ttmp-rs/examples/extract_dedupe_in_memory.rs

83 lines
2.8 KiB
Rust
Raw Normal View History

2022-12-01 00:09:26 +00:00
use std::collections::HashMap;
2022-09-16 06:49:51 +00:00
use std::fs::File;
2022-12-01 00:09:26 +00:00
use std::io::{Seek, SeekFrom};
2022-09-16 06:49:51 +00:00
use std::path::Path;
use sha3::{Digest, Sha3_256};
2022-12-01 00:09:26 +00:00
use ttmp::ttmp_extractor::TtmpExtractor;
2022-09-16 06:49:51 +00:00
pub fn main() {
let mut sha = Sha3_256::default();
let arg = std::env::args().skip(1).next().unwrap();
let file = File::open(&arg).unwrap();
let extractor = TtmpExtractor::new(file).unwrap();
let mut zip = extractor.zip().borrow_mut();
let files = extractor.all_files_sorted();
let mut data_file = zip.by_name("TTMPD.mpd").unwrap();
2022-12-01 00:09:26 +00:00
// let mut data = Vec::new();
// data_file.read_to_end(&mut data).unwrap();
// let mut cursor = Cursor::new(data);
2022-09-16 06:49:51 +00:00
std::fs::create_dir_all("files").unwrap();
let mut hashes: HashMap<String, Vec<SavedFile>> = HashMap::with_capacity(files.len());
let mut temp = tempfile::tempfile().unwrap();
2022-12-01 00:09:26 +00:00
let mut last_offset = None;
2022-09-16 06:49:51 +00:00
for file in files {
2022-12-01 00:09:26 +00:00
// handle deduped ttmps
if Some(file.file.mod_offset) == last_offset {
println!("already seen offset {}", file.file.mod_offset);
continue;
}
last_offset = Some(file.file.mod_offset);
2022-09-16 06:49:51 +00:00
temp.set_len(0).unwrap();
temp.seek(SeekFrom::Start(0)).unwrap();
2022-12-01 00:09:26 +00:00
println!("{:#?}", file);
2022-09-16 06:49:51 +00:00
// write each file into a temp file, then hash
// mod files can get quite large, so storing them entirely in memory is probably a bad idea
// let mut cursor = Cursor::new(Vec::with_capacity(file.file.mod_size));
2022-12-01 00:09:26 +00:00
// let before = cursor.position();
// println!("before: {}", before);
2022-09-16 06:49:51 +00:00
TtmpExtractor::extract_one_into(&file, &mut data_file, &mut temp).unwrap();
2022-12-01 00:09:26 +00:00
// let after = cursor.position();
// println!("after: {}", after);
// println!("size: {}", after - before);
2022-09-16 06:49:51 +00:00
// let data = cursor.into_inner();
// sha.update(&data);
temp.seek(SeekFrom::Start(0)).unwrap();
std::io::copy(&mut temp, &mut sha).unwrap();
temp.seek(SeekFrom::Start(0)).unwrap();
let hash = sha.finalize_reset();
let hash = hex::encode(&*hash);
let new = !hashes.contains_key(&hash);
let saved = SavedFile {
game_path: file.file.full_path.clone(),
group: file.group.map(ToOwned::to_owned),
option: file.option.map(ToOwned::to_owned),
};
hashes.entry(hash.clone()).or_default().push(saved);
if new {
let path = Path::new("files").join(&hash);
println!("writing {}", path.to_string_lossy());
2022-12-01 00:09:26 +00:00
std::io::copy(&mut temp, &mut File::create(&path).unwrap()).unwrap();
2022-09-16 06:49:51 +00:00
}
}
println!("{:#?}", hashes);
}
#[derive(Debug)]
pub struct SavedFile {
pub game_path: String,
pub group: Option<String>,
pub option: Option<String>,
}