ttmp-rs/examples/extract_dedupe_in_memory.rs

83 lines
2.8 KiB
Rust

use std::collections::HashMap;
use std::fs::File;
use std::io::{Seek, SeekFrom};
use std::path::Path;
use sha3::{Digest, Sha3_256};
use ttmp::ttmp_extractor::TtmpExtractor;
pub fn main() {
let mut sha = Sha3_256::default();
let arg = std::env::args().skip(1).next().unwrap();
let file = File::open(&arg).unwrap();
let extractor = TtmpExtractor::new(file).unwrap();
let mut zip = extractor.zip().borrow_mut();
let files = extractor.all_files_sorted();
let mut data_file = zip.by_name("TTMPD.mpd").unwrap();
// let mut data = Vec::new();
// data_file.read_to_end(&mut data).unwrap();
// let mut cursor = Cursor::new(data);
std::fs::create_dir_all("files").unwrap();
let mut hashes: HashMap<String, Vec<SavedFile>> = HashMap::with_capacity(files.len());
let mut temp = tempfile::tempfile().unwrap();
let mut last_offset = None;
for file in files {
// handle deduped ttmps
if Some(file.file.mod_offset) == last_offset {
println!("already seen offset {}", file.file.mod_offset);
continue;
}
last_offset = Some(file.file.mod_offset);
temp.set_len(0).unwrap();
temp.seek(SeekFrom::Start(0)).unwrap();
println!("{:#?}", file);
// write each file into a temp file, then hash
// mod files can get quite large, so storing them entirely in memory is probably a bad idea
// let mut cursor = Cursor::new(Vec::with_capacity(file.file.mod_size));
// let before = cursor.position();
// println!("before: {}", before);
TtmpExtractor::extract_one_into(&file, &mut data_file, &mut temp).unwrap();
// let after = cursor.position();
// println!("after: {}", after);
// println!("size: {}", after - before);
// let data = cursor.into_inner();
// sha.update(&data);
temp.seek(SeekFrom::Start(0)).unwrap();
std::io::copy(&mut temp, &mut sha).unwrap();
temp.seek(SeekFrom::Start(0)).unwrap();
let hash = sha.finalize_reset();
let hash = hex::encode(&*hash);
let new = !hashes.contains_key(&hash);
let saved = SavedFile {
game_path: file.file.full_path.clone(),
group: file.group.map(ToOwned::to_owned),
option: file.option.map(ToOwned::to_owned),
};
hashes.entry(hash.clone()).or_default().push(saved);
if new {
let path = Path::new("files").join(&hash);
println!("writing {}", path.to_string_lossy());
std::io::copy(&mut temp, &mut File::create(&path).unwrap()).unwrap();
}
}
println!("{:#?}", hashes);
}
#[derive(Debug)]
pub struct SavedFile {
pub game_path: String,
pub group: Option<String>,
pub option: Option<String>,
}