ttmp-rs/examples/extract_dedupe_in_memory.rs

100 lines
3.1 KiB
Rust

use std::collections::{HashMap, HashSet};
use std::fs::File;
use std::io::{Cursor, Seek, SeekFrom, Write};
use std::path::Path;
use sha3::{Digest, Sha3_256};
use ttmp::ttmp_extractor::{ModFile, TtmpExtractor};
pub fn main() {
let mut sha = Sha3_256::default();
let arg = std::env::args().skip(1).next().unwrap();
let file = File::open(&arg).unwrap();
let extractor = TtmpExtractor::new(file).unwrap();
let mut zip = extractor.zip().borrow_mut();
let files = extractor.all_files_sorted();
let mut data_file = zip.by_name("TTMPD.mpd").unwrap();
let version = &*extractor.manifest().version;
std::fs::create_dir_all("files").unwrap();
let mut hashes: HashMap<String, Vec<SavedFile>> = HashMap::with_capacity(files.len());
let mut temp = tempfile::tempfile().unwrap();
for file in files {
temp.set_len(0).unwrap();
temp.seek(SeekFrom::Start(0)).unwrap();
// write each file into a temp file, then hash
// mod files can get quite large, so storing them entirely in memory is probably a bad idea
// let mut cursor = Cursor::new(Vec::with_capacity(file.file.mod_size));
TtmpExtractor::extract_one_into(&file, &mut data_file, &mut temp).unwrap();
// let data = cursor.into_inner();
// sha.update(&data);
temp.seek(SeekFrom::Start(0)).unwrap();
std::io::copy(&mut temp, &mut sha).unwrap();
temp.seek(SeekFrom::Start(0)).unwrap();
let hash = sha.finalize_reset();
let hash = hex::encode(&*hash);
let new = !hashes.contains_key(&hash);
let saved = SavedFile {
author: extractor.manifest().author.clone(),
package: extractor.manifest().name.clone(),
package_version: extractor.manifest().version.clone(),
game_path: file.file.full_path.clone(),
group: file.group.map(ToOwned::to_owned),
option: file.option.map(ToOwned::to_owned),
};
hashes.entry(hash.clone()).or_default().push(saved);
if new {
let path = Path::new("files").join(&hash);
std::io::copy(&mut temp, &mut File::create(&path).unwrap()).unwrap();
// std::fs::write(&path, data).unwrap();
println!("writing {}", path.to_string_lossy());
}
}
println!("{:#?}", hashes);
}
#[derive(Debug)]
pub struct SavedFile {
pub author: String,
pub package: String,
pub package_version: String,
pub game_path: String,
pub group: Option<String>,
pub option: Option<String>,
}
struct Multiplexer<W1, W2> {
one: W1,
two: W2,
}
impl<W1, W2> Multiplexer<W1, W2> {
fn new(one: W1, two: W2) -> Self {
Self {
one,
two,
}
}
}
impl<W1: Write, W2: Write> Write for Multiplexer<W1, W2> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
let one = self.one.write(buf);
let two = self.two.write(buf);
one.and(two)
}
fn flush(&mut self) -> std::io::Result<()> {
let one = self.one.flush();
let two = self.two.flush();
one.and(two)
}
}