feat(trainer): add import mode

This commit is contained in:
Anna 2021-07-17 22:20:21 -04:00
parent 39a6261a76
commit 78c5f8f8d2
2 changed files with 110 additions and 0 deletions

View File

@ -4,6 +4,7 @@ using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using ConsoleTables;
using CsvHelper;
@ -11,6 +12,8 @@ using CsvHelper.Configuration;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms.Text;
using Newtonsoft.Json;
using Newtonsoft.Json.Serialization;
using NoSoliciting.Interface;
using NoSoliciting.Internal.Interface;
@ -38,6 +41,49 @@ namespace NoSoliciting.Trainer {
Interactive,
InteractiveFull,
Normalise,
Import,
}
[Serializable]
[JsonObject(NamingStrategyType = typeof(SnakeCaseNamingStrategy))]
private class ReportInput {
public uint ReportVersion { get; } = 2;
public uint ModelVersion { get; set; }
public DateTime Timestamp { get; set; }
public ushort Type { get; set; }
public List<byte> Sender { get; set; }
public List<byte> Content { get; set; }
public string? Reason { get; set; }
public string? SuggestedClassification { get; set; }
}
private static void Import(string path) {
var allData = new List<Data>();
foreach (var emlPath in Directory.GetFiles(path, "*.eml")) {
var lines = File.ReadAllLines(emlPath);
var json = lines.FirstOrDefault(line => line.StartsWith("JSON: "));
if (json == null) {
continue;
}
var jsonText = Encoding.UTF8.GetString(Convert.FromBase64String(json.Split(": ")[1]));
var report = JsonConvert.DeserializeObject<ReportInput>(jsonText);
var content = XivString.GetText(report.Content);
var data = new Data(report.Type, content) {
Category = report.SuggestedClassification,
};
allData.Add(data);
}
var writer = new StringWriter();
using var csv = new CsvWriter(writer, new CsvConfiguration(CultureInfo.InvariantCulture) {
HeaderValidated = null,
});
csv.WriteRecords(allData
.OrderBy(data => data.Channel)
.ThenBy(data => data.Message));
Console.WriteLine(writer.ToString());
}
private static void Main(string[] args) {
@ -47,9 +93,15 @@ namespace NoSoliciting.Trainer {
"interactive" => Mode.Interactive,
"interactive-full" => Mode.InteractiveFull,
"normalise" => Mode.Normalise,
"import" => Mode.Import,
_ => throw new ArgumentException("invalid argument"),
};
if (mode == Mode.Import) {
Import(args[1]);
return;
}
if (mode == Mode.Normalise) {
Console.WriteLine("Ready");

View File

@ -0,0 +1,58 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
namespace NoSoliciting.Trainer {
public static class XivString {
private const byte Start = 2;
private const byte End = 3;
public static string GetText(IEnumerable<byte> bytes) {
var stringBytes = new List<byte>();
var reader = new BinaryReader(new MemoryStream(bytes.ToArray()));
while (reader.BaseStream.Position < reader.BaseStream.Length) {
var b = reader.ReadByte();
if (b == Start) {
reader.ReadByte(); // kind
var len = GetInteger(reader); // data length
reader.ReadBytes((int) len); // data
var end = reader.ReadByte(); // end
if (end != End) {
throw new ArgumentException("Input was not a valid XivString");
}
continue;
}
stringBytes.Add(b);
}
return Encoding.UTF8.GetString(stringBytes.ToArray());
}
// Thanks, Dalamud
public static uint GetInteger(BinaryReader input) {
uint marker = input.ReadByte();
if (marker < 0xD0) {
return marker - 1;
}
// the game adds 0xF0 marker for values >= 0xCF
// uasge of 0xD0-0xEF is unknown, should we throw here?
// if (marker < 0xF0) throw new NotSupportedException();
marker = (marker + 1) & 0b1111;
var ret = new byte[4];
for (var i = 3; i >= 0; i--) {
ret[i] = (marker & (1 << i)) == 0 ? (byte) 0 : input.ReadByte();
}
return BitConverter.ToUInt32(ret, 0);
}
}
}