feat(trainer): add import mode
This commit is contained in:
parent
39a6261a76
commit
78c5f8f8d2
|
@ -4,6 +4,7 @@ using System.Collections.Generic;
|
|||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using System.Text;
|
||||
using ConsoleTables;
|
||||
using CsvHelper;
|
||||
|
@ -11,6 +12,8 @@ using CsvHelper.Configuration;
|
|||
using Microsoft.ML;
|
||||
using Microsoft.ML.Data;
|
||||
using Microsoft.ML.Transforms.Text;
|
||||
using Newtonsoft.Json;
|
||||
using Newtonsoft.Json.Serialization;
|
||||
using NoSoliciting.Interface;
|
||||
using NoSoliciting.Internal.Interface;
|
||||
|
||||
|
@ -38,6 +41,49 @@ namespace NoSoliciting.Trainer {
|
|||
Interactive,
|
||||
InteractiveFull,
|
||||
Normalise,
|
||||
Import,
|
||||
}
|
||||
|
||||
[Serializable]
|
||||
[JsonObject(NamingStrategyType = typeof(SnakeCaseNamingStrategy))]
|
||||
private class ReportInput {
|
||||
public uint ReportVersion { get; } = 2;
|
||||
public uint ModelVersion { get; set; }
|
||||
public DateTime Timestamp { get; set; }
|
||||
public ushort Type { get; set; }
|
||||
public List<byte> Sender { get; set; }
|
||||
public List<byte> Content { get; set; }
|
||||
public string? Reason { get; set; }
|
||||
public string? SuggestedClassification { get; set; }
|
||||
}
|
||||
|
||||
private static void Import(string path) {
|
||||
var allData = new List<Data>();
|
||||
|
||||
foreach (var emlPath in Directory.GetFiles(path, "*.eml")) {
|
||||
var lines = File.ReadAllLines(emlPath);
|
||||
var json = lines.FirstOrDefault(line => line.StartsWith("JSON: "));
|
||||
if (json == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
var jsonText = Encoding.UTF8.GetString(Convert.FromBase64String(json.Split(": ")[1]));
|
||||
var report = JsonConvert.DeserializeObject<ReportInput>(jsonText);
|
||||
var content = XivString.GetText(report.Content);
|
||||
var data = new Data(report.Type, content) {
|
||||
Category = report.SuggestedClassification,
|
||||
};
|
||||
allData.Add(data);
|
||||
}
|
||||
|
||||
var writer = new StringWriter();
|
||||
using var csv = new CsvWriter(writer, new CsvConfiguration(CultureInfo.InvariantCulture) {
|
||||
HeaderValidated = null,
|
||||
});
|
||||
csv.WriteRecords(allData
|
||||
.OrderBy(data => data.Channel)
|
||||
.ThenBy(data => data.Message));
|
||||
Console.WriteLine(writer.ToString());
|
||||
}
|
||||
|
||||
private static void Main(string[] args) {
|
||||
|
@ -47,9 +93,15 @@ namespace NoSoliciting.Trainer {
|
|||
"interactive" => Mode.Interactive,
|
||||
"interactive-full" => Mode.InteractiveFull,
|
||||
"normalise" => Mode.Normalise,
|
||||
"import" => Mode.Import,
|
||||
_ => throw new ArgumentException("invalid argument"),
|
||||
};
|
||||
|
||||
if (mode == Mode.Import) {
|
||||
Import(args[1]);
|
||||
return;
|
||||
}
|
||||
|
||||
if (mode == Mode.Normalise) {
|
||||
Console.WriteLine("Ready");
|
||||
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
|
||||
namespace NoSoliciting.Trainer {
|
||||
public static class XivString {
|
||||
private const byte Start = 2;
|
||||
private const byte End = 3;
|
||||
|
||||
public static string GetText(IEnumerable<byte> bytes) {
|
||||
var stringBytes = new List<byte>();
|
||||
|
||||
var reader = new BinaryReader(new MemoryStream(bytes.ToArray()));
|
||||
while (reader.BaseStream.Position < reader.BaseStream.Length) {
|
||||
var b = reader.ReadByte();
|
||||
if (b == Start) {
|
||||
reader.ReadByte(); // kind
|
||||
var len = GetInteger(reader); // data length
|
||||
reader.ReadBytes((int) len); // data
|
||||
var end = reader.ReadByte(); // end
|
||||
if (end != End) {
|
||||
throw new ArgumentException("Input was not a valid XivString");
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
stringBytes.Add(b);
|
||||
}
|
||||
|
||||
return Encoding.UTF8.GetString(stringBytes.ToArray());
|
||||
}
|
||||
|
||||
// Thanks, Dalamud
|
||||
|
||||
public static uint GetInteger(BinaryReader input) {
|
||||
uint marker = input.ReadByte();
|
||||
if (marker < 0xD0) {
|
||||
return marker - 1;
|
||||
}
|
||||
|
||||
// the game adds 0xF0 marker for values >= 0xCF
|
||||
// uasge of 0xD0-0xEF is unknown, should we throw here?
|
||||
// if (marker < 0xF0) throw new NotSupportedException();
|
||||
|
||||
marker = (marker + 1) & 0b1111;
|
||||
|
||||
var ret = new byte[4];
|
||||
for (var i = 3; i >= 0; i--) {
|
||||
ret[i] = (marker & (1 << i)) == 0 ? (byte) 0 : input.ReadByte();
|
||||
}
|
||||
|
||||
return BitConverter.ToUInt32(ret, 0);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue