feat(trainer): add import feature
This commit is contained in:
parent
aac3b42b47
commit
21c7e01097
|
@ -11,6 +11,7 @@
|
||||||
<PackageReference Include="ConsoleTables" Version="2.4.2"/>
|
<PackageReference Include="ConsoleTables" Version="2.4.2"/>
|
||||||
<PackageReference Include="CsvHelper" Version="27.1.1"/>
|
<PackageReference Include="CsvHelper" Version="27.1.1"/>
|
||||||
<PackageReference Include="Microsoft.ML" Version="1.6.0"/>
|
<PackageReference Include="Microsoft.ML" Version="1.6.0"/>
|
||||||
|
<PackageReference Include="MimeKitLite" Version="2.13.0"/>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
|
|
@ -12,6 +12,7 @@ using CsvHelper.Configuration;
|
||||||
using Microsoft.ML;
|
using Microsoft.ML;
|
||||||
using Microsoft.ML.Data;
|
using Microsoft.ML.Data;
|
||||||
using Microsoft.ML.Transforms.Text;
|
using Microsoft.ML.Transforms.Text;
|
||||||
|
using MimeKit;
|
||||||
using Newtonsoft.Json;
|
using Newtonsoft.Json;
|
||||||
using Newtonsoft.Json.Serialization;
|
using Newtonsoft.Json.Serialization;
|
||||||
using NoSoliciting.Interface;
|
using NoSoliciting.Interface;
|
||||||
|
@ -60,9 +61,12 @@ namespace NoSoliciting.Trainer {
|
||||||
private static void Import(string path) {
|
private static void Import(string path) {
|
||||||
var allData = new List<Data>();
|
var allData = new List<Data>();
|
||||||
|
|
||||||
|
var opts = new ParserOptions {
|
||||||
|
CharsetEncoding = Encoding.UTF8,
|
||||||
|
};
|
||||||
foreach (var emlPath in Directory.GetFiles(path, "*.eml")) {
|
foreach (var emlPath in Directory.GetFiles(path, "*.eml")) {
|
||||||
var lines = File.ReadAllLines(emlPath);
|
var message = MimeMessage.Load(opts, new FileStream(emlPath, FileMode.Open));
|
||||||
var json = lines.FirstOrDefault(line => line.StartsWith("JSON: "));
|
var json = message.TextBody.Split('\r', '\n').FirstOrDefault(line => line.StartsWith("JSON: "));
|
||||||
if (json == null) {
|
if (json == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -73,16 +77,22 @@ namespace NoSoliciting.Trainer {
|
||||||
var data = new Data(report.Type, content) {
|
var data = new Data(report.Type, content) {
|
||||||
Category = report.SuggestedClassification,
|
Category = report.SuggestedClassification,
|
||||||
};
|
};
|
||||||
|
data.Message = data.Message
|
||||||
|
.Replace("\r\n", " ")
|
||||||
|
.Replace('\r', ' ')
|
||||||
|
.Replace('\n', ' ');
|
||||||
allData.Add(data);
|
allData.Add(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
var writer = new StringWriter();
|
var writer = new StringWriter();
|
||||||
using var csv = new CsvWriter(writer, new CsvConfiguration(CultureInfo.InvariantCulture) {
|
using var csv = new CsvWriter(writer, new CsvConfiguration(CultureInfo.InvariantCulture) {
|
||||||
HeaderValidated = null,
|
HeaderValidated = null,
|
||||||
|
Encoding = Encoding.UTF8,
|
||||||
});
|
});
|
||||||
csv.WriteRecords(allData
|
csv.WriteRecords(allData
|
||||||
.OrderBy(data => data.Channel)
|
.OrderBy(data => data.Channel)
|
||||||
.ThenBy(data => data.Message));
|
.ThenBy(data => data.Message));
|
||||||
|
Console.OutputEncoding = Encoding.UTF8;
|
||||||
Console.WriteLine(writer.ToString());
|
Console.WriteLine(writer.ToString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue