From 29bdb77041b4ecf219ad05a0c5424723297b9d59 Mon Sep 17 00:00:00 2001 From: Anna Date: Mon, 29 Aug 2022 21:33:36 -0400 Subject: [PATCH] classification --- .../NoSoliciting.Trainer.csproj | 5 ++++- NoSoliciting.Trainer/Program.cs | 18 ++++++++++-------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/NoSoliciting.Trainer/NoSoliciting.Trainer.csproj b/NoSoliciting.Trainer/NoSoliciting.Trainer.csproj index ce5e5a2..755076e 100755 --- a/NoSoliciting.Trainer/NoSoliciting.Trainer.csproj +++ b/NoSoliciting.Trainer/NoSoliciting.Trainer.csproj @@ -10,8 +10,11 @@ - + + + + diff --git a/NoSoliciting.Trainer/Program.cs b/NoSoliciting.Trainer/Program.cs index ed0980f..3210a03 100644 --- a/NoSoliciting.Trainer/Program.cs +++ b/NoSoliciting.Trainer/Program.cs @@ -10,6 +10,7 @@ using CsvHelper; using CsvHelper.Configuration; using Microsoft.ML; using Microsoft.ML.Data; +using Microsoft.ML.TorchSharp; using Microsoft.ML.Transforms.Text; using MimeKit; using Newtonsoft.Json; @@ -211,12 +212,12 @@ namespace NoSoliciting.Trainer { .Append(ctx.Transforms.CustomMapping(compute.GetMapping(), "Compute")) .Append(ctx.Transforms.CustomMapping(normalise.GetMapping(), "Normalise")) .Append(ctx.Transforms.Text.NormalizeText("MsgNormal", nameof(Data.Normalise.Normalised.NormalisedMessage), keepPunctuations: false, keepNumbers: false)) - .Append(ctx.Transforms.Text.TokenizeIntoWords("MsgTokens", "MsgNormal")) - .Append(ctx.Transforms.Text.RemoveDefaultStopWords("MsgNoDefStop", "MsgTokens")) - .Append(ctx.Transforms.Text.RemoveStopWords("MsgNoStop", "MsgNoDefStop", StopWords)) - .Append(ctx.Transforms.Conversion.MapValueToKey("MsgKey", "MsgNoStop")) - .Append(ctx.Transforms.Text.ProduceNgrams("MsgNgrams", "MsgKey", weighting: NgramExtractingEstimator.WeightingCriteria.Tf)) - .Append(ctx.Transforms.NormalizeLpNorm("FeaturisedMessage", "MsgNgrams")) + // .Append(ctx.Transforms.Text.TokenizeIntoWords("MsgTokens", "MsgNormal")) + // .Append(ctx.Transforms.Text.RemoveDefaultStopWords("MsgNoDefStop", "MsgTokens")) + // .Append(ctx.Transforms.Text.RemoveStopWords("MsgNoStop", "MsgNoDefStop", StopWords)) + // .Append(ctx.Transforms.Conversion.MapValueToKey("MsgKey", "MsgNoStop")) + // .Append(ctx.Transforms.Text.ProduceNgrams("MsgNgrams", "MsgKey", weighting: NgramExtractingEstimator.WeightingCriteria.Tf)) + // .Append(ctx.Transforms.NormalizeLpNorm("FeaturisedMessage", "MsgNgrams")) .Append(ctx.Transforms.Conversion.ConvertType("CPartyFinder", nameof(Data.Computed.PartyFinder))) .Append(ctx.Transforms.Conversion.ConvertType("CShout", nameof(Data.Computed.Shout))) .Append(ctx.Transforms.Conversion.ConvertType("CTrade", nameof(Data.Computed.ContainsTradeWords))) @@ -224,8 +225,9 @@ namespace NoSoliciting.Trainer { .Append(ctx.Transforms.Conversion.ConvertType("HasWard", nameof(Data.Computed.ContainsWard))) .Append(ctx.Transforms.Conversion.ConvertType("HasPlot", nameof(Data.Computed.ContainsPlot))) .Append(ctx.Transforms.Conversion.ConvertType("HasNumbers", nameof(Data.Computed.ContainsHousingNumbers))) - .Append(ctx.Transforms.Concatenate("Features", "FeaturisedMessage", "CPartyFinder", "CShout", "CTrade", "HasWard", "HasPlot", "HasNumbers", "CSketch")) - .Append(ctx.MulticlassClassification.Trainers.SdcaMaximumEntropy(exampleWeightColumnName: "Weight")) + // .Append(ctx.Transforms.Concatenate("Features", "FeaturisedMessage", "CPartyFinder", "CShout", "CTrade", "HasWard", "HasPlot", "HasNumbers", "CSketch")) + // .Append(ctx.MulticlassClassification.Trainers.SdcaMaximumEntropy(exampleWeightColumnName: "Weight")) + .Append(ctx.MulticlassClassification.Trainers.TextClassification(sentence1ColumnName: "MsgNormal")) .Append(ctx.Transforms.Conversion.MapKeyToValue("PredictedLabel")); var train = mode switch {