classification

This commit is contained in:
Anna 2022-08-29 21:33:36 -04:00
parent 8751f0a78e
commit 29bdb77041
2 changed files with 14 additions and 9 deletions

View File

@ -10,8 +10,11 @@
<ItemGroup>
<PackageReference Include="ConsoleTables" Version="2.4.2"/>
<PackageReference Include="CsvHelper" Version="28.0.1"/>
<PackageReference Include="Microsoft.ML" Version="1.7.1"/>
<PackageReference Include="Microsoft.ML" Version="2.0.0-preview.22424.1"/>
<PackageReference Include="Microsoft.ML.TorchSharp" Version="0.20.0-preview.22424.1"/>
<PackageReference Include="MimeKitLite" Version="3.4.0"/>
<PackageReference Include="TorchSharp-cpu" Version="0.96.3"/>
<!-- <PackageReference Include="TorchSharp-cuda-linux" Version="0.96.3" />-->
</ItemGroup>
<ItemGroup>

View File

@ -10,6 +10,7 @@ using CsvHelper;
using CsvHelper.Configuration;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.TorchSharp;
using Microsoft.ML.Transforms.Text;
using MimeKit;
using Newtonsoft.Json;
@ -211,12 +212,12 @@ namespace NoSoliciting.Trainer {
.Append(ctx.Transforms.CustomMapping(compute.GetMapping(), "Compute"))
.Append(ctx.Transforms.CustomMapping(normalise.GetMapping(), "Normalise"))
.Append(ctx.Transforms.Text.NormalizeText("MsgNormal", nameof(Data.Normalise.Normalised.NormalisedMessage), keepPunctuations: false, keepNumbers: false))
.Append(ctx.Transforms.Text.TokenizeIntoWords("MsgTokens", "MsgNormal"))
.Append(ctx.Transforms.Text.RemoveDefaultStopWords("MsgNoDefStop", "MsgTokens"))
.Append(ctx.Transforms.Text.RemoveStopWords("MsgNoStop", "MsgNoDefStop", StopWords))
.Append(ctx.Transforms.Conversion.MapValueToKey("MsgKey", "MsgNoStop"))
.Append(ctx.Transforms.Text.ProduceNgrams("MsgNgrams", "MsgKey", weighting: NgramExtractingEstimator.WeightingCriteria.Tf))
.Append(ctx.Transforms.NormalizeLpNorm("FeaturisedMessage", "MsgNgrams"))
// .Append(ctx.Transforms.Text.TokenizeIntoWords("MsgTokens", "MsgNormal"))
// .Append(ctx.Transforms.Text.RemoveDefaultStopWords("MsgNoDefStop", "MsgTokens"))
// .Append(ctx.Transforms.Text.RemoveStopWords("MsgNoStop", "MsgNoDefStop", StopWords))
// .Append(ctx.Transforms.Conversion.MapValueToKey("MsgKey", "MsgNoStop"))
// .Append(ctx.Transforms.Text.ProduceNgrams("MsgNgrams", "MsgKey", weighting: NgramExtractingEstimator.WeightingCriteria.Tf))
// .Append(ctx.Transforms.NormalizeLpNorm("FeaturisedMessage", "MsgNgrams"))
.Append(ctx.Transforms.Conversion.ConvertType("CPartyFinder", nameof(Data.Computed.PartyFinder)))
.Append(ctx.Transforms.Conversion.ConvertType("CShout", nameof(Data.Computed.Shout)))
.Append(ctx.Transforms.Conversion.ConvertType("CTrade", nameof(Data.Computed.ContainsTradeWords)))
@ -224,8 +225,9 @@ namespace NoSoliciting.Trainer {
.Append(ctx.Transforms.Conversion.ConvertType("HasWard", nameof(Data.Computed.ContainsWard)))
.Append(ctx.Transforms.Conversion.ConvertType("HasPlot", nameof(Data.Computed.ContainsPlot)))
.Append(ctx.Transforms.Conversion.ConvertType("HasNumbers", nameof(Data.Computed.ContainsHousingNumbers)))
.Append(ctx.Transforms.Concatenate("Features", "FeaturisedMessage", "CPartyFinder", "CShout", "CTrade", "HasWard", "HasPlot", "HasNumbers", "CSketch"))
.Append(ctx.MulticlassClassification.Trainers.SdcaMaximumEntropy(exampleWeightColumnName: "Weight"))
// .Append(ctx.Transforms.Concatenate("Features", "FeaturisedMessage", "CPartyFinder", "CShout", "CTrade", "HasWard", "HasPlot", "HasNumbers", "CSketch"))
// .Append(ctx.MulticlassClassification.Trainers.SdcaMaximumEntropy(exampleWeightColumnName: "Weight"))
.Append(ctx.MulticlassClassification.Trainers.TextClassification(sentence1ColumnName: "MsgNormal"))
.Append(ctx.Transforms.Conversion.MapKeyToValue("PredictedLabel"));
var train = mode switch {