classification
This commit is contained in:
parent
8751f0a78e
commit
29bdb77041
|
@ -10,8 +10,11 @@
|
|||
<ItemGroup>
|
||||
<PackageReference Include="ConsoleTables" Version="2.4.2"/>
|
||||
<PackageReference Include="CsvHelper" Version="28.0.1"/>
|
||||
<PackageReference Include="Microsoft.ML" Version="1.7.1"/>
|
||||
<PackageReference Include="Microsoft.ML" Version="2.0.0-preview.22424.1"/>
|
||||
<PackageReference Include="Microsoft.ML.TorchSharp" Version="0.20.0-preview.22424.1"/>
|
||||
<PackageReference Include="MimeKitLite" Version="3.4.0"/>
|
||||
<PackageReference Include="TorchSharp-cpu" Version="0.96.3"/>
|
||||
<!-- <PackageReference Include="TorchSharp-cuda-linux" Version="0.96.3" />-->
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
|
|
@ -10,6 +10,7 @@ using CsvHelper;
|
|||
using CsvHelper.Configuration;
|
||||
using Microsoft.ML;
|
||||
using Microsoft.ML.Data;
|
||||
using Microsoft.ML.TorchSharp;
|
||||
using Microsoft.ML.Transforms.Text;
|
||||
using MimeKit;
|
||||
using Newtonsoft.Json;
|
||||
|
@ -211,12 +212,12 @@ namespace NoSoliciting.Trainer {
|
|||
.Append(ctx.Transforms.CustomMapping(compute.GetMapping(), "Compute"))
|
||||
.Append(ctx.Transforms.CustomMapping(normalise.GetMapping(), "Normalise"))
|
||||
.Append(ctx.Transforms.Text.NormalizeText("MsgNormal", nameof(Data.Normalise.Normalised.NormalisedMessage), keepPunctuations: false, keepNumbers: false))
|
||||
.Append(ctx.Transforms.Text.TokenizeIntoWords("MsgTokens", "MsgNormal"))
|
||||
.Append(ctx.Transforms.Text.RemoveDefaultStopWords("MsgNoDefStop", "MsgTokens"))
|
||||
.Append(ctx.Transforms.Text.RemoveStopWords("MsgNoStop", "MsgNoDefStop", StopWords))
|
||||
.Append(ctx.Transforms.Conversion.MapValueToKey("MsgKey", "MsgNoStop"))
|
||||
.Append(ctx.Transforms.Text.ProduceNgrams("MsgNgrams", "MsgKey", weighting: NgramExtractingEstimator.WeightingCriteria.Tf))
|
||||
.Append(ctx.Transforms.NormalizeLpNorm("FeaturisedMessage", "MsgNgrams"))
|
||||
// .Append(ctx.Transforms.Text.TokenizeIntoWords("MsgTokens", "MsgNormal"))
|
||||
// .Append(ctx.Transforms.Text.RemoveDefaultStopWords("MsgNoDefStop", "MsgTokens"))
|
||||
// .Append(ctx.Transforms.Text.RemoveStopWords("MsgNoStop", "MsgNoDefStop", StopWords))
|
||||
// .Append(ctx.Transforms.Conversion.MapValueToKey("MsgKey", "MsgNoStop"))
|
||||
// .Append(ctx.Transforms.Text.ProduceNgrams("MsgNgrams", "MsgKey", weighting: NgramExtractingEstimator.WeightingCriteria.Tf))
|
||||
// .Append(ctx.Transforms.NormalizeLpNorm("FeaturisedMessage", "MsgNgrams"))
|
||||
.Append(ctx.Transforms.Conversion.ConvertType("CPartyFinder", nameof(Data.Computed.PartyFinder)))
|
||||
.Append(ctx.Transforms.Conversion.ConvertType("CShout", nameof(Data.Computed.Shout)))
|
||||
.Append(ctx.Transforms.Conversion.ConvertType("CTrade", nameof(Data.Computed.ContainsTradeWords)))
|
||||
|
@ -224,8 +225,9 @@ namespace NoSoliciting.Trainer {
|
|||
.Append(ctx.Transforms.Conversion.ConvertType("HasWard", nameof(Data.Computed.ContainsWard)))
|
||||
.Append(ctx.Transforms.Conversion.ConvertType("HasPlot", nameof(Data.Computed.ContainsPlot)))
|
||||
.Append(ctx.Transforms.Conversion.ConvertType("HasNumbers", nameof(Data.Computed.ContainsHousingNumbers)))
|
||||
.Append(ctx.Transforms.Concatenate("Features", "FeaturisedMessage", "CPartyFinder", "CShout", "CTrade", "HasWard", "HasPlot", "HasNumbers", "CSketch"))
|
||||
.Append(ctx.MulticlassClassification.Trainers.SdcaMaximumEntropy(exampleWeightColumnName: "Weight"))
|
||||
// .Append(ctx.Transforms.Concatenate("Features", "FeaturisedMessage", "CPartyFinder", "CShout", "CTrade", "HasWard", "HasPlot", "HasNumbers", "CSketch"))
|
||||
// .Append(ctx.MulticlassClassification.Trainers.SdcaMaximumEntropy(exampleWeightColumnName: "Weight"))
|
||||
.Append(ctx.MulticlassClassification.Trainers.TextClassification(sentence1ColumnName: "MsgNormal"))
|
||||
.Append(ctx.Transforms.Conversion.MapKeyToValue("PredictedLabel"));
|
||||
|
||||
var train = mode switch {
|
||||
|
|
Loading…
Reference in New Issue