From 0a96858447b3057a594e6513e58fdfb59abc6176 Mon Sep 17 00:00:00 2001 From: Anna Date: Fri, 23 Apr 2021 13:21:49 -0400 Subject: [PATCH] fix(trainer): ignore numbers --- NoSoliciting.Trainer/Program.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NoSoliciting.Trainer/Program.cs b/NoSoliciting.Trainer/Program.cs index 9cc9d63..4ff94c1 100644 --- a/NoSoliciting.Trainer/Program.cs +++ b/NoSoliciting.Trainer/Program.cs @@ -124,7 +124,7 @@ namespace NoSoliciting.Trainer { var pipeline = ctx.Transforms.Conversion.MapValueToKey("Label", nameof(Data.Category)) .Append(ctx.Transforms.CustomMapping(compute.GetMapping(), "Compute")) .Append(ctx.Transforms.CustomMapping(normalise.GetMapping(), "Normalise")) - .Append(ctx.Transforms.Text.NormalizeText("MsgNormal", nameof(Data.Normalise.Normalised.NormalisedMessage), keepPunctuations: false)) + .Append(ctx.Transforms.Text.NormalizeText("MsgNormal", nameof(Data.Normalise.Normalised.NormalisedMessage), keepPunctuations: false, keepNumbers: false)) .Append(ctx.Transforms.Text.TokenizeIntoWords("MsgTokens", "MsgNormal")) .Append(ctx.Transforms.Text.RemoveDefaultStopWords("MsgNoDefStop", "MsgTokens")) .Append(ctx.Transforms.Text.RemoveStopWords("MsgNoStop", "MsgNoDefStop", StopWords))