2020-12-29 02:48:31 +00:00
|
|
|
|
using System;
|
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.Diagnostics.CodeAnalysis;
|
|
|
|
|
using System.Globalization;
|
|
|
|
|
using System.Linq;
|
|
|
|
|
using System.Text.RegularExpressions;
|
|
|
|
|
using Microsoft.ML.Data;
|
|
|
|
|
using Microsoft.ML.Transforms;
|
2021-02-18 02:45:09 +00:00
|
|
|
|
using NoSoliciting.Interface;
|
2020-12-29 02:48:31 +00:00
|
|
|
|
|
2021-01-30 21:02:37 +00:00
|
|
|
|
namespace NoSoliciting.Internal.Interface {
|
2020-12-29 02:48:31 +00:00
|
|
|
|
[SuppressMessage("ReSharper", "UnusedMember.Global")]
|
2021-01-30 21:02:37 +00:00
|
|
|
|
[SuppressMessage("ReSharper", "AutoPropertyCanBeMadeGetOnly.Global")]
|
|
|
|
|
[SuppressMessage("ReSharper", "UnusedAutoPropertyAccessor.Global")]
|
|
|
|
|
[SuppressMessage("ReSharper", "MemberCanBePrivate.Global")]
|
2020-12-29 02:48:31 +00:00
|
|
|
|
public class Data {
|
|
|
|
|
[LoadColumn(0)]
|
|
|
|
|
public string? Category { get; set; }
|
|
|
|
|
|
|
|
|
|
[LoadColumn(1)]
|
|
|
|
|
public uint Channel { get; set; }
|
|
|
|
|
|
|
|
|
|
[LoadColumn(2)]
|
|
|
|
|
public string Message { get; set; } = null!;
|
|
|
|
|
|
|
|
|
|
public Data() {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public Data(ushort channel, string message) {
|
|
|
|
|
this.Channel = channel;
|
|
|
|
|
this.Message = message;
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-18 02:45:09 +00:00
|
|
|
|
#region normalisation
|
|
|
|
|
|
|
|
|
|
[CustomMappingFactoryAttribute("Normalise")]
|
|
|
|
|
[SuppressMessage("ReSharper", "UnusedType.Global")]
|
|
|
|
|
public class Normalise : CustomMappingFactory<Data, Normalise.Normalised> {
|
|
|
|
|
public override Action<Data, Normalised> GetMapping() {
|
|
|
|
|
return Convert;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static void Convert(Data data, Normalised normalised) {
|
|
|
|
|
normalised.NormalisedMessage = NoSolUtil.Normalise(data.Message, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public class Normalised {
|
|
|
|
|
public string? NormalisedMessage { get; set; }
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endregion
|
|
|
|
|
|
2020-12-29 02:48:31 +00:00
|
|
|
|
#region computed
|
|
|
|
|
|
|
|
|
|
[CustomMappingFactoryAttribute("Compute")]
|
2021-01-30 21:02:37 +00:00
|
|
|
|
[SuppressMessage("ReSharper", "UnusedType.Global")]
|
2020-12-29 02:48:31 +00:00
|
|
|
|
public class ComputeContext : CustomMappingFactory<Data, Computed> {
|
|
|
|
|
private Dictionary<string, float> Weights { get; }
|
|
|
|
|
|
|
|
|
|
public ComputeContext() {
|
|
|
|
|
this.Weights = new Dictionary<string, float>();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public ComputeContext(Dictionary<string, float> weights) {
|
|
|
|
|
this.Weights = weights;
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-29 15:31:41 +00:00
|
|
|
|
private void Compute(Data data, Computed computed) {
|
2020-12-29 02:48:31 +00:00
|
|
|
|
data.Compute(computed, this.Weights);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public override Action<Data, Computed> GetMapping() {
|
|
|
|
|
return this.Compute;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-11 15:31:18 +00:00
|
|
|
|
private static readonly Regex[] PlotWords = {
|
2021-01-30 21:02:37 +00:00
|
|
|
|
new(@"\bplot\b", RegexOptions.Compiled | RegexOptions.IgnoreCase),
|
|
|
|
|
new(@"\bapartment\b", RegexOptions.Compiled | RegexOptions.IgnoreCase),
|
|
|
|
|
new(@"\bapt\b", RegexOptions.Compiled | RegexOptions.IgnoreCase),
|
|
|
|
|
new(@"p.{0,2}\d", RegexOptions.Compiled | RegexOptions.IgnoreCase),
|
2021-01-11 15:31:18 +00:00
|
|
|
|
};
|
2020-12-29 02:48:31 +00:00
|
|
|
|
|
2021-01-11 15:31:18 +00:00
|
|
|
|
private static readonly Regex[] WardWords = {
|
2021-01-30 21:02:37 +00:00
|
|
|
|
new(@"\bward\b", RegexOptions.Compiled | RegexOptions.IgnoreCase),
|
|
|
|
|
new(@"w.{0,2}\d", RegexOptions.Compiled | RegexOptions.IgnoreCase),
|
2020-12-29 02:48:31 +00:00
|
|
|
|
};
|
|
|
|
|
|
2021-01-30 21:02:37 +00:00
|
|
|
|
private static readonly Regex NumbersRegex = new(@"\d{1,2}.{0,2}\d{1,2}", RegexOptions.Compiled);
|
2020-12-29 02:48:31 +00:00
|
|
|
|
|
|
|
|
|
private static readonly string[] TradeWords = {
|
|
|
|
|
"B> ",
|
|
|
|
|
"S> ",
|
|
|
|
|
"buy",
|
|
|
|
|
"sell",
|
|
|
|
|
"WTB",
|
|
|
|
|
"WTS",
|
|
|
|
|
};
|
|
|
|
|
|
2021-01-30 21:02:37 +00:00
|
|
|
|
private static readonly Regex SketchUrlRegex = new(@"\.com-\w+\.\w+", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
2020-12-29 02:48:31 +00:00
|
|
|
|
|
2021-01-30 21:02:37 +00:00
|
|
|
|
[SuppressMessage("ReSharper", "UnusedAutoPropertyAccessor.Global")]
|
2020-12-29 02:48:31 +00:00
|
|
|
|
public class Computed {
|
|
|
|
|
public float Weight { get; set; } = 1;
|
|
|
|
|
|
|
|
|
|
public bool PartyFinder { get; set; }
|
|
|
|
|
|
|
|
|
|
public bool Shout { get; set; }
|
|
|
|
|
|
|
|
|
|
public bool ContainsWard { get; set; }
|
|
|
|
|
|
|
|
|
|
public bool ContainsPlot { get; set; }
|
|
|
|
|
|
|
|
|
|
public bool ContainsHousingNumbers { get; set; }
|
|
|
|
|
|
|
|
|
|
public bool ContainsTradeWords { get; set; }
|
|
|
|
|
|
|
|
|
|
public bool ContainsSketchUrl { get; set; }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void Compute(Computed output, IReadOnlyDictionary<string, float> weights) {
|
|
|
|
|
if (this.Category != null && weights.TryGetValue(this.Category, out var weight)) {
|
|
|
|
|
output.Weight = weight;
|
|
|
|
|
}
|
2020-12-29 03:00:33 +00:00
|
|
|
|
|
2021-02-18 02:45:09 +00:00
|
|
|
|
var normalised = NoSolUtil.Normalise(this.Message);
|
|
|
|
|
|
2020-12-29 02:48:31 +00:00
|
|
|
|
output.PartyFinder = this.Channel == 0;
|
|
|
|
|
output.Shout = this.Channel == 11 || this.Channel == 30;
|
2021-02-18 02:45:09 +00:00
|
|
|
|
output.ContainsWard = WardWords.Any(word => word.IsMatch(normalised));
|
|
|
|
|
output.ContainsPlot = PlotWords.Any(word => word.IsMatch(normalised));
|
|
|
|
|
output.ContainsHousingNumbers = NumbersRegex.IsMatch(normalised);
|
|
|
|
|
output.ContainsTradeWords = TradeWords.Any(word => normalised.ContainsIgnoreCase(word));
|
|
|
|
|
output.ContainsSketchUrl = SketchUrlRegex.IsMatch(normalised);
|
2020-12-29 02:48:31 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endregion
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-30 21:02:37 +00:00
|
|
|
|
[SuppressMessage("ReSharper", "AutoPropertyCanBeMadeGetOnly.Global")]
|
|
|
|
|
[SuppressMessage("ReSharper", "UnusedMember.Global")]
|
2020-12-29 02:48:31 +00:00
|
|
|
|
public class Prediction {
|
|
|
|
|
[ColumnName("PredictedLabel")]
|
2021-01-30 21:02:37 +00:00
|
|
|
|
public string Category { get; set; } = "UNKNOWN";
|
2020-12-29 02:48:31 +00:00
|
|
|
|
|
|
|
|
|
[ColumnName("Score")]
|
2021-01-30 21:02:37 +00:00
|
|
|
|
public float[] Probabilities { get; set; } = new float[0];
|
2020-12-29 02:48:31 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
internal static class Ext {
|
|
|
|
|
public static bool ContainsIgnoreCase(this string haystack, string needle) {
|
|
|
|
|
return CultureInfo.InvariantCulture.CompareInfo.IndexOf(haystack, needle, CompareOptions.IgnoreCase) >= 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|