NoSoliciting/NoSoliciting/FilterUtil.cs
Anna d00b3b0845 feat: better handle puncutation
Certain symbols are turned into one space so the model sees multiple
words instead of one. Previously "[RP]Hi" would turn into "RPHi" and
be its own token. Now it turns into "RP" and "Hi", counting as two
tokens. This change increased the model's accuracy.

Also make "18", "http", "https", and LGBT-related words into stop
words (meaning they're ignored). Each of these stop words made the
model more accurate and reduced unwanted bias.

Messages destined for ML are now normalised by the plugin in the same
way the model's input is for training. This should make the results
come closer to expected.
2021-02-17 20:01:34 -05:00

132 lines
3.1 KiB
C#

using Dalamud.Data;
using Lumina.Excel.GeneratedSheets;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
namespace NoSoliciting {
public static class FilterUtil {
private static int MaxItemLevel { get; set; }
private enum Slot {
MainHand,
OffHand,
Head,
Chest,
Hands,
Waist,
Legs,
Feet,
Earrings,
Neck,
Wrist,
RingL,
RingR,
}
private static Slot? SlotFromItem(Item item) {
var cat = item.EquipSlotCategory.Value;
if (cat == null) {
return null;
}
if (cat.MainHand != 0) {
return Slot.MainHand;
}
if (cat.Head != 0) {
return Slot.Head;
}
if (cat.Body != 0) {
return Slot.Chest;
}
if (cat.Gloves != 0) {
return Slot.Hands;
}
if (cat.Waist != 0) {
return Slot.Waist;
}
if (cat.Legs != 0) {
return Slot.Legs;
}
if (cat.Feet != 0) {
return Slot.Feet;
}
if (cat.OffHand != 0) {
return Slot.OffHand;
}
if (cat.Ears != 0) {
return Slot.Earrings;
}
if (cat.Neck != 0) {
return Slot.Neck;
}
if (cat.Wrists != 0) {
return Slot.Wrist;
}
if (cat.FingerL != 0) {
return Slot.RingL;
}
if (cat.FingerR != 0) {
return Slot.RingR;
}
return null;
}
public static int MaxItemLevelAttainable(DataManager data) {
if (MaxItemLevel > 0) {
return MaxItemLevel;
}
if (data == null) {
throw new ArgumentNullException(nameof(data), "DataManager cannot be null");
}
var ilvls = new Dictionary<Slot, int>();
foreach (var item in data.GetExcelSheet<Item>()) {
var slot = SlotFromItem(item);
if (slot == null) {
continue;
}
var itemLevel = 0;
var ilvl = item.LevelItem.Value;
if (ilvl != null) {
itemLevel = (int) ilvl.RowId;
}
if (ilvls.TryGetValue((Slot) slot, out var currentMax) && currentMax > itemLevel) {
continue;
}
ilvls[(Slot) slot] = itemLevel;
}
MaxItemLevel = (int) ilvls.Values.Average();
return MaxItemLevel;
}
}
public static class RmtExtensions {
public static bool ContainsIgnoreCase(this string haystack, string needle) {
return CultureInfo.InvariantCulture.CompareInfo.IndexOf(haystack, needle, CompareOptions.IgnoreCase) >= 0;
}
}
}