diff options
| author | Andrew Rabert <6550543+nvllsvm@users.noreply.github.com> | 2019-01-22 18:13:47 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-01-22 18:13:47 -0500 |
| commit | 28483bdb54be96ae83e0fded097f534d7e26ba1e (patch) | |
| tree | e7f4b92326417ebf55eecdf68a01d2c3b9e660d7 /Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs | |
| parent | 920c39454c05e979eabe81877269cd4517a03ccf (diff) | |
| parent | 8106c8393b711a7e1d40487e3caf2b014decbe28 (diff) | |
Merge pull request #651 from jellyfin/release-10.1.0
Release 10.1.0
Diffstat (limited to 'Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs')
| -rw-r--r-- | Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs | 118 |
1 files changed, 0 insertions, 118 deletions
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs deleted file mode 100644 index 0413edfad..000000000 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs +++ /dev/null @@ -1,118 +0,0 @@ -using System.Collections.Generic; -using System.Text.RegularExpressions; - -namespace NLangDetect.Core.Utils -{ - public class LangProfile - { - private const int MinimumFreq = 2; - private const int LessFreqRatio = 100000; - - public string name { get; set; } - - public Dictionary<string, int> freq { get; set; } - public int[] n_words { get; set; } - - #region Constructor(s) - - public LangProfile() - { - freq = new Dictionary<string, int>(); - n_words = new int[NGram.GramsCount]; - } - - public LangProfile(string name) - { - this.name = name; - freq = new Dictionary<string, int>(); - n_words = new int[NGram.GramsCount]; - } - - #endregion - - #region Public methods - - public void Add(string gram) - { - if (name == null || gram == null) return; // Illegal - int len = gram.Length; - if (len < 1 || len > NGram.GramsCount) return; // Illegal - - n_words[len - 1]++; - - if (freq.ContainsKey(gram)) - { - freq[gram] = freq[gram] + 1; - } - else - { - freq.Add(gram, 1); - } - } - - public void OmitLessFreq() - { - if (name == null) return; // Illegal - int threshold = n_words[0] / LessFreqRatio; - if (threshold < MinimumFreq) threshold = MinimumFreq; - - ICollection<string> keys = freq.Keys; - int roman = 0; - // TODO IMM HI: move up? - Regex regex1 = new Regex("^[A-Za-z]$", RegexOptions.Compiled); - List<string> keysToRemove = new List<string>(); - - foreach (string key in keys) - { - int count = freq[key]; - - if (count <= threshold) - { - n_words[key.Length - 1] -= count; - keysToRemove.Add(key); - } - else - { - if (regex1.IsMatch(key)) - { - roman += count; - } - } - } - - foreach (string keyToRemove in keysToRemove) - { - freq.Remove(keyToRemove); - } - - // roman check - keysToRemove = new List<string>(); - - if (roman < n_words[0] / 3) - { - ICollection<string> keys2 = freq.Keys; - - // TODO IMM HI: move up? - Regex regex2 = new Regex(".*[A-Za-z].*", RegexOptions.Compiled); - - foreach (string key in keys2) - { - int count = freq[key]; - - if (regex2.IsMatch(key)) - { - n_words[key.Length - 1] -= count; - keysToRemove.Add(key); - } - } - - foreach (string keyToRemove in keysToRemove) - { - freq.Remove(keyToRemove); - } - } - } - - #endregion - } -} |
