diff options
| author | Tthecreator <epostvanthomas@kpnmail.nl> | 2019-01-22 15:22:42 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-01-22 15:22:42 +0000 |
| commit | 189b99df16bd4c93cc96422d7282d01d9ff5b82f (patch) | |
| tree | 26d7da95fe3e3b2772b8b39a2463a6c0ac7652fc /Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs | |
| parent | a00c0defa8cb22774f5dc8a7d566eb36ac7307e8 (diff) | |
| parent | edcfd8b565f632088c8b1f826db8e2fbecf9790d (diff) | |
Merge pull request #1 from jellyfin/dev
Update from jellyfin repo
Diffstat (limited to 'Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs')
| -rw-r--r-- | Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs | 118 |
1 files changed, 0 insertions, 118 deletions
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs deleted file mode 100644 index 0413edfad..000000000 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs +++ /dev/null @@ -1,118 +0,0 @@ -using System.Collections.Generic; -using System.Text.RegularExpressions; - -namespace NLangDetect.Core.Utils -{ - public class LangProfile - { - private const int MinimumFreq = 2; - private const int LessFreqRatio = 100000; - - public string name { get; set; } - - public Dictionary<string, int> freq { get; set; } - public int[] n_words { get; set; } - - #region Constructor(s) - - public LangProfile() - { - freq = new Dictionary<string, int>(); - n_words = new int[NGram.GramsCount]; - } - - public LangProfile(string name) - { - this.name = name; - freq = new Dictionary<string, int>(); - n_words = new int[NGram.GramsCount]; - } - - #endregion - - #region Public methods - - public void Add(string gram) - { - if (name == null || gram == null) return; // Illegal - int len = gram.Length; - if (len < 1 || len > NGram.GramsCount) return; // Illegal - - n_words[len - 1]++; - - if (freq.ContainsKey(gram)) - { - freq[gram] = freq[gram] + 1; - } - else - { - freq.Add(gram, 1); - } - } - - public void OmitLessFreq() - { - if (name == null) return; // Illegal - int threshold = n_words[0] / LessFreqRatio; - if (threshold < MinimumFreq) threshold = MinimumFreq; - - ICollection<string> keys = freq.Keys; - int roman = 0; - // TODO IMM HI: move up? - Regex regex1 = new Regex("^[A-Za-z]$", RegexOptions.Compiled); - List<string> keysToRemove = new List<string>(); - - foreach (string key in keys) - { - int count = freq[key]; - - if (count <= threshold) - { - n_words[key.Length - 1] -= count; - keysToRemove.Add(key); - } - else - { - if (regex1.IsMatch(key)) - { - roman += count; - } - } - } - - foreach (string keyToRemove in keysToRemove) - { - freq.Remove(keyToRemove); - } - - // roman check - keysToRemove = new List<string>(); - - if (roman < n_words[0] / 3) - { - ICollection<string> keys2 = freq.Keys; - - // TODO IMM HI: move up? - Regex regex2 = new Regex(".*[A-Za-z].*", RegexOptions.Compiled); - - foreach (string key in keys2) - { - int count = freq[key]; - - if (regex2.IsMatch(key)) - { - n_words[key.Length - 1] -= count; - keysToRemove.Add(key); - } - } - - foreach (string keyToRemove in keysToRemove) - { - freq.Remove(keyToRemove); - } - } - } - - #endregion - } -} |
