aboutsummaryrefslogtreecommitdiff
path: root/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs
diff options
context:
space:
mode:
Diffstat (limited to 'Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs')
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs118
1 files changed, 0 insertions, 118 deletions
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs
deleted file mode 100644
index 78b44e1fc..000000000
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs
+++ /dev/null
@@ -1,118 +0,0 @@
-using System.Collections.Generic;
-using System.Text.RegularExpressions;
-
-namespace NLangDetect.Core.Utils
-{
- public class LangProfile
- {
- private const int MinimumFreq = 2;
- private const int LessFreqRatio = 100000;
-
- public string name { get; set; }
-
- public Dictionary<string, int> freq { get; set; }
- public int[] n_words { get; set; }
-
- #region Constructor(s)
-
- public LangProfile()
- {
- freq = new Dictionary<string, int>();
- n_words = new int[NGram.GramsCount];
- }
-
- public LangProfile(string name)
- {
- this.name = name;
- freq = new Dictionary<string, int>();
- n_words = new int[NGram.GramsCount];
- }
-
- #endregion
-
- #region Public methods
-
- public void Add(string gram)
- {
- if (name == null || gram == null) return; // Illegal
- int len = gram.Length;
- if (len < 1 || len > NGram.GramsCount) return; // Illegal
-
- n_words[len - 1]++;
-
- if (freq.ContainsKey(gram))
- {
- freq[gram] = freq[gram] + 1;
- }
- else
- {
- freq.Add(gram, 1);
- }
- }
-
- public void OmitLessFreq()
- {
- if (name == null) return; // Illegal
- int threshold = n_words[0] / LessFreqRatio;
- if (threshold < MinimumFreq) threshold = MinimumFreq;
-
- ICollection<string> keys = freq.Keys;
- int roman = 0;
- // TODO IMM HI: move up?
- var regex1 = new Regex("^[A-Za-z]$", RegexOptions.Compiled);
- var keysToRemove = new List<string>();
-
- foreach (string key in keys)
- {
- int count = freq[key];
-
- if (count <= threshold)
- {
- n_words[key.Length - 1] -= count;
- keysToRemove.Add(key);
- }
- else
- {
- if (regex1.IsMatch(key))
- {
- roman += count;
- }
- }
- }
-
- foreach (string keyToRemove in keysToRemove)
- {
- freq.Remove(keyToRemove);
- }
-
- // roman check
- keysToRemove = new List<string>();
-
- if (roman < n_words[0] / 3)
- {
- ICollection<string> keys2 = freq.Keys;
-
- // TODO IMM HI: move up?
- var regex2 = new Regex(".*[A-Za-z].*", RegexOptions.Compiled);
-
- foreach (string key in keys2)
- {
- int count = freq[key];
-
- if (regex2.IsMatch(key))
- {
- n_words[key.Length - 1] -= count;
- keysToRemove.Add(key);
- }
- }
-
- foreach (string keyToRemove in keysToRemove)
- {
- freq.Remove(keyToRemove);
- }
- }
- }
-
- #endregion
- }
-}