diff options
Diffstat (limited to 'Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs')
| -rw-r--r-- | Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs new file mode 100644 index 000000000..9d75b8356 --- /dev/null +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs @@ -0,0 +1,127 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.IO.Compression; +using NLangDetect.Core.Utils; +using MediaBrowser.Model.Serialization; +using System.Linq; + +namespace NLangDetect.Core +{ + public class DetectorFactory + { + public Dictionary<string, ProbVector> WordLangProbMap; + public List<string> Langlist; + + private static readonly DetectorFactory _instance = new DetectorFactory(); + + #region Constructor(s) + + private DetectorFactory() + { + WordLangProbMap = new Dictionary<string, ProbVector>(); + Langlist = new List<string>(); + } + + #endregion + + #region Public methods + + public static void LoadProfiles(IJsonSerializer json) + { + var assembly = typeof(DetectorFactory).Assembly; + var names = assembly.GetManifestResourceNames() + .Where(i => i.IndexOf("NLangDetect.Profiles", StringComparison.Ordinal) != -1) + .ToList(); + + var index = 0; + + foreach (var name in names) + { + using (var stream = assembly.GetManifestResourceStream(name)) + { + var langProfile = (LangProfile)json.DeserializeFromStream(stream, typeof(LangProfile)); + + AddProfile(langProfile, index); + } + + index++; + } + } + + public static Detector Create() + { + return CreateDetector(); + } + + public static Detector Create(double alpha) + { + Detector detector = CreateDetector(); + + detector.SetAlpha(alpha); + + return detector; + } + + public static void SetSeed(int? seed) + { + _instance.Seed = seed; + } + + #endregion + + #region Internal methods + + internal static void AddProfile(LangProfile profile, int index) + { + var lang = profile.name; + + if (_instance.Langlist.Contains(lang)) + { + throw new NLangDetectException("duplicate the same language profile", ErrorCode.DuplicateLangError); + } + + _instance.Langlist.Add(lang); + + foreach (string word in profile.freq.Keys) + { + if (!_instance.WordLangProbMap.ContainsKey(word)) + { + _instance.WordLangProbMap.Add(word, new ProbVector()); + } + + double prob = (double)profile.freq[word] / profile.n_words[word.Length - 1]; + + _instance.WordLangProbMap[word][index] = prob; + } + } + + internal static void Clear() + { + _instance.Langlist.Clear(); + _instance.WordLangProbMap.Clear(); + } + + #endregion + + #region Private helper methods + + private static Detector CreateDetector() + { + if (_instance.Langlist.Count == 0) + { + throw new NLangDetectException("need to load profiles", ErrorCode.NeedLoadProfileError); + } + + return new Detector(_instance); + } + + #endregion + + #region Properties + + public int? Seed { get; private set; } + + #endregion + } +} |
