aboutsummaryrefslogtreecommitdiff
path: root/Emby.Common.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs
blob: 9d75b8356661830cc5e522d26ff5695e98b215f8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using NLangDetect.Core.Utils;
using MediaBrowser.Model.Serialization;
using System.Linq;

namespace NLangDetect.Core
{
    public class DetectorFactory
    {
        public Dictionary<string, ProbVector> WordLangProbMap;
        public List<string> Langlist;

        private static readonly DetectorFactory _instance = new DetectorFactory();

        #region Constructor(s)

        private DetectorFactory()
        {
            WordLangProbMap = new Dictionary<string, ProbVector>();
            Langlist = new List<string>();
        }

        #endregion

        #region Public methods

        public static void LoadProfiles(IJsonSerializer json)
        {
            var assembly = typeof(DetectorFactory).Assembly;
            var names = assembly.GetManifestResourceNames()
                      .Where(i => i.IndexOf("NLangDetect.Profiles", StringComparison.Ordinal) != -1)
                      .ToList();

            var index = 0;

            foreach (var name in names)
            {
                using (var stream = assembly.GetManifestResourceStream(name))
                {
                    var langProfile = (LangProfile)json.DeserializeFromStream(stream, typeof(LangProfile));

                    AddProfile(langProfile, index);
                }

                index++;
            }
        }

        public static Detector Create()
        {
            return CreateDetector();
        }

        public static Detector Create(double alpha)
        {
            Detector detector = CreateDetector();

            detector.SetAlpha(alpha);

            return detector;
        }

        public static void SetSeed(int? seed)
        {
            _instance.Seed = seed;
        }

        #endregion

        #region Internal methods

        internal static void AddProfile(LangProfile profile, int index)
        {
            var lang = profile.name;

            if (_instance.Langlist.Contains(lang))
            {
                throw new NLangDetectException("duplicate the same language profile", ErrorCode.DuplicateLangError);
            }

            _instance.Langlist.Add(lang);

            foreach (string word in profile.freq.Keys)
            {
                if (!_instance.WordLangProbMap.ContainsKey(word))
                {
                    _instance.WordLangProbMap.Add(word, new ProbVector());
                }

                double prob = (double)profile.freq[word] / profile.n_words[word.Length - 1];

                _instance.WordLangProbMap[word][index] = prob;
            }
        }

        internal static void Clear()
        {
            _instance.Langlist.Clear();
            _instance.WordLangProbMap.Clear();
        }

        #endregion

        #region Private helper methods

        private static Detector CreateDetector()
        {
            if (_instance.Langlist.Count == 0)
            {
                throw new NLangDetectException("need to load profiles", ErrorCode.NeedLoadProfileError);
            }

            return new Detector(_instance);
        }

        #endregion

        #region Properties

        public int? Seed { get; private set; }

        #endregion
    }
}