diff options
Diffstat (limited to 'Emby.Server.Implementations/TextEncoding/NLangDetect/Utils')
4 files changed, 215 insertions, 218 deletions
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs index 0413edfad..78b44e1fc 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs @@ -59,8 +59,8 @@ namespace NLangDetect.Core.Utils ICollection<string> keys = freq.Keys; int roman = 0; // TODO IMM HI: move up? - Regex regex1 = new Regex("^[A-Za-z]$", RegexOptions.Compiled); - List<string> keysToRemove = new List<string>(); + var regex1 = new Regex("^[A-Za-z]$", RegexOptions.Compiled); + var keysToRemove = new List<string>(); foreach (string key in keys) { @@ -93,7 +93,7 @@ namespace NLangDetect.Core.Utils ICollection<string> keys2 = freq.Keys; // TODO IMM HI: move up? - Regex regex2 = new Regex(".*[A-Za-z].*", RegexOptions.Compiled); + var regex2 = new Regex(".*[A-Za-z].*", RegexOptions.Compiled); foreach (string key in keys2) { diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs index 1d605cc47..879c0a09b 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs @@ -1,10 +1,9 @@ +using System; using System.Collections.Generic; using System.Globalization; using System.IO; -using System.Reflection; -using System.Text.RegularExpressions; using System.Linq; -using System; +using System.Text.RegularExpressions; namespace NLangDetect.Core.Utils { @@ -19,19 +18,17 @@ namespace NLangDetect.Core.Utils public static string getString(string key) { - string value; - return - _messages.TryGetValue(key, out value) + _messages.TryGetValue(key, out var value) ? value : string.Format("!{0}!", key); } private static Dictionary<string, string> LoadMessages() { - var manifestName = typeof(Messages).Assembly.GetManifestResourceNames().FirstOrDefault(i => i.IndexOf("messages.properties", StringComparison.Ordinal) != -1) ; + var manifestName = typeof(Messages).Assembly.GetManifestResourceNames().FirstOrDefault(i => i.IndexOf("messages.properties", StringComparison.Ordinal) != -1); - Stream messagesStream = + var messagesStream = typeof(Messages).Assembly .GetManifestResourceStream(manifestName); diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs index b1738f7ca..2d29ec697 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs @@ -6,14 +6,14 @@ using NLangDetect.Core.Extensions; namespace NLangDetect.Core.Utils { - public class NGram - { - public const int GramsCount = 3; + public class NGram + { + public const int GramsCount = 3; - private static readonly string Latin1Excluded = Messages.getString("NGram.LATIN1_EXCLUDE"); + private static readonly string Latin1Excluded = Messages.getString("NGram.LATIN1_EXCLUDE"); - private static readonly string[] CjkClass = - { + private static readonly string[] CjkClass = + { #region CJK classes Messages.getString("NGram.KANJI_1_0"), @@ -146,185 +146,185 @@ namespace NLangDetect.Core.Utils #endregion }; - private static readonly Dictionary<char, char> _cjkMap; + private static readonly Dictionary<char, char> _cjkMap; - private StringBuilder _grams; - private bool _capitalword; + private StringBuilder _grams; + private bool _capitalword; - #region Constructor(s) + #region Constructor(s) - static NGram() - { - _cjkMap = new Dictionary<char, char>(); + static NGram() + { + _cjkMap = new Dictionary<char, char>(); - foreach (string cjk_list in CjkClass) - { - char representative = cjk_list[0]; + foreach (string cjk_list in CjkClass) + { + char representative = cjk_list[0]; - for (int i = 0; i < cjk_list.Length; i++) - { - _cjkMap.Add(cjk_list[i], representative); + for (int i = 0; i < cjk_list.Length; i++) + { + _cjkMap.Add(cjk_list[i], representative); + } + } } - } - } - - public NGram() - { - _grams = new StringBuilder(" "); - _capitalword = false; - } - #endregion + public NGram() + { + _grams = new StringBuilder(" "); + _capitalword = false; + } - #region Public methods + #endregion - public static char Normalize(char ch) - { - UnicodeBlock? unicodeBlock = ch.GetUnicodeBlock(); + #region Public methods - if (!unicodeBlock.HasValue) - { - return ch; - } + public static char Normalize(char ch) + { + UnicodeBlock? unicodeBlock = ch.GetUnicodeBlock(); - switch (unicodeBlock.Value) - { - case UnicodeBlock.BasicLatin: - { - if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') + if (!unicodeBlock.HasValue) { - return ' '; + return ch; } - break; - } - - case UnicodeBlock.Latin1Supplement: - { - if (Latin1Excluded.IndexOf(ch) >= 0) + switch (unicodeBlock.Value) { - return ' '; + case UnicodeBlock.BasicLatin: + { + if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') + { + return ' '; + } + + break; + } + + case UnicodeBlock.Latin1Supplement: + { + if (Latin1Excluded.IndexOf(ch) >= 0) + { + return ' '; + } + + break; + } + + case UnicodeBlock.GeneralPunctuation: + { + return ' '; + } + + case UnicodeBlock.Arabic: + { + if (ch == '\u06cc') + { + return '\u064a'; + } + + break; + } + + case UnicodeBlock.LatinExtendedAdditional: + { + if (ch >= '\u1ea0') + { + return '\u1ec3'; + } + + break; + } + + case UnicodeBlock.Hiragana: + { + return '\u3042'; + } + + case UnicodeBlock.Katakana: + { + return '\u30a2'; + } + + case UnicodeBlock.Bopomofo: + case UnicodeBlock.BopomofoExtended: + { + return '\u3105'; + } + + case UnicodeBlock.CjkUnifiedIdeographs: + { + if (_cjkMap.ContainsKey(ch)) + { + return _cjkMap[ch]; + } + + break; + } + + case UnicodeBlock.HangulSyllables: + { + return '\uac00'; + } } - break; - } - - case UnicodeBlock.GeneralPunctuation: - { - return ' '; - } + return ch; + } - case UnicodeBlock.Arabic: - { - if (ch == '\u06cc') + public void AddChar(char ch) + { + ch = Normalize(ch); + char lastchar = _grams[_grams.Length - 1]; + if (lastchar == ' ') { - return '\u064a'; + _grams = new StringBuilder(" "); + _capitalword = false; + if (ch == ' ') return; } - - break; - } - - case UnicodeBlock.LatinExtendedAdditional: - { - if (ch >= '\u1ea0') + else if (_grams.Length >= GramsCount) { - return '\u1ec3'; + _grams.Remove(0, 1); } + _grams.Append(ch); - break; - } - - case UnicodeBlock.Hiragana: - { - return '\u3042'; - } - - case UnicodeBlock.Katakana: - { - return '\u30a2'; - } - - case UnicodeBlock.Bopomofo: - case UnicodeBlock.BopomofoExtended: - { - return '\u3105'; - } - - case UnicodeBlock.CjkUnifiedIdeographs: - { - if (_cjkMap.ContainsKey(ch)) + if (char.IsUpper(ch)) { - return _cjkMap[ch]; + if (char.IsUpper(lastchar)) _capitalword = true; } + else + { + _capitalword = false; + } + } - break; - } - - case UnicodeBlock.HangulSyllables: - { - return '\uac00'; - } - } - - return ch; - } + public string Get(int n) + { + if (_capitalword) + { + return null; + } - public void AddChar(char ch) - { - ch = Normalize(ch); - char lastchar = _grams[_grams.Length - 1]; - if (lastchar == ' ') - { - _grams = new StringBuilder(" "); - _capitalword = false; - if (ch == ' ') return; - } - else if (_grams.Length >= GramsCount) - { - _grams.Remove(0, 1); - } - _grams.Append(ch); - - if (char.IsUpper(ch)) - { - if (char.IsUpper(lastchar)) _capitalword = true; - } - else - { - _capitalword = false; - } - } + int len = _grams.Length; - public string Get(int n) - { - if (_capitalword) - { - return null; - } + if (n < 1 || n > 3 || len < n) + { + return null; + } - int len = _grams.Length; + if (n == 1) + { + char ch = _grams[len - 1]; - if (n < 1 || n > 3 || len < n) - { - return null; - } + if (ch == ' ') + { + return null; + } - if (n == 1) - { - char ch = _grams[len - 1]; + return ch.ToString(); + } - if (ch == ' ') - { - return null; + // TODO IMM HI: is ToString() here effective? + return _grams.ToString().SubSequence(len - n, len); } - return ch.ToString(); - } - - // TODO IMM HI: is ToString() here effective? - return _grams.ToString().SubSequence(len - n, len); + #endregion } - - #endregion - } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs index 896fd0960..4441ecd0f 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs @@ -2,75 +2,75 @@ using System.Text; namespace NLangDetect.Core.Utils { - public class TagExtractor - { - // TODO IMM HI: do the really need to be internal? - internal string Target; - internal int Threshold; - internal StringBuilder StringBuilder; - internal string Tag; - - #region Constructor(s) - - public TagExtractor(string tag, int threshold) + public class TagExtractor { - Target = tag; - Threshold = threshold; - Count = 0; - Clear(); - } + // TODO IMM HI: do the really need to be internal? + internal string Target; + internal int Threshold; + internal StringBuilder StringBuilder; + internal string Tag; - #endregion + #region Constructor(s) - #region Public methods - - public void Clear() - { - StringBuilder = new StringBuilder(); - Tag = null; - } + public TagExtractor(string tag, int threshold) + { + Target = tag; + Threshold = threshold; + Count = 0; + Clear(); + } - public void SetTag(string tag) - { - Tag = tag; - } + #endregion - public void Add(string line) - { - if (Tag == Target && line != null) - { - StringBuilder.Append(line); - } - } + #region Public methods - public void CloseTag(LangProfile profile) - { - if (profile != null && Tag == Target && StringBuilder.Length > Threshold) - { - var gram = new NGram(); + public void Clear() + { + StringBuilder = new StringBuilder(); + Tag = null; + } - for (int i = 0; i < StringBuilder.Length; i++) + public void SetTag(string tag) { - gram.AddChar(StringBuilder[i]); + Tag = tag; + } - for (int n = 1; n <= NGram.GramsCount; n++) - { - profile.Add(gram.Get(n)); - } + public void Add(string line) + { + if (Tag == Target && line != null) + { + StringBuilder.Append(line); + } } - Count++; - } + public void CloseTag(LangProfile profile) + { + if (profile != null && Tag == Target && StringBuilder.Length > Threshold) + { + var gram = new NGram(); + + for (int i = 0; i < StringBuilder.Length; i++) + { + gram.AddChar(StringBuilder[i]); - Clear(); - } + for (int n = 1; n <= NGram.GramsCount; n++) + { + profile.Add(gram.Get(n)); + } + } + + Count++; + } - #endregion + Clear(); + } + + #endregion - #region Properties + #region Properties - public int Count { get; private set; } + public int Count { get; private set; } - #endregion - } + #endregion + } } |
