diff options
| author | Erwin de Haan <EraYaN@users.noreply.github.com> | 2019-01-13 20:22:56 +0100 |
|---|---|---|
| committer | Erwin de Haan <EraYaN@users.noreply.github.com> | 2019-01-13 20:22:56 +0100 |
| commit | 25f0315e918cf6f8c26b1e435c236ff1dbcbc6a5 (patch) | |
| tree | 805191d28c22edcaf31ffb03ba355f5fbbc1c3da /Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs | |
| parent | 0efc699e3d4cef2cb5b36223873fa5ad98177d1c (diff) | |
Visual Studio Reformat: Emby.Server.Implementations Part T-T
Diffstat (limited to 'Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs')
| -rw-r--r-- | Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs | 304 |
1 files changed, 152 insertions, 152 deletions
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs index b1738f7ca..2d29ec697 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs @@ -6,14 +6,14 @@ using NLangDetect.Core.Extensions; namespace NLangDetect.Core.Utils { - public class NGram - { - public const int GramsCount = 3; + public class NGram + { + public const int GramsCount = 3; - private static readonly string Latin1Excluded = Messages.getString("NGram.LATIN1_EXCLUDE"); + private static readonly string Latin1Excluded = Messages.getString("NGram.LATIN1_EXCLUDE"); - private static readonly string[] CjkClass = - { + private static readonly string[] CjkClass = + { #region CJK classes Messages.getString("NGram.KANJI_1_0"), @@ -146,185 +146,185 @@ namespace NLangDetect.Core.Utils #endregion }; - private static readonly Dictionary<char, char> _cjkMap; + private static readonly Dictionary<char, char> _cjkMap; - private StringBuilder _grams; - private bool _capitalword; + private StringBuilder _grams; + private bool _capitalword; - #region Constructor(s) + #region Constructor(s) - static NGram() - { - _cjkMap = new Dictionary<char, char>(); + static NGram() + { + _cjkMap = new Dictionary<char, char>(); - foreach (string cjk_list in CjkClass) - { - char representative = cjk_list[0]; + foreach (string cjk_list in CjkClass) + { + char representative = cjk_list[0]; - for (int i = 0; i < cjk_list.Length; i++) - { - _cjkMap.Add(cjk_list[i], representative); + for (int i = 0; i < cjk_list.Length; i++) + { + _cjkMap.Add(cjk_list[i], representative); + } + } } - } - } - - public NGram() - { - _grams = new StringBuilder(" "); - _capitalword = false; - } - #endregion + public NGram() + { + _grams = new StringBuilder(" "); + _capitalword = false; + } - #region Public methods + #endregion - public static char Normalize(char ch) - { - UnicodeBlock? unicodeBlock = ch.GetUnicodeBlock(); + #region Public methods - if (!unicodeBlock.HasValue) - { - return ch; - } + public static char Normalize(char ch) + { + UnicodeBlock? unicodeBlock = ch.GetUnicodeBlock(); - switch (unicodeBlock.Value) - { - case UnicodeBlock.BasicLatin: - { - if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') + if (!unicodeBlock.HasValue) { - return ' '; + return ch; } - break; - } - - case UnicodeBlock.Latin1Supplement: - { - if (Latin1Excluded.IndexOf(ch) >= 0) + switch (unicodeBlock.Value) { - return ' '; + case UnicodeBlock.BasicLatin: + { + if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') + { + return ' '; + } + + break; + } + + case UnicodeBlock.Latin1Supplement: + { + if (Latin1Excluded.IndexOf(ch) >= 0) + { + return ' '; + } + + break; + } + + case UnicodeBlock.GeneralPunctuation: + { + return ' '; + } + + case UnicodeBlock.Arabic: + { + if (ch == '\u06cc') + { + return '\u064a'; + } + + break; + } + + case UnicodeBlock.LatinExtendedAdditional: + { + if (ch >= '\u1ea0') + { + return '\u1ec3'; + } + + break; + } + + case UnicodeBlock.Hiragana: + { + return '\u3042'; + } + + case UnicodeBlock.Katakana: + { + return '\u30a2'; + } + + case UnicodeBlock.Bopomofo: + case UnicodeBlock.BopomofoExtended: + { + return '\u3105'; + } + + case UnicodeBlock.CjkUnifiedIdeographs: + { + if (_cjkMap.ContainsKey(ch)) + { + return _cjkMap[ch]; + } + + break; + } + + case UnicodeBlock.HangulSyllables: + { + return '\uac00'; + } } - break; - } - - case UnicodeBlock.GeneralPunctuation: - { - return ' '; - } + return ch; + } - case UnicodeBlock.Arabic: - { - if (ch == '\u06cc') + public void AddChar(char ch) + { + ch = Normalize(ch); + char lastchar = _grams[_grams.Length - 1]; + if (lastchar == ' ') { - return '\u064a'; + _grams = new StringBuilder(" "); + _capitalword = false; + if (ch == ' ') return; } - - break; - } - - case UnicodeBlock.LatinExtendedAdditional: - { - if (ch >= '\u1ea0') + else if (_grams.Length >= GramsCount) { - return '\u1ec3'; + _grams.Remove(0, 1); } + _grams.Append(ch); - break; - } - - case UnicodeBlock.Hiragana: - { - return '\u3042'; - } - - case UnicodeBlock.Katakana: - { - return '\u30a2'; - } - - case UnicodeBlock.Bopomofo: - case UnicodeBlock.BopomofoExtended: - { - return '\u3105'; - } - - case UnicodeBlock.CjkUnifiedIdeographs: - { - if (_cjkMap.ContainsKey(ch)) + if (char.IsUpper(ch)) { - return _cjkMap[ch]; + if (char.IsUpper(lastchar)) _capitalword = true; } + else + { + _capitalword = false; + } + } - break; - } - - case UnicodeBlock.HangulSyllables: - { - return '\uac00'; - } - } - - return ch; - } + public string Get(int n) + { + if (_capitalword) + { + return null; + } - public void AddChar(char ch) - { - ch = Normalize(ch); - char lastchar = _grams[_grams.Length - 1]; - if (lastchar == ' ') - { - _grams = new StringBuilder(" "); - _capitalword = false; - if (ch == ' ') return; - } - else if (_grams.Length >= GramsCount) - { - _grams.Remove(0, 1); - } - _grams.Append(ch); - - if (char.IsUpper(ch)) - { - if (char.IsUpper(lastchar)) _capitalword = true; - } - else - { - _capitalword = false; - } - } + int len = _grams.Length; - public string Get(int n) - { - if (_capitalword) - { - return null; - } + if (n < 1 || n > 3 || len < n) + { + return null; + } - int len = _grams.Length; + if (n == 1) + { + char ch = _grams[len - 1]; - if (n < 1 || n > 3 || len < n) - { - return null; - } + if (ch == ' ') + { + return null; + } - if (n == 1) - { - char ch = _grams[len - 1]; + return ch.ToString(); + } - if (ch == ' ') - { - return null; + // TODO IMM HI: is ToString() here effective? + return _grams.ToString().SubSequence(len - n, len); } - return ch.ToString(); - } - - // TODO IMM HI: is ToString() here effective? - return _grams.ToString().SubSequence(len - n, len); + #endregion } - - #endregion - } } |
