aboutsummaryrefslogtreecommitdiff
path: root/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs
diff options
context:
space:
mode:
authorErwin de Haan <EraYaN@users.noreply.github.com>2019-01-13 20:22:56 +0100
committerErwin de Haan <EraYaN@users.noreply.github.com>2019-01-13 20:22:56 +0100
commit25f0315e918cf6f8c26b1e435c236ff1dbcbc6a5 (patch)
tree805191d28c22edcaf31ffb03ba355f5fbbc1c3da /Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs
parent0efc699e3d4cef2cb5b36223873fa5ad98177d1c (diff)
Visual Studio Reformat: Emby.Server.Implementations Part T-T
Diffstat (limited to 'Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs')
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs304
1 files changed, 152 insertions, 152 deletions
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs
index b1738f7ca..2d29ec697 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs
@@ -6,14 +6,14 @@ using NLangDetect.Core.Extensions;
namespace NLangDetect.Core.Utils
{
- public class NGram
- {
- public const int GramsCount = 3;
+ public class NGram
+ {
+ public const int GramsCount = 3;
- private static readonly string Latin1Excluded = Messages.getString("NGram.LATIN1_EXCLUDE");
+ private static readonly string Latin1Excluded = Messages.getString("NGram.LATIN1_EXCLUDE");
- private static readonly string[] CjkClass =
- {
+ private static readonly string[] CjkClass =
+ {
#region CJK classes
Messages.getString("NGram.KANJI_1_0"),
@@ -146,185 +146,185 @@ namespace NLangDetect.Core.Utils
#endregion
};
- private static readonly Dictionary<char, char> _cjkMap;
+ private static readonly Dictionary<char, char> _cjkMap;
- private StringBuilder _grams;
- private bool _capitalword;
+ private StringBuilder _grams;
+ private bool _capitalword;
- #region Constructor(s)
+ #region Constructor(s)
- static NGram()
- {
- _cjkMap = new Dictionary<char, char>();
+ static NGram()
+ {
+ _cjkMap = new Dictionary<char, char>();
- foreach (string cjk_list in CjkClass)
- {
- char representative = cjk_list[0];
+ foreach (string cjk_list in CjkClass)
+ {
+ char representative = cjk_list[0];
- for (int i = 0; i < cjk_list.Length; i++)
- {
- _cjkMap.Add(cjk_list[i], representative);
+ for (int i = 0; i < cjk_list.Length; i++)
+ {
+ _cjkMap.Add(cjk_list[i], representative);
+ }
+ }
}
- }
- }
-
- public NGram()
- {
- _grams = new StringBuilder(" ");
- _capitalword = false;
- }
- #endregion
+ public NGram()
+ {
+ _grams = new StringBuilder(" ");
+ _capitalword = false;
+ }
- #region Public methods
+ #endregion
- public static char Normalize(char ch)
- {
- UnicodeBlock? unicodeBlock = ch.GetUnicodeBlock();
+ #region Public methods
- if (!unicodeBlock.HasValue)
- {
- return ch;
- }
+ public static char Normalize(char ch)
+ {
+ UnicodeBlock? unicodeBlock = ch.GetUnicodeBlock();
- switch (unicodeBlock.Value)
- {
- case UnicodeBlock.BasicLatin:
- {
- if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z')
+ if (!unicodeBlock.HasValue)
{
- return ' ';
+ return ch;
}
- break;
- }
-
- case UnicodeBlock.Latin1Supplement:
- {
- if (Latin1Excluded.IndexOf(ch) >= 0)
+ switch (unicodeBlock.Value)
{
- return ' ';
+ case UnicodeBlock.BasicLatin:
+ {
+ if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z')
+ {
+ return ' ';
+ }
+
+ break;
+ }
+
+ case UnicodeBlock.Latin1Supplement:
+ {
+ if (Latin1Excluded.IndexOf(ch) >= 0)
+ {
+ return ' ';
+ }
+
+ break;
+ }
+
+ case UnicodeBlock.GeneralPunctuation:
+ {
+ return ' ';
+ }
+
+ case UnicodeBlock.Arabic:
+ {
+ if (ch == '\u06cc')
+ {
+ return '\u064a';
+ }
+
+ break;
+ }
+
+ case UnicodeBlock.LatinExtendedAdditional:
+ {
+ if (ch >= '\u1ea0')
+ {
+ return '\u1ec3';
+ }
+
+ break;
+ }
+
+ case UnicodeBlock.Hiragana:
+ {
+ return '\u3042';
+ }
+
+ case UnicodeBlock.Katakana:
+ {
+ return '\u30a2';
+ }
+
+ case UnicodeBlock.Bopomofo:
+ case UnicodeBlock.BopomofoExtended:
+ {
+ return '\u3105';
+ }
+
+ case UnicodeBlock.CjkUnifiedIdeographs:
+ {
+ if (_cjkMap.ContainsKey(ch))
+ {
+ return _cjkMap[ch];
+ }
+
+ break;
+ }
+
+ case UnicodeBlock.HangulSyllables:
+ {
+ return '\uac00';
+ }
}
- break;
- }
-
- case UnicodeBlock.GeneralPunctuation:
- {
- return ' ';
- }
+ return ch;
+ }
- case UnicodeBlock.Arabic:
- {
- if (ch == '\u06cc')
+ public void AddChar(char ch)
+ {
+ ch = Normalize(ch);
+ char lastchar = _grams[_grams.Length - 1];
+ if (lastchar == ' ')
{
- return '\u064a';
+ _grams = new StringBuilder(" ");
+ _capitalword = false;
+ if (ch == ' ') return;
}
-
- break;
- }
-
- case UnicodeBlock.LatinExtendedAdditional:
- {
- if (ch >= '\u1ea0')
+ else if (_grams.Length >= GramsCount)
{
- return '\u1ec3';
+ _grams.Remove(0, 1);
}
+ _grams.Append(ch);
- break;
- }
-
- case UnicodeBlock.Hiragana:
- {
- return '\u3042';
- }
-
- case UnicodeBlock.Katakana:
- {
- return '\u30a2';
- }
-
- case UnicodeBlock.Bopomofo:
- case UnicodeBlock.BopomofoExtended:
- {
- return '\u3105';
- }
-
- case UnicodeBlock.CjkUnifiedIdeographs:
- {
- if (_cjkMap.ContainsKey(ch))
+ if (char.IsUpper(ch))
{
- return _cjkMap[ch];
+ if (char.IsUpper(lastchar)) _capitalword = true;
}
+ else
+ {
+ _capitalword = false;
+ }
+ }
- break;
- }
-
- case UnicodeBlock.HangulSyllables:
- {
- return '\uac00';
- }
- }
-
- return ch;
- }
+ public string Get(int n)
+ {
+ if (_capitalword)
+ {
+ return null;
+ }
- public void AddChar(char ch)
- {
- ch = Normalize(ch);
- char lastchar = _grams[_grams.Length - 1];
- if (lastchar == ' ')
- {
- _grams = new StringBuilder(" ");
- _capitalword = false;
- if (ch == ' ') return;
- }
- else if (_grams.Length >= GramsCount)
- {
- _grams.Remove(0, 1);
- }
- _grams.Append(ch);
-
- if (char.IsUpper(ch))
- {
- if (char.IsUpper(lastchar)) _capitalword = true;
- }
- else
- {
- _capitalword = false;
- }
- }
+ int len = _grams.Length;
- public string Get(int n)
- {
- if (_capitalword)
- {
- return null;
- }
+ if (n < 1 || n > 3 || len < n)
+ {
+ return null;
+ }
- int len = _grams.Length;
+ if (n == 1)
+ {
+ char ch = _grams[len - 1];
- if (n < 1 || n > 3 || len < n)
- {
- return null;
- }
+ if (ch == ' ')
+ {
+ return null;
+ }
- if (n == 1)
- {
- char ch = _grams[len - 1];
+ return ch.ToString();
+ }
- if (ch == ' ')
- {
- return null;
+ // TODO IMM HI: is ToString() here effective?
+ return _grams.ToString().SubSequence(len - n, len);
}
- return ch.ToString();
- }
-
- // TODO IMM HI: is ToString() here effective?
- return _grams.ToString().SubSequence(len - n, len);
+ #endregion
}
-
- #endregion
- }
}