aboutsummaryrefslogtreecommitdiff
path: root/Emby.Server.Implementations/TextEncoding/NLangDetect
diff options
context:
space:
mode:
Diffstat (limited to 'Emby.Server.Implementations/TextEncoding/NLangDetect')
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/Detector.cs6
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs8
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/ErrorCode.cs26
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs94
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/RandomExtensions.cs76
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/UnicodeBlock.cs258
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/GenProfile.cs96
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/InternalException.cs30
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/Language.cs62
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/LanguageDetector.cs4
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/NLangDetectException.cs28
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/ProbVector.cs46
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs6
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs13
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs304
-rw-r--r--Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs110
16 files changed, 580 insertions, 587 deletions
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Detector.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Detector.cs
index 507dd5e42..991ee8688 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Detector.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Detector.cs
@@ -179,7 +179,7 @@ namespace NLangDetect.Core
DetectBlock();
}
- List<Language> list = SortProbability(_langprob);
+ var list = SortProbability(_langprob);
return list;
}
@@ -250,7 +250,7 @@ namespace NLangDetect.Core
_langprob = new double[_langlist.Count];
- Random rand = (_seed.HasValue ? new Random(_seed.Value) : new Random());
+ var rand = (_seed.HasValue ? new Random(_seed.Value) : new Random());
for (int t = 0; t < _trialsCount; t++)
{
@@ -305,7 +305,7 @@ namespace NLangDetect.Core
private List<string> ExtractNGrams()
{
var list = new List<string>();
- NGram ngram = new NGram();
+ var ngram = new NGram();
for (int i = 0; i < _text.Length; i++)
{
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs
index 9d75b8356..08e98d62e 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs
@@ -1,10 +1,8 @@
using System;
using System.Collections.Generic;
-using System.IO;
-using System.IO.Compression;
-using NLangDetect.Core.Utils;
-using MediaBrowser.Model.Serialization;
using System.Linq;
+using MediaBrowser.Model.Serialization;
+using NLangDetect.Core.Utils;
namespace NLangDetect.Core
{
@@ -56,7 +54,7 @@ namespace NLangDetect.Core
public static Detector Create(double alpha)
{
- Detector detector = CreateDetector();
+ var detector = CreateDetector();
detector.SetAlpha(alpha);
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/ErrorCode.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/ErrorCode.cs
index 3ffd3b2d9..87f07fc9c 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/ErrorCode.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/ErrorCode.cs
@@ -1,15 +1,15 @@
-namespace NLangDetect.Core
+namespace NLangDetect.Core
{
- public enum ErrorCode
- {
- NoTextError,
- FormatError,
- FileLoadError,
- DuplicateLangError,
- NeedLoadProfileError,
- CantDetectError,
- CantOpenTrainData,
- TrainDataFormatError,
- InitParamError,
- }
+ public enum ErrorCode
+ {
+ NoTextError,
+ FormatError,
+ FileLoadError,
+ DuplicateLangError,
+ NeedLoadProfileError,
+ CantDetectError,
+ CantOpenTrainData,
+ TrainDataFormatError,
+ InitParamError,
+ }
}
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs
index cd77a30eb..6e58a0be8 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs
@@ -1,14 +1,14 @@
-using System;
+using System;
namespace NLangDetect.Core.Extensions
{
- public static class CharExtensions
- {
- private const int MIN_CODE_POINT = 0x000000;
- private const int MAX_CODE_POINT = 0x10ffff;
+ public static class CharExtensions
+ {
+ private const int MIN_CODE_POINT = 0x000000;
+ private const int MAX_CODE_POINT = 0x10ffff;
- private static readonly int[] _unicodeBlockStarts =
- {
+ private static readonly int[] _unicodeBlockStarts =
+ {
#region Unicode block starts
0x0000, // Basic Latin
@@ -165,8 +165,8 @@ namespace NLangDetect.Core.Extensions
#endregion
};
- private static readonly UnicodeBlock?[] _unicodeBlocks =
- {
+ private static readonly UnicodeBlock?[] _unicodeBlocks =
+ {
#region Unicode blocks
UnicodeBlock.BasicLatin,
UnicodeBlock.Latin1Supplement,
@@ -322,53 +322,53 @@ namespace NLangDetect.Core.Extensions
#endregion
};
- #region Public methods
+ #region Public methods
- /// <remarks>
- /// Taken from JDK source: http://grepcode.com/file/repository.grepcode.com/java/root/jdk/openjdk/6-b14/java/lang/Character.java#Character.UnicodeBlock.0LATIN_EXTENDED_ADDITIONAL
- /// </remarks>
- public static UnicodeBlock? GetUnicodeBlock(this char ch)
- {
- int codePoint = ch;
+ /// <remarks>
+ /// Taken from JDK source: http://grepcode.com/file/repository.grepcode.com/java/root/jdk/openjdk/6-b14/java/lang/Character.java#Character.UnicodeBlock.0LATIN_EXTENDED_ADDITIONAL
+ /// </remarks>
+ public static UnicodeBlock? GetUnicodeBlock(this char ch)
+ {
+ int codePoint = ch;
- if (!IsValidCodePoint(codePoint))
- {
- throw new ArgumentException("Argument is not a valid code point.", nameof(ch));
- }
+ if (!IsValidCodePoint(codePoint))
+ {
+ throw new ArgumentException("Argument is not a valid code point.", nameof(ch));
+ }
- int top, bottom, current;
+ int top, bottom, current;
- bottom = 0;
- top = _unicodeBlockStarts.Length;
- current = top / 2;
+ bottom = 0;
+ top = _unicodeBlockStarts.Length;
+ current = top / 2;
- // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
- while (top - bottom > 1)
- {
- if (codePoint >= _unicodeBlockStarts[current])
- {
- bottom = current;
- }
- else
- {
- top = current;
- }
+ // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
+ while (top - bottom > 1)
+ {
+ if (codePoint >= _unicodeBlockStarts[current])
+ {
+ bottom = current;
+ }
+ else
+ {
+ top = current;
+ }
- current = (top + bottom) / 2;
- }
+ current = (top + bottom) / 2;
+ }
- return _unicodeBlocks[current];
- }
+ return _unicodeBlocks[current];
+ }
- #endregion
+ #endregion
- #region Private helper methods
+ #region Private helper methods
- private static bool IsValidCodePoint(int codePoint)
- {
- return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
- }
+ private static bool IsValidCodePoint(int codePoint)
+ {
+ return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
+ }
- #endregion
- }
+ #endregion
+ }
}
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/RandomExtensions.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/RandomExtensions.cs
index d55ca80df..5b2fce60b 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/RandomExtensions.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/RandomExtensions.cs
@@ -1,51 +1,51 @@
-using System;
+using System;
namespace NLangDetect.Core.Extensions
{
- public static class RandomExtensions
- {
- private const double _Epsilon = 2.22044604925031E-15;
-
- private static readonly object _mutex = new object();
-
- private static double _nextNextGaussian;
- private static bool _hasNextNextGaussian;
-
- /// <summary>
- /// Returns the next pseudorandom, Gaussian ("normally") distributed double value with mean 0.0 and standard deviation 1.0 from this random number generator's sequence.
- /// The general contract of nextGaussian is that one double value, chosen from (approximately) the usual normal distribution with mean 0.0 and standard deviation 1.0, is pseudorandomly generated and returned.
- /// </summary>
- /// <remarks>
- /// Taken from: http://download.oracle.com/javase/6/docs/api/java/util/Random.html (nextGaussian())
- /// </remarks>
- public static double NextGaussian(this Random random)
+ public static class RandomExtensions
{
- lock (_mutex)
- {
- if (_hasNextNextGaussian)
- {
- _hasNextNextGaussian = false;
+ private const double _Epsilon = 2.22044604925031E-15;
- return _nextNextGaussian;
- }
+ private static readonly object _mutex = new object();
- double v1, v2, s;
+ private static double _nextNextGaussian;
+ private static bool _hasNextNextGaussian;
- do
+ /// <summary>
+ /// Returns the next pseudorandom, Gaussian ("normally") distributed double value with mean 0.0 and standard deviation 1.0 from this random number generator's sequence.
+ /// The general contract of nextGaussian is that one double value, chosen from (approximately) the usual normal distribution with mean 0.0 and standard deviation 1.0, is pseudorandomly generated and returned.
+ /// </summary>
+ /// <remarks>
+ /// Taken from: http://download.oracle.com/javase/6/docs/api/java/util/Random.html (nextGaussian())
+ /// </remarks>
+ public static double NextGaussian(this Random random)
{
- v1 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0
- v2 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0
- s = v1 * v1 + v2 * v2;
- }
- while (s >= 1.0 || Math.Abs(s - 0.0) < _Epsilon);
+ lock (_mutex)
+ {
+ if (_hasNextNextGaussian)
+ {
+ _hasNextNextGaussian = false;
+
+ return _nextNextGaussian;
+ }
- double multiplier = Math.Sqrt(-2.0 * Math.Log(s) / s);
+ double v1, v2, s;
- _nextNextGaussian = v2 * multiplier;
- _hasNextNextGaussian = true;
+ do
+ {
+ v1 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0
+ v2 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0
+ s = v1 * v1 + v2 * v2;
+ }
+ while (s >= 1.0 || Math.Abs(s - 0.0) < _Epsilon);
- return v1 * multiplier;
- }
+ double multiplier = Math.Sqrt(-2.0 * Math.Log(s) / s);
+
+ _nextNextGaussian = v2 * multiplier;
+ _hasNextNextGaussian = true;
+
+ return v1 * multiplier;
+ }
+ }
}
- }
}
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/UnicodeBlock.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/UnicodeBlock.cs
index 71b5de75e..bcc81f968 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/UnicodeBlock.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/UnicodeBlock.cs
@@ -1,131 +1,131 @@
-namespace NLangDetect.Core.Extensions
+namespace NLangDetect.Core.Extensions
{
- public enum UnicodeBlock
- {
- BasicLatin,
- Latin1Supplement,
- LatinExtendedA,
- LatinExtendedB,
- IpaExtensions,
- SpacingModifierLetters,
- CombiningDiacriticalMarks,
- Greek,
- Cyrillic,
- CyrillicSupplementary,
- Armenian,
- Hebrew,
- Arabic,
- Syriac,
- Thaana,
- Devanagari,
- Bengali,
- Gurmukhi,
- Gujarati,
- Oriya,
- Tamil,
- Telugu,
- Kannada,
- Malayalam,
- Sinhala,
- Thai,
- Lao,
- Tibetan,
- Myanmar,
- Georgian,
- HangulJamo,
- Ethiopic,
- Cherokee,
- UnifiedCanadianAboriginalSyllabics,
- Ogham,
- Runic,
- Tagalog,
- Hanunoo,
- Buhid,
- Tagbanwa,
- Khmer,
- Mongolian,
- Limbu,
- TaiLe,
- KhmerSymbols,
- PhoneticExtensions,
- LatinExtendedAdditional,
- GreekExtended,
- GeneralPunctuation,
- SuperscriptsAndSubscripts,
- CurrencySymbols,
- CombiningMarksForSymbols,
- LetterlikeSymbols,
- NumberForms,
- Arrows,
- MathematicalOperators,
- MiscellaneousTechnical,
- ControlPictures,
- OpticalCharacterRecognition,
- EnclosedAlphanumerics,
- BoxDrawing,
- BlockElements,
- GeometricShapes,
- MiscellaneousSymbols,
- Dingbats,
- MiscellaneousMathematicalSymbolsA,
- SupplementalArrowsA,
- BraillePatterns,
- SupplementalArrowsB,
- MiscellaneousMathematicalSymbolsB,
- SupplementalMathematicalOperators,
- MiscellaneousSymbolsAndArrows,
- CjkRadicalsSupplement,
- KangxiRadicals,
- IdeographicDescriptionCharacters,
- CjkSymbolsAndPunctuation,
- Hiragana,
- Katakana,
- Bopomofo,
- HangulCompatibilityJamo,
- Kanbun,
- BopomofoExtended,
- KatakanaPhoneticExtensions,
- EnclosedCjkLettersAndMonths,
- CjkCompatibility,
- CjkUnifiedIdeographsExtensionA,
- YijingHexagramSymbols,
- CjkUnifiedIdeographs,
- YiSyllables,
- YiRadicals,
- HangulSyllables,
- HighSurrogates,
- HighPrivateUseSurrogates,
- LowSurrogates,
- PrivateUseArea,
- CjkCompatibilityIdeographs,
- AlphabeticPresentationForms,
- ArabicPresentationFormsA,
- VariationSelectors,
- CombiningHalfMarks,
- CjkCompatibilityForms,
- SmallFormVariants,
- ArabicPresentationFormsB,
- HalfwidthAndFullwidthForms,
- Specials,
- LinearBSyllabary,
- LinearBIdeograms,
- AegeanNumbers,
- OldItalic,
- Gothic,
- Ugaritic,
- Deseret,
- Shavian,
- Osmanya,
- CypriotSyllabary,
- ByzantineMusicalSymbols,
- MusicalSymbols,
- TaiXuanJingSymbols,
- MathematicalAlphanumericSymbols,
- CjkUnifiedIdeographsExtensionB,
- CjkCompatibilityIdeographsSupplement,
- Tags,
- VariationSelectorsSupplement,
- SupplementaryPrivateUseAreaA,
- SupplementaryPrivateUseAreaB,
- }
+ public enum UnicodeBlock
+ {
+ BasicLatin,
+ Latin1Supplement,
+ LatinExtendedA,
+ LatinExtendedB,
+ IpaExtensions,
+ SpacingModifierLetters,
+ CombiningDiacriticalMarks,
+ Greek,
+ Cyrillic,
+ CyrillicSupplementary,
+ Armenian,
+ Hebrew,
+ Arabic,
+ Syriac,
+ Thaana,
+ Devanagari,
+ Bengali,
+ Gurmukhi,
+ Gujarati,
+ Oriya,
+ Tamil,
+ Telugu,
+ Kannada,
+ Malayalam,
+ Sinhala,
+ Thai,
+ Lao,
+ Tibetan,
+ Myanmar,
+ Georgian,
+ HangulJamo,
+ Ethiopic,
+ Cherokee,
+ UnifiedCanadianAboriginalSyllabics,
+ Ogham,
+ Runic,
+ Tagalog,
+ Hanunoo,
+ Buhid,
+ Tagbanwa,
+ Khmer,
+ Mongolian,
+ Limbu,
+ TaiLe,
+ KhmerSymbols,
+ PhoneticExtensions,
+ LatinExtendedAdditional,
+ GreekExtended,
+ GeneralPunctuation,
+ SuperscriptsAndSubscripts,
+ CurrencySymbols,
+ CombiningMarksForSymbols,
+ LetterlikeSymbols,
+ NumberForms,
+ Arrows,
+ MathematicalOperators,
+ MiscellaneousTechnical,
+ ControlPictures,
+ OpticalCharacterRecognition,
+ EnclosedAlphanumerics,
+ BoxDrawing,
+ BlockElements,
+ GeometricShapes,
+ MiscellaneousSymbols,
+ Dingbats,
+ MiscellaneousMathematicalSymbolsA,
+ SupplementalArrowsA,
+ BraillePatterns,
+ SupplementalArrowsB,
+ MiscellaneousMathematicalSymbolsB,
+ SupplementalMathematicalOperators,
+ MiscellaneousSymbolsAndArrows,
+ CjkRadicalsSupplement,
+ KangxiRadicals,
+ IdeographicDescriptionCharacters,
+ CjkSymbolsAndPunctuation,
+ Hiragana,
+ Katakana,
+ Bopomofo,
+ HangulCompatibilityJamo,
+ Kanbun,
+ BopomofoExtended,
+ KatakanaPhoneticExtensions,
+ EnclosedCjkLettersAndMonths,
+ CjkCompatibility,
+ CjkUnifiedIdeographsExtensionA,
+ YijingHexagramSymbols,
+ CjkUnifiedIdeographs,
+ YiSyllables,
+ YiRadicals,
+ HangulSyllables,
+ HighSurrogates,
+ HighPrivateUseSurrogates,
+ LowSurrogates,
+ PrivateUseArea,
+ CjkCompatibilityIdeographs,
+ AlphabeticPresentationForms,
+ ArabicPresentationFormsA,
+ VariationSelectors,
+ CombiningHalfMarks,
+ CjkCompatibilityForms,
+ SmallFormVariants,
+ ArabicPresentationFormsB,
+ HalfwidthAndFullwidthForms,
+ Specials,
+ LinearBSyllabary,
+ LinearBIdeograms,
+ AegeanNumbers,
+ OldItalic,
+ Gothic,
+ Ugaritic,
+ Deseret,
+ Shavian,
+ Osmanya,
+ CypriotSyllabary,
+ ByzantineMusicalSymbols,
+ MusicalSymbols,
+ TaiXuanJingSymbols,
+ MathematicalAlphanumericSymbols,
+ CjkUnifiedIdeographsExtensionB,
+ CjkCompatibilityIdeographsSupplement,
+ Tags,
+ VariationSelectorsSupplement,
+ SupplementaryPrivateUseAreaA,
+ SupplementaryPrivateUseAreaB,
+ }
}
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/GenProfile.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/GenProfile.cs
index 5895f68ae..26157483b 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/GenProfile.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/GenProfile.cs
@@ -1,67 +1,67 @@
using System;
+using System.IO;
using System.IO.Compression;
using System.Xml;
using NLangDetect.Core.Utils;
-using System.IO;
namespace NLangDetect.Core
{
- // TODO IMM HI: xml reader not tested
- public static class GenProfile
- {
- #region Public methods
-
- public static LangProfile load(string lang, string file)
+ // TODO IMM HI: xml reader not tested
+ public static class GenProfile
{
- LangProfile profile = new LangProfile(lang);
- TagExtractor tagextractor = new TagExtractor("abstract", 100);
- Stream inputStream = null;
-
- try
- {
- inputStream = File.OpenRead(file);
-
- string extension = Path.GetExtension(file) ?? "";
+ #region Public methods
- if (extension.ToUpper() == ".GZ")
+ public static LangProfile load(string lang, string file)
{
- inputStream = new GZipStream(inputStream, CompressionMode.Decompress);
- }
+ var profile = new LangProfile(lang);
+ var tagextractor = new TagExtractor("abstract", 100);
+ Stream inputStream = null;
- using (XmlReader xmlReader = XmlReader.Create(inputStream))
- {
- while (xmlReader.Read())
- {
- switch (xmlReader.NodeType)
+ try
{
- case XmlNodeType.Element:
- tagextractor.SetTag(xmlReader.Name);
- break;
+ inputStream = File.OpenRead(file);
+
+ string extension = Path.GetExtension(file) ?? "";
+
+ if (extension.ToUpper() == ".GZ")
+ {
+ inputStream = new GZipStream(inputStream, CompressionMode.Decompress);
+ }
- case XmlNodeType.Text:
- tagextractor.Add(xmlReader.Value);
- break;
+ using (var xmlReader = XmlReader.Create(inputStream))
+ {
+ while (xmlReader.Read())
+ {
+ switch (xmlReader.NodeType)
+ {
+ case XmlNodeType.Element:
+ tagextractor.SetTag(xmlReader.Name);
+ break;
- case XmlNodeType.EndElement:
- tagextractor.CloseTag(profile);
- break;
+ case XmlNodeType.Text:
+ tagextractor.Add(xmlReader.Value);
+ break;
+
+ case XmlNodeType.EndElement:
+ tagextractor.CloseTag(profile);
+ break;
+ }
+ }
+ }
+ }
+ finally
+ {
+ if (inputStream != null)
+ {
+ inputStream.Close();
+ }
}
- }
- }
- }
- finally
- {
- if (inputStream != null)
- {
- inputStream.Close();
- }
- }
- Console.WriteLine(lang + ": " + tagextractor.Count);
+ Console.WriteLine(lang + ": " + tagextractor.Count);
- return profile;
- }
+ return profile;
+ }
- #endregion
- }
+ #endregion
+ }
}
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/InternalException.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/InternalException.cs
index 32e50a219..6ed1efa88 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/InternalException.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/InternalException.cs
@@ -1,22 +1,22 @@
-using System;
+using System;
namespace NLangDetect.Core
{
- [Serializable]
- public class InternalException : Exception
- {
- #region Constructor(s)
-
- public InternalException(string message, Exception innerException)
- : base(message, innerException)
+ [Serializable]
+ public class InternalException : Exception
{
- }
+ #region Constructor(s)
- public InternalException(string message)
- : this(message, null)
- {
- }
+ public InternalException(string message, Exception innerException)
+ : base(message, innerException)
+ {
+ }
+
+ public InternalException(string message)
+ : this(message, null)
+ {
+ }
- #endregion
- }
+ #endregion
+ }
}
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Language.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Language.cs
index f4b4b153e..e15263c05 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Language.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Language.cs
@@ -2,44 +2,44 @@ using System.Globalization;
namespace NLangDetect.Core
{
- // TODO IMM HI: name??
- public class Language
- {
- #region Constructor(s)
-
- public Language(string name, double probability)
+ // TODO IMM HI: name??
+ public class Language
{
- Name = name;
- Probability = probability;
- }
+ #region Constructor(s)
- #endregion
+ public Language(string name, double probability)
+ {
+ Name = name;
+ Probability = probability;
+ }
- #region Object overrides
+ #endregion
- public override string ToString()
- {
- if (Name == null)
- {
- return "";
- }
-
- return
- string.Format(
- CultureInfo.InvariantCulture.NumberFormat,
- "{0}:{1:0.000000}",
- Name,
- Probability);
- }
+ #region Object overrides
- #endregion
+ public override string ToString()
+ {
+ if (Name == null)
+ {
+ return "";
+ }
- #region Properties
+ return
+ string.Format(
+ CultureInfo.InvariantCulture.NumberFormat,
+ "{0}:{1:0.000000}",
+ Name,
+ Probability);
+ }
- public string Name { get; set; }
+ #endregion
- public double Probability { get; set; }
+ #region Properties
- #endregion
- }
+ public string Name { get; set; }
+
+ public double Probability { get; set; }
+
+ #endregion
+ }
}
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/LanguageDetector.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/LanguageDetector.cs
index 044c7e759..a26f236a8 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/LanguageDetector.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/LanguageDetector.cs
@@ -1,4 +1,4 @@
-using System;
+using System;
using MediaBrowser.Model.Serialization;
namespace NLangDetect.Core
@@ -25,7 +25,7 @@ namespace NLangDetect.Core
{
if (string.IsNullOrEmpty(plainText)) { throw new ArgumentException("Argument can't be null nor empty.", nameof(plainText)); }
- Detector detector = DetectorFactory.Create(_DefaultAlpha);
+ var detector = DetectorFactory.Create(_DefaultAlpha);
detector.Append(plainText);
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/NLangDetectException.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/NLangDetectException.cs
index e0d066020..800858bca 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/NLangDetectException.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/NLangDetectException.cs
@@ -1,23 +1,23 @@
-using System;
+using System;
namespace NLangDetect.Core
{
- public class NLangDetectException : Exception
- {
- #region Constructor(s)
-
- public NLangDetectException(string message, ErrorCode errorCode)
- : base(message)
+ public class NLangDetectException : Exception
{
- ErrorCode = errorCode;
- }
+ #region Constructor(s)
- #endregion
+ public NLangDetectException(string message, ErrorCode errorCode)
+ : base(message)
+ {
+ ErrorCode = errorCode;
+ }
- #region Properties
+ #endregion
- public ErrorCode ErrorCode { get; private set; }
+ #region Properties
- #endregion
- }
+ public ErrorCode ErrorCode { get; private set; }
+
+ #endregion
+ }
}
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/ProbVector.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/ProbVector.cs
index c5a20dbf0..d7afb4113 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/ProbVector.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/ProbVector.cs
@@ -1,35 +1,33 @@
-using System;
+using System;
using System.Collections.Generic;
namespace NLangDetect.Core
{
- public class ProbVector
- {
- private readonly Dictionary<int, double> _dict = new Dictionary<int, double>();
-
- public double this[int key]
+ public class ProbVector
{
- get
- {
- double value;
-
- return _dict.TryGetValue(key, out value) ? value : 0.0;
- }
+ private readonly Dictionary<int, double> _dict = new Dictionary<int, double>();
- set
- {
- if (Math.Abs(value) < double.Epsilon)
+ public double this[int key]
{
- if (_dict.ContainsKey(key))
- {
- _dict.Remove(key);
- }
+ get
+ {
+ return _dict.TryGetValue(key, out var value) ? value : 0.0;
+ }
- return;
- }
+ set
+ {
+ if (Math.Abs(value) < double.Epsilon)
+ {
+ if (_dict.ContainsKey(key))
+ {
+ _dict.Remove(key);
+ }
- _dict[key] = value;
- }
+ return;
+ }
+
+ _dict[key] = value;
+ }
+ }
}
- }
}
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs
index 0413edfad..78b44e1fc 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs
@@ -59,8 +59,8 @@ namespace NLangDetect.Core.Utils
ICollection<string> keys = freq.Keys;
int roman = 0;
// TODO IMM HI: move up?
- Regex regex1 = new Regex("^[A-Za-z]$", RegexOptions.Compiled);
- List<string> keysToRemove = new List<string>();
+ var regex1 = new Regex("^[A-Za-z]$", RegexOptions.Compiled);
+ var keysToRemove = new List<string>();
foreach (string key in keys)
{
@@ -93,7 +93,7 @@ namespace NLangDetect.Core.Utils
ICollection<string> keys2 = freq.Keys;
// TODO IMM HI: move up?
- Regex regex2 = new Regex(".*[A-Za-z].*", RegexOptions.Compiled);
+ var regex2 = new Regex(".*[A-Za-z].*", RegexOptions.Compiled);
foreach (string key in keys2)
{
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs
index 1d605cc47..879c0a09b 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs
@@ -1,10 +1,9 @@
+using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
-using System.Reflection;
-using System.Text.RegularExpressions;
using System.Linq;
-using System;
+using System.Text.RegularExpressions;
namespace NLangDetect.Core.Utils
{
@@ -19,19 +18,17 @@ namespace NLangDetect.Core.Utils
public static string getString(string key)
{
- string value;
-
return
- _messages.TryGetValue(key, out value)
+ _messages.TryGetValue(key, out var value)
? value
: string.Format("!{0}!", key);
}
private static Dictionary<string, string> LoadMessages()
{
- var manifestName = typeof(Messages).Assembly.GetManifestResourceNames().FirstOrDefault(i => i.IndexOf("messages.properties", StringComparison.Ordinal) != -1) ;
+ var manifestName = typeof(Messages).Assembly.GetManifestResourceNames().FirstOrDefault(i => i.IndexOf("messages.properties", StringComparison.Ordinal) != -1);
- Stream messagesStream =
+ var messagesStream =
typeof(Messages).Assembly
.GetManifestResourceStream(manifestName);
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs
index b1738f7ca..2d29ec697 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs
@@ -6,14 +6,14 @@ using NLangDetect.Core.Extensions;
namespace NLangDetect.Core.Utils
{
- public class NGram
- {
- public const int GramsCount = 3;
+ public class NGram
+ {
+ public const int GramsCount = 3;
- private static readonly string Latin1Excluded = Messages.getString("NGram.LATIN1_EXCLUDE");
+ private static readonly string Latin1Excluded = Messages.getString("NGram.LATIN1_EXCLUDE");
- private static readonly string[] CjkClass =
- {
+ private static readonly string[] CjkClass =
+ {
#region CJK classes
Messages.getString("NGram.KANJI_1_0"),
@@ -146,185 +146,185 @@ namespace NLangDetect.Core.Utils
#endregion
};
- private static readonly Dictionary<char, char> _cjkMap;
+ private static readonly Dictionary<char, char> _cjkMap;
- private StringBuilder _grams;
- private bool _capitalword;
+ private StringBuilder _grams;
+ private bool _capitalword;
- #region Constructor(s)
+ #region Constructor(s)
- static NGram()
- {
- _cjkMap = new Dictionary<char, char>();
+ static NGram()
+ {
+ _cjkMap = new Dictionary<char, char>();
- foreach (string cjk_list in CjkClass)
- {
- char representative = cjk_list[0];
+ foreach (string cjk_list in CjkClass)
+ {
+ char representative = cjk_list[0];
- for (int i = 0; i < cjk_list.Length; i++)
- {
- _cjkMap.Add(cjk_list[i], representative);
+ for (int i = 0; i < cjk_list.Length; i++)
+ {
+ _cjkMap.Add(cjk_list[i], representative);
+ }
+ }
}
- }
- }
-
- public NGram()
- {
- _grams = new StringBuilder(" ");
- _capitalword = false;
- }
- #endregion
+ public NGram()
+ {
+ _grams = new StringBuilder(" ");
+ _capitalword = false;
+ }
- #region Public methods
+ #endregion
- public static char Normalize(char ch)
- {
- UnicodeBlock? unicodeBlock = ch.GetUnicodeBlock();
+ #region Public methods
- if (!unicodeBlock.HasValue)
- {
- return ch;
- }
+ public static char Normalize(char ch)
+ {
+ UnicodeBlock? unicodeBlock = ch.GetUnicodeBlock();
- switch (unicodeBlock.Value)
- {
- case UnicodeBlock.BasicLatin:
- {
- if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z')
+ if (!unicodeBlock.HasValue)
{
- return ' ';
+ return ch;
}
- break;
- }
-
- case UnicodeBlock.Latin1Supplement:
- {
- if (Latin1Excluded.IndexOf(ch) >= 0)
+ switch (unicodeBlock.Value)
{
- return ' ';
+ case UnicodeBlock.BasicLatin:
+ {
+ if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z')
+ {
+ return ' ';
+ }
+
+ break;
+ }
+
+ case UnicodeBlock.Latin1Supplement:
+ {
+ if (Latin1Excluded.IndexOf(ch) >= 0)
+ {
+ return ' ';
+ }
+
+ break;
+ }
+
+ case UnicodeBlock.GeneralPunctuation:
+ {
+ return ' ';
+ }
+
+ case UnicodeBlock.Arabic:
+ {
+ if (ch == '\u06cc')
+ {
+ return '\u064a';
+ }
+
+ break;
+ }
+
+ case UnicodeBlock.LatinExtendedAdditional:
+ {
+ if (ch >= '\u1ea0')
+ {
+ return '\u1ec3';
+ }
+
+ break;
+ }
+
+ case UnicodeBlock.Hiragana:
+ {
+ return '\u3042';
+ }
+
+ case UnicodeBlock.Katakana:
+ {
+ return '\u30a2';
+ }
+
+ case UnicodeBlock.Bopomofo:
+ case UnicodeBlock.BopomofoExtended:
+ {
+ return '\u3105';
+ }
+
+ case UnicodeBlock.CjkUnifiedIdeographs:
+ {
+ if (_cjkMap.ContainsKey(ch))
+ {
+ return _cjkMap[ch];
+ }
+
+ break;
+ }
+
+ case UnicodeBlock.HangulSyllables:
+ {
+ return '\uac00';
+ }
}
- break;
- }
-
- case UnicodeBlock.GeneralPunctuation:
- {
- return ' ';
- }
+ return ch;
+ }
- case UnicodeBlock.Arabic:
- {
- if (ch == '\u06cc')
+ public void AddChar(char ch)
+ {
+ ch = Normalize(ch);
+ char lastchar = _grams[_grams.Length - 1];
+ if (lastchar == ' ')
{
- return '\u064a';
+ _grams = new StringBuilder(" ");
+ _capitalword = false;
+ if (ch == ' ') return;
}
-
- break;
- }
-
- case UnicodeBlock.LatinExtendedAdditional:
- {
- if (ch >= '\u1ea0')
+ else if (_grams.Length >= GramsCount)
{
- return '\u1ec3';
+ _grams.Remove(0, 1);
}
+ _grams.Append(ch);
- break;
- }
-
- case UnicodeBlock.Hiragana:
- {
- return '\u3042';
- }
-
- case UnicodeBlock.Katakana:
- {
- return '\u30a2';
- }
-
- case UnicodeBlock.Bopomofo:
- case UnicodeBlock.BopomofoExtended:
- {
- return '\u3105';
- }
-
- case UnicodeBlock.CjkUnifiedIdeographs:
- {
- if (_cjkMap.ContainsKey(ch))
+ if (char.IsUpper(ch))
{
- return _cjkMap[ch];
+ if (char.IsUpper(lastchar)) _capitalword = true;
}
+ else
+ {
+ _capitalword = false;
+ }
+ }
- break;
- }
-
- case UnicodeBlock.HangulSyllables:
- {
- return '\uac00';
- }
- }
-
- return ch;
- }
+ public string Get(int n)
+ {
+ if (_capitalword)
+ {
+ return null;
+ }
- public void AddChar(char ch)
- {
- ch = Normalize(ch);
- char lastchar = _grams[_grams.Length - 1];
- if (lastchar == ' ')
- {
- _grams = new StringBuilder(" ");
- _capitalword = false;
- if (ch == ' ') return;
- }
- else if (_grams.Length >= GramsCount)
- {
- _grams.Remove(0, 1);
- }
- _grams.Append(ch);
-
- if (char.IsUpper(ch))
- {
- if (char.IsUpper(lastchar)) _capitalword = true;
- }
- else
- {
- _capitalword = false;
- }
- }
+ int len = _grams.Length;
- public string Get(int n)
- {
- if (_capitalword)
- {
- return null;
- }
+ if (n < 1 || n > 3 || len < n)
+ {
+ return null;
+ }
- int len = _grams.Length;
+ if (n == 1)
+ {
+ char ch = _grams[len - 1];
- if (n < 1 || n > 3 || len < n)
- {
- return null;
- }
+ if (ch == ' ')
+ {
+ return null;
+ }
- if (n == 1)
- {
- char ch = _grams[len - 1];
+ return ch.ToString();
+ }
- if (ch == ' ')
- {
- return null;
+ // TODO IMM HI: is ToString() here effective?
+ return _grams.ToString().SubSequence(len - n, len);
}
- return ch.ToString();
- }
-
- // TODO IMM HI: is ToString() here effective?
- return _grams.ToString().SubSequence(len - n, len);
+ #endregion
}
-
- #endregion
- }
}
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs
index 896fd0960..4441ecd0f 100644
--- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs
+++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs
@@ -2,75 +2,75 @@ using System.Text;
namespace NLangDetect.Core.Utils
{
- public class TagExtractor
- {
- // TODO IMM HI: do the really need to be internal?
- internal string Target;
- internal int Threshold;
- internal StringBuilder StringBuilder;
- internal string Tag;
-
- #region Constructor(s)
-
- public TagExtractor(string tag, int threshold)
+ public class TagExtractor
{
- Target = tag;
- Threshold = threshold;
- Count = 0;
- Clear();
- }
+ // TODO IMM HI: do the really need to be internal?
+ internal string Target;
+ internal int Threshold;
+ internal StringBuilder StringBuilder;
+ internal string Tag;
- #endregion
+ #region Constructor(s)
- #region Public methods
-
- public void Clear()
- {
- StringBuilder = new StringBuilder();
- Tag = null;
- }
+ public TagExtractor(string tag, int threshold)
+ {
+ Target = tag;
+ Threshold = threshold;
+ Count = 0;
+ Clear();
+ }
- public void SetTag(string tag)
- {
- Tag = tag;
- }
+ #endregion
- public void Add(string line)
- {
- if (Tag == Target && line != null)
- {
- StringBuilder.Append(line);
- }
- }
+ #region Public methods
- public void CloseTag(LangProfile profile)
- {
- if (profile != null && Tag == Target && StringBuilder.Length > Threshold)
- {
- var gram = new NGram();
+ public void Clear()
+ {
+ StringBuilder = new StringBuilder();
+ Tag = null;
+ }
- for (int i = 0; i < StringBuilder.Length; i++)
+ public void SetTag(string tag)
{
- gram.AddChar(StringBuilder[i]);
+ Tag = tag;
+ }
- for (int n = 1; n <= NGram.GramsCount; n++)
- {
- profile.Add(gram.Get(n));
- }
+ public void Add(string line)
+ {
+ if (Tag == Target && line != null)
+ {
+ StringBuilder.Append(line);
+ }
}
- Count++;
- }
+ public void CloseTag(LangProfile profile)
+ {
+ if (profile != null && Tag == Target && StringBuilder.Length > Threshold)
+ {
+ var gram = new NGram();
+
+ for (int i = 0; i < StringBuilder.Length; i++)
+ {
+ gram.AddChar(StringBuilder[i]);
- Clear();
- }
+ for (int n = 1; n <= NGram.GramsCount; n++)
+ {
+ profile.Add(gram.Get(n));
+ }
+ }
+
+ Count++;
+ }
- #endregion
+ Clear();
+ }
+
+ #endregion
- #region Properties
+ #region Properties
- public int Count { get; private set; }
+ public int Count { get; private set; }
- #endregion
- }
+ #endregion
+ }
}