diff options
Diffstat (limited to 'Emby.Server.Implementations/TextEncoding/NLangDetect')
16 files changed, 580 insertions, 587 deletions
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Detector.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Detector.cs index 507dd5e42..991ee8688 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Detector.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Detector.cs @@ -179,7 +179,7 @@ namespace NLangDetect.Core DetectBlock(); } - List<Language> list = SortProbability(_langprob); + var list = SortProbability(_langprob); return list; } @@ -250,7 +250,7 @@ namespace NLangDetect.Core _langprob = new double[_langlist.Count]; - Random rand = (_seed.HasValue ? new Random(_seed.Value) : new Random()); + var rand = (_seed.HasValue ? new Random(_seed.Value) : new Random()); for (int t = 0; t < _trialsCount; t++) { @@ -305,7 +305,7 @@ namespace NLangDetect.Core private List<string> ExtractNGrams() { var list = new List<string>(); - NGram ngram = new NGram(); + var ngram = new NGram(); for (int i = 0; i < _text.Length; i++) { diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs index 9d75b8356..08e98d62e 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs @@ -1,10 +1,8 @@ using System; using System.Collections.Generic; -using System.IO; -using System.IO.Compression; -using NLangDetect.Core.Utils; -using MediaBrowser.Model.Serialization; using System.Linq; +using MediaBrowser.Model.Serialization; +using NLangDetect.Core.Utils; namespace NLangDetect.Core { @@ -56,7 +54,7 @@ namespace NLangDetect.Core public static Detector Create(double alpha) { - Detector detector = CreateDetector(); + var detector = CreateDetector(); detector.SetAlpha(alpha); diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/ErrorCode.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/ErrorCode.cs index 3ffd3b2d9..87f07fc9c 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/ErrorCode.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/ErrorCode.cs @@ -1,15 +1,15 @@ -namespace NLangDetect.Core +namespace NLangDetect.Core { - public enum ErrorCode - { - NoTextError, - FormatError, - FileLoadError, - DuplicateLangError, - NeedLoadProfileError, - CantDetectError, - CantOpenTrainData, - TrainDataFormatError, - InitParamError, - } + public enum ErrorCode + { + NoTextError, + FormatError, + FileLoadError, + DuplicateLangError, + NeedLoadProfileError, + CantDetectError, + CantOpenTrainData, + TrainDataFormatError, + InitParamError, + } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs index cd77a30eb..6e58a0be8 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs @@ -1,14 +1,14 @@ -using System; +using System; namespace NLangDetect.Core.Extensions { - public static class CharExtensions - { - private const int MIN_CODE_POINT = 0x000000; - private const int MAX_CODE_POINT = 0x10ffff; + public static class CharExtensions + { + private const int MIN_CODE_POINT = 0x000000; + private const int MAX_CODE_POINT = 0x10ffff; - private static readonly int[] _unicodeBlockStarts = - { + private static readonly int[] _unicodeBlockStarts = + { #region Unicode block starts 0x0000, // Basic Latin @@ -165,8 +165,8 @@ namespace NLangDetect.Core.Extensions #endregion }; - private static readonly UnicodeBlock?[] _unicodeBlocks = - { + private static readonly UnicodeBlock?[] _unicodeBlocks = + { #region Unicode blocks UnicodeBlock.BasicLatin, UnicodeBlock.Latin1Supplement, @@ -322,53 +322,53 @@ namespace NLangDetect.Core.Extensions #endregion }; - #region Public methods + #region Public methods - /// <remarks> - /// Taken from JDK source: http://grepcode.com/file/repository.grepcode.com/java/root/jdk/openjdk/6-b14/java/lang/Character.java#Character.UnicodeBlock.0LATIN_EXTENDED_ADDITIONAL - /// </remarks> - public static UnicodeBlock? GetUnicodeBlock(this char ch) - { - int codePoint = ch; + /// <remarks> + /// Taken from JDK source: http://grepcode.com/file/repository.grepcode.com/java/root/jdk/openjdk/6-b14/java/lang/Character.java#Character.UnicodeBlock.0LATIN_EXTENDED_ADDITIONAL + /// </remarks> + public static UnicodeBlock? GetUnicodeBlock(this char ch) + { + int codePoint = ch; - if (!IsValidCodePoint(codePoint)) - { - throw new ArgumentException("Argument is not a valid code point.", nameof(ch)); - } + if (!IsValidCodePoint(codePoint)) + { + throw new ArgumentException("Argument is not a valid code point.", nameof(ch)); + } - int top, bottom, current; + int top, bottom, current; - bottom = 0; - top = _unicodeBlockStarts.Length; - current = top / 2; + bottom = 0; + top = _unicodeBlockStarts.Length; + current = top / 2; - // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] - while (top - bottom > 1) - { - if (codePoint >= _unicodeBlockStarts[current]) - { - bottom = current; - } - else - { - top = current; - } + // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] + while (top - bottom > 1) + { + if (codePoint >= _unicodeBlockStarts[current]) + { + bottom = current; + } + else + { + top = current; + } - current = (top + bottom) / 2; - } + current = (top + bottom) / 2; + } - return _unicodeBlocks[current]; - } + return _unicodeBlocks[current]; + } - #endregion + #endregion - #region Private helper methods + #region Private helper methods - private static bool IsValidCodePoint(int codePoint) - { - return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT; - } + private static bool IsValidCodePoint(int codePoint) + { + return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT; + } - #endregion - } + #endregion + } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/RandomExtensions.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/RandomExtensions.cs index d55ca80df..5b2fce60b 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/RandomExtensions.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/RandomExtensions.cs @@ -1,51 +1,51 @@ -using System; +using System; namespace NLangDetect.Core.Extensions { - public static class RandomExtensions - { - private const double _Epsilon = 2.22044604925031E-15; - - private static readonly object _mutex = new object(); - - private static double _nextNextGaussian; - private static bool _hasNextNextGaussian; - - /// <summary> - /// Returns the next pseudorandom, Gaussian ("normally") distributed double value with mean 0.0 and standard deviation 1.0 from this random number generator's sequence. - /// The general contract of nextGaussian is that one double value, chosen from (approximately) the usual normal distribution with mean 0.0 and standard deviation 1.0, is pseudorandomly generated and returned. - /// </summary> - /// <remarks> - /// Taken from: http://download.oracle.com/javase/6/docs/api/java/util/Random.html (nextGaussian()) - /// </remarks> - public static double NextGaussian(this Random random) + public static class RandomExtensions { - lock (_mutex) - { - if (_hasNextNextGaussian) - { - _hasNextNextGaussian = false; + private const double _Epsilon = 2.22044604925031E-15; - return _nextNextGaussian; - } + private static readonly object _mutex = new object(); - double v1, v2, s; + private static double _nextNextGaussian; + private static bool _hasNextNextGaussian; - do + /// <summary> + /// Returns the next pseudorandom, Gaussian ("normally") distributed double value with mean 0.0 and standard deviation 1.0 from this random number generator's sequence. + /// The general contract of nextGaussian is that one double value, chosen from (approximately) the usual normal distribution with mean 0.0 and standard deviation 1.0, is pseudorandomly generated and returned. + /// </summary> + /// <remarks> + /// Taken from: http://download.oracle.com/javase/6/docs/api/java/util/Random.html (nextGaussian()) + /// </remarks> + public static double NextGaussian(this Random random) { - v1 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0 - v2 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0 - s = v1 * v1 + v2 * v2; - } - while (s >= 1.0 || Math.Abs(s - 0.0) < _Epsilon); + lock (_mutex) + { + if (_hasNextNextGaussian) + { + _hasNextNextGaussian = false; + + return _nextNextGaussian; + } - double multiplier = Math.Sqrt(-2.0 * Math.Log(s) / s); + double v1, v2, s; - _nextNextGaussian = v2 * multiplier; - _hasNextNextGaussian = true; + do + { + v1 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0 + v2 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0 + s = v1 * v1 + v2 * v2; + } + while (s >= 1.0 || Math.Abs(s - 0.0) < _Epsilon); - return v1 * multiplier; - } + double multiplier = Math.Sqrt(-2.0 * Math.Log(s) / s); + + _nextNextGaussian = v2 * multiplier; + _hasNextNextGaussian = true; + + return v1 * multiplier; + } + } } - } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/UnicodeBlock.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/UnicodeBlock.cs index 71b5de75e..bcc81f968 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/UnicodeBlock.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/UnicodeBlock.cs @@ -1,131 +1,131 @@ -namespace NLangDetect.Core.Extensions +namespace NLangDetect.Core.Extensions { - public enum UnicodeBlock - { - BasicLatin, - Latin1Supplement, - LatinExtendedA, - LatinExtendedB, - IpaExtensions, - SpacingModifierLetters, - CombiningDiacriticalMarks, - Greek, - Cyrillic, - CyrillicSupplementary, - Armenian, - Hebrew, - Arabic, - Syriac, - Thaana, - Devanagari, - Bengali, - Gurmukhi, - Gujarati, - Oriya, - Tamil, - Telugu, - Kannada, - Malayalam, - Sinhala, - Thai, - Lao, - Tibetan, - Myanmar, - Georgian, - HangulJamo, - Ethiopic, - Cherokee, - UnifiedCanadianAboriginalSyllabics, - Ogham, - Runic, - Tagalog, - Hanunoo, - Buhid, - Tagbanwa, - Khmer, - Mongolian, - Limbu, - TaiLe, - KhmerSymbols, - PhoneticExtensions, - LatinExtendedAdditional, - GreekExtended, - GeneralPunctuation, - SuperscriptsAndSubscripts, - CurrencySymbols, - CombiningMarksForSymbols, - LetterlikeSymbols, - NumberForms, - Arrows, - MathematicalOperators, - MiscellaneousTechnical, - ControlPictures, - OpticalCharacterRecognition, - EnclosedAlphanumerics, - BoxDrawing, - BlockElements, - GeometricShapes, - MiscellaneousSymbols, - Dingbats, - MiscellaneousMathematicalSymbolsA, - SupplementalArrowsA, - BraillePatterns, - SupplementalArrowsB, - MiscellaneousMathematicalSymbolsB, - SupplementalMathematicalOperators, - MiscellaneousSymbolsAndArrows, - CjkRadicalsSupplement, - KangxiRadicals, - IdeographicDescriptionCharacters, - CjkSymbolsAndPunctuation, - Hiragana, - Katakana, - Bopomofo, - HangulCompatibilityJamo, - Kanbun, - BopomofoExtended, - KatakanaPhoneticExtensions, - EnclosedCjkLettersAndMonths, - CjkCompatibility, - CjkUnifiedIdeographsExtensionA, - YijingHexagramSymbols, - CjkUnifiedIdeographs, - YiSyllables, - YiRadicals, - HangulSyllables, - HighSurrogates, - HighPrivateUseSurrogates, - LowSurrogates, - PrivateUseArea, - CjkCompatibilityIdeographs, - AlphabeticPresentationForms, - ArabicPresentationFormsA, - VariationSelectors, - CombiningHalfMarks, - CjkCompatibilityForms, - SmallFormVariants, - ArabicPresentationFormsB, - HalfwidthAndFullwidthForms, - Specials, - LinearBSyllabary, - LinearBIdeograms, - AegeanNumbers, - OldItalic, - Gothic, - Ugaritic, - Deseret, - Shavian, - Osmanya, - CypriotSyllabary, - ByzantineMusicalSymbols, - MusicalSymbols, - TaiXuanJingSymbols, - MathematicalAlphanumericSymbols, - CjkUnifiedIdeographsExtensionB, - CjkCompatibilityIdeographsSupplement, - Tags, - VariationSelectorsSupplement, - SupplementaryPrivateUseAreaA, - SupplementaryPrivateUseAreaB, - } + public enum UnicodeBlock + { + BasicLatin, + Latin1Supplement, + LatinExtendedA, + LatinExtendedB, + IpaExtensions, + SpacingModifierLetters, + CombiningDiacriticalMarks, + Greek, + Cyrillic, + CyrillicSupplementary, + Armenian, + Hebrew, + Arabic, + Syriac, + Thaana, + Devanagari, + Bengali, + Gurmukhi, + Gujarati, + Oriya, + Tamil, + Telugu, + Kannada, + Malayalam, + Sinhala, + Thai, + Lao, + Tibetan, + Myanmar, + Georgian, + HangulJamo, + Ethiopic, + Cherokee, + UnifiedCanadianAboriginalSyllabics, + Ogham, + Runic, + Tagalog, + Hanunoo, + Buhid, + Tagbanwa, + Khmer, + Mongolian, + Limbu, + TaiLe, + KhmerSymbols, + PhoneticExtensions, + LatinExtendedAdditional, + GreekExtended, + GeneralPunctuation, + SuperscriptsAndSubscripts, + CurrencySymbols, + CombiningMarksForSymbols, + LetterlikeSymbols, + NumberForms, + Arrows, + MathematicalOperators, + MiscellaneousTechnical, + ControlPictures, + OpticalCharacterRecognition, + EnclosedAlphanumerics, + BoxDrawing, + BlockElements, + GeometricShapes, + MiscellaneousSymbols, + Dingbats, + MiscellaneousMathematicalSymbolsA, + SupplementalArrowsA, + BraillePatterns, + SupplementalArrowsB, + MiscellaneousMathematicalSymbolsB, + SupplementalMathematicalOperators, + MiscellaneousSymbolsAndArrows, + CjkRadicalsSupplement, + KangxiRadicals, + IdeographicDescriptionCharacters, + CjkSymbolsAndPunctuation, + Hiragana, + Katakana, + Bopomofo, + HangulCompatibilityJamo, + Kanbun, + BopomofoExtended, + KatakanaPhoneticExtensions, + EnclosedCjkLettersAndMonths, + CjkCompatibility, + CjkUnifiedIdeographsExtensionA, + YijingHexagramSymbols, + CjkUnifiedIdeographs, + YiSyllables, + YiRadicals, + HangulSyllables, + HighSurrogates, + HighPrivateUseSurrogates, + LowSurrogates, + PrivateUseArea, + CjkCompatibilityIdeographs, + AlphabeticPresentationForms, + ArabicPresentationFormsA, + VariationSelectors, + CombiningHalfMarks, + CjkCompatibilityForms, + SmallFormVariants, + ArabicPresentationFormsB, + HalfwidthAndFullwidthForms, + Specials, + LinearBSyllabary, + LinearBIdeograms, + AegeanNumbers, + OldItalic, + Gothic, + Ugaritic, + Deseret, + Shavian, + Osmanya, + CypriotSyllabary, + ByzantineMusicalSymbols, + MusicalSymbols, + TaiXuanJingSymbols, + MathematicalAlphanumericSymbols, + CjkUnifiedIdeographsExtensionB, + CjkCompatibilityIdeographsSupplement, + Tags, + VariationSelectorsSupplement, + SupplementaryPrivateUseAreaA, + SupplementaryPrivateUseAreaB, + } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/GenProfile.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/GenProfile.cs index 5895f68ae..26157483b 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/GenProfile.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/GenProfile.cs @@ -1,67 +1,67 @@ using System; +using System.IO; using System.IO.Compression; using System.Xml; using NLangDetect.Core.Utils; -using System.IO; namespace NLangDetect.Core { - // TODO IMM HI: xml reader not tested - public static class GenProfile - { - #region Public methods - - public static LangProfile load(string lang, string file) + // TODO IMM HI: xml reader not tested + public static class GenProfile { - LangProfile profile = new LangProfile(lang); - TagExtractor tagextractor = new TagExtractor("abstract", 100); - Stream inputStream = null; - - try - { - inputStream = File.OpenRead(file); - - string extension = Path.GetExtension(file) ?? ""; + #region Public methods - if (extension.ToUpper() == ".GZ") + public static LangProfile load(string lang, string file) { - inputStream = new GZipStream(inputStream, CompressionMode.Decompress); - } + var profile = new LangProfile(lang); + var tagextractor = new TagExtractor("abstract", 100); + Stream inputStream = null; - using (XmlReader xmlReader = XmlReader.Create(inputStream)) - { - while (xmlReader.Read()) - { - switch (xmlReader.NodeType) + try { - case XmlNodeType.Element: - tagextractor.SetTag(xmlReader.Name); - break; + inputStream = File.OpenRead(file); + + string extension = Path.GetExtension(file) ?? ""; + + if (extension.ToUpper() == ".GZ") + { + inputStream = new GZipStream(inputStream, CompressionMode.Decompress); + } - case XmlNodeType.Text: - tagextractor.Add(xmlReader.Value); - break; + using (var xmlReader = XmlReader.Create(inputStream)) + { + while (xmlReader.Read()) + { + switch (xmlReader.NodeType) + { + case XmlNodeType.Element: + tagextractor.SetTag(xmlReader.Name); + break; - case XmlNodeType.EndElement: - tagextractor.CloseTag(profile); - break; + case XmlNodeType.Text: + tagextractor.Add(xmlReader.Value); + break; + + case XmlNodeType.EndElement: + tagextractor.CloseTag(profile); + break; + } + } + } + } + finally + { + if (inputStream != null) + { + inputStream.Close(); + } } - } - } - } - finally - { - if (inputStream != null) - { - inputStream.Close(); - } - } - Console.WriteLine(lang + ": " + tagextractor.Count); + Console.WriteLine(lang + ": " + tagextractor.Count); - return profile; - } + return profile; + } - #endregion - } + #endregion + } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/InternalException.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/InternalException.cs index 32e50a219..6ed1efa88 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/InternalException.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/InternalException.cs @@ -1,22 +1,22 @@ -using System; +using System; namespace NLangDetect.Core { - [Serializable] - public class InternalException : Exception - { - #region Constructor(s) - - public InternalException(string message, Exception innerException) - : base(message, innerException) + [Serializable] + public class InternalException : Exception { - } + #region Constructor(s) - public InternalException(string message) - : this(message, null) - { - } + public InternalException(string message, Exception innerException) + : base(message, innerException) + { + } + + public InternalException(string message) + : this(message, null) + { + } - #endregion - } + #endregion + } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Language.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Language.cs index f4b4b153e..e15263c05 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Language.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Language.cs @@ -2,44 +2,44 @@ using System.Globalization; namespace NLangDetect.Core { - // TODO IMM HI: name?? - public class Language - { - #region Constructor(s) - - public Language(string name, double probability) + // TODO IMM HI: name?? + public class Language { - Name = name; - Probability = probability; - } + #region Constructor(s) - #endregion + public Language(string name, double probability) + { + Name = name; + Probability = probability; + } - #region Object overrides + #endregion - public override string ToString() - { - if (Name == null) - { - return ""; - } - - return - string.Format( - CultureInfo.InvariantCulture.NumberFormat, - "{0}:{1:0.000000}", - Name, - Probability); - } + #region Object overrides - #endregion + public override string ToString() + { + if (Name == null) + { + return ""; + } - #region Properties + return + string.Format( + CultureInfo.InvariantCulture.NumberFormat, + "{0}:{1:0.000000}", + Name, + Probability); + } - public string Name { get; set; } + #endregion - public double Probability { get; set; } + #region Properties - #endregion - } + public string Name { get; set; } + + public double Probability { get; set; } + + #endregion + } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/LanguageDetector.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/LanguageDetector.cs index 044c7e759..a26f236a8 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/LanguageDetector.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/LanguageDetector.cs @@ -1,4 +1,4 @@ -using System; +using System; using MediaBrowser.Model.Serialization; namespace NLangDetect.Core @@ -25,7 +25,7 @@ namespace NLangDetect.Core { if (string.IsNullOrEmpty(plainText)) { throw new ArgumentException("Argument can't be null nor empty.", nameof(plainText)); } - Detector detector = DetectorFactory.Create(_DefaultAlpha); + var detector = DetectorFactory.Create(_DefaultAlpha); detector.Append(plainText); diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/NLangDetectException.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/NLangDetectException.cs index e0d066020..800858bca 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/NLangDetectException.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/NLangDetectException.cs @@ -1,23 +1,23 @@ -using System; +using System; namespace NLangDetect.Core { - public class NLangDetectException : Exception - { - #region Constructor(s) - - public NLangDetectException(string message, ErrorCode errorCode) - : base(message) + public class NLangDetectException : Exception { - ErrorCode = errorCode; - } + #region Constructor(s) - #endregion + public NLangDetectException(string message, ErrorCode errorCode) + : base(message) + { + ErrorCode = errorCode; + } - #region Properties + #endregion - public ErrorCode ErrorCode { get; private set; } + #region Properties - #endregion - } + public ErrorCode ErrorCode { get; private set; } + + #endregion + } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/ProbVector.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/ProbVector.cs index c5a20dbf0..d7afb4113 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/ProbVector.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/ProbVector.cs @@ -1,35 +1,33 @@ -using System; +using System; using System.Collections.Generic; namespace NLangDetect.Core { - public class ProbVector - { - private readonly Dictionary<int, double> _dict = new Dictionary<int, double>(); - - public double this[int key] + public class ProbVector { - get - { - double value; - - return _dict.TryGetValue(key, out value) ? value : 0.0; - } + private readonly Dictionary<int, double> _dict = new Dictionary<int, double>(); - set - { - if (Math.Abs(value) < double.Epsilon) + public double this[int key] { - if (_dict.ContainsKey(key)) - { - _dict.Remove(key); - } + get + { + return _dict.TryGetValue(key, out var value) ? value : 0.0; + } - return; - } + set + { + if (Math.Abs(value) < double.Epsilon) + { + if (_dict.ContainsKey(key)) + { + _dict.Remove(key); + } - _dict[key] = value; - } + return; + } + + _dict[key] = value; + } + } } - } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs index 0413edfad..78b44e1fc 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/LangProfile.cs @@ -59,8 +59,8 @@ namespace NLangDetect.Core.Utils ICollection<string> keys = freq.Keys; int roman = 0; // TODO IMM HI: move up? - Regex regex1 = new Regex("^[A-Za-z]$", RegexOptions.Compiled); - List<string> keysToRemove = new List<string>(); + var regex1 = new Regex("^[A-Za-z]$", RegexOptions.Compiled); + var keysToRemove = new List<string>(); foreach (string key in keys) { @@ -93,7 +93,7 @@ namespace NLangDetect.Core.Utils ICollection<string> keys2 = freq.Keys; // TODO IMM HI: move up? - Regex regex2 = new Regex(".*[A-Za-z].*", RegexOptions.Compiled); + var regex2 = new Regex(".*[A-Za-z].*", RegexOptions.Compiled); foreach (string key in keys2) { diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs index 1d605cc47..879c0a09b 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs @@ -1,10 +1,9 @@ +using System; using System.Collections.Generic; using System.Globalization; using System.IO; -using System.Reflection; -using System.Text.RegularExpressions; using System.Linq; -using System; +using System.Text.RegularExpressions; namespace NLangDetect.Core.Utils { @@ -19,19 +18,17 @@ namespace NLangDetect.Core.Utils public static string getString(string key) { - string value; - return - _messages.TryGetValue(key, out value) + _messages.TryGetValue(key, out var value) ? value : string.Format("!{0}!", key); } private static Dictionary<string, string> LoadMessages() { - var manifestName = typeof(Messages).Assembly.GetManifestResourceNames().FirstOrDefault(i => i.IndexOf("messages.properties", StringComparison.Ordinal) != -1) ; + var manifestName = typeof(Messages).Assembly.GetManifestResourceNames().FirstOrDefault(i => i.IndexOf("messages.properties", StringComparison.Ordinal) != -1); - Stream messagesStream = + var messagesStream = typeof(Messages).Assembly .GetManifestResourceStream(manifestName); diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs index b1738f7ca..2d29ec697 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs @@ -6,14 +6,14 @@ using NLangDetect.Core.Extensions; namespace NLangDetect.Core.Utils { - public class NGram - { - public const int GramsCount = 3; + public class NGram + { + public const int GramsCount = 3; - private static readonly string Latin1Excluded = Messages.getString("NGram.LATIN1_EXCLUDE"); + private static readonly string Latin1Excluded = Messages.getString("NGram.LATIN1_EXCLUDE"); - private static readonly string[] CjkClass = - { + private static readonly string[] CjkClass = + { #region CJK classes Messages.getString("NGram.KANJI_1_0"), @@ -146,185 +146,185 @@ namespace NLangDetect.Core.Utils #endregion }; - private static readonly Dictionary<char, char> _cjkMap; + private static readonly Dictionary<char, char> _cjkMap; - private StringBuilder _grams; - private bool _capitalword; + private StringBuilder _grams; + private bool _capitalword; - #region Constructor(s) + #region Constructor(s) - static NGram() - { - _cjkMap = new Dictionary<char, char>(); + static NGram() + { + _cjkMap = new Dictionary<char, char>(); - foreach (string cjk_list in CjkClass) - { - char representative = cjk_list[0]; + foreach (string cjk_list in CjkClass) + { + char representative = cjk_list[0]; - for (int i = 0; i < cjk_list.Length; i++) - { - _cjkMap.Add(cjk_list[i], representative); + for (int i = 0; i < cjk_list.Length; i++) + { + _cjkMap.Add(cjk_list[i], representative); + } + } } - } - } - - public NGram() - { - _grams = new StringBuilder(" "); - _capitalword = false; - } - #endregion + public NGram() + { + _grams = new StringBuilder(" "); + _capitalword = false; + } - #region Public methods + #endregion - public static char Normalize(char ch) - { - UnicodeBlock? unicodeBlock = ch.GetUnicodeBlock(); + #region Public methods - if (!unicodeBlock.HasValue) - { - return ch; - } + public static char Normalize(char ch) + { + UnicodeBlock? unicodeBlock = ch.GetUnicodeBlock(); - switch (unicodeBlock.Value) - { - case UnicodeBlock.BasicLatin: - { - if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') + if (!unicodeBlock.HasValue) { - return ' '; + return ch; } - break; - } - - case UnicodeBlock.Latin1Supplement: - { - if (Latin1Excluded.IndexOf(ch) >= 0) + switch (unicodeBlock.Value) { - return ' '; + case UnicodeBlock.BasicLatin: + { + if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') + { + return ' '; + } + + break; + } + + case UnicodeBlock.Latin1Supplement: + { + if (Latin1Excluded.IndexOf(ch) >= 0) + { + return ' '; + } + + break; + } + + case UnicodeBlock.GeneralPunctuation: + { + return ' '; + } + + case UnicodeBlock.Arabic: + { + if (ch == '\u06cc') + { + return '\u064a'; + } + + break; + } + + case UnicodeBlock.LatinExtendedAdditional: + { + if (ch >= '\u1ea0') + { + return '\u1ec3'; + } + + break; + } + + case UnicodeBlock.Hiragana: + { + return '\u3042'; + } + + case UnicodeBlock.Katakana: + { + return '\u30a2'; + } + + case UnicodeBlock.Bopomofo: + case UnicodeBlock.BopomofoExtended: + { + return '\u3105'; + } + + case UnicodeBlock.CjkUnifiedIdeographs: + { + if (_cjkMap.ContainsKey(ch)) + { + return _cjkMap[ch]; + } + + break; + } + + case UnicodeBlock.HangulSyllables: + { + return '\uac00'; + } } - break; - } - - case UnicodeBlock.GeneralPunctuation: - { - return ' '; - } + return ch; + } - case UnicodeBlock.Arabic: - { - if (ch == '\u06cc') + public void AddChar(char ch) + { + ch = Normalize(ch); + char lastchar = _grams[_grams.Length - 1]; + if (lastchar == ' ') { - return '\u064a'; + _grams = new StringBuilder(" "); + _capitalword = false; + if (ch == ' ') return; } - - break; - } - - case UnicodeBlock.LatinExtendedAdditional: - { - if (ch >= '\u1ea0') + else if (_grams.Length >= GramsCount) { - return '\u1ec3'; + _grams.Remove(0, 1); } + _grams.Append(ch); - break; - } - - case UnicodeBlock.Hiragana: - { - return '\u3042'; - } - - case UnicodeBlock.Katakana: - { - return '\u30a2'; - } - - case UnicodeBlock.Bopomofo: - case UnicodeBlock.BopomofoExtended: - { - return '\u3105'; - } - - case UnicodeBlock.CjkUnifiedIdeographs: - { - if (_cjkMap.ContainsKey(ch)) + if (char.IsUpper(ch)) { - return _cjkMap[ch]; + if (char.IsUpper(lastchar)) _capitalword = true; } + else + { + _capitalword = false; + } + } - break; - } - - case UnicodeBlock.HangulSyllables: - { - return '\uac00'; - } - } - - return ch; - } + public string Get(int n) + { + if (_capitalword) + { + return null; + } - public void AddChar(char ch) - { - ch = Normalize(ch); - char lastchar = _grams[_grams.Length - 1]; - if (lastchar == ' ') - { - _grams = new StringBuilder(" "); - _capitalword = false; - if (ch == ' ') return; - } - else if (_grams.Length >= GramsCount) - { - _grams.Remove(0, 1); - } - _grams.Append(ch); - - if (char.IsUpper(ch)) - { - if (char.IsUpper(lastchar)) _capitalword = true; - } - else - { - _capitalword = false; - } - } + int len = _grams.Length; - public string Get(int n) - { - if (_capitalword) - { - return null; - } + if (n < 1 || n > 3 || len < n) + { + return null; + } - int len = _grams.Length; + if (n == 1) + { + char ch = _grams[len - 1]; - if (n < 1 || n > 3 || len < n) - { - return null; - } + if (ch == ' ') + { + return null; + } - if (n == 1) - { - char ch = _grams[len - 1]; + return ch.ToString(); + } - if (ch == ' ') - { - return null; + // TODO IMM HI: is ToString() here effective? + return _grams.ToString().SubSequence(len - n, len); } - return ch.ToString(); - } - - // TODO IMM HI: is ToString() here effective? - return _grams.ToString().SubSequence(len - n, len); + #endregion } - - #endregion - } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs index 896fd0960..4441ecd0f 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs @@ -2,75 +2,75 @@ using System.Text; namespace NLangDetect.Core.Utils { - public class TagExtractor - { - // TODO IMM HI: do the really need to be internal? - internal string Target; - internal int Threshold; - internal StringBuilder StringBuilder; - internal string Tag; - - #region Constructor(s) - - public TagExtractor(string tag, int threshold) + public class TagExtractor { - Target = tag; - Threshold = threshold; - Count = 0; - Clear(); - } + // TODO IMM HI: do the really need to be internal? + internal string Target; + internal int Threshold; + internal StringBuilder StringBuilder; + internal string Tag; - #endregion + #region Constructor(s) - #region Public methods - - public void Clear() - { - StringBuilder = new StringBuilder(); - Tag = null; - } + public TagExtractor(string tag, int threshold) + { + Target = tag; + Threshold = threshold; + Count = 0; + Clear(); + } - public void SetTag(string tag) - { - Tag = tag; - } + #endregion - public void Add(string line) - { - if (Tag == Target && line != null) - { - StringBuilder.Append(line); - } - } + #region Public methods - public void CloseTag(LangProfile profile) - { - if (profile != null && Tag == Target && StringBuilder.Length > Threshold) - { - var gram = new NGram(); + public void Clear() + { + StringBuilder = new StringBuilder(); + Tag = null; + } - for (int i = 0; i < StringBuilder.Length; i++) + public void SetTag(string tag) { - gram.AddChar(StringBuilder[i]); + Tag = tag; + } - for (int n = 1; n <= NGram.GramsCount; n++) - { - profile.Add(gram.Get(n)); - } + public void Add(string line) + { + if (Tag == Target && line != null) + { + StringBuilder.Append(line); + } } - Count++; - } + public void CloseTag(LangProfile profile) + { + if (profile != null && Tag == Target && StringBuilder.Length > Threshold) + { + var gram = new NGram(); + + for (int i = 0; i < StringBuilder.Length; i++) + { + gram.AddChar(StringBuilder[i]); - Clear(); - } + for (int n = 1; n <= NGram.GramsCount; n++) + { + profile.Add(gram.Get(n)); + } + } + + Count++; + } - #endregion + Clear(); + } + + #endregion - #region Properties + #region Properties - public int Count { get; private set; } + public int Count { get; private set; } - #endregion - } + #endregion + } } |
