diff options
| author | Tthecreator <epostvanthomas@kpnmail.nl> | 2019-01-22 15:22:42 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-01-22 15:22:42 +0000 |
| commit | 189b99df16bd4c93cc96422d7282d01d9ff5b82f (patch) | |
| tree | 26d7da95fe3e3b2772b8b39a2463a6c0ac7652fc /Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions | |
| parent | a00c0defa8cb22774f5dc8a7d566eb36ac7307e8 (diff) | |
| parent | edcfd8b565f632088c8b1f826db8e2fbecf9790d (diff) | |
Merge pull request #1 from jellyfin/dev
Update from jellyfin repo
Diffstat (limited to 'Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions')
4 files changed, 0 insertions, 581 deletions
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs deleted file mode 100644 index cd77a30eb..000000000 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs +++ /dev/null @@ -1,374 +0,0 @@ -using System; - -namespace NLangDetect.Core.Extensions -{ - public static class CharExtensions - { - private const int MIN_CODE_POINT = 0x000000; - private const int MAX_CODE_POINT = 0x10ffff; - - private static readonly int[] _unicodeBlockStarts = - { - #region Unicode block starts - - 0x0000, // Basic Latin - 0x0080, // Latin-1 Supplement - 0x0100, // Latin Extended-A - 0x0180, // Latin Extended-B - 0x0250, // IPA Extensions - 0x02B0, // Spacing Modifier Letters - 0x0300, // Combining Diacritical Marks - 0x0370, // Greek and Coptic - 0x0400, // Cyrillic - 0x0500, // Cyrillic Supplementary - 0x0530, // Armenian - 0x0590, // Hebrew - 0x0600, // Arabic - 0x0700, // Syriac - 0x0750, // unassigned - 0x0780, // Thaana - 0x07C0, // unassigned - 0x0900, // Devanagari - 0x0980, // Bengali - 0x0A00, // Gurmukhi - 0x0A80, // Gujarati - 0x0B00, // Oriya - 0x0B80, // Tamil - 0x0C00, // Telugu - 0x0C80, // Kannada - 0x0D00, // Malayalam - 0x0D80, // Sinhala - 0x0E00, // Thai - 0x0E80, // Lao - 0x0F00, // Tibetan - 0x1000, // Myanmar - 0x10A0, // Georgian - 0x1100, // Hangul Jamo - 0x1200, // Ethiopic - 0x1380, // unassigned - 0x13A0, // Cherokee - 0x1400, // Unified Canadian Aboriginal Syllabics - 0x1680, // Ogham - 0x16A0, // Runic - 0x1700, // Tagalog - 0x1720, // Hanunoo - 0x1740, // Buhid - 0x1760, // Tagbanwa - 0x1780, // Khmer - 0x1800, // Mongolian - 0x18B0, // unassigned - 0x1900, // Limbu - 0x1950, // Tai Le - 0x1980, // unassigned - 0x19E0, // Khmer Symbols - 0x1A00, // unassigned - 0x1D00, // Phonetic Extensions - 0x1D80, // unassigned - 0x1E00, // Latin Extended Additional - 0x1F00, // Greek Extended - 0x2000, // General Punctuation - 0x2070, // Superscripts and Subscripts - 0x20A0, // Currency Symbols - 0x20D0, // Combining Diacritical Marks for Symbols - 0x2100, // Letterlike Symbols - 0x2150, // Number Forms - 0x2190, // Arrows - 0x2200, // Mathematical Operators - 0x2300, // Miscellaneous Technical - 0x2400, // Control Pictures - 0x2440, // Optical Character Recognition - 0x2460, // Enclosed Alphanumerics - 0x2500, // Box Drawing - 0x2580, // Block Elements - 0x25A0, // Geometric Shapes - 0x2600, // Miscellaneous Symbols - 0x2700, // Dingbats - 0x27C0, // Miscellaneous Mathematical Symbols-A - 0x27F0, // Supplemental Arrows-A - 0x2800, // Braille Patterns - 0x2900, // Supplemental Arrows-B - 0x2980, // Miscellaneous Mathematical Symbols-B - 0x2A00, // Supplemental Mathematical Operators - 0x2B00, // Miscellaneous Symbols and Arrows - 0x2C00, // unassigned - 0x2E80, // CJK Radicals Supplement - 0x2F00, // Kangxi Radicals - 0x2FE0, // unassigned - 0x2FF0, // Ideographic Description Characters - 0x3000, // CJK Symbols and Punctuation - 0x3040, // Hiragana - 0x30A0, // Katakana - 0x3100, // Bopomofo - 0x3130, // Hangul Compatibility Jamo - 0x3190, // Kanbun - 0x31A0, // Bopomofo Extended - 0x31C0, // unassigned - 0x31F0, // Katakana Phonetic Extensions - 0x3200, // Enclosed CJK Letters and Months - 0x3300, // CJK Compatibility - 0x3400, // CJK Unified Ideographs Extension A - 0x4DC0, // Yijing Hexagram Symbols - 0x4E00, // CJK Unified Ideographs - 0xA000, // Yi Syllables - 0xA490, // Yi Radicals - 0xA4D0, // unassigned - 0xAC00, // Hangul Syllables - 0xD7B0, // unassigned - 0xD800, // High Surrogates - 0xDB80, // High Private Use Surrogates - 0xDC00, // Low Surrogates - 0xE000, // Private Use - 0xF900, // CJK Compatibility Ideographs - 0xFB00, // Alphabetic Presentation Forms - 0xFB50, // Arabic Presentation Forms-A - 0xFE00, // Variation Selectors - 0xFE10, // unassigned - 0xFE20, // Combining Half Marks - 0xFE30, // CJK Compatibility Forms - 0xFE50, // Small Form Variants - 0xFE70, // Arabic Presentation Forms-B - 0xFF00, // Halfwidth and Fullwidth Forms - 0xFFF0, // Specials - 0x10000, // Linear B Syllabary - 0x10080, // Linear B Ideograms - 0x10100, // Aegean Numbers - 0x10140, // unassigned - 0x10300, // Old Italic - 0x10330, // Gothic - 0x10350, // unassigned - 0x10380, // Ugaritic - 0x103A0, // unassigned - 0x10400, // Deseret - 0x10450, // Shavian - 0x10480, // Osmanya - 0x104B0, // unassigned - 0x10800, // Cypriot Syllabary - 0x10840, // unassigned - 0x1D000, // Byzantine Musical Symbols - 0x1D100, // Musical Symbols - 0x1D200, // unassigned - 0x1D300, // Tai Xuan Jing Symbols - 0x1D360, // unassigned - 0x1D400, // Mathematical Alphanumeric Symbols - 0x1D800, // unassigned - 0x20000, // CJK Unified Ideographs Extension B - 0x2A6E0, // unassigned - 0x2F800, // CJK Compatibility Ideographs Supplement - 0x2FA20, // unassigned - 0xE0000, // Tags - 0xE0080, // unassigned - 0xE0100, // Variation Selectors Supplement - 0xE01F0, // unassigned - 0xF0000, // Supplementary Private Use Area-A - 0x100000, // Supplementary Private Use Area-B - - #endregion - }; - - private static readonly UnicodeBlock?[] _unicodeBlocks = - { - #region Unicode blocks - UnicodeBlock.BasicLatin, - UnicodeBlock.Latin1Supplement, - UnicodeBlock.LatinExtendedA, - UnicodeBlock.LatinExtendedB, - UnicodeBlock.IpaExtensions, - UnicodeBlock.SpacingModifierLetters, - UnicodeBlock.CombiningDiacriticalMarks, - UnicodeBlock.Greek, - UnicodeBlock.Cyrillic, - UnicodeBlock.CyrillicSupplementary, - UnicodeBlock.Armenian, - UnicodeBlock.Hebrew, - UnicodeBlock.Arabic, - UnicodeBlock.Syriac, - null, - UnicodeBlock.Thaana, - null, - UnicodeBlock.Devanagari, - UnicodeBlock.Bengali, - UnicodeBlock.Gurmukhi, - UnicodeBlock.Gujarati, - UnicodeBlock.Oriya, - UnicodeBlock.Tamil, - UnicodeBlock.Telugu, - UnicodeBlock.Kannada, - UnicodeBlock.Malayalam, - UnicodeBlock.Sinhala, - UnicodeBlock.Thai, - UnicodeBlock.Lao, - UnicodeBlock.Tibetan, - UnicodeBlock.Myanmar, - UnicodeBlock.Georgian, - UnicodeBlock.HangulJamo, - UnicodeBlock.Ethiopic, - null, - UnicodeBlock.Cherokee, - UnicodeBlock.UnifiedCanadianAboriginalSyllabics, - UnicodeBlock.Ogham, - UnicodeBlock.Runic, - UnicodeBlock.Tagalog, - UnicodeBlock.Hanunoo, - UnicodeBlock.Buhid, - UnicodeBlock.Tagbanwa, - UnicodeBlock.Khmer, - UnicodeBlock.Mongolian, - null, - UnicodeBlock.Limbu, - UnicodeBlock.TaiLe, - null, - UnicodeBlock.KhmerSymbols, - null, - UnicodeBlock.PhoneticExtensions, - null, - UnicodeBlock.LatinExtendedAdditional, - UnicodeBlock.GreekExtended, - UnicodeBlock.GeneralPunctuation, - UnicodeBlock.SuperscriptsAndSubscripts, - UnicodeBlock.CurrencySymbols, - UnicodeBlock.CombiningMarksForSymbols, - UnicodeBlock.LetterlikeSymbols, - UnicodeBlock.NumberForms, - UnicodeBlock.Arrows, - UnicodeBlock.MathematicalOperators, - UnicodeBlock.MiscellaneousTechnical, - UnicodeBlock.ControlPictures, - UnicodeBlock.OpticalCharacterRecognition, - UnicodeBlock.EnclosedAlphanumerics, - UnicodeBlock.BoxDrawing, - UnicodeBlock.BlockElements, - UnicodeBlock.GeometricShapes, - UnicodeBlock.MiscellaneousSymbols, - UnicodeBlock.Dingbats, - UnicodeBlock.MiscellaneousMathematicalSymbolsA, - UnicodeBlock.SupplementalArrowsA, - UnicodeBlock.BraillePatterns, - UnicodeBlock.SupplementalArrowsB, - UnicodeBlock.MiscellaneousMathematicalSymbolsB, - UnicodeBlock.SupplementalMathematicalOperators, - UnicodeBlock.MiscellaneousSymbolsAndArrows, - null, - UnicodeBlock.CjkRadicalsSupplement, - UnicodeBlock.KangxiRadicals, - null, - UnicodeBlock.IdeographicDescriptionCharacters, - UnicodeBlock.CjkSymbolsAndPunctuation, - UnicodeBlock.Hiragana, - UnicodeBlock.Katakana, - UnicodeBlock.Bopomofo, - UnicodeBlock.HangulCompatibilityJamo, - UnicodeBlock.Kanbun, - UnicodeBlock.BopomofoExtended, - null, - UnicodeBlock.KatakanaPhoneticExtensions, - UnicodeBlock.EnclosedCjkLettersAndMonths, - UnicodeBlock.CjkCompatibility, - UnicodeBlock.CjkUnifiedIdeographsExtensionA, - UnicodeBlock.YijingHexagramSymbols, - UnicodeBlock.CjkUnifiedIdeographs, - UnicodeBlock.YiSyllables, - UnicodeBlock.YiRadicals, - null, - UnicodeBlock.HangulSyllables, - null, - UnicodeBlock.HighSurrogates, - UnicodeBlock.HighPrivateUseSurrogates, - UnicodeBlock.LowSurrogates, - UnicodeBlock.PrivateUseArea, - UnicodeBlock.CjkCompatibilityIdeographs, - UnicodeBlock.AlphabeticPresentationForms, - UnicodeBlock.ArabicPresentationFormsA, - UnicodeBlock.VariationSelectors, - null, - UnicodeBlock.CombiningHalfMarks, - UnicodeBlock.CjkCompatibilityForms, - UnicodeBlock.SmallFormVariants, - UnicodeBlock.ArabicPresentationFormsB, - UnicodeBlock.HalfwidthAndFullwidthForms, - UnicodeBlock.Specials, - UnicodeBlock.LinearBSyllabary, - UnicodeBlock.LinearBIdeograms, - UnicodeBlock.AegeanNumbers, - null, - UnicodeBlock.OldItalic, - UnicodeBlock.Gothic, - null, - UnicodeBlock.Ugaritic, - null, - UnicodeBlock.Deseret, - UnicodeBlock.Shavian, - UnicodeBlock.Osmanya, - null, - UnicodeBlock.CypriotSyllabary, - null, - UnicodeBlock.ByzantineMusicalSymbols, - UnicodeBlock.MusicalSymbols, - null, - UnicodeBlock.TaiXuanJingSymbols, - null, - UnicodeBlock.MathematicalAlphanumericSymbols, - null, - UnicodeBlock.CjkUnifiedIdeographsExtensionB, - null, - UnicodeBlock.CjkCompatibilityIdeographsSupplement, - null, - UnicodeBlock.Tags, - null, - UnicodeBlock.VariationSelectorsSupplement, - null, - UnicodeBlock.SupplementaryPrivateUseAreaA, - UnicodeBlock.SupplementaryPrivateUseAreaB, - - #endregion - }; - - #region Public methods - - /// <remarks> - /// Taken from JDK source: http://grepcode.com/file/repository.grepcode.com/java/root/jdk/openjdk/6-b14/java/lang/Character.java#Character.UnicodeBlock.0LATIN_EXTENDED_ADDITIONAL - /// </remarks> - public static UnicodeBlock? GetUnicodeBlock(this char ch) - { - int codePoint = ch; - - if (!IsValidCodePoint(codePoint)) - { - throw new ArgumentException("Argument is not a valid code point.", nameof(ch)); - } - - int top, bottom, current; - - bottom = 0; - top = _unicodeBlockStarts.Length; - current = top / 2; - - // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] - while (top - bottom > 1) - { - if (codePoint >= _unicodeBlockStarts[current]) - { - bottom = current; - } - else - { - top = current; - } - - current = (top + bottom) / 2; - } - - return _unicodeBlocks[current]; - } - - #endregion - - #region Private helper methods - - private static bool IsValidCodePoint(int codePoint) - { - return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT; - } - - #endregion - } -} diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/RandomExtensions.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/RandomExtensions.cs deleted file mode 100644 index d55ca80df..000000000 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/RandomExtensions.cs +++ /dev/null @@ -1,51 +0,0 @@ -using System; - -namespace NLangDetect.Core.Extensions -{ - public static class RandomExtensions - { - private const double _Epsilon = 2.22044604925031E-15; - - private static readonly object _mutex = new object(); - - private static double _nextNextGaussian; - private static bool _hasNextNextGaussian; - - /// <summary> - /// Returns the next pseudorandom, Gaussian ("normally") distributed double value with mean 0.0 and standard deviation 1.0 from this random number generator's sequence. - /// The general contract of nextGaussian is that one double value, chosen from (approximately) the usual normal distribution with mean 0.0 and standard deviation 1.0, is pseudorandomly generated and returned. - /// </summary> - /// <remarks> - /// Taken from: http://download.oracle.com/javase/6/docs/api/java/util/Random.html (nextGaussian()) - /// </remarks> - public static double NextGaussian(this Random random) - { - lock (_mutex) - { - if (_hasNextNextGaussian) - { - _hasNextNextGaussian = false; - - return _nextNextGaussian; - } - - double v1, v2, s; - - do - { - v1 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0 - v2 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0 - s = v1 * v1 + v2 * v2; - } - while (s >= 1.0 || Math.Abs(s - 0.0) < _Epsilon); - - double multiplier = Math.Sqrt(-2.0 * Math.Log(s) / s); - - _nextNextGaussian = v2 * multiplier; - _hasNextNextGaussian = true; - - return v1 * multiplier; - } - } - } -} diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/StringExtensions.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/StringExtensions.cs deleted file mode 100644 index 5db68bbf1..000000000 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/StringExtensions.cs +++ /dev/null @@ -1,25 +0,0 @@ -using System; - -namespace NLangDetect.Core.Extensions -{ - public static class StringExtensions - { - /// <summary> - /// Returns a new character sequence that is a subsequence of this sequence. The subsequence starts with the character at the specified index and ends with the character at index end - 1. The length of the returned sequence is end - start, so if start == end then an empty sequence is returned. - /// </summary> - /// <param name="s"></param> - /// <param name="start">the start index, inclusive</param> - /// <param name="end">the end index, exclusive</param> - /// <returns>the specified subsequence</returns> - /// <exception cref="IndexOutOfRangeException"> if start or end are negative, if end is greater than length(), or if start is greater than end</exception> - public static string SubSequence(this string s, int start, int end) - { - if (start < 0) throw new ArgumentOutOfRangeException(nameof(start), "Argument must not be negative."); - if (end < 0) throw new ArgumentOutOfRangeException(nameof(end), "Argument must not be negative."); - if (end > s.Length) throw new ArgumentOutOfRangeException(nameof(end), "Argument must not be greater than the input string's length."); - if (start > end) throw new ArgumentOutOfRangeException(nameof(start), "Argument must not be greater than the 'end' argument."); - - return s.Substring(start, end - start); - } - } -} diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/UnicodeBlock.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/UnicodeBlock.cs deleted file mode 100644 index 71b5de75e..000000000 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/UnicodeBlock.cs +++ /dev/null @@ -1,131 +0,0 @@ -namespace NLangDetect.Core.Extensions -{ - public enum UnicodeBlock - { - BasicLatin, - Latin1Supplement, - LatinExtendedA, - LatinExtendedB, - IpaExtensions, - SpacingModifierLetters, - CombiningDiacriticalMarks, - Greek, - Cyrillic, - CyrillicSupplementary, - Armenian, - Hebrew, - Arabic, - Syriac, - Thaana, - Devanagari, - Bengali, - Gurmukhi, - Gujarati, - Oriya, - Tamil, - Telugu, - Kannada, - Malayalam, - Sinhala, - Thai, - Lao, - Tibetan, - Myanmar, - Georgian, - HangulJamo, - Ethiopic, - Cherokee, - UnifiedCanadianAboriginalSyllabics, - Ogham, - Runic, - Tagalog, - Hanunoo, - Buhid, - Tagbanwa, - Khmer, - Mongolian, - Limbu, - TaiLe, - KhmerSymbols, - PhoneticExtensions, - LatinExtendedAdditional, - GreekExtended, - GeneralPunctuation, - SuperscriptsAndSubscripts, - CurrencySymbols, - CombiningMarksForSymbols, - LetterlikeSymbols, - NumberForms, - Arrows, - MathematicalOperators, - MiscellaneousTechnical, - ControlPictures, - OpticalCharacterRecognition, - EnclosedAlphanumerics, - BoxDrawing, - BlockElements, - GeometricShapes, - MiscellaneousSymbols, - Dingbats, - MiscellaneousMathematicalSymbolsA, - SupplementalArrowsA, - BraillePatterns, - SupplementalArrowsB, - MiscellaneousMathematicalSymbolsB, - SupplementalMathematicalOperators, - MiscellaneousSymbolsAndArrows, - CjkRadicalsSupplement, - KangxiRadicals, - IdeographicDescriptionCharacters, - CjkSymbolsAndPunctuation, - Hiragana, - Katakana, - Bopomofo, - HangulCompatibilityJamo, - Kanbun, - BopomofoExtended, - KatakanaPhoneticExtensions, - EnclosedCjkLettersAndMonths, - CjkCompatibility, - CjkUnifiedIdeographsExtensionA, - YijingHexagramSymbols, - CjkUnifiedIdeographs, - YiSyllables, - YiRadicals, - HangulSyllables, - HighSurrogates, - HighPrivateUseSurrogates, - LowSurrogates, - PrivateUseArea, - CjkCompatibilityIdeographs, - AlphabeticPresentationForms, - ArabicPresentationFormsA, - VariationSelectors, - CombiningHalfMarks, - CjkCompatibilityForms, - SmallFormVariants, - ArabicPresentationFormsB, - HalfwidthAndFullwidthForms, - Specials, - LinearBSyllabary, - LinearBIdeograms, - AegeanNumbers, - OldItalic, - Gothic, - Ugaritic, - Deseret, - Shavian, - Osmanya, - CypriotSyllabary, - ByzantineMusicalSymbols, - MusicalSymbols, - TaiXuanJingSymbols, - MathematicalAlphanumericSymbols, - CjkUnifiedIdeographsExtensionB, - CjkCompatibilityIdeographsSupplement, - Tags, - VariationSelectorsSupplement, - SupplementaryPrivateUseAreaA, - SupplementaryPrivateUseAreaB, - } -} |
