diff options
Diffstat (limited to 'Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs')
| -rw-r--r-- | Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs | 374 |
1 files changed, 0 insertions, 374 deletions
diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs deleted file mode 100644 index 59076bd66..000000000 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs +++ /dev/null @@ -1,374 +0,0 @@ -using System; - -namespace NLangDetect.Core.Extensions -{ - public static class CharExtensions - { - private const int MIN_CODE_POINT = 0x000000; - private const int MAX_CODE_POINT = 0x10ffff; - - private static readonly int[] _unicodeBlockStarts = - { - #region Unicode block starts - - 0x0000, // Basic Latin - 0x0080, // Latin-1 Supplement - 0x0100, // Latin Extended-A - 0x0180, // Latin Extended-B - 0x0250, // IPA Extensions - 0x02B0, // Spacing Modifier Letters - 0x0300, // Combining Diacritical Marks - 0x0370, // Greek and Coptic - 0x0400, // Cyrillic - 0x0500, // Cyrillic Supplementary - 0x0530, // Armenian - 0x0590, // Hebrew - 0x0600, // Arabic - 0x0700, // Syriac - 0x0750, // unassigned - 0x0780, // Thaana - 0x07C0, // unassigned - 0x0900, // Devanagari - 0x0980, // Bengali - 0x0A00, // Gurmukhi - 0x0A80, // Gujarati - 0x0B00, // Oriya - 0x0B80, // Tamil - 0x0C00, // Telugu - 0x0C80, // Kannada - 0x0D00, // Malayalam - 0x0D80, // Sinhala - 0x0E00, // Thai - 0x0E80, // Lao - 0x0F00, // Tibetan - 0x1000, // Myanmar - 0x10A0, // Georgian - 0x1100, // Hangul Jamo - 0x1200, // Ethiopic - 0x1380, // unassigned - 0x13A0, // Cherokee - 0x1400, // Unified Canadian Aboriginal Syllabics - 0x1680, // Ogham - 0x16A0, // Runic - 0x1700, // Tagalog - 0x1720, // Hanunoo - 0x1740, // Buhid - 0x1760, // Tagbanwa - 0x1780, // Khmer - 0x1800, // Mongolian - 0x18B0, // unassigned - 0x1900, // Limbu - 0x1950, // Tai Le - 0x1980, // unassigned - 0x19E0, // Khmer Symbols - 0x1A00, // unassigned - 0x1D00, // Phonetic Extensions - 0x1D80, // unassigned - 0x1E00, // Latin Extended Additional - 0x1F00, // Greek Extended - 0x2000, // General Punctuation - 0x2070, // Superscripts and Subscripts - 0x20A0, // Currency Symbols - 0x20D0, // Combining Diacritical Marks for Symbols - 0x2100, // Letterlike Symbols - 0x2150, // Number Forms - 0x2190, // Arrows - 0x2200, // Mathematical Operators - 0x2300, // Miscellaneous Technical - 0x2400, // Control Pictures - 0x2440, // Optical Character Recognition - 0x2460, // Enclosed Alphanumerics - 0x2500, // Box Drawing - 0x2580, // Block Elements - 0x25A0, // Geometric Shapes - 0x2600, // Miscellaneous Symbols - 0x2700, // Dingbats - 0x27C0, // Miscellaneous Mathematical Symbols-A - 0x27F0, // Supplemental Arrows-A - 0x2800, // Braille Patterns - 0x2900, // Supplemental Arrows-B - 0x2980, // Miscellaneous Mathematical Symbols-B - 0x2A00, // Supplemental Mathematical Operators - 0x2B00, // Miscellaneous Symbols and Arrows - 0x2C00, // unassigned - 0x2E80, // CJK Radicals Supplement - 0x2F00, // Kangxi Radicals - 0x2FE0, // unassigned - 0x2FF0, // Ideographic Description Characters - 0x3000, // CJK Symbols and Punctuation - 0x3040, // Hiragana - 0x30A0, // Katakana - 0x3100, // Bopomofo - 0x3130, // Hangul Compatibility Jamo - 0x3190, // Kanbun - 0x31A0, // Bopomofo Extended - 0x31C0, // unassigned - 0x31F0, // Katakana Phonetic Extensions - 0x3200, // Enclosed CJK Letters and Months - 0x3300, // CJK Compatibility - 0x3400, // CJK Unified Ideographs Extension A - 0x4DC0, // Yijing Hexagram Symbols - 0x4E00, // CJK Unified Ideographs - 0xA000, // Yi Syllables - 0xA490, // Yi Radicals - 0xA4D0, // unassigned - 0xAC00, // Hangul Syllables - 0xD7B0, // unassigned - 0xD800, // High Surrogates - 0xDB80, // High Private Use Surrogates - 0xDC00, // Low Surrogates - 0xE000, // Private Use - 0xF900, // CJK Compatibility Ideographs - 0xFB00, // Alphabetic Presentation Forms - 0xFB50, // Arabic Presentation Forms-A - 0xFE00, // Variation Selectors - 0xFE10, // unassigned - 0xFE20, // Combining Half Marks - 0xFE30, // CJK Compatibility Forms - 0xFE50, // Small Form Variants - 0xFE70, // Arabic Presentation Forms-B - 0xFF00, // Halfwidth and Fullwidth Forms - 0xFFF0, // Specials - 0x10000, // Linear B Syllabary - 0x10080, // Linear B Ideograms - 0x10100, // Aegean Numbers - 0x10140, // unassigned - 0x10300, // Old Italic - 0x10330, // Gothic - 0x10350, // unassigned - 0x10380, // Ugaritic - 0x103A0, // unassigned - 0x10400, // Deseret - 0x10450, // Shavian - 0x10480, // Osmanya - 0x104B0, // unassigned - 0x10800, // Cypriot Syllabary - 0x10840, // unassigned - 0x1D000, // Byzantine Musical Symbols - 0x1D100, // Musical Symbols - 0x1D200, // unassigned - 0x1D300, // Tai Xuan Jing Symbols - 0x1D360, // unassigned - 0x1D400, // Mathematical Alphanumeric Symbols - 0x1D800, // unassigned - 0x20000, // CJK Unified Ideographs Extension B - 0x2A6E0, // unassigned - 0x2F800, // CJK Compatibility Ideographs Supplement - 0x2FA20, // unassigned - 0xE0000, // Tags - 0xE0080, // unassigned - 0xE0100, // Variation Selectors Supplement - 0xE01F0, // unassigned - 0xF0000, // Supplementary Private Use Area-A - 0x100000, // Supplementary Private Use Area-B - - #endregion - }; - - private static readonly UnicodeBlock?[] _unicodeBlocks = - { - #region Unicode blocks - UnicodeBlock.BasicLatin, - UnicodeBlock.Latin1Supplement, - UnicodeBlock.LatinExtendedA, - UnicodeBlock.LatinExtendedB, - UnicodeBlock.IpaExtensions, - UnicodeBlock.SpacingModifierLetters, - UnicodeBlock.CombiningDiacriticalMarks, - UnicodeBlock.Greek, - UnicodeBlock.Cyrillic, - UnicodeBlock.CyrillicSupplementary, - UnicodeBlock.Armenian, - UnicodeBlock.Hebrew, - UnicodeBlock.Arabic, - UnicodeBlock.Syriac, - null, - UnicodeBlock.Thaana, - null, - UnicodeBlock.Devanagari, - UnicodeBlock.Bengali, - UnicodeBlock.Gurmukhi, - UnicodeBlock.Gujarati, - UnicodeBlock.Oriya, - UnicodeBlock.Tamil, - UnicodeBlock.Telugu, - UnicodeBlock.Kannada, - UnicodeBlock.Malayalam, - UnicodeBlock.Sinhala, - UnicodeBlock.Thai, - UnicodeBlock.Lao, - UnicodeBlock.Tibetan, - UnicodeBlock.Myanmar, - UnicodeBlock.Georgian, - UnicodeBlock.HangulJamo, - UnicodeBlock.Ethiopic, - null, - UnicodeBlock.Cherokee, - UnicodeBlock.UnifiedCanadianAboriginalSyllabics, - UnicodeBlock.Ogham, - UnicodeBlock.Runic, - UnicodeBlock.Tagalog, - UnicodeBlock.Hanunoo, - UnicodeBlock.Buhid, - UnicodeBlock.Tagbanwa, - UnicodeBlock.Khmer, - UnicodeBlock.Mongolian, - null, - UnicodeBlock.Limbu, - UnicodeBlock.TaiLe, - null, - UnicodeBlock.KhmerSymbols, - null, - UnicodeBlock.PhoneticExtensions, - null, - UnicodeBlock.LatinExtendedAdditional, - UnicodeBlock.GreekExtended, - UnicodeBlock.GeneralPunctuation, - UnicodeBlock.SuperscriptsAndSubscripts, - UnicodeBlock.CurrencySymbols, - UnicodeBlock.CombiningMarksForSymbols, - UnicodeBlock.LetterlikeSymbols, - UnicodeBlock.NumberForms, - UnicodeBlock.Arrows, - UnicodeBlock.MathematicalOperators, - UnicodeBlock.MiscellaneousTechnical, - UnicodeBlock.ControlPictures, - UnicodeBlock.OpticalCharacterRecognition, - UnicodeBlock.EnclosedAlphanumerics, - UnicodeBlock.BoxDrawing, - UnicodeBlock.BlockElements, - UnicodeBlock.GeometricShapes, - UnicodeBlock.MiscellaneousSymbols, - UnicodeBlock.Dingbats, - UnicodeBlock.MiscellaneousMathematicalSymbolsA, - UnicodeBlock.SupplementalArrowsA, - UnicodeBlock.BraillePatterns, - UnicodeBlock.SupplementalArrowsB, - UnicodeBlock.MiscellaneousMathematicalSymbolsB, - UnicodeBlock.SupplementalMathematicalOperators, - UnicodeBlock.MiscellaneousSymbolsAndArrows, - null, - UnicodeBlock.CjkRadicalsSupplement, - UnicodeBlock.KangxiRadicals, - null, - UnicodeBlock.IdeographicDescriptionCharacters, - UnicodeBlock.CjkSymbolsAndPunctuation, - UnicodeBlock.Hiragana, - UnicodeBlock.Katakana, - UnicodeBlock.Bopomofo, - UnicodeBlock.HangulCompatibilityJamo, - UnicodeBlock.Kanbun, - UnicodeBlock.BopomofoExtended, - null, - UnicodeBlock.KatakanaPhoneticExtensions, - UnicodeBlock.EnclosedCjkLettersAndMonths, - UnicodeBlock.CjkCompatibility, - UnicodeBlock.CjkUnifiedIdeographsExtensionA, - UnicodeBlock.YijingHexagramSymbols, - UnicodeBlock.CjkUnifiedIdeographs, - UnicodeBlock.YiSyllables, - UnicodeBlock.YiRadicals, - null, - UnicodeBlock.HangulSyllables, - null, - UnicodeBlock.HighSurrogates, - UnicodeBlock.HighPrivateUseSurrogates, - UnicodeBlock.LowSurrogates, - UnicodeBlock.PrivateUseArea, - UnicodeBlock.CjkCompatibilityIdeographs, - UnicodeBlock.AlphabeticPresentationForms, - UnicodeBlock.ArabicPresentationFormsA, - UnicodeBlock.VariationSelectors, - null, - UnicodeBlock.CombiningHalfMarks, - UnicodeBlock.CjkCompatibilityForms, - UnicodeBlock.SmallFormVariants, - UnicodeBlock.ArabicPresentationFormsB, - UnicodeBlock.HalfwidthAndFullwidthForms, - UnicodeBlock.Specials, - UnicodeBlock.LinearBSyllabary, - UnicodeBlock.LinearBIdeograms, - UnicodeBlock.AegeanNumbers, - null, - UnicodeBlock.OldItalic, - UnicodeBlock.Gothic, - null, - UnicodeBlock.Ugaritic, - null, - UnicodeBlock.Deseret, - UnicodeBlock.Shavian, - UnicodeBlock.Osmanya, - null, - UnicodeBlock.CypriotSyllabary, - null, - UnicodeBlock.ByzantineMusicalSymbols, - UnicodeBlock.MusicalSymbols, - null, - UnicodeBlock.TaiXuanJingSymbols, - null, - UnicodeBlock.MathematicalAlphanumericSymbols, - null, - UnicodeBlock.CjkUnifiedIdeographsExtensionB, - null, - UnicodeBlock.CjkCompatibilityIdeographsSupplement, - null, - UnicodeBlock.Tags, - null, - UnicodeBlock.VariationSelectorsSupplement, - null, - UnicodeBlock.SupplementaryPrivateUseAreaA, - UnicodeBlock.SupplementaryPrivateUseAreaB, - - #endregion - }; - - #region Public methods - - /// <remarks> - /// Taken from JDK source: http://grepcode.com/file/repository.grepcode.com/java/root/jdk/openjdk/6-b14/java/lang/Character.java#Character.UnicodeBlock.0LATIN_EXTENDED_ADDITIONAL - /// </remarks> - public static UnicodeBlock? GetUnicodeBlock(this char ch) - { - int codePoint = ch; - - if (!IsValidCodePoint(codePoint)) - { - throw new ArgumentException("Argument is not a valid code point.", "ch"); - } - - int top, bottom, current; - - bottom = 0; - top = _unicodeBlockStarts.Length; - current = top / 2; - - // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] - while (top - bottom > 1) - { - if (codePoint >= _unicodeBlockStarts[current]) - { - bottom = current; - } - else - { - top = current; - } - - current = (top + bottom) / 2; - } - - return _unicodeBlocks[current]; - } - - #endregion - - #region Private helper methods - - private static bool IsValidCodePoint(int codePoint) - { - return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT; - } - - #endregion - } -} |
