diff options
| author | hawken <hawken@thehawken.org> | 2019-01-07 23:24:34 +0000 |
|---|---|---|
| committer | hawken <hawken@thehawken.org> | 2019-01-07 23:24:34 +0000 |
| commit | ba1794f64bb1959b3af0fbbddca57df14a5544a9 (patch) | |
| tree | 8edc7dec5be3beb793ab00305f222fe15f2a52e7 /Emby.Server.Implementations/TextEncoding/UniversalDetector/Core | |
| parent | 4ab82f7436b80359ef4c5f08027b309d1e1cf3b0 (diff) | |
Remove tabs and trailing whitespace
Diffstat (limited to 'Emby.Server.Implementations/TextEncoding/UniversalDetector/Core')
| -rw-r--r-- | Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharDistributionAnalyser.cs | 147 |
1 files changed, 72 insertions, 75 deletions
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharDistributionAnalyser.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharDistributionAnalyser.cs index 8b5bc37d3..da5995932 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharDistributionAnalyser.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharDistributionAnalyser.cs @@ -38,12 +38,11 @@ namespace UniversalDetector.Core { /// <summary> - /// Base class for the Character Distribution Method, used for + /// Base class for the Character Distribution Method, used for /// the CJK encodings /// </summary> public abstract class CharDistributionAnalyser { - protected const float SURE_YES = 0.99f; protected const float SURE_NO = 0.01f; protected const int MINIMUM_DATA_THRESHOLD = 4; @@ -57,19 +56,19 @@ namespace UniversalDetector.Core //Total character encounted. protected int totalChars; - + // Mapping table to get frequency order from char order (get from GetOrder()) protected int[] charToFreqOrder; // Size of above table protected int tableSize; - //This is a constant value varies from language to language, it is used - // in calculating confidence. - protected float typicalDistributionRatio; + //This is a constant value varies from language to language, it is used + // in calculating confidence. + protected float typicalDistributionRatio; public CharDistributionAnalyser() - { + { Reset(); } @@ -77,10 +76,10 @@ namespace UniversalDetector.Core /// Feed a block of data and do distribution analysis /// </summary> /// </param> - //public abstract void HandleData(byte[] buf, int offset, int len); - + //public abstract void HandleData(byte[] buf, int offset, int len); + /// <summary> - /// we do not handle character base on its original encoding string, but + /// we do not handle character base on its original encoding string, but /// convert this encoding string to a number, here called order. /// This allow multiple encoding of a language to share one frequency table /// </summary> @@ -88,9 +87,9 @@ namespace UniversalDetector.Core /// <param name="offset"></param> /// <returns></returns> public abstract int GetOrder(byte[] buf, int offset); - + /// <summary> - /// Feed a character with known length + /// Feed a character with known length /// </summary> /// <param name="buf">A <see cref="System.Byte"/></param> /// <param name="offset">buf offset</param> @@ -107,13 +106,13 @@ namespace UniversalDetector.Core } } - public virtual void Reset() + public virtual void Reset() { done = false; totalChars = 0; freqChars = 0; } - + /// <summary> /// return confidence base on received data /// </summary> @@ -133,16 +132,16 @@ namespace UniversalDetector.Core //normalize confidence, (we don't want to be 100% sure) return SURE_YES; } - + //It is not necessary to receive all data to draw conclusion. For charset detection, // certain amount of data is enough - public bool GotEnoughData() + public bool GotEnoughData() { return totalChars > ENOUGH_DATA_THRESHOLD; } } - + public class GB18030DistributionAnalyser : CharDistributionAnalyser { // GB2312 most frequently used character table @@ -155,7 +154,7 @@ namespace UniversalDetector.Core * * Idea Distribution Ratio = 0.79135/(1-0.79135) = 3.79 * Random Distribution Ration = 512 / (3755 - 512) = 0.157 - * + * * Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR *****************************************************************************/ @@ -400,8 +399,8 @@ namespace UniversalDetector.Core 381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189, 852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, //last 512 - /*************************************************************************************** - *Everything below is of no interest for detection purpose * + /*************************************************************************************** + *Everything below is of no interest for detection purpose * *************************************************************************************** 5508,6484,3900,3414,3974,4441,4024,3537,4037,5628,5099,3633,6485,3148,6486,3636, @@ -601,7 +600,7 @@ namespace UniversalDetector.Core tableSize = GB2312_TABLE_SIZE; typicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO; } - + /// <summary> /// for GB2312 encoding, we are interested /// first byte range: 0xb0 -- 0xfe @@ -609,20 +608,20 @@ namespace UniversalDetector.Core /// no validation needed here. State machine has done that /// </summary> /// <returns></returns> - public override int GetOrder(byte[] buf, int offset) - { - if (buf[offset] >= 0xB0 && buf[offset+1] >= 0xA1) + public override int GetOrder(byte[] buf, int offset) + { + if (buf[offset] >= 0xB0 && buf[offset+1] >= 0xA1) return 94 * (buf[offset] - 0xb0) + buf[offset+1] - 0xA1; else return -1; } } - + public class EUCTWDistributionAnalyser : CharDistributionAnalyser { // EUCTW frequency table - // Converted from big5 work - // by Taiwan's Mandarin Promotion Council + // Converted from big5 work + // by Taiwan's Mandarin Promotion Council // <http://www.edu.tw:81/mandr/> /****************************************************************************** * 128 --> 0.42261 @@ -633,7 +632,7 @@ namespace UniversalDetector.Core * * Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98 * Random Distribution Ration = 512/(5401-512)=0.105 - * + * * Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR *****************************************************************************/ @@ -979,8 +978,8 @@ namespace UniversalDetector.Core 890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, // 8086 2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, // 8102 - /*************************************************************************************** - *Everything below is of no interest for detection purpose * + /*************************************************************************************** + *Everything below is of no interest for detection purpose * *************************************************************************************** 2515,1613,4582,8119,3312,3866,2516,8120,4058,8121,1637,4059,2466,4583,3867,8122, // 8118 @@ -1022,7 +1021,7 @@ namespace UniversalDetector.Core 8678,8679,8680,8681,8682,8683,8684,8685,8686,8687,8688,8689,8690,8691,8692,8693, // 8694 8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,8704,8705,8706,8707,8708,8709, // 8710 8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,8720,8721,8722,8723,8724,8725, // 8726 - 8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741, // 8742 //13973 + 8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741, // 8742 //13973 ****************************************************************************************/ }; @@ -1038,15 +1037,15 @@ namespace UniversalDetector.Core /// second byte range: 0xa1 -- 0xfe /// no validation needed here. State machine has done that /// </summary> - public override int GetOrder(byte[] buf, int offset) - { - if (buf[offset] >= 0xC4) + public override int GetOrder(byte[] buf, int offset) + { + if (buf[offset] >= 0xC4) return 94 * (buf[offset] - 0xC4) + buf[offset+1] - 0xA1; else return -1; } } - + public class EUCKRDistributionAnalyser : CharDistributionAnalyser { // Sampling from about 20M text materials include literature and computer technology @@ -1215,8 +1214,8 @@ namespace UniversalDetector.Core 2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042, 670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, //512, 256 - /*************************************************************************************** - * Everything below is of no interest for detection purpose + /*************************************************************************************** + * Everything below is of no interest for detection purpose * *************************************************************************************** 2643,2644,2645,2646,2647,2648,2649,2650,2651,2652,2653,2654,2655,2656,2657,2658, @@ -1619,32 +1618,32 @@ namespace UniversalDetector.Core 8704,8705,8706,8707,8708,8709,8710,8711,8712,8713,8714,8715,8716,8717,8718,8719, 8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735, 8736,8737,8738,8739,8740,8741 */ }; - + public EUCKRDistributionAnalyser() { charToFreqOrder = EUCKR_CHAR2FREQ_ORDER; tableSize = EUCKR_TABLE_SIZE; - typicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO; + typicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO; } - + /// <summary> /// first byte range: 0xb0 -- 0xfe /// second byte range: 0xa1 -- 0xfe /// no validation needed here. State machine has done that /// </summary> - public override int GetOrder(byte[] buf, int offset) - { - if (buf[offset] >= 0xB0) + public override int GetOrder(byte[] buf, int offset) + { + if (buf[offset] >= 0xB0) return 94 * (buf[offset] - 0xB0) + buf[offset+1] - 0xA1; else return -1; } } - + public class BIG5DistributionAnalyser : CharDistributionAnalyser { // Big5 frequency table - // by Taiwan's Mandarin Promotion Council + // by Taiwan's Mandarin Promotion Council // <http://www.edu.tw:81/mandr/> /****************************************************************************** * 128 --> 0.42261 @@ -1655,7 +1654,7 @@ namespace UniversalDetector.Core * * Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98 * Random Distribution Ration = 512/(5401-512)=0.105 - * + * * Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR *****************************************************************************/ @@ -2001,8 +2000,8 @@ namespace UniversalDetector.Core 890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, // 5360 2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, // 5376 //last 512 - /*************************************************************************************** - *Everything below is of no interest for detection purpose * + /*************************************************************************************** + *Everything below is of no interest for detection purpose * *************************************************************************************** 2522,1613,4812,5799,3345,3945,2523,5800,4162,5801,1637,4163,2471,4813,3946,5802, // 5392 @@ -2545,29 +2544,29 @@ namespace UniversalDetector.Core 13968,13969,13970,13971,13972, //13973 ****************************************************************************************/ }; - + public BIG5DistributionAnalyser() { charToFreqOrder = BIG5_CHAR2FREQ_ORDER; tableSize = BIG5_TABLE_SIZE; - typicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO; + typicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO; } - + /// <summary> /// first byte range: 0xa4 -- 0xfe /// second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe /// no validation needed here. State machine has done that /// </summary> - public override int GetOrder(byte[] buf, int offset) - { + public override int GetOrder(byte[] buf, int offset) + { if (buf[offset] >= 0xA4) { if (buf[offset+1] >= 0xA1) return 157 * (buf[offset] - 0xA4) + buf[offset+1] - 0xA1 + 63; else return 157 * (buf[offset] - 0xA4) + buf[offset+1] - 0x40; } else { - return -1; - } + return -1; + } } } @@ -2575,7 +2574,7 @@ namespace UniversalDetector.Core { //Sampling from about 20M text materials include literature and computer technology // Japanese frequency table, applied to both S-JIS and EUC-JP - //They are sorted in order. + //They are sorted in order. /****************************************************************************** * 128 --> 0.77094 @@ -2586,8 +2585,8 @@ namespace UniversalDetector.Core * * Idea Distribution Ratio = 0.92635 / (1-0.92635) = 12.58 * Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191 - * - * Typical Distribution Ratio, 25% of IDR + * + * Typical Distribution Ratio, 25% of IDR *****************************************************************************/ protected static float SJIS_TYPICAL_DISTRIBUTION_RATIO = 3.0f; @@ -2869,8 +2868,8 @@ namespace UniversalDetector.Core 1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, // 4352 2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, // 4368 //last 512 - /*************************************************************************************** - *Everything below is of no interest for detection purpose * + /*************************************************************************************** + *Everything below is of no interest for detection purpose * *************************************************************************************** 2138,2122,3730,2888,1995,1820,1044,6190,6191,6192,6193,6194,6195,6196,6197,6198, // 4384 @@ -3118,31 +3117,31 @@ namespace UniversalDetector.Core 8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, // 8256 8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271, // 8272 ****************************************************************************************/ - }; + }; public SJISDistributionAnalyser() { charToFreqOrder = SJIS_CHAR2FREQ_ORDER; tableSize = SJIS_TABLE_SIZE; - typicalDistributionRatio = SJIS_TYPICAL_DISTRIBUTION_RATIO; + typicalDistributionRatio = SJIS_TYPICAL_DISTRIBUTION_RATIO; } - + /// <summary> /// first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe /// second byte range: 0x40 -- 0x7e, 0x81 -- oxfe /// no validation needed here. State machine has done that /// </summary> - public override int GetOrder(byte[] buf, int offset) - { + public override int GetOrder(byte[] buf, int offset) + { int order = 0; - - if (buf[offset] >= 0x81 && buf[offset] <= 0x9F) + + if (buf[offset] >= 0x81 && buf[offset] <= 0x9F) order = 188 * (buf[offset] - 0x81); - else if (buf[offset] >= 0xE0 && buf[offset] <= 0xEF) + else if (buf[offset] >= 0xE0 && buf[offset] <= 0xEF) order = 188 * (buf[offset] - 0xE0 + 31); else return -1; order += buf[offset+1] - 0x40; - + if (buf[offset+1] > 0x7F) order--; return order; @@ -3154,20 +3153,18 @@ namespace UniversalDetector.Core public EUCJPDistributionAnalyser() : base() { } - + /// <summary> /// first byte range: 0xa0 -- 0xfe /// second byte range: 0xa1 -- 0xfe /// no validation needed here. State machine has done that /// </summary> - public override int GetOrder(byte[] buf, int offset) - { - if (buf[offset] >= 0xA0) + public override int GetOrder(byte[] buf, int offset) + { + if (buf[offset] >= 0xA0) return 94 * (buf[offset] - 0xA1) + buf[offset+1] - 0xA1; else return -1; } } - - } |
