aboutsummaryrefslogtreecommitdiff
path: root/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core
diff options
context:
space:
mode:
authorBond-009 <bond.009@outlook.com>2019-01-16 19:10:42 +0100
committerGitHub <noreply@github.com>2019-01-16 19:10:42 +0100
commit900dc851e6c810f9e1772a6fb901a5a7e2801baf (patch)
tree205bac3cd6df971ee18739e59bd4da0ffe91718b /Emby.Server.Implementations/TextEncoding/UniversalDetector/Core
parent07a8e49c4b1e4a2dddbaa49ab6f1ff4f271fbf20 (diff)
parent933ef438894ed233fec46badf58dd4f26492e832 (diff)
Merge branch 'dev' into cleanup
Diffstat (limited to 'Emby.Server.Implementations/TextEncoding/UniversalDetector/Core')
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Big5Prober.cs18
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/BitPackage.cs20
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharDistributionAnalyser.cs147
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharsetProber.cs44
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Charsets.cs44
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CodingStateMachine.cs32
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCJPProber.cs18
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCKRProber.cs14
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCTWProber.cs18
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscCharsetProber.cs14
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscSM.cs356
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/GB18030Prober.cs20
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/HebrewProber.cs130
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/JapaneseContextAnalyser.cs68
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangBulgarianModel.cs28
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangCyrillicModel.cs36
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangGreekModel.cs22
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangHebrewModel.cs16
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangHungarianModel.cs20
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangThaiModel.cs16
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs38
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSGroupProber.cs16
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSSM.cs834
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCSGroupProber.cs28
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCharsetProber.cs44
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SJISProber.cs20
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SMModel.cs20
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SequenceModel.cs40
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UTF8Prober.cs10
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UniversalDetector.cs60
30 files changed, 1086 insertions, 1105 deletions
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Big5Prober.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Big5Prober.cs
index 760fca9bd..19152a7ac 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Big5Prober.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Big5Prober.cs
@@ -44,12 +44,12 @@ namespace UniversalDetector.Core
private CodingStateMachine codingSM;
private BIG5DistributionAnalyser distributionAnalyser;
private byte[] lastChar = new byte[2];
-
+
public Big5Prober()
{
this.codingSM = new CodingStateMachine(new BIG5SMModel());
this.distributionAnalyser = new BIG5DistributionAnalyser();
- this.Reset();
+ this.Reset();
}
public override ProbingState HandleData(byte[] buf, int offset, int len)
@@ -73,7 +73,7 @@ namespace UniversalDetector.Core
lastChar[1] = buf[offset];
distributionAnalyser.HandleOneChar(lastChar, 0, charLen);
} else {
- distributionAnalyser.HandleOneChar(buf, i-1, charLen);
+ distributionAnalyser.HandleOneChar(buf, i-1, charLen);
}
}
}
@@ -84,23 +84,23 @@ namespace UniversalDetector.Core
state = ProbingState.FoundIt;
return state;
}
-
+
public override void Reset()
{
- codingSM.Reset();
+ codingSM.Reset();
state = ProbingState.Detecting;
distributionAnalyser.Reset();
}
-
+
public override string GetCharsetName()
{
- return "Big-5";
+ return "Big-5";
}
-
+
public override float GetConfidence()
{
return distributionAnalyser.GetConfidence();
}
-
+
}
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/BitPackage.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/BitPackage.cs
index 16483e661..19bcdc779 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/BitPackage.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/BitPackage.cs
@@ -43,15 +43,15 @@ namespace UniversalDetector.Core
public static int INDEX_SHIFT_4BITS = 3;
public static int INDEX_SHIFT_8BITS = 2;
public static int INDEX_SHIFT_16BITS = 1;
-
+
public static int SHIFT_MASK_4BITS = 7;
public static int SHIFT_MASK_8BITS = 3;
public static int SHIFT_MASK_16BITS = 1;
-
+
public static int BIT_SHIFT_4BITS = 2;
public static int BIT_SHIFT_8BITS = 3;
public static int BIT_SHIFT_16BITS = 4;
-
+
public static int UNIT_MASK_4BITS = 0x0000000F;
public static int UNIT_MASK_8BITS = 0x000000FF;
public static int UNIT_MASK_16BITS = 0x0000FFFF;
@@ -61,7 +61,7 @@ namespace UniversalDetector.Core
private int bitShift;
private int unitMask;
private int[] data;
-
+
public BitPackage(int indexShift, int shiftMask,
int bitShift, int unitMask, int[] data)
{
@@ -71,27 +71,27 @@ namespace UniversalDetector.Core
this.unitMask = unitMask;
this.data = data;
}
-
+
public static int Pack16bits(int a, int b)
{
return ((b << 16) | a);
}
-
+
public static int Pack8bits(int a, int b, int c, int d)
{
return Pack16bits((b << 8) | a, (d << 8) | c);
}
-
- public static int Pack4bits(int a, int b, int c, int d,
+
+ public static int Pack4bits(int a, int b, int c, int d,
int e, int f, int g, int h)
{
return Pack8bits((b << 4) | a, (d << 4) | c,
(f << 4) | e, (h << 4) | g);
}
-
+
public int Unpack(int i)
{
- return (data[i >> indexShift] >>
+ return (data[i >> indexShift] >>
((i & shiftMask) << bitShift)) & unitMask;
}
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharDistributionAnalyser.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharDistributionAnalyser.cs
index 8b5bc37d3..da5995932 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharDistributionAnalyser.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharDistributionAnalyser.cs
@@ -38,12 +38,11 @@
namespace UniversalDetector.Core
{
/// <summary>
- /// Base class for the Character Distribution Method, used for
+ /// Base class for the Character Distribution Method, used for
/// the CJK encodings
/// </summary>
public abstract class CharDistributionAnalyser
{
-
protected const float SURE_YES = 0.99f;
protected const float SURE_NO = 0.01f;
protected const int MINIMUM_DATA_THRESHOLD = 4;
@@ -57,19 +56,19 @@ namespace UniversalDetector.Core
//Total character encounted.
protected int totalChars;
-
+
// Mapping table to get frequency order from char order (get from GetOrder())
protected int[] charToFreqOrder;
// Size of above table
protected int tableSize;
- //This is a constant value varies from language to language, it is used
- // in calculating confidence.
- protected float typicalDistributionRatio;
+ //This is a constant value varies from language to language, it is used
+ // in calculating confidence.
+ protected float typicalDistributionRatio;
public CharDistributionAnalyser()
- {
+ {
Reset();
}
@@ -77,10 +76,10 @@ namespace UniversalDetector.Core
/// Feed a block of data and do distribution analysis
/// </summary>
/// </param>
- //public abstract void HandleData(byte[] buf, int offset, int len);
-
+ //public abstract void HandleData(byte[] buf, int offset, int len);
+
/// <summary>
- /// we do not handle character base on its original encoding string, but
+ /// we do not handle character base on its original encoding string, but
/// convert this encoding string to a number, here called order.
/// This allow multiple encoding of a language to share one frequency table
/// </summary>
@@ -88,9 +87,9 @@ namespace UniversalDetector.Core
/// <param name="offset"></param>
/// <returns></returns>
public abstract int GetOrder(byte[] buf, int offset);
-
+
/// <summary>
- /// Feed a character with known length
+ /// Feed a character with known length
/// </summary>
/// <param name="buf">A <see cref="System.Byte"/></param>
/// <param name="offset">buf offset</param>
@@ -107,13 +106,13 @@ namespace UniversalDetector.Core
}
}
- public virtual void Reset()
+ public virtual void Reset()
{
done = false;
totalChars = 0;
freqChars = 0;
}
-
+
/// <summary>
/// return confidence base on received data
/// </summary>
@@ -133,16 +132,16 @@ namespace UniversalDetector.Core
//normalize confidence, (we don't want to be 100% sure)
return SURE_YES;
}
-
+
//It is not necessary to receive all data to draw conclusion. For charset detection,
// certain amount of data is enough
- public bool GotEnoughData()
+ public bool GotEnoughData()
{
return totalChars > ENOUGH_DATA_THRESHOLD;
}
}
-
+
public class GB18030DistributionAnalyser : CharDistributionAnalyser
{
// GB2312 most frequently used character table
@@ -155,7 +154,7 @@ namespace UniversalDetector.Core
*
* Idea Distribution Ratio = 0.79135/(1-0.79135) = 3.79
* Random Distribution Ration = 512 / (3755 - 512) = 0.157
- *
+ *
* Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR
*****************************************************************************/
@@ -400,8 +399,8 @@ namespace UniversalDetector.Core
381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189,
852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, //last 512
- /***************************************************************************************
- *Everything below is of no interest for detection purpose *
+ /***************************************************************************************
+ *Everything below is of no interest for detection purpose *
***************************************************************************************
5508,6484,3900,3414,3974,4441,4024,3537,4037,5628,5099,3633,6485,3148,6486,3636,
@@ -601,7 +600,7 @@ namespace UniversalDetector.Core
tableSize = GB2312_TABLE_SIZE;
typicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO;
}
-
+
/// <summary>
/// for GB2312 encoding, we are interested
/// first byte range: 0xb0 -- 0xfe
@@ -609,20 +608,20 @@ namespace UniversalDetector.Core
/// no validation needed here. State machine has done that
/// </summary>
/// <returns></returns>
- public override int GetOrder(byte[] buf, int offset)
- {
- if (buf[offset] >= 0xB0 && buf[offset+1] >= 0xA1)
+ public override int GetOrder(byte[] buf, int offset)
+ {
+ if (buf[offset] >= 0xB0 && buf[offset+1] >= 0xA1)
return 94 * (buf[offset] - 0xb0) + buf[offset+1] - 0xA1;
else
return -1;
}
}
-
+
public class EUCTWDistributionAnalyser : CharDistributionAnalyser
{
// EUCTW frequency table
- // Converted from big5 work
- // by Taiwan's Mandarin Promotion Council
+ // Converted from big5 work
+ // by Taiwan's Mandarin Promotion Council
// <http://www.edu.tw:81/mandr/>
/******************************************************************************
* 128 --> 0.42261
@@ -633,7 +632,7 @@ namespace UniversalDetector.Core
*
* Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98
* Random Distribution Ration = 512/(5401-512)=0.105
- *
+ *
* Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
*****************************************************************************/
@@ -979,8 +978,8 @@ namespace UniversalDetector.Core
890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, // 8086
2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, // 8102
- /***************************************************************************************
- *Everything below is of no interest for detection purpose *
+ /***************************************************************************************
+ *Everything below is of no interest for detection purpose *
***************************************************************************************
2515,1613,4582,8119,3312,3866,2516,8120,4058,8121,1637,4059,2466,4583,3867,8122, // 8118
@@ -1022,7 +1021,7 @@ namespace UniversalDetector.Core
8678,8679,8680,8681,8682,8683,8684,8685,8686,8687,8688,8689,8690,8691,8692,8693, // 8694
8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,8704,8705,8706,8707,8708,8709, // 8710
8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,8720,8721,8722,8723,8724,8725, // 8726
- 8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741, // 8742 //13973
+ 8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741, // 8742 //13973
****************************************************************************************/
};
@@ -1038,15 +1037,15 @@ namespace UniversalDetector.Core
/// second byte range: 0xa1 -- 0xfe
/// no validation needed here. State machine has done that
/// </summary>
- public override int GetOrder(byte[] buf, int offset)
- {
- if (buf[offset] >= 0xC4)
+ public override int GetOrder(byte[] buf, int offset)
+ {
+ if (buf[offset] >= 0xC4)
return 94 * (buf[offset] - 0xC4) + buf[offset+1] - 0xA1;
else
return -1;
}
}
-
+
public class EUCKRDistributionAnalyser : CharDistributionAnalyser
{
// Sampling from about 20M text materials include literature and computer technology
@@ -1215,8 +1214,8 @@ namespace UniversalDetector.Core
2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042,
670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, //512, 256
- /***************************************************************************************
- * Everything below is of no interest for detection purpose
+ /***************************************************************************************
+ * Everything below is of no interest for detection purpose *
***************************************************************************************
2643,2644,2645,2646,2647,2648,2649,2650,2651,2652,2653,2654,2655,2656,2657,2658,
@@ -1619,32 +1618,32 @@ namespace UniversalDetector.Core
8704,8705,8706,8707,8708,8709,8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,
8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,
8736,8737,8738,8739,8740,8741 */ };
-
+
public EUCKRDistributionAnalyser()
{
charToFreqOrder = EUCKR_CHAR2FREQ_ORDER;
tableSize = EUCKR_TABLE_SIZE;
- typicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO;
+ typicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO;
}
-
+
/// <summary>
/// first byte range: 0xb0 -- 0xfe
/// second byte range: 0xa1 -- 0xfe
/// no validation needed here. State machine has done that
/// </summary>
- public override int GetOrder(byte[] buf, int offset)
- {
- if (buf[offset] >= 0xB0)
+ public override int GetOrder(byte[] buf, int offset)
+ {
+ if (buf[offset] >= 0xB0)
return 94 * (buf[offset] - 0xB0) + buf[offset+1] - 0xA1;
else
return -1;
}
}
-
+
public class BIG5DistributionAnalyser : CharDistributionAnalyser
{
// Big5 frequency table
- // by Taiwan's Mandarin Promotion Council
+ // by Taiwan's Mandarin Promotion Council
// <http://www.edu.tw:81/mandr/>
/******************************************************************************
* 128 --> 0.42261
@@ -1655,7 +1654,7 @@ namespace UniversalDetector.Core
*
* Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98
* Random Distribution Ration = 512/(5401-512)=0.105
- *
+ *
* Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
*****************************************************************************/
@@ -2001,8 +2000,8 @@ namespace UniversalDetector.Core
890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, // 5360
2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, // 5376 //last 512
- /***************************************************************************************
- *Everything below is of no interest for detection purpose *
+ /***************************************************************************************
+ *Everything below is of no interest for detection purpose *
***************************************************************************************
2522,1613,4812,5799,3345,3945,2523,5800,4162,5801,1637,4163,2471,4813,3946,5802, // 5392
@@ -2545,29 +2544,29 @@ namespace UniversalDetector.Core
13968,13969,13970,13971,13972, //13973
****************************************************************************************/
};
-
+
public BIG5DistributionAnalyser()
{
charToFreqOrder = BIG5_CHAR2FREQ_ORDER;
tableSize = BIG5_TABLE_SIZE;
- typicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO;
+ typicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO;
}
-
+
/// <summary>
/// first byte range: 0xa4 -- 0xfe
/// second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
/// no validation needed here. State machine has done that
/// </summary>
- public override int GetOrder(byte[] buf, int offset)
- {
+ public override int GetOrder(byte[] buf, int offset)
+ {
if (buf[offset] >= 0xA4) {
if (buf[offset+1] >= 0xA1)
return 157 * (buf[offset] - 0xA4) + buf[offset+1] - 0xA1 + 63;
else
return 157 * (buf[offset] - 0xA4) + buf[offset+1] - 0x40;
} else {
- return -1;
- }
+ return -1;
+ }
}
}
@@ -2575,7 +2574,7 @@ namespace UniversalDetector.Core
{
//Sampling from about 20M text materials include literature and computer technology
// Japanese frequency table, applied to both S-JIS and EUC-JP
- //They are sorted in order.
+ //They are sorted in order.
/******************************************************************************
* 128 --> 0.77094
@@ -2586,8 +2585,8 @@ namespace UniversalDetector.Core
*
* Idea Distribution Ratio = 0.92635 / (1-0.92635) = 12.58
* Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191
- *
- * Typical Distribution Ratio, 25% of IDR
+ *
+ * Typical Distribution Ratio, 25% of IDR
*****************************************************************************/
protected static float SJIS_TYPICAL_DISTRIBUTION_RATIO = 3.0f;
@@ -2869,8 +2868,8 @@ namespace UniversalDetector.Core
1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, // 4352
2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, // 4368 //last 512
- /***************************************************************************************
- *Everything below is of no interest for detection purpose *
+ /***************************************************************************************
+ *Everything below is of no interest for detection purpose *
***************************************************************************************
2138,2122,3730,2888,1995,1820,1044,6190,6191,6192,6193,6194,6195,6196,6197,6198, // 4384
@@ -3118,31 +3117,31 @@ namespace UniversalDetector.Core
8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, // 8256
8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271, // 8272
****************************************************************************************/
- };
+ };
public SJISDistributionAnalyser()
{
charToFreqOrder = SJIS_CHAR2FREQ_ORDER;
tableSize = SJIS_TABLE_SIZE;
- typicalDistributionRatio = SJIS_TYPICAL_DISTRIBUTION_RATIO;
+ typicalDistributionRatio = SJIS_TYPICAL_DISTRIBUTION_RATIO;
}
-
+
/// <summary>
/// first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
/// second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
/// no validation needed here. State machine has done that
/// </summary>
- public override int GetOrder(byte[] buf, int offset)
- {
+ public override int GetOrder(byte[] buf, int offset)
+ {
int order = 0;
-
- if (buf[offset] >= 0x81 && buf[offset] <= 0x9F)
+
+ if (buf[offset] >= 0x81 && buf[offset] <= 0x9F)
order = 188 * (buf[offset] - 0x81);
- else if (buf[offset] >= 0xE0 && buf[offset] <= 0xEF)
+ else if (buf[offset] >= 0xE0 && buf[offset] <= 0xEF)
order = 188 * (buf[offset] - 0xE0 + 31);
else
return -1;
order += buf[offset+1] - 0x40;
-
+
if (buf[offset+1] > 0x7F)
order--;
return order;
@@ -3154,20 +3153,18 @@ namespace UniversalDetector.Core
public EUCJPDistributionAnalyser() : base()
{
}
-
+
/// <summary>
/// first byte range: 0xa0 -- 0xfe
/// second byte range: 0xa1 -- 0xfe
/// no validation needed here. State machine has done that
/// </summary>
- public override int GetOrder(byte[] buf, int offset)
- {
- if (buf[offset] >= 0xA0)
+ public override int GetOrder(byte[] buf, int offset)
+ {
+ if (buf[offset] >= 0xA0)
return 94 * (buf[offset] - 0xA1) + buf[offset+1] - 0xA1;
else
return -1;
}
}
-
-
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharsetProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharsetProber.cs
index 3369dd430..cc4539058 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharsetProber.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharsetProber.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -40,10 +40,10 @@ using System.IO;
namespace UniversalDetector.Core
{
- public enum ProbingState {
+ public enum ProbingState {
Detecting = 0, // no sure answer yet, but caller can ask for confidence
FoundIt = 1, // positive answer
- NotMe = 2 // negative answer
+ NotMe = 2 // negative answer
};
public abstract class CharsetProber
@@ -51,16 +51,16 @@ namespace UniversalDetector.Core
protected const float SHORTCUT_THRESHOLD = 0.95F;
protected ProbingState state;
-
+
// ASCII codes
private const byte SPACE = 0x20;
private const byte CAPITAL_A = 0x41;
private const byte CAPITAL_Z = 0x5A;
private const byte SMALL_A = 0x61;
private const byte SMALL_Z = 0x7A;
- private const byte LESS_THAN = 0x3C;
+ private const byte LESS_THAN = 0x3C;
private const byte GREATER_THAN = 0x3E;
-
+
/// <summary>
/// Feed data to the prober
/// </summary>
@@ -71,44 +71,44 @@ namespace UniversalDetector.Core
/// A <see cref="ProbingState"/>
/// </returns>
public abstract ProbingState HandleData(byte[] buf, int offset, int len);
-
+
/// <summary>
/// Reset prober state
/// </summary>
public abstract void Reset();
public abstract string GetCharsetName();
-
+
public abstract float GetConfidence();
-
+
public virtual ProbingState GetState()
{
return state;
}
public virtual void SetOption()
- {
-
+ {
+
}
public virtual void DumpStatus()
- {
-
+ {
+
}
//
// Helper functions used in the Latin1 and Group probers
//
/// <summary>
- ///
+ ///
/// </summary>
/// <returns>filtered buffer</returns>
- protected static byte[] FilterWithoutEnglishLetters(byte[] buf, int offset, int len)
+ protected static byte[] FilterWithoutEnglishLetters(byte[] buf, int offset, int len)
{
byte[] result = null;
using (MemoryStream ms = new MemoryStream(buf.Length)) {
-
+
bool meetMSB = false;
int max = offset + len;
int prev = offset;
@@ -140,8 +140,8 @@ namespace UniversalDetector.Core
}
/// <summary>
- /// Do filtering to reduce load to probers (Remove ASCII symbols,
- /// collapse spaces). This filter applies to all scripts which contain
+ /// Do filtering to reduce load to probers (Remove ASCII symbols,
+ /// collapse spaces). This filter applies to all scripts which contain
/// both English characters and upper ASCII characters.
/// </summary>
/// <returns>a filtered copy of the input buffer</returns>
@@ -150,16 +150,16 @@ namespace UniversalDetector.Core
byte[] result = null;
using (MemoryStream ms = new MemoryStream(buf.Length)) {
-
+
bool inTag = false;
int max = offset + len;
int prev = offset;
int cur = offset;
while (cur < max) {
-
+
byte b = buf[cur];
-
+
if (b == GREATER_THAN)
inTag = false;
else if (b == LESS_THAN)
@@ -177,7 +177,7 @@ namespace UniversalDetector.Core
cur++;
}
- // If the current segment contains more than just a symbol
+ // If the current segment contains more than just a symbol
// and it is not inside a tag then keep it.
if (!inTag && cur > prev)
ms.Write(buf, prev, cur - prev);
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Charsets.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Charsets.cs
index a7c1be92a..00cd8826f 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Charsets.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Charsets.cs
@@ -20,7 +20,7 @@
*
* Contributor(s):
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -40,47 +40,47 @@ namespace UniversalDetector.Core
public static class Charsets
{
public const string ASCII = "ASCII";
-
+
public const string UTF8 = "UTF-8";
-
+
public const string UTF16_LE = "UTF-16LE";
-
+
public const string UTF16_BE = "UTF-16BE";
-
+
public const string UTF32_BE = "UTF-32BE";
-
+
public const string UTF32_LE = "UTF-32LE";
/// <summary>
/// Unusual BOM (3412 order)
/// </summary>
public const string UCS4_3412 = "X-ISO-10646-UCS-4-3412";
-
+
/// <summary>
/// Unusual BOM (2413 order)
/// </summary>
public const string UCS4_2413 = "X-ISO-10646-UCS-4-2413";
-
+
/// <summary>
/// Cyrillic (based on bulgarian and russian data)
/// </summary>
public const string WIN1251 = "windows-1251";
-
+
/// <summary>
/// Latin-1, almost identical to ISO-8859-1
/// </summary>
public const string WIN1252 = "windows-1252";
-
+
/// <summary>
/// Greek
/// </summary>
public const string WIN1253 = "windows-1253";
-
+
/// <summary>
/// Logical hebrew (includes ISO-8859-8-I and most of x-mac-hebrew)
/// </summary>
public const string WIN1255 = "windows-1255";
-
+
/// <summary>
/// Traditional chinese
/// </summary>
@@ -89,7 +89,7 @@ namespace UniversalDetector.Core
public const string EUCKR = "EUC-KR";
public const string EUCJP = "EUC-JP";
-
+
public const string EUCTW = "EUC-TW";
/// <summary>
@@ -98,11 +98,11 @@ namespace UniversalDetector.Core
public const string GB18030 = "gb18030";
public const string ISO2022_JP = "ISO-2022-JP";
-
+
public const string ISO2022_CN = "ISO-2022-CN";
-
+
public const string ISO2022_KR = "ISO-2022-KR";
-
+
/// <summary>
/// Simplified chinese
/// </summary>
@@ -111,15 +111,15 @@ namespace UniversalDetector.Core
public const string SHIFT_JIS = "Shift-JIS";
public const string MAC_CYRILLIC = "x-mac-cyrillic";
-
+
public const string KOI8R = "KOI8-R";
-
+
public const string IBM855 = "IBM855";
-
+
public const string IBM866 = "IBM866";
/// <summary>
- /// East-Europe. Disabled because too similar to windows-1252
+ /// East-Europe. Disabled because too similar to windows-1252
/// (latin-1). Should use tri-grams models to discriminate between
/// these two charsets.
/// </summary>
@@ -141,9 +141,9 @@ namespace UniversalDetector.Core
public const string ISO8859_8 = "ISO-8859-8";
/// <summary>
- /// Thai. This recognizer is not enabled yet.
+ /// Thai. This recognizer is not enabled yet.
/// </summary>
public const string TIS620 = "TIS620";
-
+
}
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CodingStateMachine.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CodingStateMachine.cs
index f837dd966..34f24161d 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CodingStateMachine.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CodingStateMachine.cs
@@ -22,7 +22,7 @@
* Shy Shalom <shooshX@gmail.com>
* Kohei TAKETA <k-tak@void.in> (Java port)
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -48,8 +48,8 @@ namespace UniversalDetector.Core
private SMModel model;
private int currentCharLen;
private int currentBytePos;
-
- public CodingStateMachine(SMModel model)
+
+ public CodingStateMachine(SMModel model)
{
this.currentState = SMModel.START;
this.model = model;
@@ -57,34 +57,28 @@ namespace UniversalDetector.Core
public int NextState(byte b)
{
- // for each byte we get its class, if it is first byte,
+ // for each byte we get its class, if it is first byte,
// we also get byte length
int byteCls = model.GetClass(b);
- if (currentState == SMModel.START) {
+ if (currentState == SMModel.START) {
currentBytePos = 0;
currentCharLen = model.charLenTable[byteCls];
}
-
- // from byte's class and stateTable, we get its next state
+
+ // from byte's class and stateTable, we get its next state
currentState = model.stateTable.Unpack(
currentState * model.ClassFactor + byteCls);
currentBytePos++;
return currentState;
}
-
- public void Reset()
- {
- currentState = SMModel.START;
- }
- public int CurrentCharLen
- {
- get { return currentCharLen; }
+ public void Reset()
+ {
+ currentState = SMModel.START;
}
- public string ModelName
- {
- get { return model.Name; }
- }
+ public int CurrentCharLen => currentCharLen;
+
+ public string ModelName => model.Name;
}
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCJPProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCJPProber.cs
index 050a9d9ce..eac67fe95 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCJPProber.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCJPProber.cs
@@ -43,25 +43,25 @@ namespace UniversalDetector.Core
private EUCJPContextAnalyser contextAnalyser;
private EUCJPDistributionAnalyser distributionAnalyser;
private byte[] lastChar = new byte[2];
-
+
public EUCJPProber()
{
codingSM = new CodingStateMachine(new EUCJPSMModel());
distributionAnalyser = new EUCJPDistributionAnalyser();
- contextAnalyser = new EUCJPContextAnalyser();
+ contextAnalyser = new EUCJPContextAnalyser();
Reset();
}
- public override string GetCharsetName()
+ public override string GetCharsetName()
{
return "EUC-JP";
}
-
+
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
int codingState;
int max = offset + len;
-
+
for (int i = offset; i < max; i++) {
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
@@ -83,7 +83,7 @@ namespace UniversalDetector.Core
distributionAnalyser.HandleOneChar(buf, i-1, charLen);
}
}
- }
+ }
lastChar[0] = buf[max-1];
if (state == ProbingState.Detecting)
if (contextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
@@ -93,18 +93,18 @@ namespace UniversalDetector.Core
public override void Reset()
{
- codingSM.Reset();
+ codingSM.Reset();
state = ProbingState.Detecting;
contextAnalyser.Reset();
distributionAnalyser.Reset();
}
-
+
public override float GetConfidence()
{
float contxtCf = contextAnalyser.GetConfidence();
float distribCf = distributionAnalyser.GetConfidence();
return (contxtCf > distribCf ? contxtCf : distribCf);
}
-
+
}
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCKRProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCKRProber.cs
index 67d4b0a72..b1543dae1 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCKRProber.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCKRProber.cs
@@ -46,15 +46,15 @@ namespace UniversalDetector.Core
public EUCKRProber()
{
codingSM = new CodingStateMachine(new EUCKRSMModel());
- distributionAnalyser = new EUCKRDistributionAnalyser();
+ distributionAnalyser = new EUCKRDistributionAnalyser();
Reset();
}
-
+
public override string GetCharsetName()
{
- return "EUC-KR";
+ return "EUC-KR";
}
-
+
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
int codingState;
@@ -81,12 +81,12 @@ namespace UniversalDetector.Core
}
}
lastChar[0] = buf[max-1];
-
+
if (state == ProbingState.Detecting)
if (distributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
state = ProbingState.FoundIt;
return state;
-
+
}
public override float GetConfidence()
@@ -96,7 +96,7 @@ namespace UniversalDetector.Core
public override void Reset()
{
- codingSM.Reset();
+ codingSM.Reset();
state = ProbingState.Detecting;
distributionAnalyser.Reset();
//mContextAnalyser.Reset();
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCTWProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCTWProber.cs
index a4e0b486e..65a521760 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCTWProber.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCTWProber.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -50,7 +50,7 @@ namespace UniversalDetector.Core
this.distributionAnalyser = new EUCTWDistributionAnalyser();
this.Reset();
}
-
+
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
int codingState;
@@ -77,21 +77,21 @@ namespace UniversalDetector.Core
}
}
lastChar[0] = buf[max-1];
-
+
if (state == ProbingState.Detecting)
if (distributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
state = ProbingState.FoundIt;
return state;
}
-
+
public override string GetCharsetName()
{
- return "x-euc-tw";
+ return "x-euc-tw";
}
-
+
public override void Reset()
{
- codingSM.Reset();
+ codingSM.Reset();
state = ProbingState.Detecting;
distributionAnalyser.Reset();
}
@@ -100,7 +100,7 @@ namespace UniversalDetector.Core
{
return distributionAnalyser.GetConfidence();
}
-
-
+
+
}
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscCharsetProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscCharsetProber.cs
index e9cefa9bc..f457bf490 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscCharsetProber.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscCharsetProber.cs
@@ -41,19 +41,19 @@ namespace UniversalDetector.Core
{
private const int CHARSETS_NUM = 4;
private string detectedCharset;
- private CodingStateMachine[] codingSM;
+ private CodingStateMachine[] codingSM;
int activeSM;
public EscCharsetProber()
{
- codingSM = new CodingStateMachine[CHARSETS_NUM];
+ codingSM = new CodingStateMachine[CHARSETS_NUM];
codingSM[0] = new CodingStateMachine(new HZSMModel());
codingSM[1] = new CodingStateMachine(new ISO2022CNSMModel());
codingSM[2] = new CodingStateMachine(new ISO2022JPSMModel());
codingSM[3] = new CodingStateMachine(new ISO2022KRSMModel());
Reset();
}
-
+
public override void Reset()
{
state = ProbingState.Detecting;
@@ -66,7 +66,7 @@ namespace UniversalDetector.Core
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
int max = offset + len;
-
+
for (int i = offset; i < max && state == ProbingState.Detecting; i++) {
for (int j = activeSM - 1; j >= 0; j--) {
// byte is feed to all active state machine
@@ -94,12 +94,12 @@ namespace UniversalDetector.Core
public override string GetCharsetName()
{
- return detectedCharset;
+ return detectedCharset;
}
-
+
public override float GetConfidence()
{
return 0.99f;
- }
+ }
}
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscSM.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscSM.cs
index 61ac5545f..6ebfa8a4c 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscSM.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscSM.cs
@@ -44,59 +44,59 @@ namespace UniversalDetector.Core
public class HZSMModel : SMModel
{
private readonly static int[] HZ_cls = {
- BitPackage.Pack4bits(1,0,0,0,0,0,0,0), // 00 - 07
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
- BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 28 - 2f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 40 - 47
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
- BitPackage.Pack4bits(0,0,0,4,0,5,2,0), // 78 - 7f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 80 - 87
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 88 - 8f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 90 - 97
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 98 - 9f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // a0 - a7
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // a8 - af
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b0 - b7
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b8 - bf
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // c0 - c7
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // c8 - cf
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // d0 - d7
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // d8 - df
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // e0 - e7
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // e8 - ef
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // f0 - f7
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1) // f8 - ff
+ BitPackage.Pack4bits(1,0,0,0,0,0,0,0), // 00 - 07
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
+ BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 28 - 2f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 40 - 47
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
+ BitPackage.Pack4bits(0,0,0,4,0,5,2,0), // 78 - 7f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 80 - 87
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 88 - 8f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 90 - 97
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 98 - 9f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // a0 - a7
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // a8 - af
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b0 - b7
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b8 - bf
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // c0 - c7
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // c8 - cf
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // d0 - d7
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // d8 - df
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // e0 - e7
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // e8 - ef
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // f0 - f7
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1) // f8 - ff
};
private readonly static int[] HZ_st = {
- BitPackage.Pack4bits(START, ERROR, 3, START, START, START, ERROR, ERROR),//00-07
- BitPackage.Pack4bits(ERROR, ERROR, ERROR, ERROR, ITSME, ITSME, ITSME, ITSME),//08-0f
- BitPackage.Pack4bits(ITSME, ITSME, ERROR, ERROR, START, START, 4, ERROR),//10-17
- BitPackage.Pack4bits( 5, ERROR, 6, ERROR, 5, 5, 4, ERROR),//18-1f
- BitPackage.Pack4bits( 4, ERROR, 4, 4, 4, ERROR, 4, ERROR),//20-27
- BitPackage.Pack4bits( 4, ITSME, START, START, START, START, START, START) //28-2f
+ BitPackage.Pack4bits(START, ERROR, 3, START, START, START, ERROR, ERROR),//00-07
+ BitPackage.Pack4bits(ERROR, ERROR, ERROR, ERROR, ITSME, ITSME, ITSME, ITSME),//08-0f
+ BitPackage.Pack4bits(ITSME, ITSME, ERROR, ERROR, START, START, 4, ERROR),//10-17
+ BitPackage.Pack4bits( 5, ERROR, 6, ERROR, 5, 5, 4, ERROR),//18-1f
+ BitPackage.Pack4bits( 4, ERROR, 4, 4, 4, ERROR, 4, ERROR),//20-27
+ BitPackage.Pack4bits( 4, ITSME, START, START, START, START, START, START) //28-2f
};
private readonly static int[] HZCharLenTable = {0, 0, 0, 0, 0, 0};
-
+
public HZSMModel() : base(
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, HZ_cls),
6,
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, HZ_st),
HZCharLenTable, "HZ-GB-2312")
@@ -104,65 +104,65 @@ namespace UniversalDetector.Core
}
}
-
+
public class ISO2022CNSMModel : SMModel
{
private readonly static int[] ISO2022CN_cls = {
- BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
- BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27
- BitPackage.Pack4bits(0,3,0,0,0,0,0,0), // 28 - 2f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
- BitPackage.Pack4bits(0,0,0,4,0,0,0,0), // 40 - 47
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff
+ BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
+ BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27
+ BitPackage.Pack4bits(0,3,0,0,0,0,0,0), // 28 - 2f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
+ BitPackage.Pack4bits(0,0,0,4,0,0,0,0), // 40 - 47
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff
};
private readonly static int[] ISO2022CN_st = {
- BitPackage.Pack4bits(START, 3,ERROR,START,START,START,START,START),//00-07
- BitPackage.Pack4bits(START,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//08-0f
- BitPackage.Pack4bits(ERROR,ERROR,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME),//10-17
- BitPackage.Pack4bits(ITSME,ITSME,ITSME,ERROR,ERROR,ERROR, 4,ERROR),//18-1f
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,ERROR,ERROR,ERROR,ERROR),//20-27
- BitPackage.Pack4bits( 5, 6,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//28-2f
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,ERROR,ERROR,ERROR,ERROR),//30-37
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ERROR,START) //38-3f
+ BitPackage.Pack4bits(START, 3,ERROR,START,START,START,START,START),//00-07
+ BitPackage.Pack4bits(START,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//08-0f
+ BitPackage.Pack4bits(ERROR,ERROR,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME),//10-17
+ BitPackage.Pack4bits(ITSME,ITSME,ITSME,ERROR,ERROR,ERROR, 4,ERROR),//18-1f
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,ERROR,ERROR,ERROR,ERROR),//20-27
+ BitPackage.Pack4bits( 5, 6,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//28-2f
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,ERROR,ERROR,ERROR,ERROR),//30-37
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ERROR,START) //38-3f
};
private readonly static int[] ISO2022CNCharLenTable = {0, 0, 0, 0, 0, 0, 0, 0, 0};
public ISO2022CNSMModel() : base(
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, ISO2022CN_cls),
9,
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, ISO2022CN_st),
ISO2022CNCharLenTable, "ISO-2022-CN")
@@ -170,130 +170,130 @@ namespace UniversalDetector.Core
}
}
-
+
public class ISO2022JPSMModel : SMModel
{
private readonly static int[] ISO2022JP_cls = {
- BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07
- BitPackage.Pack4bits(0,0,0,0,0,0,2,2), // 08 - 0f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
- BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f
- BitPackage.Pack4bits(0,0,0,0,7,0,0,0), // 20 - 27
- BitPackage.Pack4bits(3,0,0,0,0,0,0,0), // 28 - 2f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
- BitPackage.Pack4bits(6,0,4,0,8,0,0,0), // 40 - 47
- BitPackage.Pack4bits(0,9,5,0,0,0,0,0), // 48 - 4f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff
+ BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07
+ BitPackage.Pack4bits(0,0,0,0,0,0,2,2), // 08 - 0f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
+ BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f
+ BitPackage.Pack4bits(0,0,0,0,7,0,0,0), // 20 - 27
+ BitPackage.Pack4bits(3,0,0,0,0,0,0,0), // 28 - 2f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
+ BitPackage.Pack4bits(6,0,4,0,8,0,0,0), // 40 - 47
+ BitPackage.Pack4bits(0,9,5,0,0,0,0,0), // 48 - 4f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff
};
private readonly static int[] ISO2022JP_st = {
- BitPackage.Pack4bits(START, 3, ERROR,START,START,START,START,START),//00-07
- BitPackage.Pack4bits(START, START, ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//08-0f
- BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//10-17
- BitPackage.Pack4bits(ITSME, ITSME, ITSME,ITSME,ITSME,ITSME,ERROR,ERROR),//18-1f
- BitPackage.Pack4bits(ERROR, 5, ERROR,ERROR,ERROR, 4,ERROR,ERROR),//20-27
- BitPackage.Pack4bits(ERROR, ERROR, ERROR, 6,ITSME,ERROR,ITSME,ERROR),//28-2f
- BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ERROR,ERROR,ITSME,ITSME),//30-37
- BitPackage.Pack4bits(ERROR, ERROR, ERROR,ITSME,ERROR,ERROR,ERROR,ERROR),//38-3f
- BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ITSME,ERROR,START,START) //40-47
+ BitPackage.Pack4bits(START, 3, ERROR,START,START,START,START,START),//00-07
+ BitPackage.Pack4bits(START, START, ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//08-0f
+ BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//10-17
+ BitPackage.Pack4bits(ITSME, ITSME, ITSME,ITSME,ITSME,ITSME,ERROR,ERROR),//18-1f
+ BitPackage.Pack4bits(ERROR, 5, ERROR,ERROR,ERROR, 4,ERROR,ERROR),//20-27
+ BitPackage.Pack4bits(ERROR, ERROR, ERROR, 6,ITSME,ERROR,ITSME,ERROR),//28-2f
+ BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ERROR,ERROR,ITSME,ITSME),//30-37
+ BitPackage.Pack4bits(ERROR, ERROR, ERROR,ITSME,ERROR,ERROR,ERROR,ERROR),//38-3f
+ BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ITSME,ERROR,START,START) //40-47
};
private readonly static int[] ISO2022JPCharLenTable = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
public ISO2022JPSMModel() : base(
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, ISO2022JP_cls),
10,
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, ISO2022JP_st),
ISO2022JPCharLenTable, "ISO-2022-JP")
{
}
-
+
}
-
+
public class ISO2022KRSMModel : SMModel
- {
+ {
private readonly static int[] ISO2022KR_cls = {
- BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
- BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f
- BitPackage.Pack4bits(0,0,0,0,3,0,0,0), // 20 - 27
- BitPackage.Pack4bits(0,4,0,0,0,0,0,0), // 28 - 2f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
- BitPackage.Pack4bits(0,0,0,5,0,0,0,0), // 40 - 47
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff
+ BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
+ BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f
+ BitPackage.Pack4bits(0,0,0,0,3,0,0,0), // 20 - 27
+ BitPackage.Pack4bits(0,4,0,0,0,0,0,0), // 28 - 2f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
+ BitPackage.Pack4bits(0,0,0,5,0,0,0,0), // 40 - 47
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff
};
private readonly static int[] ISO2022KR_st = {
- BitPackage.Pack4bits(START, 3,ERROR,START,START,START,ERROR,ERROR),//00-07
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f
- BitPackage.Pack4bits(ITSME,ITSME,ERROR,ERROR,ERROR, 4,ERROR,ERROR),//10-17
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR, 5,ERROR,ERROR,ERROR),//18-1f
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,START,START,START,START) //20-27
+ BitPackage.Pack4bits(START, 3,ERROR,START,START,START,ERROR,ERROR),//00-07
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f
+ BitPackage.Pack4bits(ITSME,ITSME,ERROR,ERROR,ERROR, 4,ERROR,ERROR),//10-17
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR, 5,ERROR,ERROR,ERROR),//18-1f
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,START,START,START,START) //20-27
};
private readonly static int[] ISO2022KRCharLenTable = {0, 0, 0, 0, 0, 0};
public ISO2022KRSMModel() : base(
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, ISO2022KR_cls),
6,
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, ISO2022KR_st),
ISO2022KRCharLenTable, "ISO-2022-KR")
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/GB18030Prober.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/GB18030Prober.cs
index ac237c5cd..0d2ebd8c7 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/GB18030Prober.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/GB18030Prober.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -38,7 +38,7 @@
namespace UniversalDetector.Core
{
- // We use gb18030 to replace gb2312, because 18030 is a superset.
+ // We use gb18030 to replace gb2312, because 18030 is a superset.
public class GB18030Prober : CharsetProber
{
private CodingStateMachine codingSM;
@@ -52,18 +52,18 @@ namespace UniversalDetector.Core
analyser = new GB18030DistributionAnalyser();
Reset();
}
-
+
public override string GetCharsetName()
{
- return "gb18030";
+ return "gb18030";
}
-
+
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
int codingState = SMModel.START;
int max = offset + len;
-
+
for (int i = offset; i < max; i++) {
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
@@ -91,18 +91,18 @@ namespace UniversalDetector.Core
if (analyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
state = ProbingState.FoundIt;
}
-
+
return state;
}
-
+
public override float GetConfidence()
{
return analyser.GetConfidence();
}
-
+
public override void Reset()
{
- codingSM.Reset();
+ codingSM.Reset();
state = ProbingState.Detecting;
analyser.Reset();
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/HebrewProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/HebrewProber.cs
index 92974d3a8..2cbf33075 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/HebrewProber.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/HebrewProber.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -43,40 +43,40 @@ using System;
*
* Four main charsets exist in Hebrew:
* "ISO-8859-8" - Visual Hebrew
- * "windows-1255" - Logical Hebrew
+ * "windows-1255" - Logical Hebrew
* "ISO-8859-8-I" - Logical Hebrew
* "x-mac-hebrew" - ?? Logical Hebrew ??
*
* Both "ISO" charsets use a completely identical set of code points, whereas
- * "windows-1255" and "x-mac-hebrew" are two different proper supersets of
+ * "windows-1255" and "x-mac-hebrew" are two different proper supersets of
* these code points. windows-1255 defines additional characters in the range
- * 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific
+ * 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific
* diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.
- * x-mac-hebrew defines similar additional code points but with a different
+ * x-mac-hebrew defines similar additional code points but with a different
* mapping.
*
- * As far as an average Hebrew text with no diacritics is concerned, all four
- * charsets are identical with respect to code points. Meaning that for the
- * main Hebrew alphabet, all four map the same values to all 27 Hebrew letters
+ * As far as an average Hebrew text with no diacritics is concerned, all four
+ * charsets are identical with respect to code points. Meaning that for the
+ * main Hebrew alphabet, all four map the same values to all 27 Hebrew letters
* (including final letters).
*
* The dominant difference between these charsets is their directionality.
* "Visual" directionality means that the text is ordered as if the renderer is
- * not aware of a BIDI rendering algorithm. The renderer sees the text and
- * draws it from left to right. The text itself when ordered naturally is read
+ * not aware of a BIDI rendering algorithm. The renderer sees the text and
+ * draws it from left to right. The text itself when ordered naturally is read
* backwards. A buffer of Visual Hebrew generally looks like so:
* "[last word of first line spelled backwards] [whole line ordered backwards
- * and spelled backwards] [first word of first line spelled backwards]
+ * and spelled backwards] [first word of first line spelled backwards]
* [end of line] [last word of second line] ... etc' "
* adding punctuation marks, numbers and English text to visual text is
* naturally also "visual" and from left to right.
- *
+ *
* "Logical" directionality means the text is ordered "naturally" according to
- * the order it is read. It is the responsibility of the renderer to display
- * the text from right to left. A BIDI algorithm is used to place general
+ * the order it is read. It is the responsibility of the renderer to display
+ * the text from right to left. A BIDI algorithm is used to place general
* punctuation marks, numbers and English text in the text.
*
- * Texts in x-mac-hebrew are almost impossible to find on the Internet. From
+ * Texts in x-mac-hebrew are almost impossible to find on the Internet. From
* what little evidence I could find, it seems that its general directionality
* is Logical.
*
@@ -84,17 +84,17 @@ using System;
* charsets:
* Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are
* backwards while line order is natural. For charset recognition purposes
- * the line order is unimportant (In fact, for this implementation, even
+ * the line order is unimportant (In fact, for this implementation, even
* word order is unimportant).
* Logical Hebrew - "windows-1255" - normal, naturally ordered text.
*
- * "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be
+ * "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be
* specifically identified.
* "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew
* that contain special punctuation marks or diacritics is displayed with
* some unconverted characters showing as question marks. This problem might
* be corrected using another model prober for x-mac-hebrew. Due to the fact
- * that x-mac-hebrew texts are so rare, writing another model prober isn't
+ * that x-mac-hebrew texts are so rare, writing another model prober isn't
* worth the effort and performance hit.
*
* *** The Prober ***
@@ -136,7 +136,7 @@ using System;
*/
namespace UniversalDetector.Core
{
-
+
/// <summary>
/// This prober doesn't actually recognize a language or a charset.
/// It is a helper prober for the use of the Hebrew model probers
@@ -165,49 +165,49 @@ namespace UniversalDetector.Core
protected const string VISUAL_HEBREW_NAME = "ISO-8859-8";
protected const string LOGICAL_HEBREW_NAME = "windows-1255";
-
+
// owned by the group prober.
protected CharsetProber logicalProber, visualProber;
- protected int finalCharLogicalScore, finalCharVisualScore;
-
+ protected int finalCharLogicalScore, finalCharVisualScore;
+
// The two last bytes seen in the previous buffer.
protected byte prev, beforePrev;
-
+
public HebrewProber()
{
Reset();
}
-
- public void SetModelProbers(CharsetProber logical, CharsetProber visual)
- {
- logicalProber = logical;
- visualProber = visual;
+
+ public void SetModelProbers(CharsetProber logical, CharsetProber visual)
+ {
+ logicalProber = logical;
+ visualProber = visual;
}
-
- /**
+
+ /**
* Final letter analysis for logical-visual decision.
- * Look for evidence that the received buffer is either logical Hebrew or
+ * Look for evidence that the received buffer is either logical Hebrew or
* visual Hebrew.
* The following cases are checked:
- * 1) A word longer than 1 letter, ending with a final letter. This is an
- * indication that the text is laid out "naturally" since the final letter
+ * 1) A word longer than 1 letter, ending with a final letter. This is an
+ * indication that the text is laid out "naturally" since the final letter
* really appears at the end. +1 for logical score.
* 2) A word longer than 1 letter, ending with a Non-Final letter. In normal
* Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, should not end with
* the Non-Final form of that letter. Exceptions to this rule are mentioned
* above in isNonFinal(). This is an indication that the text is laid out
* backwards. +1 for visual score
- * 3) A word longer than 1 letter, starting with a final letter. Final letters
- * should not appear at the beginning of a word. This is an indication that
+ * 3) A word longer than 1 letter, starting with a final letter. Final letters
+ * should not appear at the beginning of a word. This is an indication that
* the text is laid out backwards. +1 for visual score.
*
- * The visual score and logical score are accumulated throughout the text and
+ * The visual score and logical score are accumulated throughout the text and
* are finally checked against each other in GetCharSetName().
* No checking for final letters in the middle of words is done since that case
* is not an indication for either Logical or Visual text.
*
* The input buffer should not contain any white spaces that are not (' ')
- * or any low-ascii punctuation marks.
+ * or any low-ascii punctuation marks.
*/
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
@@ -218,31 +218,31 @@ namespace UniversalDetector.Core
int max = offset + len;
for (int i = offset; i < max; i++) {
-
+
byte b = buf[i];
-
+
// a word just ended
if (b == 0x20) {
// *(curPtr-2) was not a space so prev is not a 1 letter word
if (beforePrev != 0x20) {
// case (1) [-2:not space][-1:final letter][cur:space]
- if (IsFinal(prev))
+ if (IsFinal(prev))
finalCharLogicalScore++;
- // case (2) [-2:not space][-1:Non-Final letter][cur:space]
+ // case (2) [-2:not space][-1:Non-Final letter][cur:space]
else if (IsNonFinal(prev))
finalCharVisualScore++;
}
-
+
} else {
// case (3) [-2:space][-1:final letter][cur:not space]
- if ((beforePrev == 0x20) && (IsFinal(prev)) && (b != ' '))
+ if ((beforePrev == 0x20) && (IsFinal(prev)) && (b != ' '))
++finalCharVisualScore;
}
beforePrev = prev;
prev = b;
}
- // Forever detecting, till the end or until both model probers
+ // Forever detecting, till the end or until both model probers
// return NotMe (handled above).
return ProbingState.Detecting;
}
@@ -252,7 +252,7 @@ namespace UniversalDetector.Core
{
// If the final letter score distance is dominant enough, rely on it.
int finalsub = finalCharLogicalScore - finalCharVisualScore;
- if (finalsub >= MIN_FINAL_CHAR_DISTANCE)
+ if (finalsub >= MIN_FINAL_CHAR_DISTANCE)
return LOGICAL_HEBREW_NAME;
if (finalsub <= -(MIN_FINAL_CHAR_DISTANCE))
return VISUAL_HEBREW_NAME;
@@ -263,9 +263,9 @@ namespace UniversalDetector.Core
return LOGICAL_HEBREW_NAME;
if (modelsub < -(MIN_MODEL_DISTANCE))
return VISUAL_HEBREW_NAME;
-
+
// Still no good, back to final letter distance, maybe it'll save the day.
- if (finalsub < 0)
+ if (finalsub < 0)
return VISUAL_HEBREW_NAME;
// (finalsub > 0 - Logical) or (don't know what to do) default to Logical.
@@ -280,10 +280,10 @@ namespace UniversalDetector.Core
beforePrev = 0x20;
}
- public override ProbingState GetState()
+ public override ProbingState GetState()
{
// Remain active as long as any of the model probers are active.
- if (logicalProber.GetState() == ProbingState.NotMe &&
+ if (logicalProber.GetState() == ProbingState.NotMe &&
visualProber.GetState() == ProbingState.NotMe)
return ProbingState.NotMe;
return ProbingState.Detecting;
@@ -293,31 +293,31 @@ namespace UniversalDetector.Core
{
//Console.WriteLine(" HEB: {0} - {1} [Logical-Visual score]", finalCharLogicalScore, finalCharVisualScore);
}
-
+
public override float GetConfidence()
- {
+ {
return 0.0f;
}
-
+
protected static bool IsFinal(byte b)
{
- return (b == FINAL_KAF || b == FINAL_MEM || b == FINAL_NUN
- || b == FINAL_PE || b == FINAL_TSADI);
+ return (b == FINAL_KAF || b == FINAL_MEM || b == FINAL_NUN
+ || b == FINAL_PE || b == FINAL_TSADI);
}
-
+
protected static bool IsNonFinal(byte b)
{
- // The normal Tsadi is not a good Non-Final letter due to words like
- // 'lechotet' (to chat) containing an apostrophe after the tsadi. This
- // apostrophe is converted to a space in FilterWithoutEnglishLetters causing
- // the Non-Final tsadi to appear at an end of a word even though this is not
+ // The normal Tsadi is not a good Non-Final letter due to words like
+ // 'lechotet' (to chat) containing an apostrophe after the tsadi. This
+ // apostrophe is converted to a space in FilterWithoutEnglishLetters causing
+ // the Non-Final tsadi to appear at an end of a word even though this is not
// the case in the original text.
- // The letters Pe and Kaf rarely display a related behavior of not being a
- // good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' for
- // example legally end with a Non-Final Pe or Kaf. However, the benefit of
- // these letters as Non-Final letters outweighs the damage since these words
- // are quite rare.
- return (b == NORMAL_KAF || b == NORMAL_MEM || b == NORMAL_NUN
+ // The letters Pe and Kaf rarely display a related behavior of not being a
+ // good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' for
+ // example legally end with a Non-Final Pe or Kaf. However, the benefit of
+ // these letters as Non-Final letters outweighs the damage since these words
+ // are quite rare.
+ return (b == NORMAL_KAF || b == NORMAL_MEM || b == NORMAL_NUN
|| b == NORMAL_PE);
}
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/JapaneseContextAnalyser.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/JapaneseContextAnalyser.cs
index 93b9d7580..7d28224c5 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/JapaneseContextAnalyser.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/JapaneseContextAnalyser.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -48,7 +48,7 @@ namespace UniversalDetector.Core
// hiragana frequency category table
// This is hiragana 2-char sequence table, the number in each cell represents its frequency category
- protected static byte[,] jp2CharContext = {
+ protected static byte[,] jp2CharContext = {
{ 0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,},
{ 2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4,},
{ 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,},
@@ -133,35 +133,35 @@ namespace UniversalDetector.Core
{ 0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3,},
{ 0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1,},
};
-
+
// category counters, each integer counts sequence in its category
int[] relSample = new int[CATEGORIES_NUM];
// total sequence received
int totalRel;
-
+
// The order of previous char
int lastCharOrder;
- // if last byte in current buffer is not the last byte of a character,
+ // if last byte in current buffer is not the last byte of a character,
// we need to know how many byte to skip in next buffer.
int needToSkipCharNum;
- // If this flag is set to true, detection is done and conclusion has
+ // If this flag is set to true, detection is done and conclusion has
// been made
bool done;
-
+
public JapaneseContextAnalyser()
{
- Reset();
+ Reset();
}
-
+
public float GetConfidence()
{
// This is just one way to calculate confidence. It works well for me.
if (totalRel > MINIMUM_DATA_THRESHOLD)
return ((float)(totalRel - relSample[0]))/totalRel;
- else
+ else
return DONT_KNOW;
}
@@ -170,15 +170,15 @@ namespace UniversalDetector.Core
int charLen = 0;
int max = offset + len;
-
+
if (done)
return;
- // The buffer we got is byte oriented, and a character may span
+ // The buffer we got is byte oriented, and a character may span
// more than one buffer. In case the last one or two byte in last
- // buffer is not complete, we record how many byte needed to
+ // buffer is not complete, we record how many byte needed to
// complete that character and skip these bytes here. We can choose
- // to record those bytes as well and analyse the character once it
+ // to record those bytes as well and analyse the character once it
// is complete, but since a character will not make much difference,
// skipping it will simplify our logic and improve performance.
for (int i = needToSkipCharNum+offset; i < max; ) {
@@ -200,14 +200,14 @@ namespace UniversalDetector.Core
}
}
}
-
+
public void HandleOneChar(byte[] buf, int offset, int charLen)
{
- if (totalRel > MAX_REL_THRESHOLD)
+ if (totalRel > MAX_REL_THRESHOLD)
done = true;
- if (done)
+ if (done)
return;
-
+
// Only 2-bytes characters are of our interest
int order = (charLen == 2) ? GetOrder(buf, offset) : -1;
if (order != -1 && lastCharOrder != -1) {
@@ -217,7 +217,7 @@ namespace UniversalDetector.Core
}
lastCharOrder = order;
}
-
+
public void Reset()
{
totalRel = 0;
@@ -228,18 +228,18 @@ namespace UniversalDetector.Core
done = false;
}
}
-
+
protected abstract int GetOrder(byte[] buf, int offset, out int charLen);
-
+
protected abstract int GetOrder(byte[] buf, int offset);
-
- public bool GotEnoughData()
+
+ public bool GotEnoughData()
{
return totalRel > ENOUGH_REL_THRESHOLD;
}
-
+
}
-
+
public class SJISContextAnalyser : JapaneseContextAnalyser
{
private const byte HIRAGANA_FIRST_BYTE = 0x82;
@@ -247,10 +247,10 @@ namespace UniversalDetector.Core
protected override int GetOrder(byte[] buf, int offset, out int charLen)
{
//find out current char's byte length
- if (buf[offset] >= 0x81 && buf[offset] <= 0x9F
+ if (buf[offset] >= 0x81 && buf[offset] <= 0x9F
|| buf[offset] >= 0xe0 && buf[offset] <= 0xFC)
charLen = 2;
- else
+ else
charLen = 1;
// return its order if it is hiragana
@@ -259,7 +259,7 @@ namespace UniversalDetector.Core
if (low >= 0x9F && low <= 0xF1)
return low - 0x9F;
}
- return -1;
+ return -1;
}
protected override int GetOrder(byte[] buf, int offset)
@@ -274,15 +274,15 @@ namespace UniversalDetector.Core
}
}
-
+
public class EUCJPContextAnalyser : JapaneseContextAnalyser
{
private const byte HIRAGANA_FIRST_BYTE = 0xA4;
-
+
protected override int GetOrder(byte[] buf, int offset, out int charLen)
{
byte high = buf[offset];
-
+
//find out current char's byte length
if (high == 0x8E || high >= 0xA1 && high <= 0xFE)
charLen = 2;
@@ -297,9 +297,9 @@ namespace UniversalDetector.Core
if (low >= 0xA1 && low <= 0xF3)
return low - 0xA1;
}
- return -1;
+ return -1;
}
-
+
protected override int GetOrder(byte[] buf, int offset)
{
// We are only interested in Hiragana
@@ -309,7 +309,7 @@ namespace UniversalDetector.Core
return low - 0xA1;
}
return -1;
- }
+ }
}
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangBulgarianModel.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangBulgarianModel.cs
index 4b6729ed3..5b18480d2 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangBulgarianModel.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangBulgarianModel.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -37,15 +37,15 @@
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
-{
+{
public abstract class BulgarianModel : SequenceModel
{
- //Model Table:
+ //Model Table:
//total sequences: 100%
//first 512 sequences: 96.9392%
//first 1024 sequences:3.0618%
//rest sequences: 0.2992%
- //negative sequences: 0.0020%
+ //negative sequences: 0.0020%
private static byte[] BULGARIAN_LANG_MODEL = {
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,
@@ -175,15 +175,15 @@ namespace UniversalDetector.Core
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-
+
};
- public BulgarianModel(byte[] charToOrderMap, string name)
+ public BulgarianModel(byte[] charToOrderMap, string name)
: base(charToOrderMap, BULGARIAN_LANG_MODEL, 0.969392f, false, name)
{
- }
+ }
}
-
+
public class Latin5BulgarianModel : BulgarianModel
{
//255: Control characters that usually does not exist in any text
@@ -191,7 +191,7 @@ namespace UniversalDetector.Core
//253: symbol (punctuation) that does not belong to word
//252: 0 - 9
// Character Mapping Table:
- // this table is modified base on win1251BulgarianCharToOrderMap, so
+ // this table is modified base on win1251BulgarianCharToOrderMap, so
// only number <64 is sure valid
private static byte[] LATIN5_CHAR_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
@@ -209,14 +209,14 @@ namespace UniversalDetector.Core
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, //c0
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, //d0
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, //e0
- 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, //f0
+ 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, //f0
};
-
+
public Latin5BulgarianModel() : base(LATIN5_CHAR_TO_ORDER_MAP, "ISO-8859-5")
{
}
}
-
+
public class Win1251BulgarianModel : BulgarianModel
{
private static byte[] WIN1251__CHAR_TO_ORDER_MAP = {
@@ -236,8 +236,8 @@ namespace UniversalDetector.Core
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, //d0
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, //e0
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, //f0
- };
-
+ };
+
public Win1251BulgarianModel() : base(WIN1251__CHAR_TO_ORDER_MAP, "windows-1251")
{
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangCyrillicModel.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangCyrillicModel.cs
index 5e55a4839..1210b6d5d 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangCyrillicModel.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangCyrillicModel.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -40,12 +40,12 @@ namespace UniversalDetector.Core
{
public abstract class CyrillicModel : SequenceModel
{
- // Model Table:
+ // Model Table:
// total sequences: 100%
// first 512 sequences: 97.6601%
// first 1024 sequences: 2.3389%
// rest sequences: 0.1237%
- // negative sequences: 0.0009%
+ // negative sequences: 0.0009%
protected readonly static byte[] RUSSIAN_LANG_MODEL = {
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,
@@ -176,13 +176,13 @@ namespace UniversalDetector.Core
0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
};
-
- public CyrillicModel(byte[] charToOrderMap, string name)
+
+ public CyrillicModel(byte[] charToOrderMap, string name)
: base(charToOrderMap, RUSSIAN_LANG_MODEL, 0.976601f, false, name)
{
}
}
-
+
public class Koi8rModel : CyrillicModel
{
private readonly static byte[] KOI8R_CHAR_TO_ORDER_MAP = {
@@ -203,12 +203,12 @@ namespace UniversalDetector.Core
59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, //e0
35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, //f0
};
-
+
public Koi8rModel() : base(KOI8R_CHAR_TO_ORDER_MAP, "KOI8-R")
{
}
}
-
+
public class Win1251Model : CyrillicModel
{
private readonly static byte[] WIN1251_CHAR_TO_ORDER_MAP = {
@@ -229,12 +229,12 @@ namespace UniversalDetector.Core
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
};
-
+
public Win1251Model() : base(WIN1251_CHAR_TO_ORDER_MAP, "windows-1251")
{
}
}
-
+
public class Latin5Model : CyrillicModel
{
private readonly static byte[] LATIN5_CHAR_TO_ORDER_MAP = {
@@ -254,13 +254,13 @@ namespace UniversalDetector.Core
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
- };
-
+ };
+
public Latin5Model() : base(LATIN5_CHAR_TO_ORDER_MAP, "ISO-8859-5")
{
}
}
-
+
public class MacCyrillicModel : CyrillicModel
{
private readonly static byte[] MACCYRILLIC_CHAR_TO_ORDER_MAP = {
@@ -281,7 +281,7 @@ namespace UniversalDetector.Core
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
};
-
+
public MacCyrillicModel() : base(MACCYRILLIC_CHAR_TO_ORDER_MAP,
"x-mac-cyrillic")
{
@@ -308,7 +308,7 @@ namespace UniversalDetector.Core
43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,
250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
};
-
+
public Ibm855Model() : base(IBM855_BYTE_TO_ORDER_MAP, "IBM855")
{
}
@@ -334,12 +334,12 @@ namespace UniversalDetector.Core
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
};
-
+
public Ibm866Model() : base(IBM866_CHAR_TO_ORDER_MAP, "IBM866")
{
}
}
-
-
+
+
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangGreekModel.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangGreekModel.cs
index 563ba52c2..2fe1e97c0 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangGreekModel.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangGreekModel.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -37,15 +37,15 @@
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
-{
+{
public abstract class GreekModel : SequenceModel
{
- // Model Table:
+ // Model Table:
// total sequences: 100%
// first 512 sequences: 98.2851%
// first 1024 sequences:1.7001%
// rest sequences: 0.0359%
- // negative sequences: 0.0148%
+ // negative sequences: 0.0148%
private readonly static byte[] GREEK_LANG_MODEL = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -177,12 +177,12 @@ namespace UniversalDetector.Core
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
- public GreekModel(byte[] charToOrderMap, string name)
+ public GreekModel(byte[] charToOrderMap, string name)
: base(charToOrderMap, GREEK_LANG_MODEL, 0.982851f, false, name)
{
- }
+ }
}
-
+
public class Latin7Model : GreekModel
{
/****************************************************************
@@ -210,12 +210,12 @@ namespace UniversalDetector.Core
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, //e0
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, //f0
};
-
+
public Latin7Model() : base(LATIN7_CHAR_TO_ORDER_MAP, "ISO-8859-7")
{
}
}
-
+
public class Win1253Model : GreekModel
{
private readonly static byte[] WIN1253__CHAR_TO_ORDER_MAP = {
@@ -235,8 +235,8 @@ namespace UniversalDetector.Core
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, //d0
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, //e0
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, //f0
- };
-
+ };
+
public Win1253Model() : base(WIN1253__CHAR_TO_ORDER_MAP, "windows-1253")
{
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangHebrewModel.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangHebrewModel.cs
index 030fcc598..180ab8a63 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangHebrewModel.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangHebrewModel.cs
@@ -37,15 +37,15 @@
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
-{
+{
public abstract class HebrewModel : SequenceModel
{
- //Model Table:
+ //Model Table:
//total sequences: 100%
//first 512 sequences: 98.4004%
//first 1024 sequences: 1.5981%
//rest sequences: 0.087%
- //negative sequences: 0.0015%
+ //negative sequences: 0.0015%
private readonly static byte[] HEBREW_LANG_MODEL = {
0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
@@ -177,12 +177,12 @@ namespace UniversalDetector.Core
0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
};
- public HebrewModel(byte[] charToOrderMap, string name)
+ public HebrewModel(byte[] charToOrderMap, string name)
: base(charToOrderMap, HEBREW_LANG_MODEL, 0.984004f, false, name)
{
- }
+ }
}
-
+
public class Win1255Model : HebrewModel
{
/*
@@ -192,7 +192,7 @@ namespace UniversalDetector.Core
252: 0 - 9
*/
//Windows-1255 language model
- //Character Mapping Table:
+ //Character Mapping Table:
private readonly static byte[] WIN1255_CHAR_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
@@ -211,7 +211,7 @@ namespace UniversalDetector.Core
9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23,
12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253,
};
-
+
public Win1255Model() : base(WIN1255_CHAR_TO_ORDER_MAP, "windows-1255")
{
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangHungarianModel.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangHungarianModel.cs
index d7eee2251..d95ec4c8e 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangHungarianModel.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangHungarianModel.cs
@@ -36,15 +36,15 @@
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
-{
+{
public abstract class HungarianModel : SequenceModel
{
- //Model Table:
+ //Model Table:
//total sequences: 100%
//first 512 sequences: 94.7368%
//first 1024 sequences:5.2623%
//rest sequences: 0.8894%
- //negative sequences: 0.0009%
+ //negative sequences: 0.0009%
private readonly static byte[] HUNGARIAN_LANG_MODEL = {
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
@@ -176,13 +176,13 @@ namespace UniversalDetector.Core
0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
};
- public HungarianModel(byte[] charToOrderMap, string name)
- : base(charToOrderMap, HUNGARIAN_LANG_MODEL, 0.947368f,
+ public HungarianModel(byte[] charToOrderMap, string name)
+ : base(charToOrderMap, HUNGARIAN_LANG_MODEL, 0.947368f,
false, name)
{
- }
+ }
}
-
+
public class Latin2HungarianModel : HungarianModel
{
private readonly static byte[] LATIN2_CHAR_TO_ORDER_MAP = {
@@ -203,12 +203,12 @@ namespace UniversalDetector.Core
82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,
245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
};
-
+
public Latin2HungarianModel() : base(LATIN2_CHAR_TO_ORDER_MAP, "ISO-8859-2")
{
}
}
-
+
public class Win1250HungarianModel : HungarianModel
{
private readonly static byte[] WIN1250_CHAR_TO_ORDER_MAP = {
@@ -229,7 +229,7 @@ namespace UniversalDetector.Core
84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,
245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
};
-
+
public Win1250HungarianModel() : base(WIN1250_CHAR_TO_ORDER_MAP, "windows-1250")
{
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangThaiModel.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangThaiModel.cs
index bdda20f14..b5dae7a34 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangThaiModel.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/LangThaiModel.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -37,7 +37,7 @@
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
-{
+{
public class ThaiModel : SequenceModel
{
/****************************************************************
@@ -46,7 +46,7 @@ namespace UniversalDetector.Core
253: symbol (punctuation) that does not belong to word
252: 0 - 9
*****************************************************************/
- // The following result for thai was collected from a limited sample (1M)
+ // The following result for thai was collected from a limited sample (1M)
private readonly static byte[] TIS620_CHAR_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
@@ -66,12 +66,12 @@ namespace UniversalDetector.Core
68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
};
- //Model Table:
+ //Model Table:
//total sequences: 100%
//first 512 sequences: 92.6386%
//first 1024 sequences:7.3177%
//rest sequences: 1.0230%
- //negative sequences: 0.0436%
+ //negative sequences: 0.0436%
private readonly static byte[] THAI_LANG_MODEL = {
0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
@@ -203,11 +203,11 @@ namespace UniversalDetector.Core
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
- public ThaiModel(byte[] charToOrderMap, string name)
- : base(TIS620_CHAR_TO_ORDER_MAP, THAI_LANG_MODEL,
+ public ThaiModel(byte[] charToOrderMap, string name)
+ : base(TIS620_CHAR_TO_ORDER_MAP, THAI_LANG_MODEL,
0.926386f, false, "TIS-620")
{
- }
+ }
}
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs
index c79a10aa7..5d57e30e1 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -40,7 +40,7 @@ using System;
namespace UniversalDetector.Core
{
- // TODO: Using trigrams the detector should be able to discriminate between
+ // TODO: Using trigrams the detector should be able to discriminate between
// latin-1 and iso8859-2
public class Latin1Prober : CharsetProber
{
@@ -54,9 +54,9 @@ namespace UniversalDetector.Core
private const int ACO = 5; // accent capital other
private const int ASV = 6; // accent small vowel
private const int ASO = 7; // accent small other
-
+
private const int CLASS_NUM = 8; // total classes
-
+
private readonly static byte[] Latin1_CharToClass = {
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 00 - 07
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 08 - 0F
@@ -92,36 +92,36 @@ namespace UniversalDetector.Core
ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, // F8 - FF
};
- /* 0 : illegal
- 1 : very unlikely
- 2 : normal
+ /* 0 : illegal
+ 1 : very unlikely
+ 2 : normal
3 : very likely
*/
private readonly static byte[] Latin1ClassModel = {
/* UDF OTH ASC ASS ACV ACO ASV ASO */
/*UDF*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*OTH*/ 0, 3, 3, 3, 3, 3, 3, 3,
- /*ASC*/ 0, 3, 3, 3, 3, 3, 3, 3,
+ /*ASC*/ 0, 3, 3, 3, 3, 3, 3, 3,
/*ASS*/ 0, 3, 3, 3, 1, 1, 3, 3,
/*ACV*/ 0, 3, 3, 3, 1, 2, 1, 2,
- /*ACO*/ 0, 3, 3, 3, 3, 3, 3, 3,
- /*ASV*/ 0, 3, 1, 3, 1, 1, 1, 3,
+ /*ACO*/ 0, 3, 3, 3, 3, 3, 3, 3,
+ /*ASV*/ 0, 3, 1, 3, 1, 1, 1, 3,
/*ASO*/ 0, 3, 1, 3, 1, 1, 3, 3,
};
private byte lastCharClass;
private int[] freqCounter = new int[FREQ_CAT_NUM];
-
+
public Latin1Prober()
{
Reset();
}
- public override string GetCharsetName()
+ public override string GetCharsetName()
{
return "windows-1252";
}
-
+
public override void Reset()
{
state = ProbingState.Detecting;
@@ -129,12 +129,12 @@ namespace UniversalDetector.Core
for (int i = 0; i < FREQ_CAT_NUM; i++)
freqCounter[i] = 0;
}
-
+
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
byte[] newbuf = FilterWithEnglishLetters(buf, offset, len);
byte charClass, freq;
-
+
for (int i = 0; i < newbuf.Length; i++) {
charClass = Latin1_CharToClass[newbuf[i]];
freq = Latin1ClassModel[lastCharClass * CLASS_NUM + charClass];
@@ -152,21 +152,21 @@ namespace UniversalDetector.Core
{
if (state == ProbingState.NotMe)
return 0.01f;
-
+
float confidence = 0.0f;
int total = 0;
for (int i = 0; i < FREQ_CAT_NUM; i++) {
total += freqCounter[i];
}
-
+
if (total <= 0) {
confidence = 0.0f;
} else {
confidence = freqCounter[3] * 1.0f / total;
confidence -= freqCounter[1] * 20.0f / total;
}
-
- // lower the confidence of latin1 so that other more accurate detector
+
+ // lower the confidence of latin1 so that other more accurate detector
// can take priority.
return confidence < 0.0f ? 0.0f : confidence * 0.5f;
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSGroupProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSGroupProber.cs
index abf49aacd..b4f6928a4 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSGroupProber.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSGroupProber.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -46,13 +46,13 @@ namespace UniversalDetector.Core
public class MBCSGroupProber : CharsetProber
{
private const int PROBERS_NUM = 7;
- private readonly static string[] ProberName =
+ private readonly static string[] ProberName =
{ "UTF8", "SJIS", "EUCJP", "GB18030", "EUCKR", "Big5", "EUCTW" };
private CharsetProber[] probers = new CharsetProber[PROBERS_NUM];
private bool[] isActive = new bool[PROBERS_NUM];
private int bestGuess;
private int activeNum;
-
+
public MBCSGroupProber()
{
probers[0] = new UTF8Prober();
@@ -62,7 +62,7 @@ namespace UniversalDetector.Core
probers[4] = new EUCKRProber();
probers[5] = new Big5Prober();
probers[6] = new EUCTWProber();
- Reset();
+ Reset();
}
public override string GetCharsetName()
@@ -99,7 +99,7 @@ namespace UniversalDetector.Core
//assume previous is not ascii, it will do no harm except add some noise
bool keepNext = true;
int max = offset + len;
-
+
for (int i = offset; i < max; i++) {
if ((buf[i] & 0x80) != 0) {
highbyteBuf[hptr++] = buf[i];
@@ -112,9 +112,9 @@ namespace UniversalDetector.Core
}
}
}
-
+
ProbingState st = ProbingState.NotMe;
-
+
for (int i = 0; i < probers.Length; i++) {
if (!isActive[i])
continue;
@@ -139,7 +139,7 @@ namespace UniversalDetector.Core
{
float bestConf = 0.0f;
float cf = 0.0f;
-
+
if (state == ProbingState.FoundIt) {
return 0.99f;
} else if (state == ProbingState.NotMe) {
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSSM.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSSM.cs
index 7aa8581bc..65e04292a 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSSM.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSSM.cs
@@ -42,79 +42,79 @@ namespace UniversalDetector.Core
{
private readonly static int[] UTF8_cls = {
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07
- BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17
- BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 40 - 47
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 48 - 4f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 50 - 57
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 58 - 5f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 60 - 67
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 68 - 6f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 70 - 77
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 78 - 7f
- BitPackage.Pack4bits(2,2,2,2,3,3,3,3), // 80 - 87
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 88 - 8f
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 90 - 97
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 98 - 9f
- BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // a0 - a7
- BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // a8 - af
- BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // b0 - b7
- BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // b8 - bf
- BitPackage.Pack4bits(0,0,6,6,6,6,6,6), // c0 - c7
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // c8 - cf
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d0 - d7
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d8 - df
- BitPackage.Pack4bits(7,8,8,8,8,8,8,8), // e0 - e7
- BitPackage.Pack4bits(8,8,8,8,8,9,8,8), // e8 - ef
- BitPackage.Pack4bits(10,11,11,11,11,11,11,11), // f0 - f7
- BitPackage.Pack4bits(12,13,13,13,14,15,0,0) // f8 - ff
+ BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17
+ BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 40 - 47
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 48 - 4f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 50 - 57
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 58 - 5f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 60 - 67
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 68 - 6f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 70 - 77
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 78 - 7f
+ BitPackage.Pack4bits(2,2,2,2,3,3,3,3), // 80 - 87
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 88 - 8f
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 90 - 97
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 98 - 9f
+ BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // a0 - a7
+ BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // a8 - af
+ BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // b0 - b7
+ BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // b8 - bf
+ BitPackage.Pack4bits(0,0,6,6,6,6,6,6), // c0 - c7
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // c8 - cf
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d0 - d7
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d8 - df
+ BitPackage.Pack4bits(7,8,8,8,8,8,8,8), // e0 - e7
+ BitPackage.Pack4bits(8,8,8,8,8,9,8,8), // e8 - ef
+ BitPackage.Pack4bits(10,11,11,11,11,11,11,11), // f0 - f7
+ BitPackage.Pack4bits(12,13,13,13,14,15,0,0) // f8 - ff
};
private readonly static int[] UTF8_st = {
- BitPackage.Pack4bits(ERROR,START,ERROR,ERROR,ERROR,ERROR, 12, 10),//00-07
- BitPackage.Pack4bits( 9, 11, 8, 7, 6, 5, 4, 3),//08-0f
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//10-17
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//18-1f
- BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME),//20-27
- BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME),//28-2f
- BitPackage.Pack4bits(ERROR,ERROR, 5, 5, 5, 5,ERROR,ERROR),//30-37
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//38-3f
- BitPackage.Pack4bits(ERROR,ERROR,ERROR, 5, 5, 5,ERROR,ERROR),//40-47
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//48-4f
- BitPackage.Pack4bits(ERROR,ERROR, 7, 7, 7, 7,ERROR,ERROR),//50-57
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//58-5f
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR, 7, 7,ERROR,ERROR),//60-67
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//68-6f
- BitPackage.Pack4bits(ERROR,ERROR, 9, 9, 9, 9,ERROR,ERROR),//70-77
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//78-7f
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR, 9,ERROR,ERROR),//80-87
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//88-8f
- BitPackage.Pack4bits(ERROR,ERROR, 12, 12, 12, 12,ERROR,ERROR),//90-97
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//98-9f
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR, 12,ERROR,ERROR),//a0-a7
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//a8-af
- BitPackage.Pack4bits(ERROR,ERROR, 12, 12, 12,ERROR,ERROR,ERROR),//b0-b7
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//b8-bf
- BitPackage.Pack4bits(ERROR,ERROR,START,START,START,START,ERROR,ERROR),//c0-c7
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR) //c8-cf
+ BitPackage.Pack4bits(ERROR,START,ERROR,ERROR,ERROR,ERROR, 12, 10),//00-07
+ BitPackage.Pack4bits( 9, 11, 8, 7, 6, 5, 4, 3),//08-0f
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//10-17
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//18-1f
+ BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME),//20-27
+ BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME),//28-2f
+ BitPackage.Pack4bits(ERROR,ERROR, 5, 5, 5, 5,ERROR,ERROR),//30-37
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//38-3f
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR, 5, 5, 5,ERROR,ERROR),//40-47
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//48-4f
+ BitPackage.Pack4bits(ERROR,ERROR, 7, 7, 7, 7,ERROR,ERROR),//50-57
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//58-5f
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR, 7, 7,ERROR,ERROR),//60-67
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//68-6f
+ BitPackage.Pack4bits(ERROR,ERROR, 9, 9, 9, 9,ERROR,ERROR),//70-77
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//78-7f
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR, 9,ERROR,ERROR),//80-87
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//88-8f
+ BitPackage.Pack4bits(ERROR,ERROR, 12, 12, 12, 12,ERROR,ERROR),//90-97
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//98-9f
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR, 12,ERROR,ERROR),//a0-a7
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//a8-af
+ BitPackage.Pack4bits(ERROR,ERROR, 12, 12, 12,ERROR,ERROR,ERROR),//b0-b7
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//b8-bf
+ BitPackage.Pack4bits(ERROR,ERROR,START,START,START,START,ERROR,ERROR),//c0-c7
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR) //c8-cf
};
- private readonly static int[] UTF8CharLenTable =
+ private readonly static int[] UTF8CharLenTable =
{0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6 };
-
+
public UTF8SMModel() : base(
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, UTF8_cls),
16,
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, UTF8_st),
UTF8CharLenTable, "UTF-8")
@@ -122,68 +122,68 @@ namespace UniversalDetector.Core
}
}
-
+
public class GB18030SMModel : SMModel
{
private readonly static int[] GB18030_cls = {
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07
- BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17
- BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 30 - 37
- BitPackage.Pack4bits(3,3,1,1,1,1,1,1), // 38 - 3f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77
- BitPackage.Pack4bits(2,2,2,2,2,2,2,4), // 78 - 7f
- BitPackage.Pack4bits(5,6,6,6,6,6,6,6), // 80 - 87
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 88 - 8f
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 90 - 97
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 98 - 9f
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // a0 - a7
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // a8 - af
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // b0 - b7
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // b8 - bf
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // c0 - c7
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // c8 - cf
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d0 - d7
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d8 - df
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // e0 - e7
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // e8 - ef
- BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // f0 - f7
- BitPackage.Pack4bits(6,6,6,6,6,6,6,0) // f8 - ff
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07
+ BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17
+ BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 30 - 37
+ BitPackage.Pack4bits(3,3,1,1,1,1,1,1), // 38 - 3f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,4), // 78 - 7f
+ BitPackage.Pack4bits(5,6,6,6,6,6,6,6), // 80 - 87
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 88 - 8f
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 90 - 97
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 98 - 9f
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // a0 - a7
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // a8 - af
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // b0 - b7
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // b8 - bf
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // c0 - c7
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // c8 - cf
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d0 - d7
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d8 - df
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // e0 - e7
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // e8 - ef
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // f0 - f7
+ BitPackage.Pack4bits(6,6,6,6,6,6,6,0) // f8 - ff
};
private readonly static int[] GB18030_st = {
- BitPackage.Pack4bits(ERROR,START,START,START,START,START, 3,ERROR),//00-07
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ITSME),//08-0f
- BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ERROR,ERROR,START),//10-17
- BitPackage.Pack4bits( 4,ERROR,START,START,ERROR,ERROR,ERROR,ERROR),//18-1f
- BitPackage.Pack4bits(ERROR,ERROR, 5,ERROR,ERROR,ERROR,ITSME,ERROR),//20-27
- BitPackage.Pack4bits(ERROR,ERROR,START,START,START,START,START,START) //28-2f
+ BitPackage.Pack4bits(ERROR,START,START,START,START,START, 3,ERROR),//00-07
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ITSME),//08-0f
+ BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ERROR,ERROR,START),//10-17
+ BitPackage.Pack4bits( 4,ERROR,START,START,ERROR,ERROR,ERROR,ERROR),//18-1f
+ BitPackage.Pack4bits(ERROR,ERROR, 5,ERROR,ERROR,ERROR,ITSME,ERROR),//20-27
+ BitPackage.Pack4bits(ERROR,ERROR,START,START,START,START,START,START) //28-2f
};
- // To be accurate, the length of class 6 can be either 2 or 4.
- // But it is not necessary to discriminate between the two since
- // it is used for frequency analysis only, and we are validating
- // each code range there as well. So it is safe to set it to be
- // 2 here.
+ // To be accurate, the length of class 6 can be either 2 or 4.
+ // But it is not necessary to discriminate between the two since
+ // it is used for frequency analysis only, and we are validating
+ // each code range there as well. So it is safe to set it to be
+ // 2 here.
private readonly static int[] GB18030CharLenTable = {0, 1, 1, 1, 1, 1, 2};
-
+
public GB18030SMModel() : base(
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, GB18030_cls),
7,
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, GB18030_st),
GB18030CharLenTable, "GB18030")
@@ -191,60 +191,60 @@ namespace UniversalDetector.Core
}
}
-
+
public class BIG5SMModel : SMModel
{
private readonly static int[] BIG5_cls = {
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07
- BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17
- BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77
- BitPackage.Pack4bits(2,2,2,2,2,2,2,1), // 78 - 7f
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 80 - 87
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 88 - 8f
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 90 - 97
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 98 - 9f
- BitPackage.Pack4bits(4,3,3,3,3,3,3,3), // a0 - a7
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // a8 - af
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // b0 - b7
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // b8 - bf
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // c0 - c7
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // c8 - cf
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d0 - d7
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d8 - df
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e0 - e7
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e8 - ef
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // f0 - f7
- BitPackage.Pack4bits(3,3,3,3,3,3,3,0) // f8 - ff
+ BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17
+ BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,1), // 78 - 7f
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 80 - 87
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 88 - 8f
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 90 - 97
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 98 - 9f
+ BitPackage.Pack4bits(4,3,3,3,3,3,3,3), // a0 - a7
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // a8 - af
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // b0 - b7
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // b8 - bf
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // c0 - c7
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // c8 - cf
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d0 - d7
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d8 - df
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e0 - e7
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e8 - ef
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // f0 - f7
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,0) // f8 - ff
};
private readonly static int[] BIG5_st = {
- BitPackage.Pack4bits(ERROR,START,START, 3,ERROR,ERROR,ERROR,ERROR),//00-07
- BitPackage.Pack4bits(ERROR,ERROR,ITSME,ITSME,ITSME,ITSME,ITSME,ERROR),//08-0f
- BitPackage.Pack4bits(ERROR,START,START,START,START,START,START,START) //10-17
+ BitPackage.Pack4bits(ERROR,START,START, 3,ERROR,ERROR,ERROR,ERROR),//00-07
+ BitPackage.Pack4bits(ERROR,ERROR,ITSME,ITSME,ITSME,ITSME,ITSME,ERROR),//08-0f
+ BitPackage.Pack4bits(ERROR,START,START,START,START,START,START,START) //10-17
};
private readonly static int[] BIG5CharLenTable = {0, 1, 1, 2, 0};
-
+
public BIG5SMModel() : base(
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, BIG5_cls),
5,
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, BIG5_st),
BIG5CharLenTable, "Big5")
@@ -252,63 +252,63 @@ namespace UniversalDetector.Core
}
}
-
+
public class EUCJPSMModel : SMModel
{
private readonly static int[] EUCJP_cls = {
- //BitPacket.Pack4bits(5,4,4,4,4,4,4,4), // 00 - 07
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 00 - 07
- BitPackage.Pack4bits(4,4,4,4,4,4,5,5), // 08 - 0f
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 10 - 17
- BitPackage.Pack4bits(4,4,4,5,4,4,4,4), // 18 - 1f
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 20 - 27
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 28 - 2f
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 30 - 37
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 38 - 3f
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 40 - 47
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 48 - 4f
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 50 - 57
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 58 - 5f
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 60 - 67
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 68 - 6f
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 70 - 77
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 78 - 7f
- BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 80 - 87
- BitPackage.Pack4bits(5,5,5,5,5,5,1,3), // 88 - 8f
- BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 90 - 97
- BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 98 - 9f
- BitPackage.Pack4bits(5,2,2,2,2,2,2,2), // a0 - a7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e0 - e7
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e8 - ef
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // f0 - f7
- BitPackage.Pack4bits(0,0,0,0,0,0,0,5) // f8 - ff
+ //BitPacket.Pack4bits(5,4,4,4,4,4,4,4), // 00 - 07
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 00 - 07
+ BitPackage.Pack4bits(4,4,4,4,4,4,5,5), // 08 - 0f
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 10 - 17
+ BitPackage.Pack4bits(4,4,4,5,4,4,4,4), // 18 - 1f
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 20 - 27
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 28 - 2f
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 30 - 37
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 38 - 3f
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 40 - 47
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 48 - 4f
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 50 - 57
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 58 - 5f
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 60 - 67
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 68 - 6f
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 70 - 77
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 78 - 7f
+ BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 80 - 87
+ BitPackage.Pack4bits(5,5,5,5,5,5,1,3), // 88 - 8f
+ BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 90 - 97
+ BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 98 - 9f
+ BitPackage.Pack4bits(5,2,2,2,2,2,2,2), // a0 - a7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e0 - e7
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e8 - ef
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // f0 - f7
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,5) // f8 - ff
};
private readonly static int[] EUCJP_st = {
- BitPackage.Pack4bits( 3, 4, 3, 5,START,ERROR,ERROR,ERROR),//00-07
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f
- BitPackage.Pack4bits(ITSME,ITSME,START,ERROR,START,ERROR,ERROR,ERROR),//10-17
- BitPackage.Pack4bits(ERROR,ERROR,START,ERROR,ERROR,ERROR, 3,ERROR),//18-1f
- BitPackage.Pack4bits( 3,ERROR,ERROR,ERROR,START,START,START,START) //20-27
+ BitPackage.Pack4bits( 3, 4, 3, 5,START,ERROR,ERROR,ERROR),//00-07
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f
+ BitPackage.Pack4bits(ITSME,ITSME,START,ERROR,START,ERROR,ERROR,ERROR),//10-17
+ BitPackage.Pack4bits(ERROR,ERROR,START,ERROR,ERROR,ERROR, 3,ERROR),//18-1f
+ BitPackage.Pack4bits( 3,ERROR,ERROR,ERROR,START,START,START,START) //20-27
};
private readonly static int[] EUCJPCharLenTable = { 2, 2, 2, 3, 1, 0 };
-
+
public EUCJPSMModel() : base(
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, EUCJP_cls),
6,
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, EUCJP_st),
EUCJPCharLenTable, "EUC-JP")
@@ -316,60 +316,60 @@ namespace UniversalDetector.Core
}
}
-
+
public class EUCKRSMModel : SMModel
{
private readonly static int[] EUCKR_cls = {
- //BitPacket.Pack4bits(0,1,1,1,1,1,1,1), // 00 - 07
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07
- BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17
- BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 40 - 47
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 48 - 4f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 50 - 57
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 58 - 5f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 60 - 67
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 68 - 6f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 70 - 77
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 78 - 7f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 88 - 8f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f
- BitPackage.Pack4bits(0,2,2,2,2,2,2,2), // a0 - a7
- BitPackage.Pack4bits(2,2,2,2,2,3,3,3), // a8 - af
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
- BitPackage.Pack4bits(2,3,2,2,2,2,2,2), // c8 - cf
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,0) // f8 - ff
+ //BitPacket.Pack4bits(0,1,1,1,1,1,1,1), // 00 - 07
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07
+ BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17
+ BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 40 - 47
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 48 - 4f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 50 - 57
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 58 - 5f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 60 - 67
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 68 - 6f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 70 - 77
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 78 - 7f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 88 - 8f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f
+ BitPackage.Pack4bits(0,2,2,2,2,2,2,2), // a0 - a7
+ BitPackage.Pack4bits(2,2,2,2,2,3,3,3), // a8 - af
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
+ BitPackage.Pack4bits(2,3,2,2,2,2,2,2), // c8 - cf
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,0) // f8 - ff
};
private readonly static int[] EUCKR_st = {
- BitPackage.Pack4bits(ERROR,START, 3,ERROR,ERROR,ERROR,ERROR,ERROR),//00-07
- BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ERROR,ERROR,START,START) //08-0f
+ BitPackage.Pack4bits(ERROR,START, 3,ERROR,ERROR,ERROR,ERROR,ERROR),//00-07
+ BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ERROR,ERROR,START,START) //08-0f
};
private readonly static int[] EUCKRCharLenTable = { 0, 1, 2, 0 };
-
+
public EUCKRSMModel() : base(
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, EUCKR_cls),
4,
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, EUCKR_st),
EUCKRCharLenTable, "EUC-KR")
@@ -377,127 +377,127 @@ namespace UniversalDetector.Core
}
}
-
+
public class EUCTWSMModel : SMModel
{
private readonly static int[] EUCTW_cls = {
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 00 - 07
- BitPackage.Pack4bits(2,2,2,2,2,2,0,0), // 08 - 0f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 10 - 17
- BitPackage.Pack4bits(2,2,2,0,2,2,2,2), // 18 - 1f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 20 - 27
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 28 - 2f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 30 - 37
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 38 - 3f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 78 - 7f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87
- BitPackage.Pack4bits(0,0,0,0,0,0,6,0), // 88 - 8f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f
- BitPackage.Pack4bits(0,3,4,4,4,4,4,4), // a0 - a7
- BitPackage.Pack4bits(5,5,1,1,1,1,1,1), // a8 - af
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b0 - b7
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b8 - bf
- BitPackage.Pack4bits(1,1,3,1,3,3,3,3), // c0 - c7
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // c8 - cf
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d0 - d7
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d8 - df
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e0 - e7
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e8 - ef
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // f0 - f7
- BitPackage.Pack4bits(3,3,3,3,3,3,3,0) // f8 - ff
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 00 - 07
+ BitPackage.Pack4bits(2,2,2,2,2,2,0,0), // 08 - 0f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 10 - 17
+ BitPackage.Pack4bits(2,2,2,0,2,2,2,2), // 18 - 1f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 20 - 27
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 28 - 2f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 30 - 37
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 38 - 3f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 78 - 7f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87
+ BitPackage.Pack4bits(0,0,0,0,0,0,6,0), // 88 - 8f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f
+ BitPackage.Pack4bits(0,3,4,4,4,4,4,4), // a0 - a7
+ BitPackage.Pack4bits(5,5,1,1,1,1,1,1), // a8 - af
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b0 - b7
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b8 - bf
+ BitPackage.Pack4bits(1,1,3,1,3,3,3,3), // c0 - c7
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // c8 - cf
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d0 - d7
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d8 - df
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e0 - e7
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e8 - ef
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // f0 - f7
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,0) // f8 - ff
};
private readonly static int[] EUCTW_st = {
- BitPackage.Pack4bits(ERROR,ERROR,START, 3, 3, 3, 4,ERROR),//00-07
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ITSME),//08-0f
- BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ERROR,START,ERROR),//10-17
- BitPackage.Pack4bits(START,START,START,ERROR,ERROR,ERROR,ERROR,ERROR),//18-1f
- BitPackage.Pack4bits( 5,ERROR,ERROR,ERROR,START,ERROR,START,START),//20-27
- BitPackage.Pack4bits(START,ERROR,START,START,START,START,START,START) //28-2f
+ BitPackage.Pack4bits(ERROR,ERROR,START, 3, 3, 3, 4,ERROR),//00-07
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ITSME),//08-0f
+ BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ERROR,START,ERROR),//10-17
+ BitPackage.Pack4bits(START,START,START,ERROR,ERROR,ERROR,ERROR,ERROR),//18-1f
+ BitPackage.Pack4bits( 5,ERROR,ERROR,ERROR,START,ERROR,START,START),//20-27
+ BitPackage.Pack4bits(START,ERROR,START,START,START,START,START,START) //28-2f
};
private readonly static int[] EUCTWCharLenTable = { 0, 0, 1, 2, 2, 2, 3 };
-
+
public EUCTWSMModel() : base(
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, EUCTW_cls),
7,
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, EUCTW_st),
EUCTWCharLenTable, "EUC-TW")
{
}
- }
-
+ }
+
public class SJISSMModel : SMModel
{
private readonly static int[] SJIS_cls = {
- //BitPacket.Pack4bits(0,1,1,1,1,1,1,1), // 00 - 07
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07
- BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17
- BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37
- BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77
- BitPackage.Pack4bits(2,2,2,2,2,2,2,1), // 78 - 7f
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 80 - 87
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 88 - 8f
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 90 - 97
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 98 - 9f
- //0xa0 is illegal in sjis encoding, but some pages does
+ //BitPacket.Pack4bits(0,1,1,1,1,1,1,1), // 00 - 07
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07
+ BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17
+ BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37
+ BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,1), // 78 - 7f
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 80 - 87
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 88 - 8f
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 90 - 97
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 98 - 9f
+ //0xa0 is illegal in sjis encoding, but some pages does
//contain such byte. We need to be more error forgiven.
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
- BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
- BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e0 - e7
- BitPackage.Pack4bits(3,3,3,3,3,4,4,4), // e8 - ef
- BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // f0 - f7
- BitPackage.Pack4bits(4,4,4,4,4,0,0,0) // f8 - ff
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
+ BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
+ BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e0 - e7
+ BitPackage.Pack4bits(3,3,3,3,3,4,4,4), // e8 - ef
+ BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // f0 - f7
+ BitPackage.Pack4bits(4,4,4,4,4,0,0,0) // f8 - ff
};
private readonly static int[] SJIS_st = {
- BitPackage.Pack4bits(ERROR,START,START, 3,ERROR,ERROR,ERROR,ERROR),//00-07
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f
- BitPackage.Pack4bits(ITSME,ITSME,ERROR,ERROR,START,START,START,START) //10-17
+ BitPackage.Pack4bits(ERROR,START,START, 3,ERROR,ERROR,ERROR,ERROR),//00-07
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f
+ BitPackage.Pack4bits(ITSME,ITSME,ERROR,ERROR,START,START,START,START) //10-17
};
private readonly static int[] SJISCharLenTable = { 0, 1, 1, 2, 0, 0 };
-
+
public SJISSMModel() : base(
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, SJIS_cls),
6,
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, SJIS_st),
SJISCharLenTable, "Shift_JIS")
@@ -505,64 +505,64 @@ namespace UniversalDetector.Core
}
}
-
+
public class UCS2BESMModel : SMModel
{
private readonly static int[] UCS2BE_cls = {
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 00 - 07
- BitPackage.Pack4bits(0,0,1,0,0,2,0,0), // 08 - 0f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
- BitPackage.Pack4bits(0,0,0,3,0,0,0,0), // 18 - 1f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27
- BitPackage.Pack4bits(0,3,3,3,3,3,0,0), // 28 - 2f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 40 - 47
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 88 - 8f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a0 - a7
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a8 - af
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b0 - b7
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b8 - bf
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c0 - c7
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c8 - cf
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d0 - d7
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d8 - df
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e0 - e7
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e8 - ef
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // f0 - f7
- BitPackage.Pack4bits(0,0,0,0,0,0,4,5) // f8 - ff
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 00 - 07
+ BitPackage.Pack4bits(0,0,1,0,0,2,0,0), // 08 - 0f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
+ BitPackage.Pack4bits(0,0,0,3,0,0,0,0), // 18 - 1f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27
+ BitPackage.Pack4bits(0,3,3,3,3,3,0,0), // 28 - 2f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 40 - 47
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 88 - 8f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a0 - a7
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a8 - af
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b0 - b7
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b8 - bf
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c0 - c7
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c8 - cf
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d0 - d7
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d8 - df
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e0 - e7
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e8 - ef
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // f0 - f7
+ BitPackage.Pack4bits(0,0,0,0,0,0,4,5) // f8 - ff
};
private readonly static int[] UCS2BE_st = {
- BitPackage.Pack4bits( 5, 7, 7,ERROR, 4, 3,ERROR,ERROR),//00-07
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f
- BitPackage.Pack4bits(ITSME,ITSME, 6, 6, 6, 6,ERROR,ERROR),//10-17
- BitPackage.Pack4bits( 6, 6, 6, 6, 6,ITSME, 6, 6),//18-1f
- BitPackage.Pack4bits( 6, 6, 6, 6, 5, 7, 7,ERROR),//20-27
- BitPackage.Pack4bits( 5, 8, 6, 6,ERROR, 6, 6, 6),//28-2f
- BitPackage.Pack4bits( 6, 6, 6, 6,ERROR,ERROR,START,START) //30-37
+ BitPackage.Pack4bits( 5, 7, 7,ERROR, 4, 3,ERROR,ERROR),//00-07
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f
+ BitPackage.Pack4bits(ITSME,ITSME, 6, 6, 6, 6,ERROR,ERROR),//10-17
+ BitPackage.Pack4bits( 6, 6, 6, 6, 6,ITSME, 6, 6),//18-1f
+ BitPackage.Pack4bits( 6, 6, 6, 6, 5, 7, 7,ERROR),//20-27
+ BitPackage.Pack4bits( 5, 8, 6, 6,ERROR, 6, 6, 6),//28-2f
+ BitPackage.Pack4bits( 6, 6, 6, 6,ERROR,ERROR,START,START) //30-37
};
private readonly static int[] UCS2BECharLenTable = { 2, 2, 2, 0, 2, 2 };
-
+
public UCS2BESMModel() : base(
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, UCS2BE_cls),
6,
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, UCS2BE_st),
UCS2BECharLenTable, "UTF-16BE")
@@ -570,64 +570,64 @@ namespace UniversalDetector.Core
}
}
-
+
public class UCS2LESMModel : SMModel
{
private readonly static int[] UCS2LE_cls = {
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 00 - 07
- BitPackage.Pack4bits(0,0,1,0,0,2,0,0), // 08 - 0f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
- BitPackage.Pack4bits(0,0,0,3,0,0,0,0), // 18 - 1f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27
- BitPackage.Pack4bits(0,3,3,3,3,3,0,0), // 28 - 2f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 40 - 47
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 88 - 8f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a0 - a7
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a8 - af
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b0 - b7
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b8 - bf
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c0 - c7
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c8 - cf
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d0 - d7
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d8 - df
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e0 - e7
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e8 - ef
- BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // f0 - f7
- BitPackage.Pack4bits(0,0,0,0,0,0,4,5) // f8 - ff
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 00 - 07
+ BitPackage.Pack4bits(0,0,1,0,0,2,0,0), // 08 - 0f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
+ BitPackage.Pack4bits(0,0,0,3,0,0,0,0), // 18 - 1f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27
+ BitPackage.Pack4bits(0,3,3,3,3,3,0,0), // 28 - 2f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 40 - 47
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 88 - 8f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a0 - a7
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a8 - af
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b0 - b7
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b8 - bf
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c0 - c7
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c8 - cf
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d0 - d7
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d8 - df
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e0 - e7
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e8 - ef
+ BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // f0 - f7
+ BitPackage.Pack4bits(0,0,0,0,0,0,4,5) // f8 - ff
};
private readonly static int[] UCS2LE_st = {
- BitPackage.Pack4bits( 6, 6, 7, 6, 4, 3,ERROR,ERROR),//00-07
- BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f
- BitPackage.Pack4bits(ITSME,ITSME, 5, 5, 5,ERROR,ITSME,ERROR),//10-17
- BitPackage.Pack4bits( 5, 5, 5,ERROR, 5,ERROR, 6, 6),//18-1f
- BitPackage.Pack4bits( 7, 6, 8, 8, 5, 5, 5,ERROR),//20-27
- BitPackage.Pack4bits( 5, 5, 5,ERROR,ERROR,ERROR, 5, 5),//28-2f
- BitPackage.Pack4bits( 5, 5, 5,ERROR, 5,ERROR,START,START) //30-37
+ BitPackage.Pack4bits( 6, 6, 7, 6, 4, 3,ERROR,ERROR),//00-07
+ BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f
+ BitPackage.Pack4bits(ITSME,ITSME, 5, 5, 5,ERROR,ITSME,ERROR),//10-17
+ BitPackage.Pack4bits( 5, 5, 5,ERROR, 5,ERROR, 6, 6),//18-1f
+ BitPackage.Pack4bits( 7, 6, 8, 8, 5, 5, 5,ERROR),//20-27
+ BitPackage.Pack4bits( 5, 5, 5,ERROR,ERROR,ERROR, 5, 5),//28-2f
+ BitPackage.Pack4bits( 5, 5, 5,ERROR, 5,ERROR,START,START) //30-37
};
private readonly static int[] UCS2LECharLenTable = { 2, 2, 2, 2, 2, 2 };
-
+
public UCS2LESMModel() : base(
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, UCS2LE_cls),
6,
- new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
- BitPackage.SHIFT_MASK_4BITS,
+ new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
+ BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, UCS2LE_st),
UCS2LECharLenTable, "UTF-16LE")
@@ -635,6 +635,6 @@ namespace UniversalDetector.Core
}
}
-
-
+
+
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCSGroupProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCSGroupProber.cs
index d8f496474..640b19c4a 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCSGroupProber.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCSGroupProber.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -43,11 +43,11 @@ namespace UniversalDetector.Core
public class SBCSGroupProber : CharsetProber
{
private const int PROBERS_NUM = 13;
- private CharsetProber[] probers = new CharsetProber[PROBERS_NUM];
+ private CharsetProber[] probers = new CharsetProber[PROBERS_NUM];
private bool[] isActive = new bool[PROBERS_NUM];
private int bestGuess;
private int activeNum;
-
+
public SBCSGroupProber()
{
probers[0] = new SingleByteCharSetProber(new Win1251Model());
@@ -62,24 +62,24 @@ namespace UniversalDetector.Core
probers[9] = new SingleByteCharSetProber(new Win1251BulgarianModel());
HebrewProber hebprober = new HebrewProber();
probers[10] = hebprober;
- // Logical
- probers[11] = new SingleByteCharSetProber(new Win1255Model(), false, hebprober);
+ // Logical
+ probers[11] = new SingleByteCharSetProber(new Win1255Model(), false, hebprober);
// Visual
- probers[12] = new SingleByteCharSetProber(new Win1255Model(), true, hebprober);
+ probers[12] = new SingleByteCharSetProber(new Win1255Model(), true, hebprober);
hebprober.SetModelProbers(probers[11], probers[12]);
- // disable latin2 before latin1 is available, otherwise all latin1
+ // disable latin2 before latin1 is available, otherwise all latin1
// will be detected as latin2 because of their similarity.
//probers[13] = new SingleByteCharSetProber(new Latin2HungarianModel());
- //probers[14] = new SingleByteCharSetProber(new Win1250HungarianModel());
+ //probers[14] = new SingleByteCharSetProber(new Win1250HungarianModel());
Reset();
}
-
- public override ProbingState HandleData(byte[] buf, int offset, int len)
+
+ public override ProbingState HandleData(byte[] buf, int offset, int len)
{
ProbingState st = ProbingState.NotMe;
-
+
//apply filter to original buffer, and we got new buffer back
- //depend on what script it is, we will feed them the new buffer
+ //depend on what script it is, we will feed them the new buffer
//we got after applying proper filter
//this is done without any consideration to KeepEnglishLetters
//of each prober since as of now, there are no probers here which
@@ -87,12 +87,12 @@ namespace UniversalDetector.Core
byte[] newBuf = FilterWithoutEnglishLetters(buf, offset, len);
if (newBuf.Length == 0)
return state; // Nothing to see here, move on.
-
+
for (int i = 0; i < PROBERS_NUM; i++) {
if (!isActive[i])
continue;
st = probers[i].HandleData(newBuf, 0, newBuf.Length);
-
+
if (st == ProbingState.FoundIt) {
bestGuess = i;
state = ProbingState.FoundIt;
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCharsetProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCharsetProber.cs
index 5a3496075..65c0f8ca8 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCharsetProber.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCharsetProber.cs
@@ -20,7 +20,7 @@
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
- * Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
+ * Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@@ -51,11 +51,11 @@ namespace UniversalDetector.Core
private const int NUMBER_OF_SEQ_CAT = 4;
private const int POSITIVE_CAT = NUMBER_OF_SEQ_CAT-1;
private const int NEGATIVE_CAT = 0;
-
+
protected SequenceModel model;
-
- // true if we need to reverse every pair in the model lookup
- bool reversed;
+
+ // true if we need to reverse every pair in the model lookup
+ bool reversed;
// char order of last character
byte lastOrder;
@@ -63,38 +63,38 @@ namespace UniversalDetector.Core
int totalSeqs;
int totalChar;
int[] seqCounters = new int[NUMBER_OF_SEQ_CAT];
-
+
// characters that fall in our sampling range
int freqChar;
-
+
// Optional auxiliary prober for name decision. created and destroyed by the GroupProber
- CharsetProber nameProber;
-
- public SingleByteCharSetProber(SequenceModel model)
+ CharsetProber nameProber;
+
+ public SingleByteCharSetProber(SequenceModel model)
: this(model, false, null)
{
-
+
}
-
- public SingleByteCharSetProber(SequenceModel model, bool reversed,
+
+ public SingleByteCharSetProber(SequenceModel model, bool reversed,
CharsetProber nameProber)
{
this.model = model;
this.reversed = reversed;
this.nameProber = nameProber;
- Reset();
+ Reset();
}
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
int max = offset + len;
-
+
for (int i = offset; i < max; i++) {
byte order = model.GetOrder(buf[i]);
if (order < SYMBOL_CAT_ORDER)
totalChar++;
-
+
if (order < SAMPLE_SIZE) {
freqChar++;
@@ -120,7 +120,7 @@ namespace UniversalDetector.Core
}
return state;
}
-
+
public override void DumpStatus()
{
//Console.WriteLine(" SBCS: {0} [{1}]", GetConfidence(), GetCharsetName());
@@ -146,9 +146,9 @@ namespace UniversalDetector.Core
r = 0.99f;
return r;
}
- return 0.01f;
+ return 0.01f;
}
-
+
public override void Reset()
{
state = ProbingState.Detecting;
@@ -159,12 +159,12 @@ namespace UniversalDetector.Core
totalChar = 0;
freqChar = 0;
}
-
- public override string GetCharsetName()
+
+ public override string GetCharsetName()
{
return (nameProber == null) ? model.CharsetName
: nameProber.GetCharsetName();
}
-
+
}
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SJISProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SJISProber.cs
index 515cd2498..e1fbb873e 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SJISProber.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SJISProber.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -50,25 +50,25 @@ namespace UniversalDetector.Core
private SJISContextAnalyser contextAnalyser;
private SJISDistributionAnalyser distributionAnalyser;
private byte[] lastChar = new byte[2];
-
+
public SJISProber()
{
codingSM = new CodingStateMachine(new SJISSMModel());
distributionAnalyser = new SJISDistributionAnalyser();
- contextAnalyser = new SJISContextAnalyser();
+ contextAnalyser = new SJISContextAnalyser();
Reset();
}
-
+
public override string GetCharsetName()
{
- return "Shift-JIS";
+ return "Shift-JIS";
}
-
+
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
int codingState;
int max = offset + len;
-
+
for (int i = offset; i < max; i++) {
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
@@ -90,7 +90,7 @@ namespace UniversalDetector.Core
distributionAnalyser.HandleOneChar(buf, i-1, charLen);
}
}
- }
+ }
lastChar[0] = buf[max-1];
if (state == ProbingState.Detecting)
if (contextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
@@ -100,12 +100,12 @@ namespace UniversalDetector.Core
public override void Reset()
{
- codingSM.Reset();
+ codingSM.Reset();
state = ProbingState.Detecting;
contextAnalyser.Reset();
distributionAnalyser.Reset();
}
-
+
public override float GetConfidence()
{
float contxtCf = contextAnalyser.GetConfidence();
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SMModel.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SMModel.cs
index 2321ecad2..cb2f201aa 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SMModel.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SMModel.cs
@@ -52,21 +52,17 @@ namespace UniversalDetector.Core
public BitPackage classTable;
public BitPackage stateTable;
public int[] charLenTable;
-
+
private string name;
- public string Name {
- get { return name; }
- }
+ public string Name => name;
private int classFactor;
- public int ClassFactor {
- get { return classFactor; }
- }
+ public int ClassFactor => classFactor;
public SMModel(BitPackage classTable, int classFactor,
- BitPackage stateTable, int[] charLenTable, String name)
+ BitPackage stateTable, int[] charLenTable, string name)
{
this.classTable = classTable;
this.classFactor = classFactor;
@@ -74,10 +70,10 @@ namespace UniversalDetector.Core
this.charLenTable = charLenTable;
this.name = name;
}
-
+
public int GetClass(byte b)
- {
- return classTable.Unpack((int)b);
+ {
+ return classTable.Unpack((int)b);
}
- }
+ }
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SequenceModel.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SequenceModel.cs
index 9048796b5..b813dda76 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SequenceModel.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SequenceModel.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -39,42 +39,36 @@
using System;
namespace UniversalDetector.Core
-{
+{
public abstract class SequenceModel
{
// [256] table use to find a char's order
protected byte[] charToOrderMap;
-
- // [SAMPLE_SIZE][SAMPLE_SIZE] table to find a 2-char sequence's
- // frequency
+
+ // [SAMPLE_SIZE][SAMPLE_SIZE] table to find a 2-char sequence's
+ // frequency
protected byte[] precedenceMatrix;
-
+
// freqSeqs / totalSeqs
protected float typicalPositiveRatio;
- public float TypicalPositiveRatio {
- get { return typicalPositiveRatio; }
- }
-
+ public float TypicalPositiveRatio => typicalPositiveRatio;
+
// not used
protected bool keepEnglishLetter;
- public bool KeepEnglishLetter {
- get { return keepEnglishLetter; }
- }
-
- protected String charsetName;
+ public bool KeepEnglishLetter => keepEnglishLetter;
+
+ protected string charsetName;
+
+ public string CharsetName => charsetName;
- public string CharsetName {
- get { return charsetName; }
- }
-
public SequenceModel(
byte[] charToOrderMap,
byte[] precedenceMatrix,
float typicalPositiveRatio,
bool keepEnglishLetter,
- String charsetName)
+ string charsetName)
{
this.charToOrderMap = charToOrderMap;
this.precedenceMatrix = precedenceMatrix;
@@ -82,16 +76,16 @@ namespace UniversalDetector.Core
this.keepEnglishLetter = keepEnglishLetter;
this.charsetName = charsetName;
}
-
+
public byte GetOrder(byte b)
{
return charToOrderMap[b];
}
-
+
public byte GetPrecedence(int pos)
{
return precedenceMatrix[pos];
}
-
+
}
}
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UTF8Prober.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UTF8Prober.cs
index 084797c5e..a469e2a0c 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UTF8Prober.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UTF8Prober.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -46,11 +46,11 @@ namespace UniversalDetector.Core
public UTF8Prober()
{
- numOfMBChar = 0;
+ numOfMBChar = 0;
codingSM = new CodingStateMachine(new UTF8SMModel());
Reset();
}
-
+
public override string GetCharsetName() {
return "UTF-8";
}
@@ -66,7 +66,7 @@ namespace UniversalDetector.Core
{
int codingState = SMModel.START;
int max = offset + len;
-
+
for (int i = offset; i < max; i++) {
codingState = codingSM.NextState(buf[i]);
@@ -97,7 +97,7 @@ namespace UniversalDetector.Core
{
float unlike = 0.99f;
float confidence = 0.0f;
-
+
if (numOfMBChar < 6) {
for (int i = 0; i < numOfMBChar; i++)
unlike *= ONE_CHAR_PROB;
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UniversalDetector.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UniversalDetector.cs
index 0c9a4ee60..4dcb282cc 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UniversalDetector.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UniversalDetector.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -41,7 +41,7 @@ namespace UniversalDetector.Core
enum InputState { PureASCII=0, EscASCII=1, Highbyte=2 };
- public abstract class UniversalDetector
+ public abstract class UniversalDetector
{
protected const int FILTER_CHINESE_SIMPLIFIED = 1;
protected const int FILTER_CHINESE_TRADITIONAL = 2;
@@ -49,12 +49,12 @@ namespace UniversalDetector.Core
protected const int FILTER_KOREAN = 8;
protected const int FILTER_NON_CJK = 16;
protected const int FILTER_ALL = 31;
- protected static int FILTER_CHINESE =
+ protected static int FILTER_CHINESE =
FILTER_CHINESE_SIMPLIFIED | FILTER_CHINESE_TRADITIONAL;
- protected static int FILTER_CJK =
- FILTER_JAPANESE | FILTER_KOREAN | FILTER_CHINESE_SIMPLIFIED
+ protected static int FILTER_CJK =
+ FILTER_JAPANESE | FILTER_KOREAN | FILTER_CHINESE_SIMPLIFIED
| FILTER_CHINESE_TRADITIONAL;
-
+
protected const float SHORTCUT_THRESHOLD = 0.95f;
protected const float MINIMUM_THRESHOLD = 0.20f;
@@ -70,16 +70,16 @@ namespace UniversalDetector.Core
protected CharsetProber escCharsetProber;
protected string detectedCharset;
- public UniversalDetector(int languageFilter) {
+ public UniversalDetector(int languageFilter) {
this.start = true;
this.inputState = InputState.PureASCII;
- this.lastChar = 0x00;
+ this.lastChar = 0x00;
this.bestGuess = -1;
this.languageFilter = languageFilter;
}
public virtual void Feed(byte[] buf, int offset, int len)
- {
+ {
if (done) {
return;
}
@@ -125,7 +125,7 @@ namespace UniversalDetector.Core
}
for (int i = 0; i < len; i++) {
-
+
// other than 0xa0, if every other character is ascii, the page is ascii
if ((buf[i] & 0x80) != 0 && buf[i] != 0xA0) {
// we got a non-ascii byte (high-byte)
@@ -143,9 +143,9 @@ namespace UniversalDetector.Core
if (charsetProbers[1] == null)
charsetProbers[1] = new SBCSGroupProber();
if (charsetProbers[2] == null)
- charsetProbers[2] = new Latin1Prober();
+ charsetProbers[2] = new Latin1Prober();
}
- } else {
+ } else {
if (inputState == InputState.PureASCII &&
(buf[i] == 0x33 || (buf[i] == 0x7B && lastChar == 0x7E))) {
// found escape character or HZ "~{"
@@ -154,9 +154,9 @@ namespace UniversalDetector.Core
lastChar = buf[i];
}
}
-
+
ProbingState st = ProbingState.NotMe;
-
+
switch (inputState) {
case InputState.EscASCII:
if (escCharsetProber == null) {
@@ -172,18 +172,18 @@ namespace UniversalDetector.Core
for (int i = 0; i < PROBERS_NUM; i++) {
if (charsetProbers[i] != null) {
st = charsetProbers[i].HandleData(buf, offset, len);
- #if DEBUG
+ #if DEBUG
charsetProbers[i].DumpStatus();
- #endif
+ #endif
if (st == ProbingState.FoundIt) {
done = true;
detectedCharset = charsetProbers[i].GetCharsetName();
return;
- }
+ }
}
}
break;
- default:
+ default:
// pure ascii
break;
}
@@ -191,13 +191,13 @@ namespace UniversalDetector.Core
}
/// <summary>
- /// Notify detector that no further data is available.
+ /// Notify detector that no further data is available.
/// </summary>
public virtual void DataEnd()
{
if (!gotData) {
- // we haven't got any data yet, return immediately
- // caller program sometimes call DataEnd before anything has
+ // we haven't got any data yet, return immediately
+ // caller program sometimes call DataEnd before anything has
// been sent to detector
return;
}
@@ -206,7 +206,7 @@ namespace UniversalDetector.Core
done = true;
Report(detectedCharset, 1.0f);
return;
- }
+ }
if (inputState == InputState.Highbyte) {
float proberConfidence = 0.0f;
@@ -221,22 +221,22 @@ namespace UniversalDetector.Core
}
}
}
-
+
if (maxProberConfidence > MINIMUM_THRESHOLD) {
Report(charsetProbers[maxProber].GetCharsetName(), maxProberConfidence);
- }
-
+ }
+
} else if (inputState == InputState.PureASCII) {
Report("ASCII", 1.0f);
- }
+ }
}
/// <summary>
/// Clear internal state of charset detector.
- /// In the original interface this method is protected.
+ /// In the original interface this method is protected.
/// </summary>
- public virtual void Reset()
- {
+ public virtual void Reset()
+ {
done = false;
start = true;
detectedCharset = null;
@@ -250,7 +250,7 @@ namespace UniversalDetector.Core
if (charsetProbers[i] != null)
charsetProbers[i].Reset();
}
-
+
protected abstract void Report(string charset, float confidence);
}