diff options
| author | Erwin de Haan <EraYaN@users.noreply.github.com> | 2019-01-13 20:22:56 +0100 |
|---|---|---|
| committer | Erwin de Haan <EraYaN@users.noreply.github.com> | 2019-01-13 20:22:56 +0100 |
| commit | 25f0315e918cf6f8c26b1e435c236ff1dbcbc6a5 (patch) | |
| tree | 805191d28c22edcaf31ffb03ba355f5fbbc1c3da /Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UniversalDetector.cs | |
| parent | 0efc699e3d4cef2cb5b36223873fa5ad98177d1c (diff) | |
Visual Studio Reformat: Emby.Server.Implementations Part T-T
Diffstat (limited to 'Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UniversalDetector.cs')
| -rw-r--r-- | Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UniversalDetector.cs | 134 |
1 files changed, 81 insertions, 53 deletions
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UniversalDetector.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UniversalDetector.cs index 4dcb282cc..28a50ea3e 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UniversalDetector.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UniversalDetector.cs @@ -39,7 +39,7 @@ namespace UniversalDetector.Core { - enum InputState { PureASCII=0, EscASCII=1, Highbyte=2 }; + enum InputState { PureASCII = 0, EscASCII = 1, Highbyte = 2 }; public abstract class UniversalDetector { @@ -70,7 +70,8 @@ namespace UniversalDetector.Core protected CharsetProber escCharsetProber; protected string detectedCharset; - public UniversalDetector(int languageFilter) { + public UniversalDetector(int languageFilter) + { this.start = true; this.inputState = InputState.PureASCII; this.lastChar = 0x00; @@ -80,7 +81,8 @@ namespace UniversalDetector.Core public virtual void Feed(byte[] buf, int offset, int len) { - if (done) { + if (done) + { return; } @@ -88,52 +90,60 @@ namespace UniversalDetector.Core gotData = true; // If the data starts with BOM, we know it is UTF - if (start) { + if (start) + { start = false; - if (len > 3) { - switch (buf[0]) { - case 0xEF: - if (0xBB == buf[1] && 0xBF == buf[2]) - detectedCharset = "UTF-8"; - break; - case 0xFE: - if (0xFF == buf[1] && 0x00 == buf[2] && 0x00 == buf[3]) - // FE FF 00 00 UCS-4, unusual octet order BOM (3412) - detectedCharset = "X-ISO-10646-UCS-4-3412"; - else if (0xFF == buf[1]) - detectedCharset = "UTF-16BE"; - break; - case 0x00: - if (0x00 == buf[1] && 0xFE == buf[2] && 0xFF == buf[3]) - detectedCharset = "UTF-32BE"; - else if (0x00 == buf[1] && 0xFF == buf[2] && 0xFE == buf[3]) - // 00 00 FF FE UCS-4, unusual octet order BOM (2143) - detectedCharset = "X-ISO-10646-UCS-4-2143"; - break; - case 0xFF: - if (0xFE == buf[1] && 0x00 == buf[2] && 0x00 == buf[3]) - detectedCharset = "UTF-32LE"; - else if (0xFE == buf[1]) - detectedCharset = "UTF-16LE"; - break; + if (len > 3) + { + switch (buf[0]) + { + case 0xEF: + if (0xBB == buf[1] && 0xBF == buf[2]) + detectedCharset = "UTF-8"; + break; + case 0xFE: + if (0xFF == buf[1] && 0x00 == buf[2] && 0x00 == buf[3]) + // FE FF 00 00 UCS-4, unusual octet order BOM (3412) + detectedCharset = "X-ISO-10646-UCS-4-3412"; + else if (0xFF == buf[1]) + detectedCharset = "UTF-16BE"; + break; + case 0x00: + if (0x00 == buf[1] && 0xFE == buf[2] && 0xFF == buf[3]) + detectedCharset = "UTF-32BE"; + else if (0x00 == buf[1] && 0xFF == buf[2] && 0xFE == buf[3]) + // 00 00 FF FE UCS-4, unusual octet order BOM (2143) + detectedCharset = "X-ISO-10646-UCS-4-2143"; + break; + case 0xFF: + if (0xFE == buf[1] && 0x00 == buf[2] && 0x00 == buf[3]) + detectedCharset = "UTF-32LE"; + else if (0xFE == buf[1]) + detectedCharset = "UTF-16LE"; + break; } // switch } - if (detectedCharset != null) { + if (detectedCharset != null) + { done = true; return; } } - for (int i = 0; i < len; i++) { + for (int i = 0; i < len; i++) + { // other than 0xa0, if every other character is ascii, the page is ascii - if ((buf[i] & 0x80) != 0 && buf[i] != 0xA0) { + if ((buf[i] & 0x80) != 0 && buf[i] != 0xA0) + { // we got a non-ascii byte (high-byte) - if (inputState != InputState.Highbyte) { + if (inputState != InputState.Highbyte) + { inputState = InputState.Highbyte; // kill EscCharsetProber if it is active - if (escCharsetProber != null) { + if (escCharsetProber != null) + { escCharsetProber = null; } @@ -145,9 +155,12 @@ namespace UniversalDetector.Core if (charsetProbers[2] == null) charsetProbers[2] = new Latin1Prober(); } - } else { + } + else + { if (inputState == InputState.PureASCII && - (buf[i] == 0x33 || (buf[i] == 0x7B && lastChar == 0x7E))) { + (buf[i] == 0x33 || (buf[i] == 0x7B && lastChar == 0x7E))) + { // found escape character or HZ "~{" inputState = InputState.EscASCII; } @@ -157,25 +170,31 @@ namespace UniversalDetector.Core ProbingState st = ProbingState.NotMe; - switch (inputState) { + switch (inputState) + { case InputState.EscASCII: - if (escCharsetProber == null) { + if (escCharsetProber == null) + { escCharsetProber = new EscCharsetProber(); } st = escCharsetProber.HandleData(buf, offset, len); - if (st == ProbingState.FoundIt) { + if (st == ProbingState.FoundIt) + { done = true; detectedCharset = escCharsetProber.GetCharsetName(); } break; case InputState.Highbyte: - for (int i = 0; i < PROBERS_NUM; i++) { - if (charsetProbers[i] != null) { + for (int i = 0; i < PROBERS_NUM; i++) + { + if (charsetProbers[i] != null) + { st = charsetProbers[i].HandleData(buf, offset, len); - #if DEBUG +#if DEBUG charsetProbers[i].DumpStatus(); - #endif - if (st == ProbingState.FoundIt) { +#endif + if (st == ProbingState.FoundIt) + { done = true; detectedCharset = charsetProbers[i].GetCharsetName(); return; @@ -195,38 +214,47 @@ namespace UniversalDetector.Core /// </summary> public virtual void DataEnd() { - if (!gotData) { + if (!gotData) + { // we haven't got any data yet, return immediately // caller program sometimes call DataEnd before anything has // been sent to detector return; } - if (detectedCharset != null) { + if (detectedCharset != null) + { done = true; Report(detectedCharset, 1.0f); return; } - if (inputState == InputState.Highbyte) { + if (inputState == InputState.Highbyte) + { float proberConfidence = 0.0f; float maxProberConfidence = 0.0f; int maxProber = 0; - for (int i = 0; i < PROBERS_NUM; i++) { - if (charsetProbers[i] != null) { + for (int i = 0; i < PROBERS_NUM; i++) + { + if (charsetProbers[i] != null) + { proberConfidence = charsetProbers[i].GetConfidence(); - if (proberConfidence > maxProberConfidence) { + if (proberConfidence > maxProberConfidence) + { maxProberConfidence = proberConfidence; maxProber = i; } } } - if (maxProberConfidence > MINIMUM_THRESHOLD) { + if (maxProberConfidence > MINIMUM_THRESHOLD) + { Report(charsetProbers[maxProber].GetCharsetName(), maxProberConfidence); } - } else if (inputState == InputState.PureASCII) { + } + else if (inputState == InputState.PureASCII) + { Report("ASCII", 1.0f); } } |
