aboutsummaryrefslogtreecommitdiff
path: root/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs
diff options
context:
space:
mode:
authorBond-009 <bond.009@outlook.com>2019-01-16 19:10:42 +0100
committerGitHub <noreply@github.com>2019-01-16 19:10:42 +0100
commit900dc851e6c810f9e1772a6fb901a5a7e2801baf (patch)
tree205bac3cd6df971ee18739e59bd4da0ffe91718b /Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs
parent07a8e49c4b1e4a2dddbaa49ab6f1ff4f271fbf20 (diff)
parent933ef438894ed233fec46badf58dd4f26492e832 (diff)
Merge branch 'dev' into cleanup
Diffstat (limited to 'Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs')
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs38
1 files changed, 19 insertions, 19 deletions
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs
index c79a10aa7..5d57e30e1 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -40,7 +40,7 @@ using System;
namespace UniversalDetector.Core
{
- // TODO: Using trigrams the detector should be able to discriminate between
+ // TODO: Using trigrams the detector should be able to discriminate between
// latin-1 and iso8859-2
public class Latin1Prober : CharsetProber
{
@@ -54,9 +54,9 @@ namespace UniversalDetector.Core
private const int ACO = 5; // accent capital other
private const int ASV = 6; // accent small vowel
private const int ASO = 7; // accent small other
-
+
private const int CLASS_NUM = 8; // total classes
-
+
private readonly static byte[] Latin1_CharToClass = {
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 00 - 07
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 08 - 0F
@@ -92,36 +92,36 @@ namespace UniversalDetector.Core
ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, // F8 - FF
};
- /* 0 : illegal
- 1 : very unlikely
- 2 : normal
+ /* 0 : illegal
+ 1 : very unlikely
+ 2 : normal
3 : very likely
*/
private readonly static byte[] Latin1ClassModel = {
/* UDF OTH ASC ASS ACV ACO ASV ASO */
/*UDF*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*OTH*/ 0, 3, 3, 3, 3, 3, 3, 3,
- /*ASC*/ 0, 3, 3, 3, 3, 3, 3, 3,
+ /*ASC*/ 0, 3, 3, 3, 3, 3, 3, 3,
/*ASS*/ 0, 3, 3, 3, 1, 1, 3, 3,
/*ACV*/ 0, 3, 3, 3, 1, 2, 1, 2,
- /*ACO*/ 0, 3, 3, 3, 3, 3, 3, 3,
- /*ASV*/ 0, 3, 1, 3, 1, 1, 1, 3,
+ /*ACO*/ 0, 3, 3, 3, 3, 3, 3, 3,
+ /*ASV*/ 0, 3, 1, 3, 1, 1, 1, 3,
/*ASO*/ 0, 3, 1, 3, 1, 1, 3, 3,
};
private byte lastCharClass;
private int[] freqCounter = new int[FREQ_CAT_NUM];
-
+
public Latin1Prober()
{
Reset();
}
- public override string GetCharsetName()
+ public override string GetCharsetName()
{
return "windows-1252";
}
-
+
public override void Reset()
{
state = ProbingState.Detecting;
@@ -129,12 +129,12 @@ namespace UniversalDetector.Core
for (int i = 0; i < FREQ_CAT_NUM; i++)
freqCounter[i] = 0;
}
-
+
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
byte[] newbuf = FilterWithEnglishLetters(buf, offset, len);
byte charClass, freq;
-
+
for (int i = 0; i < newbuf.Length; i++) {
charClass = Latin1_CharToClass[newbuf[i]];
freq = Latin1ClassModel[lastCharClass * CLASS_NUM + charClass];
@@ -152,21 +152,21 @@ namespace UniversalDetector.Core
{
if (state == ProbingState.NotMe)
return 0.01f;
-
+
float confidence = 0.0f;
int total = 0;
for (int i = 0; i < FREQ_CAT_NUM; i++) {
total += freqCounter[i];
}
-
+
if (total <= 0) {
confidence = 0.0f;
} else {
confidence = freqCounter[3] * 1.0f / total;
confidence -= freqCounter[1] * 20.0f / total;
}
-
- // lower the confidence of latin1 so that other more accurate detector
+
+ // lower the confidence of latin1 so that other more accurate detector
// can take priority.
return confidence < 0.0f ? 0.0f : confidence * 0.5f;
}