aboutsummaryrefslogtreecommitdiff
path: root/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharsetProber.cs
diff options
context:
space:
mode:
Diffstat (limited to 'Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharsetProber.cs')
-rw-r--r--Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharsetProber.cs44
1 files changed, 22 insertions, 22 deletions
diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharsetProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharsetProber.cs
index 3369dd430..cc4539058 100644
--- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharsetProber.cs
+++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharsetProber.cs
@@ -21,7 +21,7 @@
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
- *
+ *
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
@@ -40,10 +40,10 @@ using System.IO;
namespace UniversalDetector.Core
{
- public enum ProbingState {
+ public enum ProbingState {
Detecting = 0, // no sure answer yet, but caller can ask for confidence
FoundIt = 1, // positive answer
- NotMe = 2 // negative answer
+ NotMe = 2 // negative answer
};
public abstract class CharsetProber
@@ -51,16 +51,16 @@ namespace UniversalDetector.Core
protected const float SHORTCUT_THRESHOLD = 0.95F;
protected ProbingState state;
-
+
// ASCII codes
private const byte SPACE = 0x20;
private const byte CAPITAL_A = 0x41;
private const byte CAPITAL_Z = 0x5A;
private const byte SMALL_A = 0x61;
private const byte SMALL_Z = 0x7A;
- private const byte LESS_THAN = 0x3C;
+ private const byte LESS_THAN = 0x3C;
private const byte GREATER_THAN = 0x3E;
-
+
/// <summary>
/// Feed data to the prober
/// </summary>
@@ -71,44 +71,44 @@ namespace UniversalDetector.Core
/// A <see cref="ProbingState"/>
/// </returns>
public abstract ProbingState HandleData(byte[] buf, int offset, int len);
-
+
/// <summary>
/// Reset prober state
/// </summary>
public abstract void Reset();
public abstract string GetCharsetName();
-
+
public abstract float GetConfidence();
-
+
public virtual ProbingState GetState()
{
return state;
}
public virtual void SetOption()
- {
-
+ {
+
}
public virtual void DumpStatus()
- {
-
+ {
+
}
//
// Helper functions used in the Latin1 and Group probers
//
/// <summary>
- ///
+ ///
/// </summary>
/// <returns>filtered buffer</returns>
- protected static byte[] FilterWithoutEnglishLetters(byte[] buf, int offset, int len)
+ protected static byte[] FilterWithoutEnglishLetters(byte[] buf, int offset, int len)
{
byte[] result = null;
using (MemoryStream ms = new MemoryStream(buf.Length)) {
-
+
bool meetMSB = false;
int max = offset + len;
int prev = offset;
@@ -140,8 +140,8 @@ namespace UniversalDetector.Core
}
/// <summary>
- /// Do filtering to reduce load to probers (Remove ASCII symbols,
- /// collapse spaces). This filter applies to all scripts which contain
+ /// Do filtering to reduce load to probers (Remove ASCII symbols,
+ /// collapse spaces). This filter applies to all scripts which contain
/// both English characters and upper ASCII characters.
/// </summary>
/// <returns>a filtered copy of the input buffer</returns>
@@ -150,16 +150,16 @@ namespace UniversalDetector.Core
byte[] result = null;
using (MemoryStream ms = new MemoryStream(buf.Length)) {
-
+
bool inTag = false;
int max = offset + len;
int prev = offset;
int cur = offset;
while (cur < max) {
-
+
byte b = buf[cur];
-
+
if (b == GREATER_THAN)
inTag = false;
else if (b == LESS_THAN)
@@ -177,7 +177,7 @@ namespace UniversalDetector.Core
cur++;
}
- // If the current segment contains more than just a symbol
+ // If the current segment contains more than just a symbol
// and it is not inside a tag then keep it.
if (!inTag && cur > prev)
ms.Write(buf, prev, cur - prev);