From 0e7cbb04651bd9b65668ca1635a4625640639560 Mon Sep 17 00:00:00 2001 From: Luke Pulverenti Date: Sat, 17 Jun 2017 18:59:17 -0400 Subject: add subtitle language detection --- .../TextEncoding/TextEncoding.cs | 63 ++++++++++++++++++++-- 1 file changed, 60 insertions(+), 3 deletions(-) (limited to 'Emby.Common.Implementations/TextEncoding/TextEncoding.cs') diff --git a/Emby.Common.Implementations/TextEncoding/TextEncoding.cs b/Emby.Common.Implementations/TextEncoding/TextEncoding.cs index 49b424d5f..a5caae391 100644 --- a/Emby.Common.Implementations/TextEncoding/TextEncoding.cs +++ b/Emby.Common.Implementations/TextEncoding/TextEncoding.cs @@ -8,6 +8,8 @@ using System.Threading.Tasks; using MediaBrowser.Model.MediaInfo; using MediaBrowser.Model.Logging; using UniversalDetector; +using NLangDetect.Core; +using MediaBrowser.Model.Serialization; namespace Emby.Common.Implementations.TextEncoding { @@ -15,11 +17,13 @@ namespace Emby.Common.Implementations.TextEncoding { private readonly IFileSystem _fileSystem; private readonly ILogger _logger; + private IJsonSerializer _json; - public TextEncoding(IFileSystem fileSystem, ILogger logger) + public TextEncoding(IFileSystem fileSystem, ILogger logger, IJsonSerializer json) { _fileSystem = fileSystem; _logger = logger; + _json = json; } public Encoding GetASCIIEncoding() @@ -63,6 +67,7 @@ namespace Emby.Common.Implementations.TextEncoding } } + private bool _langDetectInitialized; public string GetDetectedEncodingName(byte[] bytes, string language) { var encoding = GetInitialEncoding(bytes); @@ -72,6 +77,22 @@ namespace Emby.Common.Implementations.TextEncoding return "utf-8"; } + if (!_langDetectInitialized) + { + _langDetectInitialized = true; + LanguageDetector.Initialize(_json); + } + + if (string.IsNullOrWhiteSpace(language)) + { + language = DetectLanguage(bytes); + + if (!string.IsNullOrWhiteSpace(language)) + { + _logger.Debug("Text language detected as {0}", language); + } + } + var charset = DetectCharset(bytes, language); if (!string.IsNullOrWhiteSpace(charset)) @@ -95,6 +116,35 @@ namespace Emby.Common.Implementations.TextEncoding return null; } + private string DetectLanguage(byte[] bytes) + { + try + { + return LanguageDetector.DetectLanguage(Encoding.UTF8.GetString(bytes)); + } + catch (NLangDetectException ex) + { + } + + try + { + return LanguageDetector.DetectLanguage(Encoding.ASCII.GetString(bytes)); + } + catch (NLangDetectException ex) + { + } + + try + { + return LanguageDetector.DetectLanguage(Encoding.Unicode.GetString(bytes)); + } + catch (NLangDetectException ex) + { + } + + return null; + } + public Encoding GetEncodingFromCharset(string charset) { if (string.IsNullOrWhiteSpace(charset)) @@ -136,22 +186,29 @@ namespace Emby.Common.Implementations.TextEncoding case "cze": case "ces": case "slo": - case "slk": - case "slv": case "srp": case "hrv": case "rum": case "ron": case "rup": + return "windows-1250"; + // albanian case "alb": case "sqi": return "windows-1250"; + // slovak + case "slk": + case "slv": + return "windows-1250"; case "ara": return "windows-1256"; case "heb": return "windows-1255"; case "grc": + return "windows-1253"; + // greek case "gre": + case "ell": return "windows-1253"; case "crh": case "ota": -- cgit v1.2.3 From ce47f6338950853110ecc1d6bbd312b532b2ed8a Mon Sep 17 00:00:00 2001 From: Luke Pulverenti Date: Sun, 18 Jun 2017 03:11:55 -0400 Subject: 3.2.20.5 --- .../TextEncoding/TextEncoding.cs | 18 +++++++++--------- Emby.Server.Implementations/Dto/DtoService.cs | 6 +++--- .../ServerManager/WebSocketConnection.cs | 2 +- .../Subtitles/SubtitleEncoder.cs | 4 ++-- MediaBrowser.Model/Text/ITextEncoding.cs | 4 ++-- SharedVersion.cs | 2 +- 6 files changed, 18 insertions(+), 18 deletions(-) (limited to 'Emby.Common.Implementations/TextEncoding/TextEncoding.cs') diff --git a/Emby.Common.Implementations/TextEncoding/TextEncoding.cs b/Emby.Common.Implementations/TextEncoding/TextEncoding.cs index a5caae391..021caec75 100644 --- a/Emby.Common.Implementations/TextEncoding/TextEncoding.cs +++ b/Emby.Common.Implementations/TextEncoding/TextEncoding.cs @@ -68,7 +68,7 @@ namespace Emby.Common.Implementations.TextEncoding } private bool _langDetectInitialized; - public string GetDetectedEncodingName(byte[] bytes, string language) + public string GetDetectedEncodingName(byte[] bytes, string language, bool enableLanguageDetection) { var encoding = GetInitialEncoding(bytes); @@ -77,14 +77,14 @@ namespace Emby.Common.Implementations.TextEncoding return "utf-8"; } - if (!_langDetectInitialized) + if (string.IsNullOrWhiteSpace(language) && enableLanguageDetection) { - _langDetectInitialized = true; - LanguageDetector.Initialize(_json); - } + if (!_langDetectInitialized) + { + _langDetectInitialized = true; + LanguageDetector.Initialize(_json); + } - if (string.IsNullOrWhiteSpace(language)) - { language = DetectLanguage(bytes); if (!string.IsNullOrWhiteSpace(language)) @@ -167,9 +167,9 @@ namespace Emby.Common.Implementations.TextEncoding } } - public Encoding GetDetectedEncoding(byte[] bytes, string language) + public Encoding GetDetectedEncoding(byte[] bytes, string language, bool enableLanguageDetection) { - var charset = GetDetectedEncodingName(bytes, language); + var charset = GetDetectedEncodingName(bytes, language, enableLanguageDetection); return GetEncodingFromCharset(charset); } diff --git a/Emby.Server.Implementations/Dto/DtoService.cs b/Emby.Server.Implementations/Dto/DtoService.cs index ae988f938..7352073bd 100644 --- a/Emby.Server.Implementations/Dto/DtoService.cs +++ b/Emby.Server.Implementations/Dto/DtoService.cs @@ -778,16 +778,16 @@ namespace Emby.Server.Implementations.Dto .Select(i => new NameIdPair { Name = i, - Id = GetStudioId(i, item) + Id = GetGenreId(i, item) }) .ToArray(); } - private string GetStudioId(string name, BaseItem owner) + private string GetGenreId(string name, BaseItem owner) { if (owner is IHasMusicGenres) { - return _libraryManager.GetGameGenreId(name).ToString("N"); + return _libraryManager.GetMusicGenreId(name).ToString("N"); } if (owner is Game || owner is GameSystem) diff --git a/Emby.Server.Implementations/ServerManager/WebSocketConnection.cs b/Emby.Server.Implementations/ServerManager/WebSocketConnection.cs index e4392d7e6..4d5192fea 100644 --- a/Emby.Server.Implementations/ServerManager/WebSocketConnection.cs +++ b/Emby.Server.Implementations/ServerManager/WebSocketConnection.cs @@ -136,7 +136,7 @@ namespace Emby.Server.Implementations.ServerManager return; } - var charset = _textEncoding.GetDetectedEncodingName(bytes, null); + var charset = _textEncoding.GetDetectedEncodingName(bytes, null, false); if (string.Equals(charset, "utf-8", StringComparison.OrdinalIgnoreCase)) { diff --git a/MediaBrowser.MediaEncoding/Subtitles/SubtitleEncoder.cs b/MediaBrowser.MediaEncoding/Subtitles/SubtitleEncoder.cs index f8f95684d..247c5274f 100644 --- a/MediaBrowser.MediaEncoding/Subtitles/SubtitleEncoder.cs +++ b/MediaBrowser.MediaEncoding/Subtitles/SubtitleEncoder.cs @@ -198,7 +198,7 @@ namespace MediaBrowser.MediaEncoding.Subtitles { var bytes = await GetBytes(path, protocol, cancellationToken).ConfigureAwait(false); - var charset = _textEncoding.GetDetectedEncodingName(bytes, language); + var charset = _textEncoding.GetDetectedEncodingName(bytes, language, true); _logger.Debug("charset {0} detected for {1}", charset ?? "null", path); if (!string.IsNullOrEmpty(charset)) @@ -705,7 +705,7 @@ namespace MediaBrowser.MediaEncoding.Subtitles { var bytes = await GetBytes(path, protocol, cancellationToken).ConfigureAwait(false); - var charset = _textEncoding.GetDetectedEncodingName(bytes, language); + var charset = _textEncoding.GetDetectedEncodingName(bytes, language, true); _logger.Debug("charset {0} detected for {1}", charset ?? "null", path); diff --git a/MediaBrowser.Model/Text/ITextEncoding.cs b/MediaBrowser.Model/Text/ITextEncoding.cs index 7424f00f5..96dca0c04 100644 --- a/MediaBrowser.Model/Text/ITextEncoding.cs +++ b/MediaBrowser.Model/Text/ITextEncoding.cs @@ -7,8 +7,8 @@ namespace MediaBrowser.Model.Text { Encoding GetASCIIEncoding(); - string GetDetectedEncodingName(byte[] bytes, string language); - Encoding GetDetectedEncoding(byte[] bytes, string language); + string GetDetectedEncodingName(byte[] bytes, string language, bool enableLanguageDetection); + Encoding GetDetectedEncoding(byte[] bytes, string language, bool enableLanguageDetection); Encoding GetEncodingFromCharset(string charset); } } diff --git a/SharedVersion.cs b/SharedVersion.cs index adeff3508..939f3f388 100644 --- a/SharedVersion.cs +++ b/SharedVersion.cs @@ -1,3 +1,3 @@ using System.Reflection; -[assembly: AssemblyVersion("3.2.20.4")] +[assembly: AssemblyVersion("3.2.20.5")] -- cgit v1.2.3 From 905f02e6d9dc9785c9a39904a30b5933898e66dd Mon Sep 17 00:00:00 2001 From: Luke Pulverenti Date: Tue, 20 Jun 2017 15:38:42 -0400 Subject: 3.2.20.7 --- .../TextEncoding/TextEncoding.cs | 3 ++ .../LiveTv/LiveStreamHelper.cs | 2 +- .../Session/HttpSessionController.cs | 37 +++++++++++----------- SharedVersion.cs | 2 +- 4 files changed, 24 insertions(+), 20 deletions(-) (limited to 'Emby.Common.Implementations/TextEncoding/TextEncoding.cs') diff --git a/Emby.Common.Implementations/TextEncoding/TextEncoding.cs b/Emby.Common.Implementations/TextEncoding/TextEncoding.cs index 021caec75..54c47d62c 100644 --- a/Emby.Common.Implementations/TextEncoding/TextEncoding.cs +++ b/Emby.Common.Implementations/TextEncoding/TextEncoding.cs @@ -214,7 +214,10 @@ namespace Emby.Common.Implementations.TextEncoding case "ota": case "tur": return "windows-1254"; + // bulgarian + case "bul": case "bgr": + return "windows-1251"; case "rus": return "windows-1251"; case "vie": diff --git a/Emby.Server.Implementations/LiveTv/LiveStreamHelper.cs b/Emby.Server.Implementations/LiveTv/LiveStreamHelper.cs index 75f423181..428b6202b 100644 --- a/Emby.Server.Implementations/LiveTv/LiveStreamHelper.cs +++ b/Emby.Server.Implementations/LiveTv/LiveStreamHelper.cs @@ -17,7 +17,7 @@ namespace Emby.Server.Implementations.LiveTv private readonly ILogger _logger; const int ProbeAnalyzeDurationMs = 2000; - const int PlaybackAnalyzeDurationMs = 1000; + const int PlaybackAnalyzeDurationMs = 2000; public LiveStreamHelper(IMediaEncoder mediaEncoder, ILogger logger) { diff --git a/Emby.Server.Implementations/Session/HttpSessionController.cs b/Emby.Server.Implementations/Session/HttpSessionController.cs index 92fa6c424..dbac76bb4 100644 --- a/Emby.Server.Implementations/Session/HttpSessionController.cs +++ b/Emby.Server.Implementations/Session/HttpSessionController.cs @@ -159,24 +159,25 @@ namespace Emby.Server.Implementations.Session public Task SendMessage(string name, T data, CancellationToken cancellationToken) { - var url = PostUrl + "/" + name; - - var options = new HttpRequestOptions - { - Url = url, - CancellationToken = cancellationToken, - BufferContent = false - }; - - options.RequestContent = _json.SerializeToString(data); - options.RequestContentType = "application/json"; - - return _httpClient.Post(new HttpRequestOptions - { - Url = url, - CancellationToken = cancellationToken, - BufferContent = false - }); + return Task.FromResult(true); + //var url = PostUrl + "/" + name; + + //var options = new HttpRequestOptions + //{ + // Url = url, + // CancellationToken = cancellationToken, + // BufferContent = false + //}; + + //options.RequestContent = _json.SerializeToString(data); + //options.RequestContentType = "application/json"; + + //return _httpClient.Post(new HttpRequestOptions + //{ + // Url = url, + // CancellationToken = cancellationToken, + // BufferContent = false + //}); } private string ToQueryString(Dictionary nvc) diff --git a/SharedVersion.cs b/SharedVersion.cs index b7341d9eb..38f901e92 100644 --- a/SharedVersion.cs +++ b/SharedVersion.cs @@ -1,3 +1,3 @@ using System.Reflection; -[assembly: AssemblyVersion("3.2.20.6")] +[assembly: AssemblyVersion("3.2.20.7")] -- cgit v1.2.3