diff options
| author | Luis Miguel Almánzar <ruisu15@gmail.com> | 2014-05-10 22:29:34 -0400 |
|---|---|---|
| committer | Luis Miguel Almánzar <ruisu15@gmail.com> | 2014-05-10 22:29:51 -0400 |
| commit | f2237b858ad4defe47d7671cffdf3febeff3ad00 (patch) | |
| tree | 9656f9d7a4ec296fc366275b26baba4f9ee3a19b | |
| parent | 4e710ec0a49c2869805801264a411076c576a3a5 (diff) | |
implemented SRT Parser
4 files changed, 212 insertions, 6 deletions
diff --git a/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs b/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs index 410c0bbdd..89676ba7e 100644 --- a/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs +++ b/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs @@ -1,17 +1,61 @@ using System; using System.Collections.Generic; +using System.Globalization; using System.IO; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +using System.Text.RegularExpressions; namespace MediaBrowser.MediaEncoding.Subtitles { public class SrtParser : ISubtitleParser { - public SubtitleTrackInfo Parse(Stream stream) - { - throw new NotImplementedException(); + private readonly CultureInfo _usCulture = new CultureInfo("en-US"); + public SubtitleTrackInfo Parse(Stream stream) { + var trackInfo = new SubtitleTrackInfo(); + using ( var reader = new StreamReader(stream)) + { + string line; + while ((line = reader.ReadLine()) != null) + { + if (string.IsNullOrWhiteSpace(line)) + { + continue; + } + var subEvent = new SubtitleTrackEvent {Id = line}; + line = reader.ReadLine(); + var time = Regex.Split(line, @"[\t ]*-->[\t ]*"); + subEvent.StartPositionTicks = GetTicks(time[0]); + var endTime = time[1]; + var idx = endTime.IndexOf(" ", StringComparison.Ordinal); + if (idx > 0) + endTime = endTime.Substring(0, idx); + subEvent.EndPositionTicks = GetTicks(endTime); + var multiline = new List<string>(); + while ((line = reader.ReadLine()) != null) + { + if (string.IsNullOrEmpty(line)) + { + break; + } + multiline.Add(line); + } + subEvent.Text = string.Join(@"\N", multiline); + subEvent.Text = Regex.Replace(subEvent.Text, "\\{(\\\\[\\w]+\\(?([\\w\\d]+,?)+\\)?)+\\}", string.Empty, RegexOptions.IgnoreCase); + subEvent.Text = Regex.Replace(subEvent.Text, "<", "<", RegexOptions.IgnoreCase); + subEvent.Text = Regex.Replace(subEvent.Text, ">", ">", RegexOptions.IgnoreCase); + subEvent.Text = Regex.Replace(subEvent.Text, "<(\\/?(font|b|u|i|s))((\\s+(\\w|\\w[\\w\\-]*\\w)(\\s*=\\s*(?:\\\".*?\\\"|'.*?'|[^'\\\">\\s]+))?)+\\s*|\\s*)(\\/?)>", "<$1$3$7>", RegexOptions.IgnoreCase); + subEvent.Text = Regex.Replace(subEvent.Text, @"\\N", "<br />",RegexOptions.IgnoreCase); + trackInfo.TrackEvents.Add(subEvent); + } + } + return trackInfo; + } + + long GetTicks(string time) { + TimeSpan span; + return TimeSpan.TryParseExact(time, @"hh\:mm\:ss\.fff", _usCulture, out span) + ? span.Ticks + : (TimeSpan.TryParseExact(time, @"hh\:mm\:ss\,fff", _usCulture, out span) + ? span.Ticks : 0); } } } diff --git a/MediaBrowser.Tests/MediaBrowser.Tests.csproj b/MediaBrowser.Tests/MediaBrowser.Tests.csproj index 6ae7544b8..46f748130 100644 --- a/MediaBrowser.Tests/MediaBrowser.Tests.csproj +++ b/MediaBrowser.Tests/MediaBrowser.Tests.csproj @@ -50,6 +50,7 @@ </Otherwise> </Choose> <ItemGroup> + <Compile Include="MediaEncoding\Subtitles\SrtParserTests.cs" /> <Compile Include="Providers\MovieDbProviderTests.cs" /> <Compile Include="Resolvers\MovieResolverTests.cs" /> <Compile Include="Resolvers\TvUtilTests.cs" /> @@ -61,6 +62,10 @@ <Project>{17e1f4e6-8abd-4fe5-9ecf-43d4b6087ba2}</Project> <Name>MediaBrowser.Controller</Name> </ProjectReference> + <ProjectReference Include="..\MediaBrowser.MediaEncoding\MediaBrowser.MediaEncoding.csproj"> + <Project>{0BD82FA6-EB8A-4452-8AF5-74F9C3849451}</Project> + <Name>MediaBrowser.MediaEncoding</Name> + </ProjectReference> <ProjectReference Include="..\MediaBrowser.Model\MediaBrowser.Model.csproj"> <Project>{7eeeb4bb-f3e8-48fc-b4c5-70f0fff8329b}</Project> <Name>MediaBrowser.Model</Name> @@ -77,6 +82,11 @@ <ItemGroup> <None Include="app.config" /> </ItemGroup> + <ItemGroup> + <None Include="MediaEncoding\Subtitles\TestSubtitles\unit.srt"> + <CopyToOutputDirectory>Always</CopyToOutputDirectory> + </None> + </ItemGroup> <Choose> <When Condition="'$(VisualStudioVersion)' == '10.0' And '$(IsCodedUITest)' == 'True'"> <ItemGroup> diff --git a/MediaBrowser.Tests/MediaEncoding/Subtitles/SrtParserTests.cs b/MediaBrowser.Tests/MediaEncoding/Subtitles/SrtParserTests.cs new file mode 100644 index 000000000..0d86fbdcd --- /dev/null +++ b/MediaBrowser.Tests/MediaEncoding/Subtitles/SrtParserTests.cs @@ -0,0 +1,108 @@ +using System; +using System.Collections.Generic; +using System.IO; +using MediaBrowser.MediaEncoding.Subtitles; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace MediaBrowser.Tests.MediaEncoding.Subtitles { + + [TestClass] + public class SrtParserTests { + + [TestMethod] + public void TestParse() { + + var expectedSubs = + new SubtitleTrackInfo { + TrackEvents = new List<SubtitleTrackEvent> { + new SubtitleTrackEvent { + Id = "1", + StartPositionTicks = 24000000, + EndPositionTicks = 52000000, + Text = + "[Background Music Playing]" + }, + new SubtitleTrackEvent { + Id = "2", + StartPositionTicks = 157120000, + EndPositionTicks = 173990000, + Text = + "Oh my god, Watch out!<br />It's coming!!" + }, + new SubtitleTrackEvent { + Id = "3", + StartPositionTicks = 257120000, + EndPositionTicks = 303990000, + Text = "[Bird noises]" + }, + new SubtitleTrackEvent { + Id = "4", + StartPositionTicks = 310000000, + EndPositionTicks = 319990000, + Text = + "This text is <font color=\"red\">RED</font> and has not been positioned." + }, + new SubtitleTrackEvent { + Id = "5", + StartPositionTicks = 320000000, + EndPositionTicks = 329990000, + Text = + "This is a<br />new line, as is<br />this" + }, + new SubtitleTrackEvent { + Id = "6", + StartPositionTicks = 330000000, + EndPositionTicks = 339990000, + Text = + "This contains nested <b>bold, <i>italic, <u>underline</u> and <s>strike-through</s></u></i></b> HTML tags" + }, + new SubtitleTrackEvent { + Id = "7", + StartPositionTicks = 340000000, + EndPositionTicks = 349990000, + Text = + "Unclosed but <b>supported HTML tags are left in, SSA italics aren't" + }, + new SubtitleTrackEvent { + Id = "8", + StartPositionTicks = 350000000, + EndPositionTicks = 359990000, + Text = + "<ggg>Unsupported</ggg> HTML tags are escaped and left in, even if <hhh>not closed." + }, + new SubtitleTrackEvent { + Id = "9", + StartPositionTicks = 360000000, + EndPositionTicks = 369990000, + Text = + "Multiple SSA tags are stripped" + }, + new SubtitleTrackEvent { + Id = "10", + StartPositionTicks = 370000000, + EndPositionTicks = 379990000, + Text = + "Greater than (<) and less than (>) are shown" + } + } + }; + + var sut = new SrtParser(); + + var stream = File.OpenRead(@"MediaEncoding\Subtitles\TestSubtitles\unit.srt"); + + var result = sut.Parse(stream); + + Assert.IsNotNull(result); + Assert.AreEqual(expectedSubs.TrackEvents.Count,result.TrackEvents.Count); + for (int i = 0; i < expectedSubs.TrackEvents.Count; i++) + { + Assert.AreEqual(expectedSubs.TrackEvents[i].Id, result.TrackEvents[i].Id); + Assert.AreEqual(expectedSubs.TrackEvents[i].StartPositionTicks, result.TrackEvents[i].StartPositionTicks); + Assert.AreEqual(expectedSubs.TrackEvents[i].EndPositionTicks, result.TrackEvents[i].EndPositionTicks); + Assert.AreEqual(expectedSubs.TrackEvents[i].Text, result.TrackEvents[i].Text); + } + + } + } +}
\ No newline at end of file diff --git a/MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/unit.srt b/MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/unit.srt new file mode 100644 index 000000000..5f6e5636e --- /dev/null +++ b/MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/unit.srt @@ -0,0 +1,44 @@ + + +1 +00:00:02.400 --> 00:00:05.200 +[Background Music Playing] + +2 +00:00:15,712 --> 00:00:17,399 X1:000 X2:000 Y1:050 Y2:100 +Oh my god, Watch out! +It's coming!! + +3 +00:00:25,712 --> 00:00:30,399 +[Bird noises] + +4 +00:00:31,000 --> 00:00:31,999 +This text is <font color="red">RED</font> and has not been {\pos(142,120)}positioned. + +5 +00:00:32,000 --> 00:00:32,999 +This is a\nnew line, as is\Nthis + +6 +00:00:33,000 --> 00:00:33,999 +This contains nested <b>bold, <i>italic, <u>underline</u> and <s>strike-through</s></u></i></b> HTML tags + +7 +00:00:34,000 --> 00:00:34,999 +Unclosed but <b>supported HTML tags are left in, {\i1} SSA italics aren't + +8 +00:00:35,000 --> 00:00:35,999 +<ggg>Unsupported</ggg> HTML tags are escaped and left in, even if <hhh>not closed. + +9 +00:00:36,000 --> 00:00:36,999 +Multiple {\pos(142,120)\b1}SSA tags are stripped + +10 +00:00:37,000 --> 00:00:37,999 +Greater than (<) and less than (>) are shown + + |
