aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuis Miguel Almánzar <ruisu15@gmail.com>2014-05-10 22:29:34 -0400
committerLuis Miguel Almánzar <ruisu15@gmail.com>2014-05-10 22:29:51 -0400
commitf2237b858ad4defe47d7671cffdf3febeff3ad00 (patch)
tree9656f9d7a4ec296fc366275b26baba4f9ee3a19b
parent4e710ec0a49c2869805801264a411076c576a3a5 (diff)
implemented SRT Parser
-rw-r--r--MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs56
-rw-r--r--MediaBrowser.Tests/MediaBrowser.Tests.csproj10
-rw-r--r--MediaBrowser.Tests/MediaEncoding/Subtitles/SrtParserTests.cs108
-rw-r--r--MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/unit.srt44
4 files changed, 212 insertions, 6 deletions
diff --git a/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs b/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs
index 410c0bbdd..89676ba7e 100644
--- a/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs
+++ b/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs
@@ -1,17 +1,61 @@
using System;
using System.Collections.Generic;
+using System.Globalization;
using System.IO;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+using System.Text.RegularExpressions;
namespace MediaBrowser.MediaEncoding.Subtitles
{
public class SrtParser : ISubtitleParser
{
- public SubtitleTrackInfo Parse(Stream stream)
- {
- throw new NotImplementedException();
+ private readonly CultureInfo _usCulture = new CultureInfo("en-US");
+ public SubtitleTrackInfo Parse(Stream stream) {
+ var trackInfo = new SubtitleTrackInfo();
+ using ( var reader = new StreamReader(stream))
+ {
+ string line;
+ while ((line = reader.ReadLine()) != null)
+ {
+ if (string.IsNullOrWhiteSpace(line))
+ {
+ continue;
+ }
+ var subEvent = new SubtitleTrackEvent {Id = line};
+ line = reader.ReadLine();
+ var time = Regex.Split(line, @"[\t ]*-->[\t ]*");
+ subEvent.StartPositionTicks = GetTicks(time[0]);
+ var endTime = time[1];
+ var idx = endTime.IndexOf(" ", StringComparison.Ordinal);
+ if (idx > 0)
+ endTime = endTime.Substring(0, idx);
+ subEvent.EndPositionTicks = GetTicks(endTime);
+ var multiline = new List<string>();
+ while ((line = reader.ReadLine()) != null)
+ {
+ if (string.IsNullOrEmpty(line))
+ {
+ break;
+ }
+ multiline.Add(line);
+ }
+ subEvent.Text = string.Join(@"\N", multiline);
+ subEvent.Text = Regex.Replace(subEvent.Text, "\\{(\\\\[\\w]+\\(?([\\w\\d]+,?)+\\)?)+\\}", string.Empty, RegexOptions.IgnoreCase);
+ subEvent.Text = Regex.Replace(subEvent.Text, "<", "&lt;", RegexOptions.IgnoreCase);
+ subEvent.Text = Regex.Replace(subEvent.Text, ">", "&gt;", RegexOptions.IgnoreCase);
+ subEvent.Text = Regex.Replace(subEvent.Text, "&lt;(\\/?(font|b|u|i|s))((\\s+(\\w|\\w[\\w\\-]*\\w)(\\s*=\\s*(?:\\\".*?\\\"|'.*?'|[^'\\\">\\s]+))?)+\\s*|\\s*)(\\/?)&gt;", "<$1$3$7>", RegexOptions.IgnoreCase);
+ subEvent.Text = Regex.Replace(subEvent.Text, @"\\N", "<br />",RegexOptions.IgnoreCase);
+ trackInfo.TrackEvents.Add(subEvent);
+ }
+ }
+ return trackInfo;
+ }
+
+ long GetTicks(string time) {
+ TimeSpan span;
+ return TimeSpan.TryParseExact(time, @"hh\:mm\:ss\.fff", _usCulture, out span)
+ ? span.Ticks
+ : (TimeSpan.TryParseExact(time, @"hh\:mm\:ss\,fff", _usCulture, out span)
+ ? span.Ticks : 0);
}
}
}
diff --git a/MediaBrowser.Tests/MediaBrowser.Tests.csproj b/MediaBrowser.Tests/MediaBrowser.Tests.csproj
index 6ae7544b8..46f748130 100644
--- a/MediaBrowser.Tests/MediaBrowser.Tests.csproj
+++ b/MediaBrowser.Tests/MediaBrowser.Tests.csproj
@@ -50,6 +50,7 @@
</Otherwise>
</Choose>
<ItemGroup>
+ <Compile Include="MediaEncoding\Subtitles\SrtParserTests.cs" />
<Compile Include="Providers\MovieDbProviderTests.cs" />
<Compile Include="Resolvers\MovieResolverTests.cs" />
<Compile Include="Resolvers\TvUtilTests.cs" />
@@ -61,6 +62,10 @@
<Project>{17e1f4e6-8abd-4fe5-9ecf-43d4b6087ba2}</Project>
<Name>MediaBrowser.Controller</Name>
</ProjectReference>
+ <ProjectReference Include="..\MediaBrowser.MediaEncoding\MediaBrowser.MediaEncoding.csproj">
+ <Project>{0BD82FA6-EB8A-4452-8AF5-74F9C3849451}</Project>
+ <Name>MediaBrowser.MediaEncoding</Name>
+ </ProjectReference>
<ProjectReference Include="..\MediaBrowser.Model\MediaBrowser.Model.csproj">
<Project>{7eeeb4bb-f3e8-48fc-b4c5-70f0fff8329b}</Project>
<Name>MediaBrowser.Model</Name>
@@ -77,6 +82,11 @@
<ItemGroup>
<None Include="app.config" />
</ItemGroup>
+ <ItemGroup>
+ <None Include="MediaEncoding\Subtitles\TestSubtitles\unit.srt">
+ <CopyToOutputDirectory>Always</CopyToOutputDirectory>
+ </None>
+ </ItemGroup>
<Choose>
<When Condition="'$(VisualStudioVersion)' == '10.0' And '$(IsCodedUITest)' == 'True'">
<ItemGroup>
diff --git a/MediaBrowser.Tests/MediaEncoding/Subtitles/SrtParserTests.cs b/MediaBrowser.Tests/MediaEncoding/Subtitles/SrtParserTests.cs
new file mode 100644
index 000000000..0d86fbdcd
--- /dev/null
+++ b/MediaBrowser.Tests/MediaEncoding/Subtitles/SrtParserTests.cs
@@ -0,0 +1,108 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using MediaBrowser.MediaEncoding.Subtitles;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+
+namespace MediaBrowser.Tests.MediaEncoding.Subtitles {
+
+ [TestClass]
+ public class SrtParserTests {
+
+ [TestMethod]
+ public void TestParse() {
+
+ var expectedSubs =
+ new SubtitleTrackInfo {
+ TrackEvents = new List<SubtitleTrackEvent> {
+ new SubtitleTrackEvent {
+ Id = "1",
+ StartPositionTicks = 24000000,
+ EndPositionTicks = 52000000,
+ Text =
+ "[Background Music Playing]"
+ },
+ new SubtitleTrackEvent {
+ Id = "2",
+ StartPositionTicks = 157120000,
+ EndPositionTicks = 173990000,
+ Text =
+ "Oh my god, Watch out!<br />It's coming!!"
+ },
+ new SubtitleTrackEvent {
+ Id = "3",
+ StartPositionTicks = 257120000,
+ EndPositionTicks = 303990000,
+ Text = "[Bird noises]"
+ },
+ new SubtitleTrackEvent {
+ Id = "4",
+ StartPositionTicks = 310000000,
+ EndPositionTicks = 319990000,
+ Text =
+ "This text is <font color=\"red\">RED</font> and has not been positioned."
+ },
+ new SubtitleTrackEvent {
+ Id = "5",
+ StartPositionTicks = 320000000,
+ EndPositionTicks = 329990000,
+ Text =
+ "This is a<br />new line, as is<br />this"
+ },
+ new SubtitleTrackEvent {
+ Id = "6",
+ StartPositionTicks = 330000000,
+ EndPositionTicks = 339990000,
+ Text =
+ "This contains nested <b>bold, <i>italic, <u>underline</u> and <s>strike-through</s></u></i></b> HTML tags"
+ },
+ new SubtitleTrackEvent {
+ Id = "7",
+ StartPositionTicks = 340000000,
+ EndPositionTicks = 349990000,
+ Text =
+ "Unclosed but <b>supported HTML tags are left in, SSA italics aren't"
+ },
+ new SubtitleTrackEvent {
+ Id = "8",
+ StartPositionTicks = 350000000,
+ EndPositionTicks = 359990000,
+ Text =
+ "&lt;ggg&gt;Unsupported&lt;/ggg&gt; HTML tags are escaped and left in, even if &lt;hhh&gt;not closed."
+ },
+ new SubtitleTrackEvent {
+ Id = "9",
+ StartPositionTicks = 360000000,
+ EndPositionTicks = 369990000,
+ Text =
+ "Multiple SSA tags are stripped"
+ },
+ new SubtitleTrackEvent {
+ Id = "10",
+ StartPositionTicks = 370000000,
+ EndPositionTicks = 379990000,
+ Text =
+ "Greater than (&lt;) and less than (&gt;) are shown"
+ }
+ }
+ };
+
+ var sut = new SrtParser();
+
+ var stream = File.OpenRead(@"MediaEncoding\Subtitles\TestSubtitles\unit.srt");
+
+ var result = sut.Parse(stream);
+
+ Assert.IsNotNull(result);
+ Assert.AreEqual(expectedSubs.TrackEvents.Count,result.TrackEvents.Count);
+ for (int i = 0; i < expectedSubs.TrackEvents.Count; i++)
+ {
+ Assert.AreEqual(expectedSubs.TrackEvents[i].Id, result.TrackEvents[i].Id);
+ Assert.AreEqual(expectedSubs.TrackEvents[i].StartPositionTicks, result.TrackEvents[i].StartPositionTicks);
+ Assert.AreEqual(expectedSubs.TrackEvents[i].EndPositionTicks, result.TrackEvents[i].EndPositionTicks);
+ Assert.AreEqual(expectedSubs.TrackEvents[i].Text, result.TrackEvents[i].Text);
+ }
+
+ }
+ }
+} \ No newline at end of file
diff --git a/MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/unit.srt b/MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/unit.srt
new file mode 100644
index 000000000..5f6e5636e
--- /dev/null
+++ b/MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/unit.srt
@@ -0,0 +1,44 @@
+
+
+1
+00:00:02.400 --> 00:00:05.200
+[Background Music Playing]
+
+2
+00:00:15,712 --> 00:00:17,399 X1:000 X2:000 Y1:050 Y2:100
+Oh my god, Watch out!
+It's coming!!
+
+3
+00:00:25,712 --> 00:00:30,399
+[Bird noises]
+
+4
+00:00:31,000 --> 00:00:31,999
+This text is <font color="red">RED</font> and has not been {\pos(142,120)}positioned.
+
+5
+00:00:32,000 --> 00:00:32,999
+This is a\nnew line, as is\Nthis
+
+6
+00:00:33,000 --> 00:00:33,999
+This contains nested <b>bold, <i>italic, <u>underline</u> and <s>strike-through</s></u></i></b> HTML tags
+
+7
+00:00:34,000 --> 00:00:34,999
+Unclosed but <b>supported HTML tags are left in, {\i1} SSA italics aren't
+
+8
+00:00:35,000 --> 00:00:35,999
+<ggg>Unsupported</ggg> HTML tags are escaped and left in, even if <hhh>not closed.
+
+9
+00:00:36,000 --> 00:00:36,999
+Multiple {\pos(142,120)\b1}SSA tags are stripped
+
+10
+00:00:37,000 --> 00:00:37,999
+Greater than (<) and less than (>) are shown
+
+