aboutsummaryrefslogtreecommitdiff
path: root/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs
blob: 0606dbdb295eefd1fb10a03c058be187e0a8052e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Text.RegularExpressions;
using System.Threading;
using MediaBrowser.Model.Extensions;
using MediaBrowser.Model.MediaInfo;
using Microsoft.Extensions.Logging;

namespace MediaBrowser.MediaEncoding.Subtitles
{
    public class SrtParser : ISubtitleParser
    {
        private readonly ILogger _logger;

        private readonly CultureInfo _usCulture = new CultureInfo("en-US");

        public SrtParser(ILogger logger)
        {
            _logger = logger;
        }

        public SubtitleTrackInfo Parse(Stream stream, CancellationToken cancellationToken)
        {
            var trackInfo = new SubtitleTrackInfo();
            var trackEvents = new List<SubtitleTrackEvent>();
            using (var reader = new StreamReader(stream))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    cancellationToken.ThrowIfCancellationRequested();

                    if (string.IsNullOrWhiteSpace(line))
                    {
                        continue;
                    }
                    var subEvent = new SubtitleTrackEvent { Id = line };
                    line = reader.ReadLine();

                    if (string.IsNullOrWhiteSpace(line))
                    {
                        continue;
                    }

                    var time = Regex.Split(line, @"[\t ]*-->[\t ]*");

                    if (time.Length < 2)
                    {
                        // This occurs when subtitle text has an empty line as part of the text.
                        // Need to adjust the break statement below to resolve this.
                        _logger.LogWarning("Unrecognized line in srt: {0}", line);
                        continue;
                    }
                    subEvent.StartPositionTicks = GetTicks(time[0]);
                    var endTime = time[1];
                    var idx = endTime.IndexOf(" ", StringComparison.Ordinal);
                    if (idx > 0)
                        endTime = endTime.Substring(0, idx);
                    subEvent.EndPositionTicks = GetTicks(endTime);
                    var multiline = new List<string>();
                    while ((line = reader.ReadLine()) != null)
                    {
                        if (string.IsNullOrEmpty(line))
                        {
                            break;
                        }
                        multiline.Add(line);
                    }
                    subEvent.Text = string.Join(ParserValues.NewLine, multiline);
                    subEvent.Text = subEvent.Text.Replace(@"\N", ParserValues.NewLine, StringComparison.OrdinalIgnoreCase);
                    subEvent.Text = Regex.Replace(subEvent.Text, @"\{(?:\\\d?[\w.-]+(?:\([^\)]*\)|&H?[0-9A-Fa-f]+&|))+\}", string.Empty, RegexOptions.IgnoreCase);
                    subEvent.Text = Regex.Replace(subEvent.Text, "<", "&lt;", RegexOptions.IgnoreCase);
                    subEvent.Text = Regex.Replace(subEvent.Text, ">", "&gt;", RegexOptions.IgnoreCase);
                    subEvent.Text = Regex.Replace(subEvent.Text, "&lt;(\\/?(font|b|u|i|s))((\\s+(\\w|\\w[\\w\\-]*\\w)(\\s*=\\s*(?:\\\".*?\\\"|'.*?'|[^'\\\">\\s]+))?)+\\s*|\\s*)(\\/?)&gt;", "<$1$3$7>", RegexOptions.IgnoreCase);
                    trackEvents.Add(subEvent);
                }
            }
            trackInfo.TrackEvents = trackEvents.ToArray();
            return trackInfo;
        }

        long GetTicks(string time)
        {
            return TimeSpan.TryParseExact(time, @"hh\:mm\:ss\.fff", _usCulture, out var span)
                ? span.Ticks
                : (TimeSpan.TryParseExact(time, @"hh\:mm\:ss\,fff", _usCulture, out span)
                ? span.Ticks : 0);
        }
    }
}