diff options
| author | Piotr Niełacny <piotr.nielacny@gmail.com> | 2026-05-28 19:26:28 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-05-28 19:26:28 +0200 |
| commit | 8d544e48424d9ddbb1f97d354ed6e6a3f749cbfb (patch) | |
| tree | e148601627ecfde177d5ec1ca12ce18402674436 | |
| parent | ef9468e85774d4d3847476565796343100ea36f5 (diff) | |
Fix A/V desync when resuming HLS with video transcode + audio copy (#16580)
Fix A/V desync when resuming HLS with video transcode + audio copy
4 files changed, 153 insertions, 28 deletions
diff --git a/MediaBrowser.Controller/MediaEncoding/EncodingHelper.cs b/MediaBrowser.Controller/MediaEncoding/EncodingHelper.cs index 8688ea4b6c..ff8d84d45e 100644 --- a/MediaBrowser.Controller/MediaEncoding/EncodingHelper.cs +++ b/MediaBrowser.Controller/MediaEncoding/EncodingHelper.cs @@ -86,6 +86,7 @@ namespace MediaBrowser.Controller.MediaEncoding private readonly Version _minFFmpegQsvVppScaleModeOption = new Version(6, 0); private readonly Version _minFFmpegRkmppHevcDecDoviRpu = new Version(7, 1, 1); private readonly Version _minFFmpegReadrateCatchupOption = new Version(8, 0); + private readonly Version _minFFmpegNoiseBsfDrop = new Version(5, 0); private static readonly string[] _videoProfilesH264 = [ @@ -1547,20 +1548,61 @@ namespace MediaBrowser.Controller.MediaEncoding public string GetAudioBitStreamArguments(EncodingJobInfo state, string segmentContainer, string mediaSourceContainer) { - var bitStreamArgs = string.Empty; + var filters = new List<string>(); + + var noiseFilter = GetCopiedAudioTrimBsf(state); + if (!string.IsNullOrEmpty(noiseFilter)) + { + filters.Add(noiseFilter); + } + var segmentFormat = GetSegmentFileExtension(segmentContainer).TrimStart('.'); // Apply aac_adtstoasc bitstream filter when media source is in mpegts. if (string.Equals(segmentFormat, "mp4", StringComparison.OrdinalIgnoreCase) && (string.Equals(mediaSourceContainer, "ts", StringComparison.OrdinalIgnoreCase) || string.Equals(mediaSourceContainer, "aac", StringComparison.OrdinalIgnoreCase) - || string.Equals(mediaSourceContainer, "hls", StringComparison.OrdinalIgnoreCase))) + || string.Equals(mediaSourceContainer, "hls", StringComparison.OrdinalIgnoreCase)) + && IsAAC(state.AudioStream)) { - bitStreamArgs = GetBitStreamArgs(state, MediaStreamType.Audio); - bitStreamArgs = string.IsNullOrEmpty(bitStreamArgs) ? string.Empty : " " + bitStreamArgs; + filters.Add("aac_adtstoasc"); } - return bitStreamArgs; + return filters.Count == 0 + ? string.Empty + : " -bsf:a " + string.Join(',', filters); + } + + // When video is transcoded, accurate_seek (the default) trims video to the + // exact seek point via decoder-side frame discard. But stream-copied audio + // bypasses the decoder, so it starts from the nearest keyframe — potentially + // seconds before the target. Use the noise bsf to drop copied audio packets + // before the seek target, achieving the same trim precision without + // re-encoding. The noise bsf's drop= parameter requires ffmpeg >= 5.0. + // Important: make sure not to use it with wtv because it breaks seeking + private string GetCopiedAudioTrimBsf(EncodingJobInfo state) + { + if (state.TranscodingType is not TranscodingJobType.Hls + || !state.IsVideoRequest + || IsCopyCodec(state.OutputVideoCodec) + || !IsCopyCodec(state.OutputAudioCodec) + || string.Equals(state.InputContainer, "wtv", StringComparison.OrdinalIgnoreCase) + || _mediaEncoder.EncoderVersion < _minFFmpegNoiseBsfDrop) + { + return null; + } + + var startTicks = state.BaseRequest.StartTimeTicks ?? 0; + if (startTicks <= 0) + { + return null; + } + + var seekSeconds = startTicks / (double)TimeSpan.TicksPerSecond; + return string.Format( + CultureInfo.InvariantCulture, + "noise=drop='lt(pts*tb\\,{0:F3})'", + seekSeconds); } public static string GetSegmentFileExtension(string segmentContainer) @@ -3006,23 +3048,6 @@ namespace MediaBrowser.Controller.MediaEncoding } seekParam += string.Format(CultureInfo.InvariantCulture, "-ss {0}", _mediaEncoder.GetTimeParameter(seekTick)); - - if (state.IsVideoRequest) - { - // If we are remuxing, then the copied stream cannot be seeked accurately (it will seek to the nearest - // keyframe). If we are using fMP4, then force all other streams to use the same inaccurate seeking to - // avoid A/V sync issues which cause playback issues on some devices. - // When remuxing video, the segment start times correspond to key frames in the source stream, so this - // option shouldn't change the seeked point that much. - // Important: make sure not to use it with wtv because it breaks seeking - if (state.TranscodingType is TranscodingJobType.Hls - && string.Equals(segmentContainer, "mp4", StringComparison.OrdinalIgnoreCase) - && (IsCopyCodec(state.OutputVideoCodec) || IsCopyCodec(state.OutputAudioCodec)) - && !string.Equals(state.InputContainer, "wtv", StringComparison.OrdinalIgnoreCase)) - { - seekParam += " -noaccurate_seek"; - } - } } return seekParam; diff --git a/MediaBrowser.Model/Configuration/EncodingOptions.cs b/MediaBrowser.Model/Configuration/EncodingOptions.cs index 98fc2e632f..f5bb5330ed 100644 --- a/MediaBrowser.Model/Configuration/EncodingOptions.cs +++ b/MediaBrowser.Model/Configuration/EncodingOptions.cs @@ -61,7 +61,7 @@ public class EncodingOptions SubtitleExtractionTimeoutMinutes = 30; AllowOnDemandMetadataBasedKeyframeExtractionForExtensions = ["mkv"]; HardwareDecodingCodecs = ["h264", "vc1"]; - HlsAudioSeekStrategy = HlsAudioSeekStrategy.DisableAccurateSeek; + HlsAudioSeekStrategy = HlsAudioSeekStrategy.TrimCopiedAudio; } /// <summary> @@ -307,6 +307,6 @@ public class EncodingOptions /// <summary> /// Gets or sets the method used for audio seeking in HLS. /// </summary> - [DefaultValue(HlsAudioSeekStrategy.DisableAccurateSeek)] + [DefaultValue(HlsAudioSeekStrategy.TrimCopiedAudio)] public HlsAudioSeekStrategy HlsAudioSeekStrategy { get; set; } } diff --git a/MediaBrowser.Model/Configuration/HlsAudioSeekStrategy.cs b/MediaBrowser.Model/Configuration/HlsAudioSeekStrategy.cs index 49feeb435f..c9155faeb1 100644 --- a/MediaBrowser.Model/Configuration/HlsAudioSeekStrategy.cs +++ b/MediaBrowser.Model/Configuration/HlsAudioSeekStrategy.cs @@ -7,11 +7,12 @@ namespace MediaBrowser.Model.Configuration public enum HlsAudioSeekStrategy { /// <summary> - /// If the video stream is transcoded and the audio stream is copied, - /// seek the video stream to the same keyframe as the audio stream. The - /// resulting timestamps in the output streams may be inaccurate. + /// When video is transcoded and audio is copied, use a bitstream filter + /// to drop copied audio packets before the seek point, aligning them + /// with the accurately-seeked video. Timestamps are accurate and audio + /// remains stream-copied (no re-encoding overhead). /// </summary> - DisableAccurateSeek = 0, + TrimCopiedAudio = 0, /// <summary> /// Prevent audio streams from being copied if the video stream is transcoded. diff --git a/tests/Jellyfin.Controller.Tests/MediaEncoding/EncodingHelperAudioBitStreamTests.cs b/tests/Jellyfin.Controller.Tests/MediaEncoding/EncodingHelperAudioBitStreamTests.cs new file mode 100644 index 0000000000..2dcb898051 --- /dev/null +++ b/tests/Jellyfin.Controller.Tests/MediaEncoding/EncodingHelperAudioBitStreamTests.cs @@ -0,0 +1,99 @@ +using System; +using System.Globalization; +using MediaBrowser.Common.Configuration; +using MediaBrowser.Controller.IO; +using MediaBrowser.Controller.MediaEncoding; +using MediaBrowser.Model.Dlna; +using MediaBrowser.Model.Entities; +using Microsoft.Extensions.Configuration; +using Moq; +using Xunit; +using IConfigurationManager = MediaBrowser.Common.Configuration.IConfigurationManager; + +namespace Jellyfin.Controller.Tests.MediaEncoding +{ + public class EncodingHelperAudioBitStreamTests + { + private const string BothFilters = " -bsf:a noise=drop='lt(pts*tb\\,63.063)',aac_adtstoasc"; + private const string NoiseOnly = " -bsf:a noise=drop='lt(pts*tb\\,63.063)'"; + private const string AdtsOnly = " -bsf:a aac_adtstoasc"; + private const long DefaultSeekTicks = 630_630_000L; + private const string DefaultFfmpegVersion = "5.0"; + + private static EncodingHelper CreateHelper(string ffmpegVersion) + { + var mediaEncoder = new Mock<IMediaEncoder>(); + mediaEncoder + .Setup(e => e.GetTimeParameter(It.IsAny<long>())) + .Returns((long ticks) => TimeSpan.FromTicks(ticks).ToString(@"hh\:mm\:ss\.fff", CultureInfo.InvariantCulture)); + mediaEncoder + .SetupGet(e => e.EncoderVersion) + .Returns(Version.Parse(ffmpegVersion)); + + return new EncodingHelper( + Mock.Of<IApplicationPaths>(), + mediaEncoder.Object, + Mock.Of<ISubtitleEncoder>(), + Mock.Of<IConfiguration>(), + Mock.Of<IConfigurationManager>(), + Mock.Of<IPathManager>()); + } + + private static EncodingJobInfo CreateState( + TranscodingJobType jobType, + string outputVideoCodec, + string outputAudioCodec, + string audioStreamCodec, + string inputContainer, + long startTimeTicks) + { + return new EncodingJobInfo(jobType) + { + IsVideoRequest = true, + OutputVideoCodec = outputVideoCodec, + OutputAudioCodec = outputAudioCodec, + InputContainer = inputContainer, + RunTimeTicks = TimeSpan.FromMinutes(10).Ticks, + AudioStream = new MediaStream + { + Type = MediaStreamType.Audio, + Codec = audioStreamCodec + }, + BaseRequest = new BaseEncodingJobOptions + { + StartTimeTicks = startTimeTicks + } + }; + } + + [Theory] + [InlineData(TranscodingJobType.Hls, "libx264", "copy", "aac", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "ts", BothFilters)] + [InlineData(TranscodingJobType.Hls, "libx264", "copy", "aac", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "aac", BothFilters)] + [InlineData(TranscodingJobType.Hls, "libx264", "copy", "aac", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "hls", BothFilters)] + [InlineData(TranscodingJobType.Progressive, "libx264", "copy", "aac", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "ts", AdtsOnly)] + [InlineData(TranscodingJobType.Hls, "copy", "copy", "aac", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "ts", AdtsOnly)] + [InlineData(TranscodingJobType.Hls, "libx264", "aac", "aac", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "ts", AdtsOnly)] + [InlineData(TranscodingJobType.Hls, "libx264", "copy", "aac", "wtv", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "ts", AdtsOnly)] + [InlineData(TranscodingJobType.Hls, "libx264", "copy", "aac", "ts", 0L, DefaultFfmpegVersion, "mp4", "ts", AdtsOnly)] + [InlineData(TranscodingJobType.Hls, "libx264", "copy", "aac", "ts", DefaultSeekTicks, "4.4.6", "mp4", "ts", AdtsOnly)] + [InlineData(TranscodingJobType.Hls, "libx264", "copy", "aac", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "ts", "ts", NoiseOnly)] + [InlineData(TranscodingJobType.Hls, "libx264", "copy", "aac", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "mkv", NoiseOnly)] + [InlineData(TranscodingJobType.Hls, "libx264", "copy", "ac3", "ts", DefaultSeekTicks, DefaultFfmpegVersion, "mp4", "ts", NoiseOnly)] + public void AudioBitStreamArguments_AppliesGates( + TranscodingJobType jobType, + string outputVideoCodec, + string outputAudioCodec, + string audioStreamCodec, + string inputContainer, + long startTicks, + string ffmpegVersion, + string segmentContainer, + string mediaSourceContainer, + string expected) + { + var state = CreateState(jobType, outputVideoCodec, outputAudioCodec, audioStreamCodec, inputContainer, startTicks); + var result = CreateHelper(ffmpegVersion).GetAudioBitStreamArguments(state, segmentContainer, mediaSourceContainer); + Assert.Equal(expected, result); + } + } +} |
