aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShadowghost <Ghost_of_Stone@web.de>2026-05-09 11:18:05 +0200
committerShadowghost <Ghost_of_Stone@web.de>2026-05-09 11:18:05 +0200
commit02835c61442547e0b6490751eb74241e95d69c6e (patch)
tree319415f19aa401a90e439a77da8f47168f0aaf2e
parentb7b405dc839598892cadd236f009f1d21a76a4d3 (diff)
Add People Deduplication
-rw-r--r--Jellyfin.Server.Implementations/Item/PeopleRepository.cs19
-rw-r--r--Jellyfin.Server/Migrations/Routines/MergeDuplicatePeople.cs294
2 files changed, 304 insertions, 9 deletions
diff --git a/Jellyfin.Server.Implementations/Item/PeopleRepository.cs b/Jellyfin.Server.Implementations/Item/PeopleRepository.cs
index 6cc9729bbe..a0ffe9aea0 100644
--- a/Jellyfin.Server.Implementations/Item/PeopleRepository.cs
+++ b/Jellyfin.Server.Implementations/Item/PeopleRepository.cs
@@ -46,9 +46,10 @@ public class PeopleRepository(IDbContextFactory<JellyfinDbContext> dbProvider, I
{
// The Peoples table has one row per (Name, PersonType), so the same person can
// appear multiple times (e.g. as Actor and GuestStar). Collapse to one row per
- // name so /Persons doesn't return the same BaseItem id repeatedly.
+ // name so /Persons doesn't return the same BaseItem id repeatedly. Lowercase the
+ // grouping key so case-only duplicates collapse together.
var representativeIds = dbQuery
- .GroupBy(e => e.Name)
+ .GroupBy(e => e.Name.ToLower())
.Select(g => g.Min(e => e.Id));
dbQuery = context.Peoples.AsNoTracking()
.Where(p => representativeIds.Contains(p.Id))
@@ -102,16 +103,16 @@ public class PeopleRepository(IDbContextFactory<JellyfinDbContext> dbProvider, I
person.Role = person.Role?.Trim() ?? string.Empty;
}
- // multiple metadata providers can provide the _same_ person
- people = people.DistinctBy(e => e.Name + "-" + e.Type).ToArray();
- var personKeys = people.Select(e => e.Name + "-" + e.Type).ToArray();
+ // multiple metadata providers can provide the _same_ person; dedupe case-insensitively.
+ people = people.DistinctBy(e => e.Name.ToLowerInvariant() + "-" + e.Type).ToArray();
+ var personKeys = people.Select(e => e.Name.ToLowerInvariant() + "-" + e.Type).ToArray();
using var context = _dbProvider.CreateDbContext();
using var transaction = context.Database.BeginTransaction();
var existingPersons = context.Peoples.Select(e => new
{
item = e,
- SelectionKey = e.Name + "-" + e.PersonType
+ SelectionKey = e.Name.ToLower() + "-" + e.PersonType
})
.Where(p => personKeys.Contains(p.SelectionKey))
.Select(f => f.item)
@@ -119,7 +120,7 @@ public class PeopleRepository(IDbContextFactory<JellyfinDbContext> dbProvider, I
var toAdd = people
.Where(e => e.Type is not PersonKind.Artist && e.Type is not PersonKind.AlbumArtist)
- .Where(e => !existingPersons.Any(f => f.Name == e.Name && f.PersonType == e.Type.ToString()))
+ .Where(e => !existingPersons.Any(f => string.Equals(f.Name, e.Name, StringComparison.OrdinalIgnoreCase) && f.PersonType == e.Type.ToString()))
.Select(Map);
context.Peoples.AddRange(toAdd);
context.SaveChanges();
@@ -137,8 +138,8 @@ public class PeopleRepository(IDbContextFactory<JellyfinDbContext> dbProvider, I
continue;
}
- var entityPerson = personsEntities.First(e => e.Name == person.Name && e.PersonType == person.Type.ToString());
- var existingMap = existingMaps.FirstOrDefault(e => e.People.Name == person.Name && e.People.PersonType == person.Type.ToString() && e.Role == person.Role);
+ var entityPerson = personsEntities.First(e => string.Equals(e.Name, person.Name, StringComparison.OrdinalIgnoreCase) && e.PersonType == person.Type.ToString());
+ var existingMap = existingMaps.FirstOrDefault(e => string.Equals(e.People.Name, person.Name, StringComparison.OrdinalIgnoreCase) && e.People.PersonType == person.Type.ToString() && e.Role == person.Role);
if (existingMap is null)
{
context.PeopleBaseItemMap.Add(new PeopleBaseItemMap()
diff --git a/Jellyfin.Server/Migrations/Routines/MergeDuplicatePeople.cs b/Jellyfin.Server/Migrations/Routines/MergeDuplicatePeople.cs
new file mode 100644
index 0000000000..d092555139
--- /dev/null
+++ b/Jellyfin.Server/Migrations/Routines/MergeDuplicatePeople.cs
@@ -0,0 +1,294 @@
+#pragma warning disable RS0030 // Do not use banned APIs
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
+using Jellyfin.Database.Implementations;
+using Jellyfin.Server.ServerSetupApp;
+using MediaBrowser.Controller.Library;
+using MediaBrowser.Controller.Persistence;
+using Microsoft.EntityFrameworkCore;
+using Microsoft.Extensions.Logging;
+
+namespace Jellyfin.Server.Migrations.Routines;
+
+/// <summary>
+/// Merges case-only duplicate people. Two passes:
+/// 1) Person BaseItems whose Name differs only by casing — Person.GetPath hashes the name
+/// verbatim, so two casings produce two distinct Person rows in BaseItems.
+/// 2) Peoples lookup rows whose Name differs only by casing within the same PersonType —
+/// UpdatePeople used to insert a second Peoples row when a metadata provider returned
+/// a different casing than the row already in the table.
+/// Both bugs cause the /Persons endpoint to list the same person twice.
+/// </summary>
+[JellyfinMigration("2026-05-08T13:00:00", nameof(MergeDuplicatePeople))]
+[JellyfinMigrationBackup(JellyfinDb = true)]
+public class MergeDuplicatePeople : IAsyncMigrationRoutine
+{
+ private const string PersonType = "MediaBrowser.Controller.Entities.Person";
+
+ private readonly IStartupLogger<MergeDuplicatePeople> _logger;
+ private readonly IDbContextFactory<JellyfinDbContext> _dbContextFactory;
+ private readonly ILibraryManager _libraryManager;
+ private readonly IItemPersistenceService _persistenceService;
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="MergeDuplicatePeople"/> class.
+ /// </summary>
+ /// <param name="logger">The startup logger.</param>
+ /// <param name="dbContextFactory">The database context factory.</param>
+ /// <param name="libraryManager">The library manager.</param>
+ /// <param name="persistenceService">The item persistence service.</param>
+ public MergeDuplicatePeople(
+ IStartupLogger<MergeDuplicatePeople> logger,
+ IDbContextFactory<JellyfinDbContext> dbContextFactory,
+ ILibraryManager libraryManager,
+ IItemPersistenceService persistenceService)
+ {
+ _logger = logger;
+ _dbContextFactory = dbContextFactory;
+ _libraryManager = libraryManager;
+ _persistenceService = persistenceService;
+ }
+
+ /// <inheritdoc/>
+ public async Task PerformAsync(CancellationToken cancellationToken)
+ {
+ var context = await _dbContextFactory.CreateDbContextAsync(cancellationToken).ConfigureAwait(false);
+ await using (context.ConfigureAwait(false))
+ {
+ await MergePersonBaseItemsAsync(context, cancellationToken).ConfigureAwait(false);
+ await MergePeoplesRowsAsync(context, cancellationToken).ConfigureAwait(false);
+ }
+ }
+
+ private async Task MergePersonBaseItemsAsync(JellyfinDbContext context, CancellationToken cancellationToken)
+ {
+ var persons = await context.BaseItems
+ .Where(b => b.Type == PersonType && b.Name != null)
+ .Select(b => new { b.Id, b.Name, b.DateCreated })
+ .ToListAsync(cancellationToken)
+ .ConfigureAwait(false);
+
+ var groups = persons
+ .GroupBy(p => p.Name!.ToLowerInvariant())
+ .Where(g => g.Count() > 1)
+ .ToList();
+
+ if (groups.Count == 0)
+ {
+ _logger.LogInformation("No case-only duplicate Person BaseItems found.");
+ return;
+ }
+
+ _logger.LogInformation("Found {Count} groups of case-only duplicate Person BaseItems.", groups.Count);
+
+ var idsToDelete = new List<Guid>();
+ foreach (var group in groups)
+ {
+ cancellationToken.ThrowIfCancellationRequested();
+
+ var groupIds = group.Select(g => g.Id).ToArray();
+
+ // Pick the keeper: the Person with the most UserData rows (favorites, image
+ // refresh state) is the one users have actually interacted with.
+ var stats = await context.BaseItems
+ .Where(b => groupIds.Contains(b.Id))
+ .Select(b => new
+ {
+ b.Id,
+ b.Name,
+ b.DateCreated,
+ UserDataCount = context.UserData.Count(u => u.ItemId == b.Id),
+ LinkedCount = context.LinkedChildren.Count(l => l.ParentId == b.Id || l.ChildId == b.Id),
+ })
+ .ToListAsync(cancellationToken)
+ .ConfigureAwait(false);
+
+ var keeper = stats
+ .OrderByDescending(s => s.UserDataCount)
+ .ThenByDescending(s => s.LinkedCount)
+ .ThenBy(s => s.DateCreated)
+ .First();
+
+ foreach (var dup in stats.Where(s => s.Id != keeper.Id))
+ {
+ var keeperId = keeper.Id;
+ var dupId = dup.Id;
+
+ await context.BaseItems
+ .Where(b => b.ParentId == dupId)
+ .ExecuteUpdateAsync(s => s.SetProperty(b => b.ParentId, keeperId), cancellationToken)
+ .ConfigureAwait(false);
+
+ await context.BaseItems
+ .Where(b => b.OwnerId == dupId)
+ .ExecuteUpdateAsync(s => s.SetProperty(b => b.OwnerId, keeperId), cancellationToken)
+ .ConfigureAwait(false);
+
+ await context.AncestorIds
+ .Where(a => a.ParentItemId == dupId
+ && context.AncestorIds.Any(k => k.ParentItemId == keeperId && k.ItemId == a.ItemId))
+ .ExecuteDeleteAsync(cancellationToken)
+ .ConfigureAwait(false);
+ await context.AncestorIds
+ .Where(a => a.ParentItemId == dupId)
+ .ExecuteUpdateAsync(s => s.SetProperty(a => a.ParentItemId, keeperId), cancellationToken)
+ .ConfigureAwait(false);
+
+ await context.LinkedChildren
+ .Where(l => l.ParentId == dupId
+ && context.LinkedChildren.Any(k => k.ParentId == keeperId && k.ChildId == l.ChildId))
+ .ExecuteDeleteAsync(cancellationToken)
+ .ConfigureAwait(false);
+ await context.LinkedChildren
+ .Where(l => l.ParentId == dupId)
+ .ExecuteUpdateAsync(s => s.SetProperty(l => l.ParentId, keeperId), cancellationToken)
+ .ConfigureAwait(false);
+ await context.LinkedChildren
+ .Where(l => l.ChildId == dupId
+ && context.LinkedChildren.Any(k => k.ChildId == keeperId && k.ParentId == l.ParentId))
+ .ExecuteDeleteAsync(cancellationToken)
+ .ConfigureAwait(false);
+ await context.LinkedChildren
+ .Where(l => l.ChildId == dupId)
+ .ExecuteUpdateAsync(s => s.SetProperty(l => l.ChildId, keeperId), cancellationToken)
+ .ConfigureAwait(false);
+
+ await context.UserData
+ .Where(u => u.ItemId == dupId
+ && context.UserData.Any(k => k.ItemId == keeperId && k.UserId == u.UserId && k.CustomDataKey == u.CustomDataKey))
+ .ExecuteDeleteAsync(cancellationToken)
+ .ConfigureAwait(false);
+ await context.UserData
+ .Where(u => u.ItemId == dupId)
+ .ExecuteUpdateAsync(s => s.SetProperty(u => u.ItemId, keeperId), cancellationToken)
+ .ConfigureAwait(false);
+
+ idsToDelete.Add(dupId);
+ }
+
+ _logger.LogDebug(
+ "Merged Person BaseItems for '{Name}' into {KeeperId} ({Removed} removed).",
+ keeper.Name,
+ keeper.Id,
+ stats.Count - 1);
+ }
+
+ if (idsToDelete.Count == 0)
+ {
+ return;
+ }
+
+ // Resolve via LibraryManager so DeleteItemsUnsafeFast can also remove the
+ // %MetadataPath%/People/<Letter>/<Name> directories the duplicate stubs left behind.
+ var itemsToDelete = idsToDelete
+ .Select(id => _libraryManager.GetItemById(id))
+ .Where(item => item is not null)
+ .ToList();
+ if (itemsToDelete.Count > 0)
+ {
+ _libraryManager.DeleteItemsUnsafeFast(itemsToDelete!);
+ }
+
+ var deletedIds = itemsToDelete.Select(i => i!.Id).ToHashSet();
+ var unresolvedIds = idsToDelete.Where(id => !deletedIds.Contains(id)).ToList();
+ if (unresolvedIds.Count > 0)
+ {
+ _persistenceService.DeleteItem(unresolvedIds);
+ }
+
+ _logger.LogInformation("Removed {Count} duplicate Person BaseItems.", idsToDelete.Count);
+ }
+
+ private async Task MergePeoplesRowsAsync(JellyfinDbContext context, CancellationToken cancellationToken)
+ {
+ var people = await context.Peoples
+ .Select(p => new { p.Id, p.Name, p.PersonType })
+ .ToListAsync(cancellationToken)
+ .ConfigureAwait(false);
+
+ var groups = people
+ .GroupBy(p => (Name: p.Name.ToLowerInvariant(), p.PersonType))
+ .Where(g => g.Count() > 1)
+ .ToList();
+
+ if (groups.Count == 0)
+ {
+ _logger.LogInformation("No case-only duplicate Peoples rows found.");
+ return;
+ }
+
+ _logger.LogInformation("Found {Count} groups of case-only duplicate Peoples rows.", groups.Count);
+
+ var idsToDelete = new List<Guid>();
+ foreach (var group in groups)
+ {
+ cancellationToken.ThrowIfCancellationRequested();
+
+ var groupIds = group.Select(g => g.Id).ToArray();
+
+ // Pick the keeper: the row referenced by the most BaseItems is the one most
+ // tracks/movies already point at; the duplicates are usually orphan stubs left
+ // by a casing-mismatched insert.
+ var stats = await context.Peoples
+ .Where(p => groupIds.Contains(p.Id))
+ .Select(p => new
+ {
+ p.Id,
+ p.Name,
+ MapCount = context.PeopleBaseItemMap.Count(m => m.PeopleId == p.Id),
+ })
+ .ToListAsync(cancellationToken)
+ .ConfigureAwait(false);
+
+ var keeper = stats
+ .OrderByDescending(s => s.MapCount)
+ .ThenBy(s => s.Id)
+ .First();
+
+ foreach (var dup in stats.Where(s => s.Id != keeper.Id))
+ {
+ var keeperId = keeper.Id;
+ var dupId = dup.Id;
+
+ // PeopleBaseItemMap PK is (ItemId, PeopleId, Role); drop dup rows that would
+ // collide on (ItemId, Role) before redirecting PeopleId. Role is nullable, so
+ // match nulls explicitly.
+ await context.PeopleBaseItemMap
+ .Where(m => m.PeopleId == dupId
+ && context.PeopleBaseItemMap.Any(k => k.PeopleId == keeperId
+ && k.ItemId == m.ItemId
+ && (k.Role == m.Role || (k.Role == null && m.Role == null))))
+ .ExecuteDeleteAsync(cancellationToken)
+ .ConfigureAwait(false);
+ await context.PeopleBaseItemMap
+ .Where(m => m.PeopleId == dupId)
+ .ExecuteUpdateAsync(s => s.SetProperty(m => m.PeopleId, keeperId), cancellationToken)
+ .ConfigureAwait(false);
+
+ idsToDelete.Add(dupId);
+ }
+
+ _logger.LogDebug(
+ "Merged Peoples rows for '{Name}' into {KeeperId} ({Removed} removed).",
+ keeper.Name,
+ keeper.Id,
+ stats.Count - 1);
+ }
+
+ if (idsToDelete.Count == 0)
+ {
+ return;
+ }
+
+ await context.Peoples
+ .Where(p => idsToDelete.Contains(p.Id))
+ .ExecuteDeleteAsync(cancellationToken)
+ .ConfigureAwait(false);
+
+ _logger.LogInformation("Removed {Count} duplicate Peoples rows.", idsToDelete.Count);
+ }
+}