From 92c76de2ba01608e37a3f7ba311d2711b2230dc8 Mon Sep 17 00:00:00 2001 From: Luke Pulverenti Date: Mon, 20 Jan 2014 14:55:49 -0500 Subject: #680 - improve name comparisons --- .../FileOrganization/TvFileSorter.cs | 50 ++++++++++++++++++++-- 1 file changed, 46 insertions(+), 4 deletions(-) (limited to 'MediaBrowser.Server.Implementations/FileOrganization/TvFileSorter.cs') diff --git a/MediaBrowser.Server.Implementations/FileOrganization/TvFileSorter.cs b/MediaBrowser.Server.Implementations/FileOrganization/TvFileSorter.cs index 72f0da207..e0efa0c3f 100644 --- a/MediaBrowser.Server.Implementations/FileOrganization/TvFileSorter.cs +++ b/MediaBrowser.Server.Implementations/FileOrganization/TvFileSorter.cs @@ -1,4 +1,5 @@ -using MediaBrowser.Common.IO; +using System.Text; +using MediaBrowser.Common.IO; using MediaBrowser.Controller.Entities.TV; using MediaBrowser.Controller.FileOrganization; using MediaBrowser.Controller.IO; @@ -429,9 +430,7 @@ namespace MediaBrowser.Server.Implementations.FileOrganization { var score = 0; - // TODO: Improve this - should ignore spaces, periods, underscores, most likely all symbols and - // possibly remove sorting words like "the", "and", etc. - if (string.Equals(sortedName, series.Name, StringComparison.OrdinalIgnoreCase)) + if (IsNameMatch(sortedName, series.Name)) { score++; @@ -452,6 +451,49 @@ namespace MediaBrowser.Server.Implementations.FileOrganization return new Tuple(series, score); } + private bool IsNameMatch(string name1, string name2) + { + name1 = GetComparableName(name1); + name2 = GetComparableName(name2); + + return string.Equals(name1, name2, StringComparison.OrdinalIgnoreCase); + } + + private string GetComparableName(string name) + { + // TODO: Improve this - should ignore spaces, periods, underscores, most likely all symbols and + // possibly remove sorting words like "the", "and", etc. + + name = RemoveDiacritics(name); + + name = " " + name.ToLower() + " "; + + name = name.Replace(".", " ") + .Replace("_", " ") + .Replace("&", " ") + .Replace("!", " ") + .Replace(",", " ") + .Replace(" a ", string.Empty) + .Replace(" the ", string.Empty) + .Replace(" ", string.Empty); + + return name.Trim(); + } + + /// + /// Removes the diacritics. + /// + /// The text. + /// System.String. + private string RemoveDiacritics(string text) + { + return string.Concat( + text.Normalize(NormalizationForm.FormD) + .Where(ch => CharUnicodeInfo.GetUnicodeCategory(ch) != + UnicodeCategory.NonSpacingMark) + ).Normalize(NormalizationForm.FormC); + } + /// /// Deletes the left over files. /// -- cgit v1.2.3