diff options
| author | Erik Rigtorp <erik@rigtorp.se> | 2020-05-07 18:45:04 -0700 |
|---|---|---|
| committer | Erik Rigtorp <erik@rigtorp.se> | 2020-06-11 17:26:38 -0700 |
| commit | 82e8865147848a6d4f3fefc4b6a4d51cb8fd72b5 (patch) | |
| tree | 35cc0856c36a7b2245c37cc897d0ba74e233bb5b | |
| parent | 762e0c8d1759f7e3ced6b9c14df894d4a3d92b62 (diff) | |
Make tvdb name normalizer unicode aware
| -rw-r--r-- | MediaBrowser.Providers/Plugins/TheTvdb/TvdbSeriesProvider.cs | 42 |
1 files changed, 5 insertions, 37 deletions
diff --git a/MediaBrowser.Providers/Plugins/TheTvdb/TvdbSeriesProvider.cs b/MediaBrowser.Providers/Plugins/TheTvdb/TvdbSeriesProvider.cs index 541471561..066d13eec 100644 --- a/MediaBrowser.Providers/Plugins/TheTvdb/TvdbSeriesProvider.cs +++ b/MediaBrowser.Providers/Plugins/TheTvdb/TvdbSeriesProvider.cs @@ -275,16 +275,6 @@ namespace MediaBrowser.Providers.Plugins.TheTvdb } /// <summary> - /// The remove. - /// </summary> - const string remove = "\"'!`?"; - - /// <summary> - /// The spacers. - /// </summary> - const string spacers = "/,.:;\\(){}[]+-_=–*"; // (there are two types of dashes, short and long) - - /// <summary> /// Gets the name of the comparable. /// </summary> /// <param name="name">The name.</param> @@ -293,33 +283,11 @@ namespace MediaBrowser.Providers.Plugins.TheTvdb { name = name.ToLowerInvariant(); name = name.Normalize(NormalizationForm.FormKD); - var sb = new StringBuilder(); - foreach (var c in name) - { - if (c >= 0x2B0 && c <= 0x0333) - { - // skip char modifier and diacritics - } - else if (remove.IndexOf(c) > -1) - { - // skip chars we are removing - } - else if (spacers.IndexOf(c) > -1) - { - sb.Append(" "); - } - else if (c == '&') - { - sb.Append(" and "); - } - else - { - sb.Append(c); - } - } - - sb.Replace(", the", string.Empty).Replace("the ", " ").Replace(" the ", " "); - return Regex.Replace(sb.ToString().Trim(), @"\s+", " "); + name = name.Replace(", the", string.Empty).Replace("the ", " ").Replace(" the ", " "); + name = name.Replace("&", " and " ); + name = Regex.Replace(name, @"[\p{Lm}\p{Mn}]", string.Empty); // Remove diacritics, etc + name = Regex.Replace(name, @"[\W\p{Pc}]+", " "); // Replace sequences of non-word characters and _ with " " + return name.Trim(); } private void MapSeriesToResult(MetadataResult<Series> result, TvDbSharper.Dto.Series tvdbSeries, string metadataLanguage) |
