diff options
| author | dkanada <dkanada@users.noreply.github.com> | 2020-06-12 13:38:09 +0900 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-06-12 13:38:09 +0900 |
| commit | 91fcd56380c6991cbf484bbc1b0ce08b0fab6c1e (patch) | |
| tree | 5b7775c4d7fcad34044992edf5004e41a3f2bb7c | |
| parent | 72a688aa7ae7dedd8829c6afb6c619171bf0d9e8 (diff) | |
| parent | 82e8865147848a6d4f3fefc4b6a4d51cb8fd72b5 (diff) | |
Merge pull request #3071 from rigtorp/tvdb-normalize
Make tvdb name normalizer unicode aware
| -rw-r--r-- | MediaBrowser.Providers/Plugins/TheTvdb/TvdbSeriesProvider.cs | 42 |
1 files changed, 5 insertions, 37 deletions
diff --git a/MediaBrowser.Providers/Plugins/TheTvdb/TvdbSeriesProvider.cs b/MediaBrowser.Providers/Plugins/TheTvdb/TvdbSeriesProvider.cs index 541471561..066d13eec 100644 --- a/MediaBrowser.Providers/Plugins/TheTvdb/TvdbSeriesProvider.cs +++ b/MediaBrowser.Providers/Plugins/TheTvdb/TvdbSeriesProvider.cs @@ -275,16 +275,6 @@ namespace MediaBrowser.Providers.Plugins.TheTvdb } /// <summary> - /// The remove. - /// </summary> - const string remove = "\"'!`?"; - - /// <summary> - /// The spacers. - /// </summary> - const string spacers = "/,.:;\\(){}[]+-_=–*"; // (there are two types of dashes, short and long) - - /// <summary> /// Gets the name of the comparable. /// </summary> /// <param name="name">The name.</param> @@ -293,33 +283,11 @@ namespace MediaBrowser.Providers.Plugins.TheTvdb { name = name.ToLowerInvariant(); name = name.Normalize(NormalizationForm.FormKD); - var sb = new StringBuilder(); - foreach (var c in name) - { - if (c >= 0x2B0 && c <= 0x0333) - { - // skip char modifier and diacritics - } - else if (remove.IndexOf(c) > -1) - { - // skip chars we are removing - } - else if (spacers.IndexOf(c) > -1) - { - sb.Append(" "); - } - else if (c == '&') - { - sb.Append(" and "); - } - else - { - sb.Append(c); - } - } - - sb.Replace(", the", string.Empty).Replace("the ", " ").Replace(" the ", " "); - return Regex.Replace(sb.ToString().Trim(), @"\s+", " "); + name = name.Replace(", the", string.Empty).Replace("the ", " ").Replace(" the ", " "); + name = name.Replace("&", " and " ); + name = Regex.Replace(name, @"[\p{Lm}\p{Mn}]", string.Empty); // Remove diacritics, etc + name = Regex.Replace(name, @"[\W\p{Pc}]+", " "); // Replace sequences of non-word characters and _ with " " + return name.Trim(); } private void MapSeriesToResult(MetadataResult<Series> result, TvDbSharper.Dto.Series tvdbSeries, string metadataLanguage) |
