aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MediaBrowser.Providers/Tmdb/Movies/TmdbSearch.cs67
1 files changed, 40 insertions, 27 deletions
diff --git a/MediaBrowser.Providers/Tmdb/Movies/TmdbSearch.cs b/MediaBrowser.Providers/Tmdb/Movies/TmdbSearch.cs
index 223cef086..08c1afec2 100644
--- a/MediaBrowser.Providers/Tmdb/Movies/TmdbSearch.cs
+++ b/MediaBrowser.Providers/Tmdb/Movies/TmdbSearch.cs
@@ -5,6 +5,7 @@ using System.Linq;
using System.Net;
using System.Threading;
using System.Threading.Tasks;
+using System.Text.RegularExpressions;
using MediaBrowser.Common.Net;
using MediaBrowser.Controller.Library;
using MediaBrowser.Controller.Providers;
@@ -19,6 +20,20 @@ namespace MediaBrowser.Providers.Tmdb.Movies
public class TmdbSearch
{
private static readonly CultureInfo EnUs = new CultureInfo("en-US");
+
+ private static readonly Regex cleanEnclosed = new Regex(@"\p{Ps}.*\p{Pe}", RegexOptions.Compiled);
+ private static readonly Regex cleanNonWord = new Regex(@"[\W_]+", RegexOptions.Compiled);
+ private static readonly Regex cleanStopWords = new Regex(@"\b( # Start at word boundary
+ 19[0-9]{2}|20[0-9]{2}| # 1900-2099
+ S[0-9]{2}| # Season
+ E[0-9]{2}| # Episode
+ (2160|1080|720|576|480)[ip]?| # Resolution
+ [xh]?264| # Encoding
+ (web|dvd|bd|hdtv|hd)rip| # *Rip
+ web|hdtv|mp4|bluray|ktr|dl|single|imageset|internal|doku|dubbed|retail|xxx|flac
+ ).* # Match rest of string",
+ RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
+
private const string Search3 = TmdbUtils.BaseTmdbApiUrl + @"3/search/{3}?api_key={1}&query={0}&language={2}";
private readonly ILogger _logger;
@@ -61,19 +76,18 @@ namespace MediaBrowser.Providers.Tmdb.Movies
var tmdbImageUrl = tmdbSettings.images.GetImageUrl("original");
- if (!string.IsNullOrWhiteSpace(name))
- {
- var parsedName = _libraryManager.ParseName(name);
- var yearInName = parsedName.Year;
- name = parsedName.Name;
- year = year ?? yearInName;
- }
+ // Does this mean we are reparsing already parsed ItemLookupInfo?
+ var parsedName = _libraryManager.ParseName(name);
+ var yearInName = parsedName.Year;
+ name = parsedName.Name;
+ year = year ?? yearInName;
- _logger.LogInformation("MovieDbProvider: Finding id for item: " + name);
+ _logger.LogInformation("TmdbSearch: Finding id for item: {0} ({1})", name, year);
var language = idInfo.MetadataLanguage.ToLowerInvariant();
- //nope - search for it
- //var searchType = item is BoxSet ? "collection" : "movie";
+ // Replace sequences of non-word characters with space
+ // TMDB expects a space separated list of words make sure that is the case
+ name = cleanNonWord.Replace(name, " ").Trim();
var results = await GetSearchResults(name, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
@@ -86,36 +100,35 @@ namespace MediaBrowser.Providers.Tmdb.Movies
}
}
+ // Ideally retrying alternatives should be done outside the search
+ // provider so that the retry logic can be common for all search
+ // providers
if (results.Count == 0)
{
- // try with dot and _ turned to space
- var originalName = name;
-
- name = name.Replace(",", " ");
- name = name.Replace(".", " ");
- name = name.Replace("_", " ");
- name = name.Replace("-", " ");
- name = name.Replace("!", " ");
- name = name.Replace("?", " ");
-
- var parenthIndex = name.IndexOf('(');
- if (parenthIndex != -1)
- {
- name = name.Substring(0, parenthIndex);
- }
+ name = parsedName.Name;
+
+ // Remove things enclosed in []{}() etc
+ name = cleanEnclosed.Replace(name, string.Empty);
+ // Replace sequences of non-word characters with space
+ name = cleanNonWord.Replace(name, " ");
+
+ // Clean based on common stop words / tokens
+ name = cleanStopWords.Replace(name, string.Empty);
+
+ // Trim whitespace
name = name.Trim();
// Search again if the new name is different
- if (!string.Equals(name, originalName))
+ if (!string.Equals(name, parsedName.Name) && !string.IsNullOrWhiteSpace(name))
{
+ _logger.LogInformation("TmdbSearch: Finding id for item: {0} ({1})", name, year);
results = await GetSearchResults(name, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
if (results.Count == 0 && !string.Equals(language, "en", StringComparison.OrdinalIgnoreCase))
{
//one more time, in english
results = await GetSearchResults(name, searchType, year, "en", tmdbImageUrl, cancellationToken).ConfigureAwait(false);
-
}
}
}