aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBond-009 <bond.009@outlook.com>2022-05-05 19:59:17 +0200
committercrobibero <cody@robibe.ro>2022-05-20 18:30:56 -0400
commit2b1a915eadbf26320f2c0baf988abc434102816d (patch)
tree2b45aebdf5575c4e9ec792d93131b65393e25b2a /src
parente263e9c2b129ef20c696ce458e00cd4d3367c520 (diff)
Merge pull request #7604 from Jellifi007/fixes-diactritics
Co-authored-by: Cody Robibero <cody@robibe.ro> (cherry picked from commit 8d1d9734381472b301deb0118bbb8da2a769a65e) Signed-off-by: crobibero <cody@robibe.ro>
Diffstat (limited to 'src')
-rw-r--r--src/Jellyfin.Extensions/StringExtensions.cs42
1 files changed, 42 insertions, 0 deletions
diff --git a/src/Jellyfin.Extensions/StringExtensions.cs b/src/Jellyfin.Extensions/StringExtensions.cs
index 3a7707253..dadc9f1d5 100644
--- a/src/Jellyfin.Extensions/StringExtensions.cs
+++ b/src/Jellyfin.Extensions/StringExtensions.cs
@@ -1,4 +1,8 @@
using System;
+using System.Diagnostics;
+using System.Globalization;
+using System.Text;
+using System.Text.RegularExpressions;
namespace Jellyfin.Extensions
{
@@ -7,6 +11,44 @@ namespace Jellyfin.Extensions
/// </summary>
public static class StringExtensions
{
+ // Matches non-conforming unicode chars
+ // https://mnaoumov.wordpress.com/2014/06/14/stripping-invalid-characters-from-utf-16-strings/
+ private static readonly Regex _nonConformingUnicode = new Regex("([\ud800-\udbff](?![\udc00-\udfff]))|((?<![\ud800-\udbff])[\udc00-\udfff])|(\ufffd)");
+
+ /// <summary>
+ /// Removes the diacritics character from the strings.
+ /// </summary>
+ /// <param name="text">The string to act on.</param>
+ /// <returns>The string without diacritics character.</returns>
+ public static string RemoveDiacritics(this string text)
+ {
+ string withDiactritics = _nonConformingUnicode
+ .Replace(text, string.Empty)
+ .Normalize(NormalizationForm.FormD);
+
+ var withoutDiactritics = new StringBuilder();
+ foreach (char c in withDiactritics)
+ {
+ UnicodeCategory uc = CharUnicodeInfo.GetUnicodeCategory(c);
+ if (uc != UnicodeCategory.NonSpacingMark)
+ {
+ withoutDiactritics.Append(c);
+ }
+ }
+
+ return withoutDiactritics.ToString().Normalize(NormalizationForm.FormC);
+ }
+
+ /// <summary>
+ /// Checks wether or not the specified string has diacritics in it.
+ /// </summary>
+ /// <param name="text">The string to check.</param>
+ /// <returns>True if the string has diacritics, false otherwise.</returns>
+ public static bool HasDiacritics(this string text)
+ {
+ return !string.Equals(text, text.RemoveDiacritics(), StringComparison.Ordinal);
+ }
+
/// <summary>
/// Counts the number of occurrences of [needle] in the string.
/// </summary>