From 40442f887ba717ae47620b152315f21b252fe049 Mon Sep 17 00:00:00 2001 From: Luke Pulverenti Date: Wed, 9 Aug 2017 15:56:38 -0400 Subject: consolidate emby.server.core into emby.server.implementations --- .../Localization/TextLocalizer.cs | 63 ++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 Emby.Server.Implementations/Localization/TextLocalizer.cs (limited to 'Emby.Server.Implementations/Localization/TextLocalizer.cs') diff --git a/Emby.Server.Implementations/Localization/TextLocalizer.cs b/Emby.Server.Implementations/Localization/TextLocalizer.cs new file mode 100644 index 000000000..5188a959e --- /dev/null +++ b/Emby.Server.Implementations/Localization/TextLocalizer.cs @@ -0,0 +1,63 @@ +using System; +using System.Globalization; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; + +namespace Emby.Server.Implementations.Localization +{ + public class TextLocalizer : ITextLocalizer + { + public string RemoveDiacritics(string text) + { + if (text == null) + { + throw new ArgumentNullException("text"); + } + + var chars = Normalize(text, NormalizationForm.FormD) + .Where(ch => CharUnicodeInfo.GetUnicodeCategory(ch) != UnicodeCategory.NonSpacingMark); + + return Normalize(String.Concat(chars), NormalizationForm.FormC); + } + + private static string Normalize(string text, NormalizationForm form, bool stripStringOnFailure = true) + { + if (stripStringOnFailure) + { + try + { + return text.Normalize(form); + } + catch (ArgumentException) + { + // will throw if input contains invalid unicode chars + // https://mnaoumov.wordpress.com/2014/06/14/stripping-invalid-characters-from-utf-16-strings/ + text = StripInvalidUnicodeCharacters(text); + return Normalize(text, form, false); + } + } + + try + { + return text.Normalize(form); + } + catch (ArgumentException) + { + // if it still fails, return the original text + return text; + } + } + + private static string StripInvalidUnicodeCharacters(string str) + { + var invalidCharactersRegex = new Regex("([\ud800-\udbff](?![\udc00-\udfff]))|((?