diff options
| author | Luke <luke.pulverenti@gmail.com> | 2017-08-09 15:59:26 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2017-08-09 15:59:26 -0400 |
| commit | c2996935c8a873662e6d301f139c88df8a542ed2 (patch) | |
| tree | cf405f91e893fe6a3fc20dfa1622f027666d4848 /Emby.Server.Implementations/Localization/TextLocalizer.cs | |
| parent | ab834f8fdffb64b562ece0512a53f361c62f7f6f (diff) | |
| parent | 7a74c705e584774534b74e11c1ab86144cb454c6 (diff) | |
Merge pull request #2800 from MediaBrowser/dev
Dev
Diffstat (limited to 'Emby.Server.Implementations/Localization/TextLocalizer.cs')
| -rw-r--r-- | Emby.Server.Implementations/Localization/TextLocalizer.cs | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/Emby.Server.Implementations/Localization/TextLocalizer.cs b/Emby.Server.Implementations/Localization/TextLocalizer.cs new file mode 100644 index 000000000..5188a959e --- /dev/null +++ b/Emby.Server.Implementations/Localization/TextLocalizer.cs @@ -0,0 +1,63 @@ +using System; +using System.Globalization; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; + +namespace Emby.Server.Implementations.Localization +{ + public class TextLocalizer : ITextLocalizer + { + public string RemoveDiacritics(string text) + { + if (text == null) + { + throw new ArgumentNullException("text"); + } + + var chars = Normalize(text, NormalizationForm.FormD) + .Where(ch => CharUnicodeInfo.GetUnicodeCategory(ch) != UnicodeCategory.NonSpacingMark); + + return Normalize(String.Concat(chars), NormalizationForm.FormC); + } + + private static string Normalize(string text, NormalizationForm form, bool stripStringOnFailure = true) + { + if (stripStringOnFailure) + { + try + { + return text.Normalize(form); + } + catch (ArgumentException) + { + // will throw if input contains invalid unicode chars + // https://mnaoumov.wordpress.com/2014/06/14/stripping-invalid-characters-from-utf-16-strings/ + text = StripInvalidUnicodeCharacters(text); + return Normalize(text, form, false); + } + } + + try + { + return text.Normalize(form); + } + catch (ArgumentException) + { + // if it still fails, return the original text + return text; + } + } + + private static string StripInvalidUnicodeCharacters(string str) + { + var invalidCharactersRegex = new Regex("([\ud800-\udbff](?![\udc00-\udfff]))|((?<![\ud800-\udbff])[\udc00-\udfff])"); + return invalidCharactersRegex.Replace(str, ""); + } + + public string NormalizeFormKD(string text) + { + return text.Normalize(NormalizationForm.FormKD); + } + } +} |
