Update to 3.5.2 and .net core 2.1

author: stefan <stefan@hegedues.at> 2018-09-12 19:26:21 +0200
committer: stefan <stefan@hegedues.at> 2018-09-12 19:26:21 +0200
commit: 48facb797ed912e4ea6b04b17d1ff190ac2daac4 (patch)
tree: 8dae77a31670a888d733484cb17dd4077d5444e8 /SocketHttpListener/Net/HttpListenerRequestUriBuilder.cs
parent: c32d8656382a0eacb301692e0084377fc433ae9b (diff)
1 files changed, 445 insertions, 0 deletions
diff --git a/SocketHttpListener/Net/HttpListenerRequestUriBuilder.cs b/SocketHttpListener/Net/HttpListenerRequestUriBuilder.cs
new file mode 100644
index 000000000..e61bde32e
--- /dev/null
+++ b/SocketHttpListener/Net/HttpListenerRequestUriBuilder.cs
@@ -0,0 +1,445 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Text;
+using System.Globalization;
+
+namespace SocketHttpListener.Net
+{
+    // We don't use the cooked URL because http.sys unescapes all percent-encoded values. However,
+    // we also can't just use the raw Uri, since http.sys supports not only Utf-8, but also ANSI/DBCS and
+    // Unicode code points. System.Uri only supports Utf-8.
+    // The purpose of this class is to convert all ANSI, DBCS, and Unicode code points into percent encoded
+    // Utf-8 characters.
+    internal sealed class HttpListenerRequestUriBuilder
+    {
+        private static readonly Encoding s_utf8Encoding = new UTF8Encoding(false, true);
+        private static readonly Encoding s_ansiEncoding = Encoding.GetEncoding(0, new EncoderExceptionFallback(), new DecoderExceptionFallback());
+
+        private readonly string _rawUri;
+        private readonly string _cookedUriScheme;
+        private readonly string _cookedUriHost;
+        private readonly string _cookedUriPath;
+        private readonly string _cookedUriQuery;
+
+        // This field is used to build the final request Uri string from the Uri parts passed to the ctor.
+        private StringBuilder _requestUriString;
+
+        // The raw path is parsed by looping through all characters from left to right. 'rawOctets'
+        // is used to store consecutive percent encoded octets as actual byte values: e.g. for path /pa%C3%84th%2F/
+        // rawOctets will be set to { 0xC3, 0x84 } when we reach character 't' and it will be { 0x2F } when
+        // we reach the final '/'. I.e. after a sequence of percent encoded octets ends, we use rawOctets as 
+        // input to the encoding and percent encode the resulting string into UTF-8 octets.
+        //
+        // When parsing ANSI (Latin 1) encoded path '/pa%C4th/', %C4 will be added to rawOctets and when
+        // we reach 't', the content of rawOctets { 0xC4 } will be fed into the ANSI encoding. The resulting 
+        // string 'Ä' will be percent encoded into UTF-8 octets and appended to requestUriString. The final
+        // path will be '/pa%C3%84th/', where '%C3%84' is the UTF-8 percent encoded character 'Ä'.
+        private List<byte> _rawOctets;
+        private string _rawPath;
+
+        // Holds the final request Uri.
+        private Uri _requestUri;
+
+        private HttpListenerRequestUriBuilder(string rawUri, string cookedUriScheme, string cookedUriHost,
+            string cookedUriPath, string cookedUriQuery)
+        {
+            _rawUri = rawUri;
+            _cookedUriScheme = cookedUriScheme;
+            _cookedUriHost = cookedUriHost;
+            _cookedUriPath = AddSlashToAsteriskOnlyPath(cookedUriPath);
+            _cookedUriQuery = cookedUriQuery ?? string.Empty;
+        }
+
+        public static Uri GetRequestUri(string rawUri, string cookedUriScheme, string cookedUriHost,
+            string cookedUriPath, string cookedUriQuery)
+        {
+            HttpListenerRequestUriBuilder builder = new HttpListenerRequestUriBuilder(rawUri,
+                cookedUriScheme, cookedUriHost, cookedUriPath, cookedUriQuery);
+
+            return builder.Build();
+        }
+
+        private Uri Build()
+        {
+            BuildRequestUriUsingRawPath();
+
+            if (_requestUri == null)
+            {
+                BuildRequestUriUsingCookedPath();
+            }
+
+            return _requestUri;
+        }
+
+        private void BuildRequestUriUsingCookedPath()
+        {
+            bool isValid = Uri.TryCreate(_cookedUriScheme + Uri.SchemeDelimiter + _cookedUriHost + _cookedUriPath +
+                _cookedUriQuery, UriKind.Absolute, out _requestUri);
+
+            // Creating a Uri from the cooked Uri should really always work: If not, we log at least.
+            if (!isValid)
+            {
+                //if (NetEventSource.IsEnabled)
+                //    NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_create_uri, _cookedUriScheme, _cookedUriHost, _cookedUriPath, _cookedUriQuery));
+            }
+        }
+
+        private void BuildRequestUriUsingRawPath()
+        {
+            bool isValid = false;
+
+            // Initialize 'rawPath' only if really needed; i.e. if we build the request Uri from the raw Uri.
+            _rawPath = GetPath(_rawUri);
+
+            // Try to check the raw path using first the primary encoding (according to http.sys settings);
+            // if it fails try the secondary encoding.
+            ParsingResult result = BuildRequestUriUsingRawPath(GetEncoding(EncodingType.Primary));
+            if (result == ParsingResult.EncodingError)
+            {
+                Encoding secondaryEncoding = GetEncoding(EncodingType.Secondary);
+                result = BuildRequestUriUsingRawPath(secondaryEncoding);
+            }
+            isValid = (result == ParsingResult.Success) ? true : false;
+
+            // Log that we weren't able to create a Uri from the raw string.
+            if (!isValid)
+            {
+                //if (NetEventSource.IsEnabled)
+                //    NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_create_uri, _cookedUriScheme, _cookedUriHost, _rawPath, _cookedUriQuery));
+            }
+        }
+
+        private static Encoding GetEncoding(EncodingType type)
+        {
+            Debug.Assert((type == EncodingType.Primary) || (type == EncodingType.Secondary),
+                "Unknown 'EncodingType' value: " + type.ToString());
+
+            if (type == EncodingType.Secondary)
+            {
+                return s_ansiEncoding;
+            }
+            else
+            {
+                return s_utf8Encoding;
+            }
+        }
+
+        private ParsingResult BuildRequestUriUsingRawPath(Encoding encoding)
+        {
+            Debug.Assert(encoding != null, "'encoding' must be assigned.");
+            Debug.Assert(!string.IsNullOrEmpty(_rawPath), "'rawPath' must have at least one character.");
+
+            _rawOctets = new List<byte>();
+            _requestUriString = new StringBuilder();
+            _requestUriString.Append(_cookedUriScheme);
+            _requestUriString.Append(Uri.SchemeDelimiter);
+            _requestUriString.Append(_cookedUriHost);
+
+            ParsingResult result = ParseRawPath(encoding);
+            if (result == ParsingResult.Success)
+            {
+                _requestUriString.Append(_cookedUriQuery);
+
+                Debug.Assert(_rawOctets.Count == 0,
+                    "Still raw octets left. They must be added to the result path.");
+
+                if (!Uri.TryCreate(_requestUriString.ToString(), UriKind.Absolute, out _requestUri))
+                {
+                    // If we can't create a Uri from the string, this is an invalid string and it doesn't make 
+                    // sense to try another encoding.
+                    result = ParsingResult.InvalidString;
+                }
+            }
+
+            if (result != ParsingResult.Success)
+            {
+                //if (NetEventSource.IsEnabled)
+                //    NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_raw_path, _rawPath, encoding.EncodingName));
+            }
+
+            return result;
+        }
+
+        private ParsingResult ParseRawPath(Encoding encoding)
+        {
+            Debug.Assert(encoding != null, "'encoding' must be assigned.");
+
+            int index = 0;
+            char current = '\0';
+            while (index < _rawPath.Length)
+            {
+                current = _rawPath[index];
+                if (current == '%')
+                {
+                    // Assert is enough, since http.sys accepted the request string already. This should never happen.
+                    Debug.Assert(index + 2 < _rawPath.Length, "Expected >=2 characters after '%' (e.g. %2F)");
+
+                    index++;
+                    current = _rawPath[index];
+                    if (current == 'u' || current == 'U')
+                    {
+                        // We found "%u" which means, we have a Unicode code point of the form "%uXXXX".
+                        Debug.Assert(index + 4 < _rawPath.Length, "Expected >=4 characters after '%u' (e.g. %u0062)");
+
+                        // Decode the content of rawOctets into percent encoded UTF-8 characters and append them
+                        // to requestUriString.
+                        if (!EmptyDecodeAndAppendRawOctetsList(encoding))
+                        {
+                            return ParsingResult.EncodingError;
+                        }
+                        if (!AppendUnicodeCodePointValuePercentEncoded(_rawPath.Substring(index + 1, 4)))
+                        {
+                            return ParsingResult.InvalidString;
+                        }
+                        index += 5;
+                    }
+                    else
+                    {
+                        // We found '%', but not followed by 'u', i.e. we have a percent encoded octed: %XX 
+                        if (!AddPercentEncodedOctetToRawOctetsList(encoding, _rawPath.Substring(index, 2)))
+                        {
+                            return ParsingResult.InvalidString;
+                        }
+                        index += 2;
+                    }
+                }
+                else
+                {
+                    // We found a non-'%' character: decode the content of rawOctets into percent encoded
+                    // UTF-8 characters and append it to the result. 
+                    if (!EmptyDecodeAndAppendRawOctetsList(encoding))
+                    {
+                        return ParsingResult.EncodingError;
+                    }
+                    // Append the current character to the result.
+                    _requestUriString.Append(current);
+                    index++;
+                }
+            }
+
+            // if the raw path ends with a sequence of percent encoded octets, make sure those get added to the
+            // result (requestUriString).
+            if (!EmptyDecodeAndAppendRawOctetsList(encoding))
+            {
+                return ParsingResult.EncodingError;
+            }
+
+            return ParsingResult.Success;
+        }
+
+        private bool AppendUnicodeCodePointValuePercentEncoded(string codePoint)
+        {
+            // http.sys only supports %uXXXX (4 hex-digits), even though unicode code points could have up to
+            // 6 hex digits. Therefore we parse always 4 characters after %u and convert them to an int.
+            int codePointValue;
+            if (!int.TryParse(codePoint, NumberStyles.HexNumber, null, out codePointValue))
+            {
+                //if (NetEventSource.IsEnabled)
+                //    NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_percent_value, codePoint));
+                return false;
+            }
+
+            string unicodeString = null;
+            try
+            {
+                unicodeString = char.ConvertFromUtf32(codePointValue);
+                AppendOctetsPercentEncoded(_requestUriString, s_utf8Encoding.GetBytes(unicodeString));
+
+                return true;
+            }
+            catch (ArgumentOutOfRangeException)
+            {
+                //if (NetEventSource.IsEnabled)
+                //    NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_percent_value, codePoint));
+            }
+            catch (EncoderFallbackException e)
+            {
+                // If utf8Encoding.GetBytes() fails
+                //if (NetEventSource.IsEnabled) NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_to_utf8, unicodeString, e.Message));
+            }
+
+            return false;
+        }
+
+        private bool AddPercentEncodedOctetToRawOctetsList(Encoding encoding, string escapedCharacter)
+        {
+            byte encodedValue;
+            if (!byte.TryParse(escapedCharacter, NumberStyles.HexNumber, null, out encodedValue))
+            {
+                //if (NetEventSource.IsEnabled) NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_percent_value, escapedCharacter));
+                return false;
+            }
+
+            _rawOctets.Add(encodedValue);
+
+            return true;
+        }
+
+        private bool EmptyDecodeAndAppendRawOctetsList(Encoding encoding)
+        {
+            if (_rawOctets.Count == 0)
+            {
+                return true;
+            }
+
+            string decodedString = null;
+            try
+            {
+                // If the encoding can get a string out of the byte array, this is a valid string in the
+                // 'encoding' encoding.
+                decodedString = encoding.GetString(_rawOctets.ToArray());
+
+                if (encoding == s_utf8Encoding)
+                {
+                    AppendOctetsPercentEncoded(_requestUriString, _rawOctets.ToArray());
+                }
+                else
+                {
+                    AppendOctetsPercentEncoded(_requestUriString, s_utf8Encoding.GetBytes(decodedString));
+                }
+
+                _rawOctets.Clear();
+
+                return true;
+            }
+            catch (DecoderFallbackException e)
+            {
+                //if (NetEventSource.IsEnabled) NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_bytes, GetOctetsAsString(_rawOctets), e.Message));
+            }
+            catch (EncoderFallbackException e)
+            {
+                // If utf8Encoding.GetBytes() fails
+                //if (NetEventSource.IsEnabled) NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_to_utf8, decodedString, e.Message));
+            }
+
+            return false;
+        }
+
+        private static void AppendOctetsPercentEncoded(StringBuilder target, IEnumerable<byte> octets)
+        {
+            foreach (byte octet in octets)
+            {
+                target.Append('%');
+                target.Append(octet.ToString("X2", CultureInfo.InvariantCulture));
+            }
+        }
+
+        private static string GetOctetsAsString(IEnumerable<byte> octets)
+        {
+            StringBuilder octetString = new StringBuilder();
+
+            bool first = true;
+            foreach (byte octet in octets)
+            {
+                if (first)
+                {
+                    first = false;
+                }
+                else
+                {
+                    octetString.Append(' ');
+                }
+                octetString.Append(octet.ToString("X2", CultureInfo.InvariantCulture));
+            }
+
+            return octetString.ToString();
+        }
+
+        private static string GetPath(string uriString)
+        {
+            Debug.Assert(uriString != null, "uriString must not be null");
+            Debug.Assert(uriString.Length > 0, "uriString must not be empty");
+
+            int pathStartIndex = 0;
+
+            // Perf. improvement: nearly all strings are relative Uris. So just look if the
+            // string starts with '/'. If so, we have a relative Uri and the path starts at position 0.
+            // (http.sys already trimmed leading whitespaces)
+            if (uriString[0] != '/')
+            {
+                // We can't check against cookedUriScheme, since http.sys allows for request http://myserver/ to
+                // use a request line 'GET https://myserver/' (note http vs. https). Therefore check if the
+                // Uri starts with either http:// or https://.
+                int authorityStartIndex = 0;
+                if (uriString.StartsWith("http://", StringComparison.OrdinalIgnoreCase))
+                {
+                    authorityStartIndex = 7;
+                }
+                else if (uriString.StartsWith("https://", StringComparison.OrdinalIgnoreCase))
+                {
+                    authorityStartIndex = 8;
+                }
+
+                if (authorityStartIndex > 0)
+                {
+                    // we have an absolute Uri. Find out where the authority ends and the path begins.
+                    // Note that Uris like "http://server?query=value/1/2" are invalid according to RFC2616
+                    // and http.sys behavior: If the Uri contains a query, there must be at least one '/'
+                    // between the authority and the '?' character: It's safe to just look for the first
+                    // '/' after the authority to determine the beginning of the path.
+                    pathStartIndex = uriString.IndexOf('/', authorityStartIndex);
+                    if (pathStartIndex == -1)
+                    {
+                        // e.g. for request lines like: 'GET http://myserver' (no final '/')
+                        pathStartIndex = uriString.Length;
+                    }
+                }
+                else
+                {
+                    // RFC2616: Request-URI = "*" | absoluteURI | abs_path | authority
+                    // 'authority' can only be used with CONNECT which is never received by HttpListener.
+                    // I.e. if we don't have an absolute path (must start with '/') and we don't have
+                    // an absolute Uri (must start with http:// or https://), then 'uriString' must be '*'.
+                    Debug.Assert((uriString.Length == 1) && (uriString[0] == '*'), "Unknown request Uri string format",
+                        "Request Uri string is not an absolute Uri, absolute path, or '*': {0}", uriString);
+
+                    // Should we ever get here, be consistent with 2.0/3.5 behavior: just add an initial
+                    // slash to the string and treat it as a path:
+                    uriString = "/" + uriString;
+                }
+            }
+
+            // Find end of path: The path is terminated by
+            // - the first '?' character
+            // - the first '#' character: This is never the case here, since http.sys won't accept 
+            //   Uris containing fragments. Also, RFC2616 doesn't allow fragments in request Uris.
+            // - end of Uri string
+            int queryIndex = uriString.IndexOf('?');
+            if (queryIndex == -1)
+            {
+                queryIndex = uriString.Length;
+            }
+
+            // will always return a != null string.
+            return AddSlashToAsteriskOnlyPath(uriString.Substring(pathStartIndex, queryIndex - pathStartIndex));
+        }
+
+        private static string AddSlashToAsteriskOnlyPath(string path)
+        {
+            Debug.Assert(path != null, "'path' must not be null");
+
+            // If a request like "OPTIONS * HTTP/1.1" is sent to the listener, then the request Uri
+            // should be "http[s]://server[:port]/*" to be compatible with pre-4.0 behavior.
+            if ((path.Length == 1) && (path[0] == '*'))
+            {
+                return "/*";
+            }
+
+            return path;
+        }
+
+        private enum ParsingResult
+        {
+            Success,
+            InvalidString,
+            EncodingError
+        }
+
+        private enum EncodingType
+        {
+            Primary,
+            Secondary
+        }
+    }
+}
author	stefan <stefan@hegedues.at>	2018-09-12 19:26:21 +0200
committer	stefan <stefan@hegedues.at>	2018-09-12 19:26:21 +0200
commit	48facb797ed912e4ea6b04b17d1ff190ac2daac4 (patch)
tree	8dae77a31670a888d733484cb17dd4077d5444e8 /SocketHttpListener/Net/HttpListenerRequestUriBuilder.cs
parent	c32d8656382a0eacb301692e0084377fc433ae9b (diff)