diff options
| author | Luke <luke.pulverenti@gmail.com> | 2016-07-27 15:32:07 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2016-07-27 15:32:07 -0400 |
| commit | 06b0cfb86fec5f1de83080f4fece2513dfa9cf6c (patch) | |
| tree | b94e74277d1b64d804acb12c6e4c482452381353 /MediaBrowser.Tests/ConsistencyTests/TextIndexing/IndexBuilder.cs | |
| parent | b785e919f375d1b876d2e82e0e377db9b6cfbc71 (diff) | |
| parent | 897e0566294e854395f61040a8b922c1d0166930 (diff) | |
Merge pull request #1991 from MediaBrowser/beta
Beta
Diffstat (limited to 'MediaBrowser.Tests/ConsistencyTests/TextIndexing/IndexBuilder.cs')
| -rw-r--r-- | MediaBrowser.Tests/ConsistencyTests/TextIndexing/IndexBuilder.cs | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/MediaBrowser.Tests/ConsistencyTests/TextIndexing/IndexBuilder.cs b/MediaBrowser.Tests/ConsistencyTests/TextIndexing/IndexBuilder.cs new file mode 100644 index 000000000..07c0df86c --- /dev/null +++ b/MediaBrowser.Tests/ConsistencyTests/TextIndexing/IndexBuilder.cs @@ -0,0 +1,54 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace MediaBrowser.Tests.ConsistencyTests.TextIndexing +{ + public class IndexBuilder + { + public const int MinumumWordLength = 4; + + public static char[] WordChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890".ToCharArray(); + + public static WordIndex BuildIndexFromFiles(IEnumerable<FileInfo> wordFiles, string rootFolderPath) + { + var index = new WordIndex(); + + var wordSeparators = Enumerable.Range(32, 127).Select(e => Convert.ToChar(e)).Where(c => !WordChars.Contains(c)).ToArray(); + wordSeparators = wordSeparators.Concat(new[] { '\t' }).ToArray(); // add tab + + foreach (var file in wordFiles) + { + var lineNumber = 1; + var displayFileName = file.FullName.Replace(rootFolderPath, string.Empty); + using (var reader = file.OpenText()) + { + while (!reader.EndOfStream) + { + var words = reader + .ReadLine() + .Split(wordSeparators, StringSplitOptions.RemoveEmptyEntries); + ////.Select(f => f.Trim()); + + var wordIndex = 1; + foreach (var word in words) + { + if (word.Length >= MinumumWordLength) + { + index.AddWordOccurrence(word, displayFileName, file.FullName, lineNumber, wordIndex++); + } + } + + lineNumber++; + } + } + } + + return index; + } + + } +} |
