aboutsummaryrefslogtreecommitdiff
path: root/MediaBrowser.Tests/ConsistencyTests/TextIndexing/IndexBuilder.cs
diff options
context:
space:
mode:
authorLuke <luke.pulverenti@gmail.com>2016-07-27 15:32:07 -0400
committerGitHub <noreply@github.com>2016-07-27 15:32:07 -0400
commit06b0cfb86fec5f1de83080f4fece2513dfa9cf6c (patch)
treeb94e74277d1b64d804acb12c6e4c482452381353 /MediaBrowser.Tests/ConsistencyTests/TextIndexing/IndexBuilder.cs
parentb785e919f375d1b876d2e82e0e377db9b6cfbc71 (diff)
parent897e0566294e854395f61040a8b922c1d0166930 (diff)
Merge pull request #1991 from MediaBrowser/beta
Beta
Diffstat (limited to 'MediaBrowser.Tests/ConsistencyTests/TextIndexing/IndexBuilder.cs')
-rw-r--r--MediaBrowser.Tests/ConsistencyTests/TextIndexing/IndexBuilder.cs54
1 files changed, 54 insertions, 0 deletions
diff --git a/MediaBrowser.Tests/ConsistencyTests/TextIndexing/IndexBuilder.cs b/MediaBrowser.Tests/ConsistencyTests/TextIndexing/IndexBuilder.cs
new file mode 100644
index 000000000..07c0df86c
--- /dev/null
+++ b/MediaBrowser.Tests/ConsistencyTests/TextIndexing/IndexBuilder.cs
@@ -0,0 +1,54 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace MediaBrowser.Tests.ConsistencyTests.TextIndexing
+{
+ public class IndexBuilder
+ {
+ public const int MinumumWordLength = 4;
+
+ public static char[] WordChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890".ToCharArray();
+
+ public static WordIndex BuildIndexFromFiles(IEnumerable<FileInfo> wordFiles, string rootFolderPath)
+ {
+ var index = new WordIndex();
+
+ var wordSeparators = Enumerable.Range(32, 127).Select(e => Convert.ToChar(e)).Where(c => !WordChars.Contains(c)).ToArray();
+ wordSeparators = wordSeparators.Concat(new[] { '\t' }).ToArray(); // add tab
+
+ foreach (var file in wordFiles)
+ {
+ var lineNumber = 1;
+ var displayFileName = file.FullName.Replace(rootFolderPath, string.Empty);
+ using (var reader = file.OpenText())
+ {
+ while (!reader.EndOfStream)
+ {
+ var words = reader
+ .ReadLine()
+ .Split(wordSeparators, StringSplitOptions.RemoveEmptyEntries);
+ ////.Select(f => f.Trim());
+
+ var wordIndex = 1;
+ foreach (var word in words)
+ {
+ if (word.Length >= MinumumWordLength)
+ {
+ index.AddWordOccurrence(word, displayFileName, file.FullName, lineNumber, wordIndex++);
+ }
+ }
+
+ lineNumber++;
+ }
+ }
+ }
+
+ return index;
+ }
+
+ }
+}