diff --git a/mwmbl/indexer/index.py b/mwmbl/indexer/index.py
index d8aef72e98cee7a57a4b8ce2191c42cc7e5fe2c1..8bd0dc9786a1ce869ba88bbe9833b23538fdabbb 100644
--- a/mwmbl/indexer/index.py
+++ b/mwmbl/indexer/index.py
@@ -12,8 +12,6 @@ import pandas as pd
 # PAGE_SIZE = 512
 from mwmbl.tinysearchengine.indexer import TinyIndexer, Document, TokenizedDocument
 
-NUM_INITIAL_TOKENS = 50
-
 HTTP_START = 'http://'
 HTTPS_START = 'https://'
 BATCH_SIZE = 100
@@ -27,8 +25,10 @@ def is_content_token(nlp, token):
 def tokenize(nlp, input_text):
     cleaned_text = input_text.encode('utf8', 'replace').decode('utf8')
     tokens = nlp.tokenizer(cleaned_text)
-    content_tokens = [token for token in tokens[:NUM_INITIAL_TOKENS]
-                      if is_content_token(nlp, token)]
+    if input_text.endswith('…'):
+        # Discard the last two tokens since there will likely be a word cut in two
+        tokens = tokens[:-2]
+    content_tokens = [token for token in tokens if is_content_token(nlp, token)]
     lowered = {nlp.vocab[token.orth].text.lower() for token in content_tokens}
     return lowered
 
diff --git a/mwmbl/tinysearchengine/create_app.py b/mwmbl/tinysearchengine/create_app.py
index 4de776cb611781e98e483cd5df2f20fd56f6dffe..b6e0eacc9334e733612f95e4b446ca9ef52bae8e 100644
--- a/mwmbl/tinysearchengine/create_app.py
+++ b/mwmbl/tinysearchengine/create_app.py
@@ -31,9 +31,9 @@ def create(tiny_index: TinyIndex):
     def search(s: str):
         results, terms = get_results(s)
 
+        pattern = get_query_regex(terms)
         formatted_results = []
         for result in results:
-            pattern = get_query_regex(terms)
             formatted_result = {}
             for content_type, content in [('title', result.title), ('extract', result.extract)]:
                 matches = re.finditer(pattern, content, re.IGNORECASE)