diff --git a/analyse/index_local.py b/analyse/index_local.py
index 9db9dbff35ed3e2e896beea1f8e7a355dacd4e7f..24628a5b53711d38111ae7a01d58cf3cae266542 100644
--- a/analyse/index_local.py
+++ b/analyse/index_local.py
@@ -8,6 +8,7 @@ import logging
 import os
 import sys
 from pathlib import Path
+from datetime import datetime
 
 import spacy
 
@@ -41,10 +42,16 @@ def run():
     TinyIndex.create(item_factory=Document, index_path=EVALUATE_INDEX_PATH, num_pages=NUM_PAGES, page_size=PAGE_SIZE)
 
     batches = get_batches()
+
+    start = datetime.now()
     with Database() as db:
         nlp = spacy.load("en_core_web_sm")
         url_db = URLDatabase(db.connection)
         index_batches(batches, EVALUATE_INDEX_PATH, nlp, url_db)
+    end = datetime.now()
+
+    total_time = (end - start).total_seconds()
+    print("total_seconds:", total_time)
 
 
 if __name__ == '__main__':
diff --git a/mwmbl/indexer/index.py b/mwmbl/indexer/index.py
index 45793ee28825aed84d4e2e8be76a915a62078c6d..67ae1643d9852121fecb9bca2e58fe42efb5c202 100644
--- a/mwmbl/indexer/index.py
+++ b/mwmbl/indexer/index.py
@@ -16,20 +16,35 @@ HTTPS_START = 'https://'
 BATCH_SIZE = 100
 
 
-def is_content_token(nlp, token):
-    lexeme = nlp.vocab[token.orth]
-    return (lexeme.is_alpha or lexeme.is_digit) and not token.is_stop
-
-
-def tokenize(nlp, input_text):
+STOPWORDS = set("0,1,2,3,4,5,6,7,8,9,a,A,about,above,across,after,again,against,all,almost,alone,along,already,also," \
+            "although,always,am,among,an,and,another,any,anyone,anything,anywhere,are,aren't,around,as,at,b,B,back," \
+            "be,became,because,become,becomes,been,before,behind,being,below,between,both,but,by,c,C,can,cannot,can't," \
+            "could,couldn't,d,D,did,didn't,do,does,doesn't,doing,done,don't,down,during,e,E,each,either,enough,even," \
+            "ever,every,everyone,everything,everywhere,f,F,few,find,first,for,four,from,full,further,g,G,get,give,go," \
+            "h,H,had,hadn't,has,hasn't,have,haven't,having,he,he'd,he'll,her,here,here's,hers,herself,he's,him," \
+            "himself,his,how,however,how's,i,I,i'd,if,i'll,i'm,in,interest,into,is,isn't,it,it's,its,itself,i've," \
+            "j,J,k,K,keep,l,L,last,least,less,let's,m,M,made,many,may,me,might,more,most,mostly,much,must,mustn't," \
+            "my,myself,n,N,never,next,no,nobody,noone,nor,not,nothing,now,nowhere,o,O,of,off,often,on,once,one,only," \
+            "or,other,others,ought,our,ours,ourselves,out,over,own,p,P,part,per,perhaps,put,q,Q,r,R,rather,s,S,same," \
+            "see,seem,seemed,seeming,seems,several,shan't,she,she'd,she'll,she's,should,shouldn't,show,side,since,so," \
+            "some,someone,something,somewhere,still,such,t,T,take,than,that,that's,the,their,theirs,them,themselves," \
+            "then,there,therefore,there's,these,they,they'd,they'll,they're,they've,this,those,though,three,through," \
+            "thus,to,together,too,toward,two,u,U,under,until,up,upon,us,v,V,very,w,W,was,wasn't,we,we'd,we'll,well," \
+            "we're,were,weren't,we've,what,what's,when,when's,where,where's,whether,which,while,who,whole,whom,who's," \
+            "whose,why,why's,will,with,within,without,won't,would,wouldn't,x,X,y,Y,yet,you,you'd,you'll,your,you're," \
+            "yours,yourself,yourselves,you've,z,Z".split(','))
+
+
+def tokenize(input_text):
     cleaned_text = input_text.encode('utf8', 'replace').decode('utf8')
-    tokens = nlp.tokenizer(cleaned_text)
+    tokens = cleaned_text.lower().replace('.', ' ').replace(',', ' ').split()
+    # tokens = nlp.tokenizer(cleaned_text)
     if input_text.endswith('…'):
         # Discard the last two tokens since there will likely be a word cut in two
         tokens = tokens[:-2]
-    content_tokens = [token for token in tokens if is_content_token(nlp, token)]
-    lowered = {nlp.vocab[token.orth].text.lower() for token in content_tokens}
-    return lowered
+    content_tokens = [token for token in tokens if not token in STOPWORDS]
+    # lowered = {nlp.vocab[token.orth].text.lower() for token in content_tokens}
+    return content_tokens
 
 
 def prepare_url_for_tokenizing(url: str):
@@ -53,12 +68,12 @@ def get_pages(nlp, titles_urls_and_extracts, link_counts) -> Iterable[TokenizedD
 
 
 def tokenize_document(url, title_cleaned, extract, score, nlp):
-    title_tokens = tokenize(nlp, title_cleaned)
+    title_tokens = tokenize(title_cleaned)
     prepared_url = prepare_url_for_tokenizing(unquote(url))
-    url_tokens = tokenize(nlp, prepared_url)
-    extract_tokens = tokenize(nlp, extract)
+    url_tokens = tokenize(prepared_url)
+    extract_tokens = tokenize(extract)
     # print("Extract tokens", extract_tokens)
-    tokens = title_tokens | url_tokens | extract_tokens
+    tokens = set(title_tokens) | set(url_tokens) | set(extract_tokens)
     document = TokenizedDocument(tokens=list(tokens), url=url, title=title_cleaned, extract=extract, score=score)
     return document
 
diff --git a/mwmbl/tinysearchengine/indexer.py b/mwmbl/tinysearchengine/indexer.py
index 405cc17ac567737456e3db7fe81d23ad5444c17f..253faab8f1a65ecc8c50e8a534adbd8795767f17 100644
--- a/mwmbl/tinysearchengine/indexer.py
+++ b/mwmbl/tinysearchengine/indexer.py
@@ -1,6 +1,6 @@
 import json
 import os
-from dataclasses import astuple, dataclass, asdict
+from dataclasses import dataclass, asdict
 from io import UnsupportedOperation, BytesIO
 from logging import getLogger
 from mmap import mmap, PROT_READ, PROT_WRITE
@@ -20,6 +20,10 @@ PAGE_SIZE = 4096
 logger = getLogger(__name__)
 
 
+def astuple(dc):
+    return tuple(dc.__dict__.values())
+
+
 @dataclass
 class Document:
     title: str
diff --git a/mwmbl/tinysearchengine/rank.py b/mwmbl/tinysearchengine/rank.py
index 75246d84e0327818a0e5f634ce387488191d9356..68562d7f2a08ab40706c48d23f05d03da7a98e02 100644
--- a/mwmbl/tinysearchengine/rank.py
+++ b/mwmbl/tinysearchengine/rank.py
@@ -5,6 +5,7 @@ from logging import getLogger
 from operator import itemgetter
 from urllib.parse import urlparse
 
+from mwmbl.indexer.index import tokenize
 from mwmbl.tinysearchengine.completer import Completer
 from mwmbl.hn_top_domains_filtered import DOMAINS
 from mwmbl.tinysearchengine.indexer import TinyIndex, Document
@@ -171,7 +172,7 @@ class Ranker:
             return [q, urls + completed]
 
     def get_results(self, q):
-        terms = [x.lower() for x in q.replace('.', ' ').split()]
+        terms = tokenize(q)
         is_complete = q.endswith(' ')
         if len(terms) > 0 and not is_complete:
             completions = self.completer.complete(terms[-1])