performance.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. """
  2. Test the performance of the search in terms of compression and speed.
  3. """
  4. import json
  5. import os
  6. from datetime import datetime
  7. from itertools import islice
  8. from spacy.lang.en import English
  9. from starlette.testclient import TestClient
  10. from app import app, complete
  11. from index import Indexer, index_titles_and_urls
  12. from paths import TEST_INDEX_PATH
  13. from wiki import get_wiki_titles_and_urls
  14. NUM_PAGES = 500
  15. def query_test():
  16. titles_and_urls = get_wiki_titles_and_urls()
  17. client = TestClient(app)
  18. start = datetime.now()
  19. hits = 0
  20. for title, url in islice(titles_and_urls, NUM_PAGES):
  21. result = client.get('/complete', params={'q': title})
  22. assert result.status_code == 200
  23. data = result.content.decode('utf8')
  24. # data = json.dumps(complete(title))
  25. if url in data:
  26. hits += 1
  27. end = datetime.now()
  28. print("Hits:", hits)
  29. print("Query time:", (end - start).total_seconds()/NUM_PAGES)
  30. def performance_test():
  31. nlp = English()
  32. try:
  33. os.remove(TEST_INDEX_PATH)
  34. except FileNotFoundError:
  35. print("No test index found, creating")
  36. indexer = Indexer(TEST_INDEX_PATH)
  37. titles_and_urls = get_wiki_titles_and_urls()
  38. titles_and_urls_slice = islice(titles_and_urls, NUM_PAGES)
  39. start_time = datetime.now()
  40. index_titles_and_urls(indexer, nlp, titles_and_urls_slice)
  41. stop_time = datetime.now()
  42. index_time = (stop_time - start_time).total_seconds()
  43. index_size = os.path.getsize(TEST_INDEX_PATH)
  44. print("Indexed pages:", NUM_PAGES)
  45. print("Index time:", index_time)
  46. print("Index size", index_size)
  47. print("Num tokens", indexer.get_num_tokens())
  48. query_test()
  49. if __name__ == '__main__':
  50. performance_test()