Prechádzať zdrojové kódy

Merge pull request #95 from mwmbl/rishabh-unit-testing-with-ci

Add PyUnit dependency + Unit Tests for completer.py + Github Actions CI for running unit tests
Rishabh Singh Ahluwalia 2 rokov pred
rodič
commit
2aa61a5121

+ 57 - 0
.github/workflows/ci.yml

@@ -0,0 +1,57 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      #----------------------------------------------
+      #       check-out repo and set-up python
+      #----------------------------------------------
+      - name: Check out repository
+        uses: actions/checkout@v3
+      - name: Set up python
+        id: setup-python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      #----------------------------------------------
+      #  -----  install & configure poetry  -----
+      #----------------------------------------------
+      - name: Install Poetry
+        uses: snok/install-poetry@v1.3.3
+        with:
+          virtualenvs-create: true
+          virtualenvs-in-project: true
+          installer-parallel: true
+
+      #----------------------------------------------
+      #       load cached venv if cache exists
+      #----------------------------------------------
+      - name: Load cached venv
+        id: cached-poetry-dependencies
+        uses: actions/cache@v3
+        with:
+          path: .venv
+          key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
+      #----------------------------------------------
+      # install dependencies if cache does not exist
+      #----------------------------------------------
+      - name: Install dependencies
+        if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
+        run: poetry install --no-interaction --no-root
+      #----------------------------------------------
+      # install your root project, if required
+      #----------------------------------------------
+      - name: Install project
+        run: poetry install --no-interaction
+      #----------------------------------------------
+      #              run test suite
+      #----------------------------------------------
+      - name: Run tests
+        run: |
+          poetry run pytest

+ 4 - 1
mwmbl/tinysearchengine/completer.py

@@ -10,13 +10,16 @@ TERMS_PATH = Path(__file__).parent.parent / 'resources' / 'mwmbl-crawl-terms.csv
 class Completer:
     def __init__(self, num_matches: int = 3):
         # Load term data
-        terms = pd.read_csv(TERMS_PATH)
+        terms = self.get_terms()
 
         terms_dict = terms.sort_values('term').set_index('term')['count'].to_dict()
         self.terms = list(terms_dict.keys())
         self.counts = list(terms_dict.values())
         self.num_matches = num_matches
         print("Terms", self.terms[:100], self.counts[:100])
+        
+    def get_terms(self):
+        return pd.read_csv(TERMS_PATH)
 
     def complete(self, term) -> list[str]:
         term_length = len(term)

Rozdielové dáta súboru neboli zobrazené, pretože súbor je príliš veľký
+ 265 - 895
poetry.lock


+ 2 - 0
pyproject.toml

@@ -18,6 +18,8 @@ boto3 = "^1.20.37"
 requests = "^2.27.1"
 psycopg2-binary = "^2.9.3"
 spacy = "==3.2.1"
+pytest = "^7.2.1"
+pytest-mock = "^3.10.0"
 
 # Optional dependencies do not get installed by default. Look under tool.poetry.extras section
 # to see which extras to use.

+ 78 - 0
test/test_completer.py

@@ -0,0 +1,78 @@
+import mwmbl.tinysearchengine.completer
+import pytest
+import pandas as pd
+
+def mockCompleterData(mocker, data):
+    testDataFrame = pd.DataFrame(data, columns=['','term','count'])
+    mocker.patch('mwmbl.tinysearchengine.completer.Completer.get_terms', 
+                 return_value = testDataFrame)
+
+def test_correctCompletions(mocker):
+    # Mock completer with custom data
+    testdata = [
+        [0, 'build', 4],
+        [1, 'builder', 3],
+        [2, 'announce', 2],
+        [3, 'buildings', 1]]
+    mockCompleterData(mocker, testdata)
+    
+    completer = mwmbl.tinysearchengine.completer.Completer()
+    completion = completer.complete('build')
+    assert ['build', 'builder', 'buildings'] == completion
+
+def test_correctSortOrder(mocker):
+    # Mock completer with custom data
+    testdata = [
+        [0, 'build', 4],
+        [1, 'builder', 1],
+        [2, 'announce', 2],
+        [3, 'buildings', 3]]
+    mockCompleterData(mocker, testdata)
+    
+    completer = mwmbl.tinysearchengine.completer.Completer()
+    completion = completer.complete('build')
+    assert ['build', 'buildings', 'builder'] == completion
+    
+def test_noCompletions(mocker):
+    # Mock completer with custom data
+    testdata = [
+        [0, 'build', 4],
+        [1, 'builder', 3],
+        [2, 'announce', 2],
+        [3, 'buildings', 1]]
+    mockCompleterData(mocker, testdata)
+    
+    completer = mwmbl.tinysearchengine.completer.Completer()
+    completion = completer.complete('test')
+    assert [] == completion
+    
+def test_singleCompletions(mocker):
+    # Mock completer with custom data
+    testdata = [
+        [0, 'build', 4],
+        [1, 'builder', 3],
+        [2, 'announce', 2],
+        [3, 'buildings', 1]]
+    mockCompleterData(mocker, testdata)
+    
+    completer = mwmbl.tinysearchengine.completer.Completer()
+    completion = completer.complete('announce')
+    assert ['announce'] == completion
+    
+def test_idempotencyWithSameScoreCompletions(mocker):
+    # Mock completer with custom data
+    testdata = [
+        [0, 'build', 1],
+        [1, 'builder', 1],
+        [2, 'announce', 1],
+        [3, 'buildings', 1]]
+    mockCompleterData(mocker, testdata)
+    
+    completer = mwmbl.tinysearchengine.completer.Completer()
+    for i in range(3):
+        print(f"iteration: {i}")
+        completion = completer.complete('build')
+        # Results expected in reverse order
+        expected = ['buildings','builder','build']
+        assert expected == completion
+    

Niektoré súbory nie sú zobrazené, pretože je v týchto rozdielových dátach zmenené mnoho súborov