Use new server
This commit is contained in:
parent
aaca8b2b6e
commit
14107acc75
1 changed files with 29 additions and 6 deletions
|
@ -12,21 +12,44 @@ import requests
|
|||
from mwmbl.indexer.paths import CRAWL_GLOB
|
||||
|
||||
|
||||
API_ENDPOINT = "http://localhost:8080/batches/historical"
|
||||
API_ENDPOINT = "http://95.216.215.29/batches/historical"
|
||||
|
||||
|
||||
def total_num_batches():
|
||||
return len(glob.glob(CRAWL_GLOB))
|
||||
|
||||
|
||||
def get_batches():
|
||||
for path in glob.glob(CRAWL_GLOB):
|
||||
for path in sorted(glob.glob(CRAWL_GLOB)):
|
||||
hashed_batch = json.load(gzip.open(path))
|
||||
yield hashed_batch
|
||||
|
||||
|
||||
def convert_item(item):
|
||||
return {
|
||||
'url': item['url'],
|
||||
'status': 200,
|
||||
'timestamp': item['timestamp'],
|
||||
'content': {
|
||||
'title': item['title'],
|
||||
'extract': item['extract'],
|
||||
'links': item['links'],
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
def run():
|
||||
total_batches = total_num_batches()
|
||||
batches = get_batches()
|
||||
for hashed_batch in batches:
|
||||
print("Recording batch", hashed_batch)
|
||||
response = requests.post(API_ENDPOINT, json=hashed_batch)
|
||||
print("Response", response)
|
||||
for i, hashed_batch in enumerate(batches):
|
||||
new_batch = {
|
||||
'user_id_hash': hashed_batch['user_id_hash'],
|
||||
'timestamp': hashed_batch['timestamp'],
|
||||
'items': [convert_item(item) for item in hashed_batch['items']]
|
||||
}
|
||||
response = requests.post(API_ENDPOINT, json=new_batch)
|
||||
print(f"Response {i} of {total_batches}", response)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Add table
Reference in a new issue