From ba45d950efb79a3fb7c6b039e8905b397085e4b1 Mon Sep 17 00:00:00 2001 From: Daoud Clarke Date: Sun, 25 Apr 2021 11:41:44 +0100 Subject: [PATCH] Catch connection errors --- domains/domain_titles.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/domains/domain_titles.py b/domains/domain_titles.py index 29a866a..09fd8be 100644 --- a/domains/domain_titles.py +++ b/domains/domain_titles.py @@ -1,14 +1,12 @@ """ Retrieve titles for each domain in the list of top domains """ -import csv -import gzip import pickle from urllib.parse import urlsplit, urlunsplit import bs4 import requests -from persistqueue import SQLiteQueue, SQLiteAckQueue +from persistqueue import SQLiteAckQueue from paths import DOMAINS_QUEUE_PATH, DOMAINS_TITLES_QUEUE_PATH @@ -50,7 +48,7 @@ def retrieve_title(domain): result = get_redirect_no_cookies(original_url) status = result.status_code url = result.url - except RecursionError as e: + except (RecursionError, requests.exceptions.ConnectionError) as e: print("Error retrieving URL", str(e)) status = None url = None