spamassassin_utils.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. """Inspired from
  2. https://github.com/petermat/spamassassin_client
  3. """
  4. import logging
  5. import re
  6. import select
  7. import socket
  8. from io import BytesIO
  9. from app.log import LOG
  10. divider_pattern = re.compile(br"^(.*?)\r?\n(.*?)\r?\n\r?\n", re.DOTALL)
  11. first_line_pattern = re.compile(br"^SPAMD/[^ ]+ 0 EX_OK$")
  12. class SpamAssassin(object):
  13. def __init__(self, message, timeout=20, host="127.0.0.1", spamd_user="spamd"):
  14. self.score = None
  15. self.symbols = None
  16. self.spamd_user = spamd_user
  17. # Connecting
  18. client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  19. client.settimeout(timeout)
  20. client.connect((host, 783))
  21. # Sending
  22. client.sendall(self._build_message(message))
  23. client.shutdown(socket.SHUT_WR)
  24. # Reading
  25. resfp = BytesIO()
  26. while True:
  27. ready = select.select([client], [], [], timeout)
  28. if ready[0] is None:
  29. # Kill with Timeout!
  30. logging.info("[SpamAssassin] - Timeout ({0}s)!".format(str(timeout)))
  31. break
  32. data = client.recv(4096)
  33. if data == b"":
  34. break
  35. resfp.write(data)
  36. # Closing
  37. client.close()
  38. client = None
  39. self._parse_response(resfp.getvalue())
  40. def _build_message(self, message):
  41. reqfp = BytesIO()
  42. data_len = str(len(message)).encode()
  43. reqfp.write(b"REPORT SPAMC/1.2\r\n")
  44. reqfp.write(b"Content-Length: " + data_len + b"\r\n")
  45. reqfp.write(f"User: {self.spamd_user}\r\n\r\n".encode())
  46. reqfp.write(message)
  47. return reqfp.getvalue()
  48. def _parse_response(self, response):
  49. if response == b"":
  50. logging.info("[SPAM ASSASSIN] Empty response")
  51. return None
  52. match = divider_pattern.match(response)
  53. if not match:
  54. logging.error("[SPAM ASSASSIN] Response error:")
  55. logging.error(response)
  56. return None
  57. first_line = match.group(1)
  58. headers = match.group(2)
  59. body = response[match.end(0) :]
  60. # Checking response is good
  61. match = first_line_pattern.match(first_line)
  62. if not match:
  63. logging.error("[SPAM ASSASSIN] invalid response:")
  64. logging.error(first_line)
  65. return None
  66. report_list = [
  67. s.strip() for s in body.decode("utf-8", errors="ignore").strip().split("\n")
  68. ]
  69. linebreak_num = report_list.index([s for s in report_list if "---" in s][0])
  70. tablelists = [s for s in report_list[linebreak_num + 1 :]]
  71. self.report_fulltext = "\n".join(report_list)
  72. # join line when current one is only wrap of previous
  73. tablelists_temp = []
  74. if tablelists:
  75. for counter, tablelist in enumerate(tablelists):
  76. if len(tablelist) > 1:
  77. if (tablelist[0].isnumeric() or tablelist[0] == "-") and (
  78. tablelist[1].isnumeric() or tablelist[1] == "."
  79. ):
  80. tablelists_temp.append(tablelist)
  81. else:
  82. if tablelists_temp:
  83. tablelists_temp[-1] += " " + tablelist
  84. tablelists = tablelists_temp
  85. # create final json
  86. self.report_json = dict()
  87. for tablelist in tablelists:
  88. wordlist = re.split("\s+", tablelist)
  89. try:
  90. self.report_json[wordlist[1]] = {
  91. "partscore": float(wordlist[0]),
  92. "description": " ".join(wordlist[1:]),
  93. }
  94. except ValueError:
  95. LOG.warning("Cannot parse %s %s", wordlist[0], wordlist)
  96. headers = (
  97. headers.decode("utf-8")
  98. .replace(" ", "")
  99. .replace(":", ";")
  100. .replace("/", ";")
  101. .split(";")
  102. )
  103. self.score = float(headers[2])
  104. def get_report_json(self):
  105. return self.report_json
  106. def get_score(self):
  107. return self.score
  108. def is_spam(self, level=5):
  109. return self.score is None or self.score > level
  110. def get_fulltext(self):
  111. return self.report_fulltext