From c553167a24c66c9ab849f87eedaea3ce8e332bfb Mon Sep 17 00:00:00 2001 From: Stef Date: Fri, 18 Apr 2025 14:22:42 +0200 Subject: [PATCH] Seperated part of the message processing into seperate functions --- functions.py | 12 ++++++++++++ run.py | 19 +++---------------- 2 files changed, 15 insertions(+), 16 deletions(-) create mode 100644 functions.py diff --git a/functions.py b/functions.py new file mode 100644 index 0000000..6a4715b --- /dev/null +++ b/functions.py @@ -0,0 +1,12 @@ +from re import split, sub, match + + +def processRawMessages(chat: str): + temp = split(r"\d{1,2}/\d{1,2}/\d{2}, \d{1,2}:\d{2}", chat) + + temp = [sub(r"([.,?!*()])", "", message) for message in temp] + temp = [sub(r"\n", " ", message) for message in temp] + temp = [sub(r"[^\x00-\x7F]", "", message) for message in temp] + temp = [msg for msg in temp if msg != ""] + + return [s[3:] for s in temp if match(r" - [^ ]+?: ", s)] diff --git a/run.py b/run.py index 5ede2da..69ce22b 100644 --- a/run.py +++ b/run.py @@ -1,15 +1,7 @@ -from re import split, sub, match +from re import split, sub from wordcloud import WordCloud # type: ignore from os import makedirs - - -def cleanupMessages(messages: list[str]) -> list[str]: - # Remove "", \n, and symbols like , and . - temp = [sub(r"([.,?!*()])", "", message) for message in messages] - temp = [sub(r"\n", " ", message) for message in temp] - temp = [sub(r"[^\x00-\x7F]", "", message) for message in temp] - temp = [msg for msg in temp if msg != ""] - return temp +from functions import processRawMessages # Open and read the chats from the '/data/_chat.txt' file exported by Whatsapp @@ -21,12 +13,7 @@ except FileNotFoundError: print("Sorry, the file /data/_chat.txt does not exist.") exit() -messages = cleanupMessages( - split(r"\d{1,2}/\d{1,2}/\d{2}, \d{1,2}:\d{2}", chat) -) - -messages = [s[3:] for s in messages if match(r" - [^ ]+?: ", s)] - +messages = processRawMessages(chat) author_words: dict[str, list[str]] = {}