Seperated part of the message processing into seperate functions

This commit is contained in:
2025-04-18 14:22:42 +02:00
parent 2c82589a10
commit c553167a24
2 changed files with 15 additions and 16 deletions

12
functions.py Normal file
View File

@@ -0,0 +1,12 @@
from re import split, sub, match
def processRawMessages(chat: str):
temp = split(r"\d{1,2}/\d{1,2}/\d{2}, \d{1,2}:\d{2}", chat)
temp = [sub(r"([.,?!*()])", "", message) for message in temp]
temp = [sub(r"\n", " ", message) for message in temp]
temp = [sub(r"[^\x00-\x7F]", "", message) for message in temp]
temp = [msg for msg in temp if msg != ""]
return [s[3:] for s in temp if match(r" - [^ ]+?: ", s)]

19
run.py
View File

@@ -1,15 +1,7 @@
from re import split, sub, match
from re import split, sub
from wordcloud import WordCloud # type: ignore
from os import makedirs
def cleanupMessages(messages: list[str]) -> list[str]:
# Remove "", \n, and symbols like , and .
temp = [sub(r"([.,?!*()])", "", message) for message in messages]
temp = [sub(r"\n", " ", message) for message in temp]
temp = [sub(r"[^\x00-\x7F]", "", message) for message in temp]
temp = [msg for msg in temp if msg != ""]
return temp
from functions import processRawMessages
# Open and read the chats from the '/data/_chat.txt' file exported by Whatsapp
@@ -21,12 +13,7 @@ except FileNotFoundError:
print("Sorry, the file /data/_chat.txt does not exist.")
exit()
messages = cleanupMessages(
split(r"\d{1,2}/\d{1,2}/\d{2}, \d{1,2}:\d{2}", chat)
)
messages = [s[3:] for s in messages if match(r" - [^ ]+?: ", s)]
messages = processRawMessages(chat)
author_words: dict[str, list[str]] = {}