mirror of
https://github.com/StefBuwalda/whatsapp-wordcloud.git
synced 2025-11-01 12:19:57 +00:00
Seperated part of the message processing into seperate functions
This commit is contained in:
12
functions.py
Normal file
12
functions.py
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
from re import split, sub, match
|
||||||
|
|
||||||
|
|
||||||
|
def processRawMessages(chat: str):
|
||||||
|
temp = split(r"\d{1,2}/\d{1,2}/\d{2}, \d{1,2}:\d{2}", chat)
|
||||||
|
|
||||||
|
temp = [sub(r"([.,?!*()])", "", message) for message in temp]
|
||||||
|
temp = [sub(r"\n", " ", message) for message in temp]
|
||||||
|
temp = [sub(r"[^\x00-\x7F]", "", message) for message in temp]
|
||||||
|
temp = [msg for msg in temp if msg != ""]
|
||||||
|
|
||||||
|
return [s[3:] for s in temp if match(r" - [^ ]+?: ", s)]
|
||||||
19
run.py
19
run.py
@@ -1,15 +1,7 @@
|
|||||||
from re import split, sub, match
|
from re import split, sub
|
||||||
from wordcloud import WordCloud # type: ignore
|
from wordcloud import WordCloud # type: ignore
|
||||||
from os import makedirs
|
from os import makedirs
|
||||||
|
from functions import processRawMessages
|
||||||
|
|
||||||
def cleanupMessages(messages: list[str]) -> list[str]:
|
|
||||||
# Remove "", \n, and symbols like , and .
|
|
||||||
temp = [sub(r"([.,?!*()])", "", message) for message in messages]
|
|
||||||
temp = [sub(r"\n", " ", message) for message in temp]
|
|
||||||
temp = [sub(r"[^\x00-\x7F]", "", message) for message in temp]
|
|
||||||
temp = [msg for msg in temp if msg != ""]
|
|
||||||
return temp
|
|
||||||
|
|
||||||
|
|
||||||
# Open and read the chats from the '/data/_chat.txt' file exported by Whatsapp
|
# Open and read the chats from the '/data/_chat.txt' file exported by Whatsapp
|
||||||
@@ -21,12 +13,7 @@ except FileNotFoundError:
|
|||||||
print("Sorry, the file /data/_chat.txt does not exist.")
|
print("Sorry, the file /data/_chat.txt does not exist.")
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
messages = cleanupMessages(
|
messages = processRawMessages(chat)
|
||||||
split(r"\d{1,2}/\d{1,2}/\d{2}, \d{1,2}:\d{2}", chat)
|
|
||||||
)
|
|
||||||
|
|
||||||
messages = [s[3:] for s in messages if match(r" - [^ ]+?: ", s)]
|
|
||||||
|
|
||||||
|
|
||||||
author_words: dict[str, list[str]] = {}
|
author_words: dict[str, list[str]] = {}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user