removed old functionality and added new

2026-02-04 04:04:49 +00:00 · 2025-04-18 17:04:29 +02:00
parent c553167a24
commit 8b648639f4
3 changed files with 76 additions and 42 deletions
--- a/config.py
+++ b/config.py
@@ -0,0 +1,16 @@
 from wordcloud import WordCloud  # type: ignore
 wordcloud = WordCloud(
    width=800,
    height=400,
    background_color="black",  # or 'black', or any HTML color
    colormap="viridis",  # matplotlib colormap ('plasma', 'cool', 'inferno')
    # font_path="path/to/font.ttf",  # Use a custom font
    max_words=100,  # Max number of words to include
    min_font_size=10,
    max_font_size=100,
    prefer_horizontal=1,  # Between 0 (all vertical) and 1 (all horizontal)
    scale=2,  # Higher = better resolution
    contour_color="steelblue",  # Outline color (when using contour_width)
    contour_width=1,  # For consistent layout between runs
 )
--- a/functions.py
+++ b/functions.py
@@ -1,4 +1,47 @@
 from re import split, sub, match
 from regex import sub as sub2
 def processRawMessages2(chat: str) -> dict[str, list[str]]:
    output: dict[str, list[str]] = {}
    # Split based on new line
    segments = split(r"\n", chat)
    author = ""
    for segment in segments:
        re_match = match(r"\d+/\d+/\d+, \d+:\d+ - ([^:]+): (.*)", segment)
        if re_match:
            # It's a match, get rid of date and time, keep name + message
            author = re_match.group(1)
            if author not in output:
                output[author] = []
            output[author].append(re_match.group(2))
        else:
            # Not a match, check if it's an action or continuation of sentence
            re_match2 = match(r"\d+/\d+/\d+, \d+:\d+ - ", segment)
            if re_match2:
                # It's an action, ignore
                pass
            else:
                segmentList = output.get(author)
                if segmentList:
                    segmentList[-1] += segment
                else:
                    print("ERROR functions.py line 24")
                    print(segment)
    return output
 def processMessageList(messages: list[str]) -> list[str]:
    output: list[str] = []
    for message in messages:
        # Remove http(s) links
        message = sub(r"https?://(?:www\.)?\S+", "", message)
        # Remove emojis and symbols
        message = sub2(r"[\p{Emoji}?!:,.]+", "", message)
        # If it's not added media, add to output
        if message != "<Media omitted>":
            output += message.lower().split()
    return output
 def processRawMessages(chat: str):
--- a/run.py
+++ b/run.py
@@ -1,7 +1,11 @@
-from re import split, sub
+from config import wordcloud  # type: ignore
 from wordcloud import WordCloud  # type: ignore
 from os import makedirs
-from functions import processRawMessages
+from functions import (
    processRawMessages,
    processRawMessages2,
    processMessageList,
 )
 from collections import Counter
 # Open and read the chats from the '/data/_chat.txt' file exported by Whatsapp
@@ -13,44 +17,15 @@ except FileNotFoundError:
    print("Sorry, the file /data/_chat.txt does not exist.")
    exit()
 messages = processRawMessages(chat)
 author_words: dict[str, list[str]] = {}
 for message in messages:
    message = sub(":", "", message)
    author, words = split(r" ", message, maxsplit=1)
    words = [word for word in words.split() if word and word != " "]
    for word in words:
        if author not in author_words:
            author_words[author] = []
        author_words[author].append(word.lower())
 word_count_dicts: dict[str, int] = {}
 wordcloud = WordCloud(
    width=800,
    height=400,
    background_color="black",  # or 'black', or any HTML color
    colormap="viridis",  # matplotlib colormap ('plasma', 'cool', 'inferno')
    # font_path="path/to/font.ttf",  # Use a custom font
    max_words=100,  # Max number of words to include
    min_font_size=10,
    max_font_size=100,
    prefer_horizontal=0.9,  # Between 0 (all vertical) and 1 (all horizontal)
    scale=2,  # Higher = better resolution
    contour_color="steelblue",  # Outline color (when using contour_width)
    contour_width=1,  # For consistent layout between runs
 )
 makedirs("output", exist_ok=True)
-for author in author_words.keys():
+test = processRawMessages2(chat)
-    words = author_words.get(author)
+
-    if words:
+for author in test.keys():
-        for word in words:
+    messageList = test.get(author)
-            word_count_dicts[word] = word_count_dicts.get(word, 0) + 1
+    if messageList:
-    test = wordcloud.generate_from_frequencies(  # type: ignore
+        wordList = processMessageList(messageList)
-        word_count_dicts
+        freq_dict = Counter(wordList)
-    )
+        image = wordcloud.generate_from_frequencies(freq_dict)  # type: ignore
-    test.to_file("output/" + author + ".png")  # type: ignore
+        image.to_file(f"output/{author}.png")  # type: ignore
 messages = processRawMessages(chat)