From 2bf664e25fa6855c4817fb512214f9007fa3fa12 Mon Sep 17 00:00:00 2001
From: Stef <stbuwalda@gmail.com>
Date: Fri, 18 Apr 2025 18:21:50 +0200
Subject: [PATCH] setting up the data to generate a piechart

---
 backend/process_data.py | 13 ++++++++++---
 chatgpt.py              |  4 ++--
 piechart.py             | 12 ++++++++++++
 word_cloud.py           |  6 +++---
 4 files changed, 27 insertions(+), 8 deletions(-)
 create mode 100644 piechart.py

diff --git a/backend/process_data.py b/backend/process_data.py
index 6adaaba..aef1f32 100644
--- a/backend/process_data.py
+++ b/backend/process_data.py
@@ -19,10 +19,17 @@ makedirs("output", exist_ok=True)
 
 test = processRawMessages(chat)
 
-frequency_dictionary: dict[str, dict[str, int]] = {}
+total_frequency = 0
+total_frequency_dict: dict[str, int] = Counter()
+word_frequency_dict: dict[str, dict[str, int]] = {}
+
 
 for author in test:
-    frequency_dictionary[author] = {}
+    word_frequency_dict[author] = {}
     messageList = test.get(author)
     if messageList:
-        frequency_dictionary[author] = Counter(processMessageList(messageList))
+        wordList = Counter(processMessageList(messageList))
+        word_frequency_dict[author] = wordList
+        for count in wordList.items():
+            total_frequency_dict.update({author: count[1]})
+            total_frequency += count[1]
diff --git a/chatgpt.py b/chatgpt.py
index e2ee1cd..82a7955 100644
--- a/chatgpt.py
+++ b/chatgpt.py
@@ -1,11 +1,11 @@
 import matplotlib.pyplot as plt
 import seaborn as sns
 import pandas as pd
-from backend.process_data import frequency_dictionary
+from backend.process_data import word_frequency_dict
 from collections import Counter
 
 # Example dictionary
-data = frequency_dictionary
+data = word_frequency_dict
 
 # Choose how many top words to show
 TOP_N = 5
diff --git a/piechart.py b/piechart.py
new file mode 100644
index 0000000..829d4c7
--- /dev/null
+++ b/piechart.py
@@ -0,0 +1,12 @@
+from backend.process_data import (
+    total_frequency_dict as freq_dict,
+    total_frequency as total,
+)
+
+for author in freq_dict:
+    fraction = round(freq_dict[author] / total * 100, 1)
+    print(
+        f"{author + ":":<30}",
+        f"{str(freq_dict.get(author))+"/"+str(total):>15}",
+        f"({fraction:>4}%)",
+    )
diff --git a/word_cloud.py b/word_cloud.py
index 42c62ca..a223455 100644
--- a/word_cloud.py
+++ b/word_cloud.py
@@ -1,5 +1,5 @@
 from wordcloud import WordCloud  # type: ignore
-from backend.process_data import frequency_dictionary
+from backend.process_data import word_frequency_dict
 
 
 wordcloud = WordCloud(
@@ -17,7 +17,7 @@ wordcloud = WordCloud(
     contour_width=1,  # For consistent layout between runs
 )
 
-for author in frequency_dictionary:
-    freq_dict = frequency_dictionary.get(author)
+for author in word_frequency_dict:
+    freq_dict = word_frequency_dict.get(author)
     image = wordcloud.generate_from_frequencies(freq_dict)  # type: ignore
     image.to_file(f"output/{author}.png")  # type: ignore