From 2bf664e25fa6855c4817fb512214f9007fa3fa12 Mon Sep 17 00:00:00 2001 From: Stef Date: Fri, 18 Apr 2025 18:21:50 +0200 Subject: [PATCH] setting up the data to generate a piechart --- backend/process_data.py | 13 ++++++++++--- chatgpt.py | 4 ++-- piechart.py | 12 ++++++++++++ word_cloud.py | 6 +++--- 4 files changed, 27 insertions(+), 8 deletions(-) create mode 100644 piechart.py diff --git a/backend/process_data.py b/backend/process_data.py index 6adaaba..aef1f32 100644 --- a/backend/process_data.py +++ b/backend/process_data.py @@ -19,10 +19,17 @@ makedirs("output", exist_ok=True) test = processRawMessages(chat) -frequency_dictionary: dict[str, dict[str, int]] = {} +total_frequency = 0 +total_frequency_dict: dict[str, int] = Counter() +word_frequency_dict: dict[str, dict[str, int]] = {} + for author in test: - frequency_dictionary[author] = {} + word_frequency_dict[author] = {} messageList = test.get(author) if messageList: - frequency_dictionary[author] = Counter(processMessageList(messageList)) + wordList = Counter(processMessageList(messageList)) + word_frequency_dict[author] = wordList + for count in wordList.items(): + total_frequency_dict.update({author: count[1]}) + total_frequency += count[1] diff --git a/chatgpt.py b/chatgpt.py index e2ee1cd..82a7955 100644 --- a/chatgpt.py +++ b/chatgpt.py @@ -1,11 +1,11 @@ import matplotlib.pyplot as plt import seaborn as sns import pandas as pd -from backend.process_data import frequency_dictionary +from backend.process_data import word_frequency_dict from collections import Counter # Example dictionary -data = frequency_dictionary +data = word_frequency_dict # Choose how many top words to show TOP_N = 5 diff --git a/piechart.py b/piechart.py new file mode 100644 index 0000000..829d4c7 --- /dev/null +++ b/piechart.py @@ -0,0 +1,12 @@ +from backend.process_data import ( + total_frequency_dict as freq_dict, + total_frequency as total, +) + +for author in freq_dict: + fraction = round(freq_dict[author] / total * 100, 1) + print( + f"{author + ":":<30}", + f"{str(freq_dict.get(author))+"/"+str(total):>15}", + f"({fraction:>4}%)", + ) diff --git a/word_cloud.py b/word_cloud.py index 42c62ca..a223455 100644 --- a/word_cloud.py +++ b/word_cloud.py @@ -1,5 +1,5 @@ from wordcloud import WordCloud # type: ignore -from backend.process_data import frequency_dictionary +from backend.process_data import word_frequency_dict wordcloud = WordCloud( @@ -17,7 +17,7 @@ wordcloud = WordCloud( contour_width=1, # For consistent layout between runs ) -for author in frequency_dictionary: - freq_dict = frequency_dictionary.get(author) +for author in word_frequency_dict: + freq_dict = word_frequency_dict.get(author) image = wordcloud.generate_from_frequencies(freq_dict) # type: ignore image.to_file(f"output/{author}.png") # type: ignore