diff --git a/backend/process_data.py b/backend/process_data.py index aef1f32..7e7b5d7 100644 --- a/backend/process_data.py +++ b/backend/process_data.py @@ -20,16 +20,15 @@ makedirs("output", exist_ok=True) test = processRawMessages(chat) total_frequency = 0 -total_frequency_dict: dict[str, int] = Counter() +total_frequency_dict: dict[str, int] = {} word_frequency_dict: dict[str, dict[str, int]] = {} for author in test: word_frequency_dict[author] = {} - messageList = test.get(author) + messageList = test[author] if messageList: - wordList = Counter(processMessageList(messageList)) - word_frequency_dict[author] = wordList - for count in wordList.items(): - total_frequency_dict.update({author: count[1]}) - total_frequency += count[1] + wordFreqList = Counter(processMessageList(messageList)) + word_frequency_dict[author] = wordFreqList + total_frequency_dict[author] = sum(wordFreqList.values()) +total_frequency = sum(total_frequency_dict.values()) diff --git a/piechart.py b/piechart.py index 829d4c7..b8e876f 100644 --- a/piechart.py +++ b/piechart.py @@ -2,6 +2,7 @@ from backend.process_data import ( total_frequency_dict as freq_dict, total_frequency as total, ) +import matplotlib.pyplot as plt for author in freq_dict: fraction = round(freq_dict[author] / total * 100, 1) @@ -10,3 +11,16 @@ for author in freq_dict: f"{str(freq_dict.get(author))+"/"+str(total):>15}", f"({fraction:>4}%)", ) + +# ChatGPT generated +# Prepare labels and sizes +labels = list(freq_dict.keys()) +sizes = [v / total * 100 for v in freq_dict.values()] # percentages + +# Plot +plt.figure(figsize=(8, 8)) # type: ignore +plt.pie(sizes, labels=labels, autopct="%.1f%%", startangle=90) # type: ignore +plt.title("Word Frequency Distribution by Author") # type: ignore +plt.axis("equal") # type: ignore # Equal aspect ratio for a perfect circle +plt.tight_layout() +plt.show() # type: ignore