setting up the data to generate a piechart

This commit is contained in:
2025-04-18 18:21:50 +02:00
parent e7cfc81146
commit 2bf664e25f
4 changed files with 27 additions and 8 deletions

View File

@@ -19,10 +19,17 @@ makedirs("output", exist_ok=True)
test = processRawMessages(chat)
frequency_dictionary: dict[str, dict[str, int]] = {}
total_frequency = 0
total_frequency_dict: dict[str, int] = Counter()
word_frequency_dict: dict[str, dict[str, int]] = {}
for author in test:
frequency_dictionary[author] = {}
word_frequency_dict[author] = {}
messageList = test.get(author)
if messageList:
frequency_dictionary[author] = Counter(processMessageList(messageList))
wordList = Counter(processMessageList(messageList))
word_frequency_dict[author] = wordList
for count in wordList.items():
total_frequency_dict.update({author: count[1]})
total_frequency += count[1]

View File

@@ -1,11 +1,11 @@
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from backend.process_data import frequency_dictionary
from backend.process_data import word_frequency_dict
from collections import Counter
# Example dictionary
data = frequency_dictionary
data = word_frequency_dict
# Choose how many top words to show
TOP_N = 5

12
piechart.py Normal file
View File

@@ -0,0 +1,12 @@
from backend.process_data import (
total_frequency_dict as freq_dict,
total_frequency as total,
)
for author in freq_dict:
fraction = round(freq_dict[author] / total * 100, 1)
print(
f"{author + ":":<30}",
f"{str(freq_dict.get(author))+"/"+str(total):>15}",
f"({fraction:>4}%)",
)

View File

@@ -1,5 +1,5 @@
from wordcloud import WordCloud # type: ignore
from backend.process_data import frequency_dictionary
from backend.process_data import word_frequency_dict
wordcloud = WordCloud(
@@ -17,7 +17,7 @@ wordcloud = WordCloud(
contour_width=1, # For consistent layout between runs
)
for author in frequency_dictionary:
freq_dict = frequency_dictionary.get(author)
for author in word_frequency_dict:
freq_dict = word_frequency_dict.get(author)
image = wordcloud.generate_from_frequencies(freq_dict) # type: ignore
image.to_file(f"output/{author}.png") # type: ignore