whatsapp-wordcloud/chatgpt.py

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from backend.process_data import word_frequency_dict
from collections import Counter

# Example dictionary
data = word_frequency_dict

# Choose how many top words to show
TOP_N = 5

# Collect top words globally or per author
combined = Counter()  # type: ignore
for author in data:
    combined.update(data[author])  # type: ignore

top_words = [word for word, _ in combined.most_common(TOP_N)]  # type: ignore

# Create DataFrame with only top words
df = pd.DataFrame(data).fillna(0).astype(int)  # type: ignore
df = df.loc[df.index.intersection(top_words)]  # type: ignore


plt.figure(figsize=(8, 5))  # type: ignore
sns.heatmap(  # type: ignore
    df, annot=True, fmt="d", cmap="YlGnBu", cbar_kws={"format": "%.0f"}
)
plt.title(f"Top {TOP_N} Word Frequencies")  # type: ignore
plt.xlabel("Author")  # type: ignore
plt.ylabel("Word")  # type: ignore
plt.tight_layout()
plt.show()  # type: ignore