import json
import pickle
import time

from collections import Counter
def create_anagram_map(dictionary_path='words.txt'):
    """
    Reads a dictionary file and creates a map of anagrams.

    words.txt from https://apiacoa.org/publications/teaching/datasets/google-10000-english.txt

    The map uses a sorted string of characters as a canonical key and a list of
    corresponding words from the dictionary as the value.

    Args:
        dictionary_path (str): The path to the dictionary file.

    Returns:
        A dictionary mapping sorted character strings to lists of words.
    """
    anagram_map = {}
    print(f"Starting preprocessing of '{dictionary_path}'...")
    
    try:
        with open(dictionary_path, 'r', encoding='utf-8') as f:
            for line in f:
                # Clean the word: remove whitespace and convert to lowercase
                cleaned_word = line.strip().lower()
                if cleaned_word.startswith("#!"):
                    continue
                if cleaned_word.isalpha():  # Ensure we only process alphabetic words
                    # The sorted string is the canonical key
                    sorted_key = "".join(sorted(cleaned_word))
                    if sorted_key in anagram_map:
                        anagram_map[sorted_key].append(cleaned_word)
                    else:
                        anagram_map[sorted_key] = [cleaned_word]
    except FileNotFoundError:
        print(f"Error: Dictionary file not found at '{dictionary_path}'.")
        print("Please download a dictionary file (e.g., from GitHub) and save it as 'words.txt'.")
        return None
    
    # Remove keys that only have one word, as they have no anagrams
    # This is an optional optimization to reduce the size of the map
    final_map = {key: words for key, words in anagram_map.items() if len(words) > 1}
    
    print(f"Preprocessing complete. Found {len(final_map)} unique anagram groups.")
    return final_map


def save_data_json(data, output_path='anagram_data.json'):
    """Saves the given data structure to a json file using pickle."""
    if data is None:
        print("No data to save.")
        return
        
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(data, f)
    print(f"Anagram map successfully saved to '{output_path}'.")

def analyze_anagram_frequencies(anagram_map):
    """Analyzes and prints the frequency of anagram group sizes.

    Args:
        anagram_map (dict): The map of anagrams to analyze.
    """
    if not anagram_map:
        print("Anagram map is empty. No frequencies to analyze.")
        return

    # Get the count of anagrams for each group (e.g., [2, 3, 2, 4, ...])
    counts = [len(words) for words in anagram_map.values()]

    # Count the occurrences of each group size
    frequency = Counter(counts)

    print("\n--- Anagram Group Size Frequencies ---")
    for count, freq in sorted(frequency.items()):
        print(f"Groups with {count} anagrams: {freq}")
    print("--------------------------------------\n")

if __name__ == '__main__':
    start_time = time.time()
    
    # Create the map from the dictionary file
    processed_data = create_anagram_map('words.txt')
    
    # Save the processed data for the main app to use
    if processed_data:
        # Analyze and display frequencies before saving
        analyze_anagram_frequencies(processed_data)
        save_data_json(processed_data)
    
    end_time = time.time()
    print(f"Total preprocessing time: {end_time - start_time:.2f} seconds.")