Source code for votekit.ballot_generator.std_generator.impartial_anon_culture

"""
Generate ranked preference profiles using the Impartial Anonymous Culture (IAC) model.

The main API functions in this module are:

- `iac_profile_generator`: Generates a single preference profile using the IAC distribution.
"""

import math
import numpy as np
import random
from collections import Counter
from functools import lru_cache
from typing import Optional, Sequence

from votekit.pref_profile import RankProfile
from votekit.utils import index_to_lexicographic_ballot, build_df_from_ballot_samples

# ====================================================
# ================= Helper Functions =================
# ====================================================


@lru_cache
def _total_num_ballots(n_candidates: int, max_ballot_length: int) -> int:
    """
    Calculate the total number of possible ballots given the number of candidates and the maximum
    ballot length.

    Args:
        n_candidates (int): Number of candidates.
        max_ballot_length (int): Maximum length of each ballot.

    Returns:
        int: Total number of possible ballots.
    """
    return sum(
        math.comb(n_candidates, i) * math.factorial(i)
        for i in range(1, max_ballot_length + 1)
    )


# =================================================
# ================= API Functions =================
# =================================================


[docs] def iac_profile_generator( candidates: Sequence[str], number_of_ballots: int, max_ballot_length: Optional[int] = None, ) -> RankProfile: """ Generate a profile according to the Impartial Anonymous Culture (IAC) model where each profile is equally likely. Args: candidates (Sequence[str]): List of candidate strings. number_of_ballots (int): Number of ballots to generate. max_ballot_length (Optional[int]): Maximum length of each ballot. If None, defaults to the number of candidates. Returns: RankProfile: Generated rank profile """ if max_ballot_length is None: max_ballot_length = len(candidates) # choose index as sampled 0 to N, do this n! times num_cands = len(candidates) num_gaps = number_of_ballots # + 1 gap_freq = np.zeros(num_gaps, dtype=int) # record the number of gaps in stars/bars # rather than iterate n! times, we perform multiple # multinomial experiments num_valid_ballots = _total_num_ballots(num_cands, max_ballot_length) for _ in range(num_valid_ballots): gap_freq[random.randint(0, num_gaps) - 1] += 1 # TODO: Double check possible off by 1 errors here and in `gap_freq` ballot_indices = np.cumsum(gap_freq) - 1 ballots_as_cand_ind = [ tuple( index_to_lexicographic_ballot( ballot_ind, num_cands, max_ballot_length, _total_num_ballots, always_use_total_valid_ballots_method=False, ) ) for ballot_ind in ballot_indices ] ballots_as_counter = Counter(ballots_as_cand_ind) pp_df = build_df_from_ballot_samples(dict(ballots_as_counter), candidates) pp_df.index.name = "Ballot Index" return RankProfile( df=pp_df, max_ranking_length=max_ballot_length, candidates=candidates, )