Source code for votekit.utils

from typing import Sequence, Optional, Literal, Union
from itertools import permutations
import math
import random
from votekit.ballot import Ballot, RankBallot
from votekit.pref_profile import RankProfile, ScoreProfile
import pandas as pd
import numpy as np
from numpy.typing import NDArray

COLOR_LIST = [
    "#0099cd",
    "#ffca5d",
    "#00cd99",
    "#99cd00",
    "#cd0099",
    "#9900cd",
    "#8dd3c7",
    "#bebada",
    "#fb8072",
    "#80b1d3",
    "#fdb462",
    "#b3de69",
    "#fccde5",
    "#bc80bd",
    "#ccebc5",
    "#ffed6f",
    "#ffffb3",
    "#a6cee3",
    "#1f78b4",
    "#b2df8a",
    "#33a02c",
    "#fb9a99",
    "#e31a1c",
    "#fdbf6f",
    "#ff7f00",
    "#cab2d6",
    "#6a3d9a",
    "#b15928",
    "#64ffda",
    "#00B8D4",
    "#A1887F",
    "#76FF03",
    "#DCE775",
    "#B388FF",
    "#FF80AB",
    "#D81B60",
    "#26A69A",
    "#FFEA00",
    "#6200EA",
]


[docs] def ballots_by_first_cand(profile: RankProfile) -> dict[str, list[RankBallot]]: """ Partitions the profile by first place candidate. Assumes there are no ties within first place positions of ballots. Args: profile (RankProfile): Profile to partititon. Returns: dict[str, list[RankBallot]]: A dictionary whose keys are candidates and values are lists of ballots that have that candidate first. """ if not isinstance(profile, RankProfile): raise TypeError("Ballots must have rankings.") df = profile.df assert profile.max_ranking_length is not None ranking_cols = [f"Ranking_{i}" for i in range(1, profile.max_ranking_length + 1)] rank_arr = df[ranking_cols].to_numpy() weights = df["Weight"].to_numpy() voter_sets = df["Voter Set"].to_numpy() cand_dict: dict[str, list[RankBallot]] = {c: [] for c in profile.candidates} tilde = frozenset({"~"}) for row, w, voter_set in zip(rank_arr, weights, voter_sets): first = row[0] if len(first) > 1: raise ValueError( f"Ballot " f"{RankBallot(ranking=tuple(c_set for c_set in row if c_set != tilde), weight=float(w))} " "has a tie for first." ) cand = next(iter(first)) if cand == "~": continue clean_ranking = tuple(s for s in row if s != tilde) cand_dict[cand].append( RankBallot(ranking=clean_ranking, weight=float(w), voter_set=voter_set) ) return cand_dict
[docs] def add_missing_cands(profile: RankProfile) -> RankProfile: """ Add any candidates from `profile.candidates` that are not listed on a ballot as tied in last place. Args: profile (RankProfile): Input profile. Returns: RankProfile """ if not isinstance(profile, RankProfile): raise TypeError("Profile must be of type RankProfile.") new_ballots = [RankBallot()] * len(profile.ballots) candidates = set(profile.candidates) for i, ballot in enumerate(profile.ballots): if ballot.ranking is None: raise TypeError("Ballots must have rankings.") else: b_cands = [c for s in ballot.ranking for c in s] missing_cands = candidates.difference(b_cands) new_ranking = ( list(ballot.ranking) + [missing_cands] if len(missing_cands) > 0 else ballot.ranking ) new_ballots[i] = RankBallot( weight=ballot.weight, voter_set=ballot.voter_set, ranking=tuple([frozenset(s) for s in new_ranking]), ) return RankProfile(ballots=tuple(new_ballots), candidates=tuple(candidates))
[docs] def validate_score_vector(score_vector: Sequence[float]): """ Validator function for score vectors. Vectors should be non-increasing and non-negative. Args: score_vector (Sequence[float]): Score vector. Raises: ValueError: If any score is negative. ValueError: If score vector is increasing at any point. """ for i, score in enumerate(score_vector): # if score is negative if score < 0: raise ValueError("Score vector must be non-negative.") if i > 0: # if the current score is bigger than prev if score > score_vector[i - 1]: raise ValueError("Score vector must be non-increasing.")
def _score_dict_from_rankings_df_no_ties( profile: RankProfile, score_vector: Sequence[float], ) -> dict[str, float]: validate_score_vector(score_vector) if not isinstance(profile, RankProfile): raise TypeError("Profile must only contain ranked ballots.") assert profile.max_ranking_length is not None max_len = profile.max_ranking_length if len(score_vector) < max_len: score_vector = list(score_vector) + [0] * (max_len - len(score_vector)) df = profile.df cand_frznst = [frozenset({c}) for c in profile.candidates_cast] all_frznst = cand_frznst + [frozenset({"~"}), frozenset()] n_buckets = len(all_frznst) idx_of_empty = all_frznst.index(frozenset()) # Pull out weights and score_vector as NumPy arrays: weights = df["Weight"].to_numpy(dtype=float) score_arr = np.array(score_vector, dtype=float).reshape(1, -1) rank_cols = [f"Ranking_{i}" for i in range(1, max_len + 1)] arr = df[rank_cols].to_numpy(dtype=object) flat_arr = arr.ravel() # Slick way of converting frozensets to integer codes: codes_flat: NDArray[np.int64] = pd.Categorical( flat_arr, categories=all_frznst ).codes.astype(np.int64) # Take care of error codes (-1) if (codes_flat == -1).any(): codes_flat = np.where(codes_flat == -1, idx_of_empty, codes_flat) weight_matrix = weights[:, None] * score_arr weights_flat = weight_matrix.ravel() bucket_sums = np.bincount(codes_flat, weights=weights_flat, minlength=n_buckets) return {next(iter(k)): bucket_sums[idx] for idx, k in enumerate(cand_frznst)}
[docs] def score_dict_from_score_vector( profile: RankProfile, score_vector: Sequence[float], tie_convention: Literal["high", "average", "low"] = "low", ) -> dict[str, float]: """ Score the candidates based on a score vector. For example, the vector (1,0,...) would return the first place votes for each candidate. Vectors should be non-increasing and non-negative. Vector should be as long as ``max_ranking_length`` in the profile. If it is shorter, we add 0s. Candidates who are not mentioned in any ranking do not appear in the dictionary. Args: profile (RankProfile): Profile to score. score_vector (Sequence[float]): Score vector. Should be non-increasing and non-negative. Vector should be as long as ``max_ranking_length`` in the profile. If it is shorter, we add 0s. tie_convention (Literal["high", "average", "low"], optional): How to award points for tied rankings. Defaults to "low", where any candidates tied receive the lowest possible points for their position, eg three people tied for 3rd would each receive the points for 5th. "high" awards the highest possible points, so in the previous example, they would each receive the points for 3rd. "average" averages the points, so they would each receive the points for 4th place. Returns: dict[str, float]: Dictionary mapping candidates to scores. """ validate_score_vector(score_vector) if not isinstance(profile, RankProfile): raise TypeError("Profile must only contain ranked ballots.") assert profile.max_ranking_length is not None max_length = profile.max_ranking_length if len(score_vector) < max_length: score_vector = list(score_vector) + [0] * (max_length - len(score_vector)) scores = {c: 0.0 for c in profile.candidates_cast} try: ranking_cols = [f"Ranking_{i}" for i in range(1, max_length + 1)] ranking_mat = profile.df[ranking_cols].to_numpy() except KeyError as e: raise TypeError("Ballots must have rankings.") from e weights = profile.df["Weight"].to_numpy(dtype=float) if tie_convention not in ["high", "average", "low"]: raise ValueError( ( "tie_convention must be one of 'high', 'low', 'average', " f"not {tie_convention}" ) ) tilde = frozenset({"~"}) for idx in range(len(ranking_mat)): current_ind = 0 ranking = ranking_mat[idx] wt = weights[idx] for s in ranking: position_size = len(s) if position_size == 0: raise TypeError( f"Ballot {Ballot(ranking=ranking.tolist(), weight=wt)} has an empty ranking " "position." ) if s == tilde: continue local_score_vector: Sequence[float | int] = score_vector[ current_ind : current_ind + position_size ] if tie_convention == "high": allocation = max(local_score_vector) elif tie_convention == "low": allocation = min(local_score_vector) else: allocation = sum(local_score_vector) / position_size for c in s: scores[c] += allocation * wt current_ind += position_size return scores
def _first_place_votes_from_df_no_ties( profile: RankProfile, ) -> dict[str, float]: """ Computes first place votes for all candidates_cast in a ``RankProfile``. Intended to be much faster than first_place_votes, but does not handle ties in ballots. Args: profile (RankProfile): The profile to compute first place votes for. Returns: dict[str, float]: Dictionary mapping candidates to number of first place votes. """ # equiv to score vector of (1,0,0,...) assert profile.max_ranking_length is not None return _score_dict_from_rankings_df_no_ties( profile, [1] + [0] * (profile.max_ranking_length - 1) )
[docs] def first_place_votes( profile: RankProfile, tie_convention: Literal["high", "average", "low"] = "average", ) -> dict[str, float]: """ Computes first place votes for all candidates_cast in a ``RankProfile``. Args: profile (RankProfile): The profile to compute first place votes for. tie_convention (Literal["high", "average", "low"], optional): How to award points for tied first place votes. Defaults to "average", where if n candidates are tied for first, each receives 1/n points. "high" would award them each one point, and "low" 0. Returns: dict[str, float]: Dictionary mapping candidates to number of first place votes. """ # equiv to score vector of (1,0,0,...) if not isinstance(profile, RankProfile): raise TypeError("Profile must be of type RankProfile.") assert profile.max_ranking_length is not None return score_dict_from_score_vector( profile, [1] + [0] * (profile.max_ranking_length - 1), tie_convention )
[docs] def mentions( profile: RankProfile, ) -> dict[str, float]: """ Calculates total mentions for all candidates in a ``RankProfile``. Args: profile (RankProfile): RankProfile of ballots. Returns: dict[str, float]: Dictionary mapping candidates to mention totals (values). """ mentions = {c: 0.0 for c in profile.candidates} if not isinstance(profile, RankProfile): raise TypeError("Profile must be of type RankProfile.") for ballot in profile.ballots: if ballot.ranking is None: raise TypeError("Ballots must have rankings.") else: for s in ballot.ranking: for cand in s: mentions[cand] += ballot.weight return mentions
[docs] def borda_scores( profile: RankProfile, borda_max: Optional[int] = None, tie_convention: Literal["high", "average", "low"] = "low", ) -> dict[str, float]: r""" Calculates Borda scores for candidates_cast in a ``RankProfile``. The Borda vector is :math:`(n,n-1,\dots,1)` where :math:`n` is the ``borda_max`. Args: profile (RankProfile): ``RankProfile`` of ballots. borda_max (int, optional): The maximum value of the Borda vector. Defaults to the length of the longest allowable ballot in the profile. tie_convention (Literal["high", "average", "low"], optional): How to award points for tied rankings. Defaults to "low", where any candidates tied receive the lowest possible points for their position, eg three people tied for 3rd would each receive the points for 5th. "high" awards the highest possible points, so in the previous example, they would each receive the points for 3rd. "average" averages the points, so they would each receive the points for 4th place. Returns: dict[str, float]: Dictionary mapping candidates to Borda scores. """ if not isinstance(profile, RankProfile): raise TypeError("Profile must be of type RankProfile.") if borda_max is None: assert profile.max_ranking_length is not None borda_max = profile.max_ranking_length score_vector = list(range(borda_max, 0, -1)) return score_dict_from_score_vector(profile, score_vector, tie_convention)
[docs] def tiebreak_set( r_set: frozenset[str], profile: Optional[RankProfile] = None, tiebreak: str = "random", scoring_tie_convention: Literal["high", "average", "low"] = "low", backup_tiebreak_convention: Optional[str] = None, ) -> tuple[frozenset[str], ...]: """ Break a single set of candidates into multiple sets each with a single candidate according to a tiebreak rule. Rule 1: random. Rule 2: first-place votes; break the tie based on first-place votes in the profile. Rule 3: borda; break the tie based on Borda points in the profile. Rule 4: lex/lexicographic/alph/alphabetical; break the tie alphabetically. Args: r_set (frozenset[str]): Set of candidates on which to break tie. profile (RankProfile, optional): Profile used to break ties in first-place votes or Borda setting. Defaults to None, which implies a random tiebreak. tiebreak (str): Tiebreak method to use. Options are "random", "first_place", and "borda". Defaults to "random". scoring_tie_convention (Literal["high", "average", "low"]): How to award points for tied rankings. Defaults to "low", where any candidates tied receive the lowest possible points for their position, eg three people tied for 3rd would each receive the points for 5th. "high" awards the highest possible points, so in the previous example, they would each receive the points for 3rd. "average" averages the points, so they would each receive the points for 4th place. backup_tiebreak_convention (str, optional): If the initial tiebreak does not resolve all ties, this convention is used to break any remaining ties. Options are "random" and "lex/lexicographic/alph/alphabetical". Defaults to None which sets the backup to "lex" if the initial tiebreak is alphabetical, and "random" otherwise. Returns: tuple[frozenset[str],...]: tiebroken ranking """ if tiebreak in ["alphabetical", "lexicographic", "alph", "lex"]: sorted_cands = sorted([c for c in r_set]) new_ranking = tuple(map(lambda c: frozenset({c}), sorted_cands)) elif tiebreak == "random": new_ranking = tuple( frozenset({c}) for c in random.sample(list(r_set), k=len(r_set)) ) elif (tiebreak == "first_place" or tiebreak == "borda") and profile: if tiebreak == "borda": tiebreak_scores = borda_scores( profile, tie_convention=scoring_tie_convention ) else: tiebreak_scores = first_place_votes( profile, tie_convention=scoring_tie_convention ) tiebreak_scores = { c: score for c, score in tiebreak_scores.items() if c in r_set } new_ranking = score_dict_to_ranking(tiebreak_scores) elif profile is None: raise ValueError("Method of tiebreak requires profile.") else: raise ValueError("Invalid tiebreak code was provided") if any(len(s) > 1 for s in new_ranking): if backup_tiebreak_convention is None: if tiebreak in [ "alphabetical", "lexicographic", "alph", "lex", ]: backup_tiebreak_convention = "lex" else: backup_tiebreak_convention = "random" if backup_tiebreak_convention in [ "alphabetical", "lexicographic", "alph", "lex", ]: print("Initial tiebreak was unsuccessful, performing alphabetic tiebreak") new_ranking, _ = tiebroken_ranking( new_ranking, profile=profile, tiebreak="lex" ) elif backup_tiebreak_convention == "random": print("Initial tiebreak was unsuccessful, performing random tiebreak") new_ranking, _ = tiebroken_ranking( new_ranking, profile=profile, tiebreak="random" ) else: raise ValueError("Invalid backup tiebreak code was provided") return new_ranking
[docs] def tiebroken_ranking( ranking: tuple[frozenset[str], ...], profile: Optional[RankProfile] = None, tiebreak: str = "random", ) -> tuple[ tuple[frozenset[str], ...], dict[frozenset[str], tuple[frozenset[str], ...]] ]: """ Breaks ties in a list-of-sets ranking according to a given scheme. Args: ranking (list[set[str]]): A list-of-set ranking of candidates. profile (RankProfile, optional): Profile used to break ties in first-place votes or Borda setting. Defaults to None, which implies a random tiebreak. tiebreak (str, optional): Method of tiebreak, currently supports 'random', 'borda', 'first_place'. Defaults to random. Returns: tuple[tuple[frozenset[str], ...], dict[frozenset[str], tuple[frozenset[str],...]]]: The first entry of the tuple is a list-of-set ranking of candidates (broken down to one candidate sets). The second entry is a dictionary that maps tied sets to their resolution. """ new_ranking: list[frozenset[str]] = [frozenset()] * len( [c for s in ranking for c in s] ) i = 0 tied_dict = {} for s in ranking: if len(s) > 1: tiebroken = tiebreak_set(s, profile, tiebreak) tied_dict[s] = tiebroken else: tiebroken = (s,) new_ranking[i : (i + len(tiebroken))] = tiebroken i += len(tiebroken) return (tuple(new_ranking), tied_dict)
[docs] def score_dict_to_ranking( score_dict: dict[str, float], sort_high_low: bool = True ) -> tuple[frozenset[str], ...]: """ Sorts candidates into a tuple of frozensets ranking based on a scoring dictionary. Args: score_dict (dict[str, float]): Dictionary between candidates and their score. sort_high_low (bool, optional): How to sort candidates based on scores. True sorts from high to low. Defaults to True. Returns: tuple[frozenset[str],...]: Candidate rankings in a list-of-sets form. """ score_to_cand: dict[float, list[str]] = {s: [] for s in score_dict.values()} for c, score in score_dict.items(): score_to_cand[score].append(c) if len(score_to_cand) > 0: return tuple( [ frozenset(c_list) for _, c_list in sorted( score_to_cand.items(), key=lambda x: x[0], reverse=sort_high_low ) ] ) else: return (frozenset(),)
[docs] def elect_cands_from_set_ranking( ranking: Sequence[Union[frozenset[str], set[str]]], m: int, profile: Optional[RankProfile] = None, tiebreak: Optional[str] = None, ) -> tuple[ tuple[frozenset[str], ...], tuple[frozenset[str], ...], Optional[tuple[frozenset[str], tuple[frozenset[str], ...]]], ]: """ Given a ranking, elect the top m candidates in the ranking. If a tie set overlaps the desired number of seats, it breaks the tie with the provided method or raises a ValueError if tiebreak is set to None. Returns a tuple of elected candidates, remaining candidates, and a tuple whose first entry is a tie set and whose second entry is the resolution of the tie. Args: ranking (tuple[frozenset[str],...]): A list-of-set ranking of candidates. m (int): Number of seats to elect. profile (RankProfile, optional): Profile used to break ties in first-place votes or Borda setting. Defaults to None, which implies a random tiebreak. tiebreak (str, optional): Method of tiebreak, currently supports 'random', 'borda', 'first_place'. Defaults to None, which does not break ties. Returns: tuple[tuple[frozenset[str]]], list[tuple[frozenset[str]], Optional[tuple[frozenset[str], tuple[frozenset[str], ...]]]: A list-of-sets of elected candidates, a list-of-sets of remaining candidates, and a tuple whose first entry is a tie set and whose second entry is the resolution of the tie. If no ties were broken, the tuple returns None. """ if m < 1: raise ValueError("m must be strictly positive") if m > len([c for s in ranking for c in s]): raise ValueError("m must be no more than the number of candidates.") ranking_fs: tuple[frozenset[str], ...] = tuple( s if isinstance(s, frozenset) else frozenset(s) for s in ranking ) num_elected = 0 elected: list[frozenset[str]] = [] i = 0 tiebreak_ranking: Optional[tuple[frozenset[str], tuple[frozenset[str], ...]]] = None while num_elected < m: elected.append(ranking_fs[i]) num_elected += len(ranking_fs[i]) if num_elected > m: if tiebreak is None: raise ValueError( "Cannot elect correct number of candidates without breaking ties." ) # back out the overfill elected.pop() num_elected -= len(ranking_fs[i]) tiebroken = tiebreak_set(frozenset(ranking_fs[i]), profile, tiebreak) elected += tiebroken[: (m - num_elected)] remaining: list[frozenset[str]] = list(tiebroken[(m - num_elected) :]) if i < len(ranking_fs): remaining += list(ranking_fs[(i + 1) :]) return ( tuple(elected), tuple(remaining), (ranking_fs[i], tiebroken), ) i += 1 return (tuple(elected), ranking_fs[i:], tiebreak_ranking)
[docs] def expand_tied_ballot(ballot: RankBallot) -> list[RankBallot]: """ Fix tie(s) in a ballot by returning all possible permutations of the tie(s), and divide the weight of the original ballot equally among the new ballots. Args: ballot (RankBallot): Ballot to expand tie sets on. Returns: list[v]: All possible permutations of the tie(s). """ if not isinstance(ballot, RankBallot): raise TypeError("Ballot must be of type RankBallot.") if ballot.ranking is None: raise TypeError("Ballot must have ranking.") if all(len(s) == 1 for s in ballot.ranking): return [ballot] else: for i, s in enumerate(ballot.ranking): if len(s) > 1: new_ballots = [ RankBallot( weight=ballot.weight / math.factorial(len(s)), voter_set=ballot.voter_set, ranking=tuple(ballot.ranking[:i]) + tuple([frozenset({c}) for c in order]) + tuple(ballot.ranking[(i + 1) :]), ) for order in permutations(s) ] return [b for new_b in new_ballots for b in expand_tied_ballot(new_b)] assert False # mypy
[docs] def resolve_profile_ties(profile: RankProfile) -> RankProfile: """ Takes in a PeferenceProfile with potential ties in ballots. Replaces ballots with ties with fractionally weighted ballots corresponding to all permutations of the tied ranking. Args: profile (RankProfile): Input profile with potentially tied rankings. Returns: RankProfile: A RankProfile with resolved ties. """ new_ballots = tuple( [b for ballot in profile.ballots for b in expand_tied_ballot(ballot)] ) return RankProfile(ballots=new_ballots)
[docs] def score_profile_from_ballot_scores( profile: ScoreProfile, ) -> dict[str, float]: """ Score the candidates based on the ``scores`` parameter of the ballots. All ballots must have a ``scores`` parameter; note that a ``scores`` dictionary with no non-zero scores will raise the same error. Args: profile (ScoreProfile): Profile to score. Returns: dict[str, float]: Dictionary mapping candidates to scores. """ scores = {c: 0.0 for c in profile.candidates} if not isinstance(profile, ScoreProfile): raise TypeError("Profile must be of type ScoreProfile.") for ballot in profile.ballots: if ballot.scores is None: raise TypeError(f"Ballot {ballot} has no scores.") else: for c, score in ballot.scores.items(): scores[c] += score * ballot.weight return scores
[docs] def ballot_lengths(profile: RankProfile) -> dict[int, float]: """ Compute the frequency of ballot lengths in the profile. Includes all lengths from 1 to ``max_ranking_length`` as keys. Ballots must have rankings. Args: profile (RankProfile): Profile to compute ballot lengths. Returns: dict[int, float]: Dictionary of ballot length frequency. Raises: TypeError: All ballots must have rankings. """ if not isinstance(profile, RankProfile): raise TypeError("Profile must be of type RankProfile.") assert profile.max_ranking_length is not None ballot_lengths = {i: 0.0 for i in range(1, profile.max_ranking_length + 1)} for ballot in profile.ballots: if ballot.ranking is None: raise TypeError("All ballots must have rankings.") length = len(ballot.ranking) ballot_lengths[length] += ballot.weight return ballot_lengths
[docs] def index_to_lexicographic_ballot( ballot_index: int, n_candidates: int, max_length: int, total_valid_ballots_method, always_use_total_valid_ballots_method: bool = True, ) -> list[int]: """ Convert an index to one ballot with candidates taken from the list range(n_candidates), and where the ballot has length at most max_length. The ordering of the ballots is lexicographic, i.e., the first ballot is the lexicographically smallest ballot and continues in that order: (0,), (0,1), (0,1,2), ... (0,2), (0,2,1), ... (n-1, n-2, ..., n-l) where n is the number of candidates and l is the maximum ballot length. Args: ballot_index (int): The index to convert. n_candidates (int): The number of candidates. max_length (int): The maximum allowed ballot rank. total_valid_ballots_method: ((n_candidates, max_length) -> integer) a method which returns the total number of allowed ballots. always_use_total_valid_ballots_method (bool): a flag indicating whether the given total valid ballots method should be used when computing b_n. If total_valid_ballots_method is using a cache then this should be True for maximum runtime efficiency. Returns: list[int]: A list representing the ballot corresponding to index. """ total_valid_ballots = total_valid_ballots_method(n_candidates, max_length) if ballot_index >= total_valid_ballots: raise Exception( f"Given ballot index {ballot_index} out of range. Max index: {total_valid_ballots}" ) def chunk_size(n_cands: int, ballot_len: int) -> int: return total_valid_ballots_method(n_cands, ballot_len) // n_cands candidates = list(range(n_candidates)) out = [] if always_use_total_valid_ballots_method: bn = total_valid_ballots_method(n_candidates + 1, max_length + 1) // ( n_candidates + 1 ) else: bn = chunk_size(n_candidates + 1, max_length + 1) for i in range(n_candidates, 0, -1): if always_use_total_valid_ballots_method: bn = total_valid_ballots_method( n_candidates=i, max_ballot_length=max_length ) // (i) else: bn = (bn - 1) // i # Perform Euclidean division of index by bn section = ballot_index // bn remaining = ballot_index % bn out.append(candidates.pop(section)) if remaining == 0: # Cut off the ballot here break ballot_index = remaining - 1 return out
[docs] def build_df_from_ballot_samples( ballots_freq_dict: dict[tuple[int, ...], int], candidates: Sequence[str] ): """ Helper function which creates a pandas df to instantiate a RankProfile args: ballots_freq_dict: dictionary mapping ballots to sampled frequency. The keys should be in candidate id form candidates : list of candidates in the profile returns: pandas df """ df_data = [] n_cands = len(candidates) for ballot in ballots_freq_dict.keys(): ballot_as_frozenset_entries = tuple( [frozenset([candidates[i]]) for i in ballot] ) completed_ballot = ( ballot_as_frozenset_entries + tuple( [frozenset(["~"]) for _ in range(n_cands - len(ballot))] ) # padding short ballots + tuple([ballots_freq_dict[ballot], set()]) ) # weight, voter set df_data.append(completed_ballot) return pd.DataFrame( df_data, columns=[f"Ranking_{i}" for i in range(1, n_cands + 1)] + ["Weight", "Voter Set"], )