Source code for votekit.ballot_generator.std_generator.spacial

"""
Generate ranked preference profiles using the Spacial models

The main API functions in this module are:

- `onedim_spacial_profile_generator`: Generates a single preference profile using a one-dimensional
    spacial model.
- `spacial_profile_and_positions_generator`: Generates a single preference profile using a
    multi-dimensional spacial model with voters and candidates distributed according to specified
    distributions.
- `clustered_spacial_profile_and_positions_generator`: Generates a single preference profile using a
    clustered multi-dimensional spacial model where voters are clustered around candidates.
"""

import numpy as np
from numpy.typing import NDArray
import pandas as pd
from typing import Optional, Tuple, Callable, Dict, Any, Sequence

from votekit.metrics import euclidean_dist
from votekit.pref_profile import RankProfile

# =================================================
# ================= API Functions =================
# =================================================


[docs] def onedim_spacial_profile_generator( candidates: Sequence[str], number_of_ballots: int, ) -> RankProfile: """ Generates a ranked preference profile where voters and candidates are positioned on a one-dimensional line according to a normal distribution. Voter preferences are determined by their proximity to candidates on this line. Args: number_of_ballots (int): The number of ballots to generate. Returns: RankProfile: A ranked preference profile object. """ n_candidates = len(candidates) candidate_position_dict = {c: np.random.normal(0, 1) for c in candidates} voter_positions = np.random.normal(0, 1, number_of_ballots) ballot_pool = np.full( (number_of_ballots, n_candidates), frozenset("~"), dtype=object ) for i, vp in enumerate(voter_positions): distance_tuples = [ (c, abs(v - vp)) for c, v, in candidate_position_dict.items() ] candidate_ranking = np.array( [frozenset({t[0]}) for t in sorted(distance_tuples, key=lambda x: x[1])] ) ballot_pool[i] = candidate_ranking df = pd.DataFrame(ballot_pool) df.index.name = "Ballot Index" df.columns = [f"Ranking_{i + 1}" for i in range(n_candidates)] # type: ignore[misc] df["Weight"] = 1 df["Voter Set"] = [frozenset()] * len(df) return RankProfile( candidates=candidates, df=df, max_ranking_length=n_candidates, )
[docs] def spacial_profile_and_positions_generator( number_of_ballots: int, candidates: list[str], voter_dist: Callable[..., np.ndarray] = np.random.uniform, voter_dist_kwargs: Optional[Dict[str, Any]] = None, candidate_dist: Callable[..., np.ndarray] = np.random.uniform, candidate_dist_kwargs: Optional[Dict[str, Any]] = None, distance: Callable[[np.ndarray, np.ndarray], float] = euclidean_dist, ) -> Tuple[RankProfile, dict[str, np.ndarray], np.ndarray]: """ Samples a metric position for number_of_ballots voters from the voter distribution. Samples a metric position for each candidate from the input candidate distribution. With sampled positions, this method then creates a ranked RankProfile in which voter's preferences are consistent with their distances to the candidates in the metric space. Args: number_of_ballots (int): The number of ballots to generate. by_bloc (bool): Dummy variable from parent class. Returns: Tuple[RankProfile, dict[str, numpy.ndarray], numpy.ndarray]: A tuple containing the preference profile object, a dictionary with each candidate's position in the metric space, and a matrix where each row is a single voter's position in the metric space. """ if voter_dist_kwargs is None: if voter_dist is np.random.uniform: voter_dist_kwargs = {"low": 0.0, "high": 1.0, "size": 2.0} else: voter_dist_kwargs = {} try: voter_dist(**voter_dist_kwargs) except TypeError: raise TypeError("Invalid kwargs for the voter distribution.") if candidate_dist_kwargs is None: if candidate_dist is np.random.uniform: candidate_dist_kwargs = {"low": 0.0, "high": 1.0, "size": 2.0} else: candidate_dist_kwargs = {} try: candidate_dist(**candidate_dist_kwargs) except TypeError: raise TypeError("Invalid kwargs for the candidate distribution.") try: v = voter_dist(**voter_dist_kwargs) c = candidate_dist(**candidate_dist_kwargs) distance(v, c) except TypeError: raise TypeError( "Distance function is invalid or incompatible " "with voter/candidate distributions." ) candidate_position_dict = { c: candidate_dist(**candidate_dist_kwargs) for c in candidates } voter_positions = np.array( [voter_dist(**voter_dist_kwargs) for _ in range(number_of_ballots)] ) ballot_pool = np.full((number_of_ballots, len(candidates)), frozenset("~")) for i in range(number_of_ballots): distance_tuples = [ (c, distance(voter_positions[i], c_position)) for c, c_position, in candidate_position_dict.items() ] candidate_ranking = np.array( [frozenset({t[0]}) for t in sorted(distance_tuples, key=lambda x: x[1])] ) ballot_pool[i] = candidate_ranking n_candidates = len(candidates) df = pd.DataFrame(ballot_pool) df.index.name = "Ballot Index" df.columns = [f"Ranking_{i + 1}" for i in range(n_candidates)] # type: ignore[misc] df["Weight"] = 1 df["Voter Set"] = [frozenset()] * len(df) return ( RankProfile( candidates=candidates, df=df, max_ranking_length=n_candidates, ), candidate_position_dict, voter_positions, )
[docs] def clustered_spacial_profile_and_positions_generator( number_of_ballots: dict[str, int], candidates: list[str], voter_dist: Callable[..., np.ndarray] = np.random.normal, voter_dist_kwargs: Optional[Dict[str, Any]] = None, candidate_dist: Callable[..., np.ndarray] = np.random.uniform, candidate_dist_kwargs: Optional[Dict[str, Any]] = None, distance: Callable[[np.ndarray, np.ndarray], float] = euclidean_dist, ) -> Tuple[RankProfile, dict[str, np.ndarray], np.ndarray]: """ Samples a metric position for each candidate from the input candidate distribution. For each candidate, then sample number_of_ballots[candidate] metric positions for voters which will be centered around the candidate. With sampled positions, this method then creates a ranked RankProfile in which voter's preferences are consistent with their distances to the candidates in the metric space. Args: number_of_ballots (dict[str, int]): The number of voters attributed to each candidate {candidate string: # voters}. by_bloc (bool): Dummy variable from parent class. Returns: Tuple[RankProfile, dict[str, numpy.ndarray], numpy.ndarray]: A tuple containing the preference profile object, a dictionary with each candidate's position in the metric space, and a matrix where each row is a single voter's position in the metric space. """ if voter_dist_kwargs is None: if voter_dist is np.random.normal: voter_dist_kwargs = { "loc": 0, "std": np.array(1.0), "size": np.array(2.0), } else: voter_dist_kwargs = {} if voter_dist.__name__ not in ["normal", "laplace", "logistic", "gumbel"]: raise ValueError("Input voter distribution not supported.") try: voter_dist_kwargs["loc"] = 0 voter_dist(**voter_dist_kwargs) except TypeError: raise TypeError("Invalid kwargs for the voter distribution.") if candidate_dist_kwargs is None: if candidate_dist is np.random.uniform: candidate_dist_kwargs = {"low": 0.0, "high": 1.0, "size": 2.0} else: candidate_dist_kwargs = {} try: candidate_dist(**candidate_dist_kwargs) except TypeError: raise TypeError("Invalid kwargs for the candidate distribution.") try: v = voter_dist(**voter_dist_kwargs) c = candidate_dist(**candidate_dist_kwargs) distance(v, c) except TypeError: raise TypeError( "Distance function is invalid or incompatible " "with voter/candidate distributions." ) candidate_position_dict: dict[str, NDArray] = { c: candidate_dist(**candidate_dist_kwargs) for c in candidates } n_voters = sum(number_of_ballots.values()) voter_positions = [np.zeros(2) for _ in range(n_voters)] vidx = 0 for c, c_position in candidate_position_dict.items(): # type: ignore for _ in range(number_of_ballots[c]): # type: ignore voter_dist_kwargs["loc"] = c_position voter_positions[vidx] = voter_dist(**voter_dist_kwargs) vidx += 1 n_candidates = len(candidates) ballot_pool = np.full((n_voters, n_candidates), frozenset("~"), dtype=object) for i in range(len(voter_positions)): v_position = voter_positions[i] distance_tuples = [ (c, distance(v_position, c_position)) for c, c_position, in candidate_position_dict.items() ] candidate_ranking = np.array( [frozenset({t[0]}) for t in sorted(distance_tuples, key=lambda x: x[1])] ) ballot_pool[i] = candidate_ranking voter_positions_array = np.vstack(voter_positions) df = pd.DataFrame(ballot_pool) df.index.name = "Ballot Index" df.columns = [f"Ranking_{i + 1}" for i in range(n_candidates)] # type: ignore[misc] df["Weight"] = 1 df["Voter Set"] = [frozenset()] * len(df) return ( RankProfile( candidates=candidates, df=df, max_ranking_length=n_candidates, ), candidate_position_dict, voter_positions_array, )