Source code for votekit.ballot_generator.std_generator.spacial

"""
Generate ranked preference profiles using the Spacial models

The main API functions in this module are:

- `onedim_spacial_profile_generator`: Generates a single preference profile using a one-dimensional
    spacial model.
- `spacial_profile_and_positions_generator`: Generates a single preference profile using a
    multi-dimensional spacial model with voters and candidates distributed according to specified
    distributions.
- `clustered_spacial_profile_and_positions_generator`: Generates a single preference profile using a
    clustered multi-dimensional spacial model where voters are clustered around candidates.
"""

import numpy as np
from numpy.typing import NDArray
import pandas as pd
from typing import Optional, Tuple, Callable, Dict, Any, Sequence

from votekit.metrics import euclidean_dist
from votekit.pref_profile import RankProfile

# =================================================
# ================= API Functions =================
# =================================================



[docs]
def onedim_spacial_profile_generator(
    candidates: Sequence[str],
    number_of_ballots: int,
) -> RankProfile:
    """
    Generates a ranked preference profile where voters and candidates
    are positioned on a one-dimensional line according to a normal
    distribution. Voter preferences are determined by their proximity
    to candidates on this line.

    Args:
        number_of_ballots (int): The number of ballots to generate.

    Returns:
        RankProfile: A ranked preference profile object.
    """
    n_candidates = len(candidates)

    candidate_position_dict = {c: np.random.normal(0, 1) for c in candidates}
    voter_positions = np.random.normal(0, 1, number_of_ballots)

    ballot_pool = np.full(
        (number_of_ballots, n_candidates), frozenset("~"), dtype=object
    )

    for i, vp in enumerate(voter_positions):
        distance_tuples = [
            (c, abs(v - vp)) for c, v, in candidate_position_dict.items()
        ]
        candidate_ranking = np.array(
            [frozenset({t[0]}) for t in sorted(distance_tuples, key=lambda x: x[1])]
        )
        ballot_pool[i] = candidate_ranking

    df = pd.DataFrame(ballot_pool)
    df.index.name = "Ballot Index"
    df.columns = [f"Ranking_{i + 1}" for i in range(n_candidates)]  # type: ignore[misc]
    df["Weight"] = 1
    df["Voter Set"] = [frozenset()] * len(df)
    return RankProfile(
        candidates=candidates,
        df=df,
        max_ranking_length=n_candidates,
    )




[docs]
def spacial_profile_and_positions_generator(
    number_of_ballots: int,
    candidates: list[str],
    voter_dist: Callable[..., np.ndarray] = np.random.uniform,
    voter_dist_kwargs: Optional[Dict[str, Any]] = None,
    candidate_dist: Callable[..., np.ndarray] = np.random.uniform,
    candidate_dist_kwargs: Optional[Dict[str, Any]] = None,
    distance: Callable[[np.ndarray, np.ndarray], float] = euclidean_dist,
) -> Tuple[RankProfile, dict[str, np.ndarray], np.ndarray]:
    """
    Samples a metric position for number_of_ballots voters from
    the voter distribution. Samples a metric position for each candidate
    from the input candidate distribution. With sampled
    positions, this method then creates a ranked RankProfile in which
    voter's preferences are consistent with their distances to the candidates
    in the metric space.

    Args:
        number_of_ballots (int): The number of ballots to generate.
        by_bloc (bool): Dummy variable from parent class.

    Returns:
        Tuple[RankProfile, dict[str, numpy.ndarray], numpy.ndarray]:
            A tuple containing the preference profile object,
            a dictionary with each candidate's position in the metric
            space, and a matrix where each row is a single voter's position
            in the metric space.
    """
    if voter_dist_kwargs is None:
        if voter_dist is np.random.uniform:
            voter_dist_kwargs = {"low": 0.0, "high": 1.0, "size": 2.0}
        else:
            voter_dist_kwargs = {}

    try:
        voter_dist(**voter_dist_kwargs)
    except TypeError:
        raise TypeError("Invalid kwargs for the voter distribution.")

    if candidate_dist_kwargs is None:
        if candidate_dist is np.random.uniform:
            candidate_dist_kwargs = {"low": 0.0, "high": 1.0, "size": 2.0}
        else:
            candidate_dist_kwargs = {}

    try:
        candidate_dist(**candidate_dist_kwargs)
    except TypeError:
        raise TypeError("Invalid kwargs for the candidate distribution.")

    try:
        v = voter_dist(**voter_dist_kwargs)
        c = candidate_dist(**candidate_dist_kwargs)
        distance(v, c)
    except TypeError:
        raise TypeError(
            "Distance function is invalid or incompatible "
            "with voter/candidate distributions."
        )

    candidate_position_dict = {
        c: candidate_dist(**candidate_dist_kwargs) for c in candidates
    }
    voter_positions = np.array(
        [voter_dist(**voter_dist_kwargs) for _ in range(number_of_ballots)]
    )

    ballot_pool = np.full((number_of_ballots, len(candidates)), frozenset("~"))

    for i in range(number_of_ballots):
        distance_tuples = [
            (c, distance(voter_positions[i], c_position))
            for c, c_position, in candidate_position_dict.items()
        ]
        candidate_ranking = np.array(
            [frozenset({t[0]}) for t in sorted(distance_tuples, key=lambda x: x[1])]
        )
        ballot_pool[i] = candidate_ranking

    n_candidates = len(candidates)
    df = pd.DataFrame(ballot_pool)
    df.index.name = "Ballot Index"
    df.columns = [f"Ranking_{i + 1}" for i in range(n_candidates)]  # type: ignore[misc]
    df["Weight"] = 1
    df["Voter Set"] = [frozenset()] * len(df)
    return (
        RankProfile(
            candidates=candidates,
            df=df,
            max_ranking_length=n_candidates,
        ),
        candidate_position_dict,
        voter_positions,
    )




[docs]
def clustered_spacial_profile_and_positions_generator(
    number_of_ballots: dict[str, int],
    candidates: list[str],
    voter_dist: Callable[..., np.ndarray] = np.random.normal,
    voter_dist_kwargs: Optional[Dict[str, Any]] = None,
    candidate_dist: Callable[..., np.ndarray] = np.random.uniform,
    candidate_dist_kwargs: Optional[Dict[str, Any]] = None,
    distance: Callable[[np.ndarray, np.ndarray], float] = euclidean_dist,
) -> Tuple[RankProfile, dict[str, np.ndarray], np.ndarray]:
    """
    Samples a metric position for each candidate
    from the input candidate distribution. For each candidate, then sample
    number_of_ballots[candidate] metric positions for voters
    which will be centered around the candidate.
    With sampled positions, this method then creates a ranked RankProfile in which
    voter's preferences are consistent with their distances to the candidates
    in the metric space.

    Args:
        number_of_ballots (dict[str, int]): The number of voters attributed
                    to each candidate {candidate string: # voters}.
        by_bloc (bool): Dummy variable from parent class.

    Returns:
        Tuple[RankProfile, dict[str, numpy.ndarray], numpy.ndarray]:
            A tuple containing the preference profile object,
            a dictionary with each candidate's position in the metric
            space, and a matrix where each row is a single voter's position
            in the metric space.
    """
    if voter_dist_kwargs is None:
        if voter_dist is np.random.normal:
            voter_dist_kwargs = {
                "loc": 0,
                "std": np.array(1.0),
                "size": np.array(2.0),
            }
        else:
            voter_dist_kwargs = {}

    if voter_dist.__name__ not in ["normal", "laplace", "logistic", "gumbel"]:
        raise ValueError("Input voter distribution not supported.")

    try:
        voter_dist_kwargs["loc"] = 0
        voter_dist(**voter_dist_kwargs)
    except TypeError:
        raise TypeError("Invalid kwargs for the voter distribution.")

    if candidate_dist_kwargs is None:
        if candidate_dist is np.random.uniform:
            candidate_dist_kwargs = {"low": 0.0, "high": 1.0, "size": 2.0}
        else:
            candidate_dist_kwargs = {}

    try:
        candidate_dist(**candidate_dist_kwargs)
    except TypeError:
        raise TypeError("Invalid kwargs for the candidate distribution.")

    try:
        v = voter_dist(**voter_dist_kwargs)
        c = candidate_dist(**candidate_dist_kwargs)
        distance(v, c)
    except TypeError:
        raise TypeError(
            "Distance function is invalid or incompatible "
            "with voter/candidate distributions."
        )

    candidate_position_dict: dict[str, NDArray] = {
        c: candidate_dist(**candidate_dist_kwargs) for c in candidates
    }

    n_voters = sum(number_of_ballots.values())
    voter_positions = [np.zeros(2) for _ in range(n_voters)]
    vidx = 0
    for c, c_position in candidate_position_dict.items():  # type: ignore
        for _ in range(number_of_ballots[c]):  # type: ignore
            voter_dist_kwargs["loc"] = c_position
            voter_positions[vidx] = voter_dist(**voter_dist_kwargs)
            vidx += 1

    n_candidates = len(candidates)
    ballot_pool = np.full((n_voters, n_candidates), frozenset("~"), dtype=object)
    for i in range(len(voter_positions)):
        v_position = voter_positions[i]
        distance_tuples = [
            (c, distance(v_position, c_position))
            for c, c_position, in candidate_position_dict.items()
        ]
        candidate_ranking = np.array(
            [frozenset({t[0]}) for t in sorted(distance_tuples, key=lambda x: x[1])]
        )
        ballot_pool[i] = candidate_ranking

    voter_positions_array = np.vstack(voter_positions)

    df = pd.DataFrame(ballot_pool)
    df.index.name = "Ballot Index"
    df.columns = [f"Ranking_{i + 1}" for i in range(n_candidates)]  # type: ignore[misc]
    df["Weight"] = 1
    df["Voter Set"] = [frozenset()] * len(df)
    return (
        RankProfile(
            candidates=candidates,
            df=df,
            max_ranking_length=n_candidates,
        ),
        candidate_position_dict,
        voter_positions_array,
    )