from votekit.ballot import RankBallot
from votekit.pref_profile import RankProfile
import numpy as np
import itertools as it
from votekit.matrices._utils import _convert_dict_to_matrix
[docs]
def candidate_distance(i: str, j: str, ballot: RankBallot) -> float:
"""
Takes candidates i,j and returns distance r(j)-r(i) in ranking.
Returns numpy.nan if a candidate is not on ballot. Note that this is non-symmetric,
and that a positive value indicates that i is ranked higher than j.
Args:
i (str): Candidate.
j (str): Candidate.
ballot (RankBallot): RankBallot.
Returns:
float: Distance r(j)-r(i) in ranking.
"""
if not isinstance(ballot, RankBallot):
raise TypeError("Ballot must be of type RankBallot.")
if ballot.ranking is None:
raise TypeError("RankBallot must have a ranking.")
positions = {i: -1, j: -1}
for position, s in enumerate(ballot.ranking):
if i in s:
positions[i] = position
if j in s:
positions[j] = position
if -1 in positions.values():
return np.nan
else:
return positions[j] - positions[i]
[docs]
def candidate_distance_matrix(
pref_profile: RankProfile, candidates: list[str]
) -> np.ndarray:
"""
Takes a preference profile and converts to a matrix
where the i,j entry shows the average distance between i and j when i >= j on the same
ballot. Computations use ballot weight. Non-symmetric.
Uses numpy.nan for undefined entries.
Args:
pref_profile (RankProfile): Profile.
candidates (list[str]): List of candidates to use. Indexing of this list matches indexing of
output array.
Returns:
np.ndarray: Numpy array of average distances.
"""
dist_matrix = {c: {c: 0.0 for c in candidates} for c in candidates}
weight_matrix = {c: {c: 0.0 for c in candidates} for c in candidates}
avg_dist_matrix = {c: {c: 0.0 for c in candidates} for c in candidates}
for i, j in it.combinations_with_replacement(candidates, 2):
for ballot in pref_profile.ballots:
d = candidate_distance(i, j, ballot)
# i >= j
if d >= 0:
dist_matrix[i][j] += d * ballot.weight
weight_matrix[i][j] += ballot.weight
# i < j
elif d < 0:
dist_matrix[j][i] += (-d) * ballot.weight
weight_matrix[j][i] += ballot.weight
for c, row in dist_matrix.items():
for k, v in row.items():
avg_dist_matrix[c][k] = (
float(v / weight_matrix[c][k]) if weight_matrix[c][k] > 0 else np.nan
)
return _convert_dict_to_matrix(avg_dist_matrix)