From 58885f1e9b79cd6d1daf2115819773f6823751af Mon Sep 17 00:00:00 2001 From: Alexander Mattick Date: Sun, 25 Dec 2022 13:39:21 +0100 Subject: [PATCH] simple scoring system for prompts and ranks --- scripts/postprocessing/scoring.py | 132 +++++++++++++++++++++++++++--- 1 file changed, 121 insertions(+), 11 deletions(-) diff --git a/scripts/postprocessing/scoring.py b/scripts/postprocessing/scoring.py index 362a1c63..47d85f3e 100644 --- a/scripts/postprocessing/scoring.py +++ b/scripts/postprocessing/scoring.py @@ -1,9 +1,10 @@ # -*- coding: utf-8 -*- -from dataclasses import dataclass +from dataclasses import dataclass, replace from typing import Any import numpy as np import numpy.typing as npt +from scipy.stats import kendalltau @dataclass @@ -18,7 +19,15 @@ class Voter: uid: Any num_votes: int num_good_votes: int - total_points: int + num_prompts: int + num_good_prompts: int + num_rankings: int + num_good_rankings: int + + ##################### + voting_points: int + prompt_points: int + ranking_points: int def voter_quality(self): return self.num_good_votes / self.num_votes @@ -26,11 +35,22 @@ class Voter: def is_well_behaved(self, threshhold): return self.voter_quality() > threshhold + def total_points(self, voting_weight, prompt_weight, ranking_weight): + return ( + voting_weight * self.voting_points + + prompt_weight * self.prompt_points + + ranking_weight * self.ranking_points + ) -def score_update(new_vote: int, consensus: npt.ArrayLike, voter_data: Voter) -> Voter: + +def score_update_votes(new_vote: int, consensus: npt.ArrayLike, voter_data: Voter) -> Voter: """ - This function returns the new "quality score" and points for a voter. - I.e. a voter casts a vote on a question. + This function returns the new "quality score" and points for a voter, + after that voter cast a vote on a question. + + This function is only to be run when archiving a question + i.e. the question has had sufficiently many votes, or we cann't get more than "K" bits of information + The consensus is the array of all votes cast by all voters for that question We then update the voter data using the new information @@ -45,21 +65,111 @@ def score_update(new_vote: int, consensus: npt.ArrayLike, voter_data: Voter) -> # produces the ranking of votes, e.g. for [100,300,200] it returns [0, 2, 1], # since 100 is the lowest, 300 the highest and 200 the middle value consensus_ranking = np.argsort(np.argsort(consensus)) - new_points = consensus_ranking[new_vote] + voter_data.total_points + new_points = consensus_ranking[new_vote] + voter_data.voting_points + # we need to correct for 0 indexing, if you are closer to "right" than "wrong" of the conensus, # it's a good vote new_good_votes = int(consensus_ranking[new_vote] > (len(consensus) - 1) / 2) + voter_data.num_good_votes new_num_votes = voter_data.num_votes + 1 - return Voter(voter_data.uid, new_num_votes, new_good_votes, new_points) + return replace(voter_data, num_votes=new_num_votes, num_good_votes=new_good_votes, voting_points=new_points) + + +def score_update_prompts(consensus: npt.ArrayLike, voter_data: Voter) -> Voter: + """ + This function returns the gain of points for a given prompt's votes + + This function is only to be run when archiving a question + i.e. the question has had sufficiently many votes, or we cann't get more than "K" bits of information + + Parameters: + consensus (ArrayLike): all votes cast for this question + voter_data (Voter): a "Voter" object that represents the person that wrote the prompt + + Returns: + updated_voter (Voter): the new "quality score" and points for the voter + """ + # produces the ranking of votes, e.g. for [100,300,200] it returns [0, 2, 1], + # since 100 is the lowest, 300 the highest and 200 the middle value + consensus_ranking = np.argsort(np.argsort(consensus)) - len(consensus) // 2 + delta_votes = np.sum(consensus_ranking * consensus) + new_points = delta_votes + voter_data.prompt_points + + # we need to correct for 0 indexing, if you are closer to "right" than "wrong" of the conensus, + # it's a good vote + new_good_prompts = int(delta_votes > 0) + voter_data.num_good_prompts + new_num_prompts = voter_data.num_prompts + 1 + return replace( + voter_data, + num_prompts=new_num_prompts, + num_good_prompts=new_good_prompts, + prompt_points=new_points, + ) + + +def score_update_ranking(user_ranking: npt.ArrayLike, consensus_ranking: npt.ArrayLike, voter_data: Voter) -> Voter: + """ + This function returns the gain of points for a given ranking's votes + + This function is only to be run when archiving a question + i.e. the question has had sufficiently many votes, or we cann't get more than "K" bits of information + + we use the bubble-sort distance (or "kendall-tau" distance) to compare the two rankings + we use this over spearman correlation since: + "[Kendall's τ] approaches a normal distribution more rapidly than ρ, as N, the sample size, increases; + and τ is also more tractable mathematically, particularly when ties are present" + Gilpin, A. R. (1993). Table for conversion of Kendall's Tau to Spearman's + Rho within the context measures of magnitude of effect for meta-analysis + + Further in + "research design and statistical analyses, second edition, 2003" + the authors note that at least from an significance test POV they will yield the same p-values + + Parameters: + user_ranking (ArrayLike): ranking produced by the user + consensus (ArrayLike): ranking produced after running the voting algorithm to merge into the consensus ranking + voter_data (Voter): a "Voter" object that represents the person that wrote the prompt + + Returns: + updated_voter (Voter): the new "quality score" and points for the voter + """ + bubble_sort_distance, p_value = kendalltau(user_ranking, consensus_ranking) + # normalize kendall-tau from [-1,1] into [0,1] range + bubble_sort_distance = (1 + bubble_sort_distance) / 2 + new_points = bubble_sort_distance + voter_data.ranking_points + # it's a good ranking if the likelihood that the consensus ranking and the + # user ranking are related + new_good_rankings = int(p_value < 0.5) + voter_data.num_good_rankings + new_num_rankings = voter_data.num_rankings + 1 + return replace( + voter_data, + num_rankings=new_num_rankings, + num_good_rankings=new_good_rankings, + ranking_points=new_points, + ) if __name__ == "__main__": - demo_voter = Voter("abc", 10, 2, 6) + demo_voter = Voter( + "abc", + num_votes=10, + num_good_votes=2, + num_prompts=10, + num_good_prompts=2, + num_rankings=10, + num_good_rankings=2, + voting_points=6, + prompt_points=0, + ranking_points=0, + ) new_vote = 3 consensus = np.array([200, 300, 100, 500]) print(demo_voter) - print("best vote", score_update(new_vote, consensus, demo_voter)) + print("best vote ", score_update_votes(new_vote, consensus, demo_voter)) new_vote = 2 - print("worst vote", score_update(new_vote, consensus, demo_voter)) + print("worst vote ", score_update_votes(new_vote, consensus, demo_voter)) new_vote = 1 - print("medium vote", score_update(new_vote, consensus, demo_voter)) + print("medium vote ", score_update_votes(new_vote, consensus, demo_voter)) + print("prompt writer", score_update_prompts(consensus, demo_voter)) + print("best rank ", score_update_ranking(np.array([0, 2, 1]), np.array([0, 2, 1]), demo_voter)) + print("medium rank ", score_update_ranking(np.array([2, 0, 1]), np.array([0, 2, 1]), demo_voter)) + print("worst rank ", score_update_ranking(np.array([1, 0, 2]), np.array([0, 2, 1]), demo_voter))