From 58885f1e9b79cd6d1daf2115819773f6823751af Mon Sep 17 00:00:00 2001
From: Alexander Mattick <alex.mattick@fau.de>
Date: Sun, 25 Dec 2022 13:39:21 +0100
Subject: [PATCH] simple scoring system for prompts and ranks

---
 scripts/postprocessing/scoring.py | 132 +++++++++++++++++++++++++++---
 1 file changed, 121 insertions(+), 11 deletions(-)

diff --git a/scripts/postprocessing/scoring.py b/scripts/postprocessing/scoring.py
index 362a1c63..47d85f3e 100644
--- a/scripts/postprocessing/scoring.py
+++ b/scripts/postprocessing/scoring.py
@@ -1,9 +1,10 @@
 # -*- coding: utf-8 -*-
-from dataclasses import dataclass
+from dataclasses import dataclass, replace
 from typing import Any
 
 import numpy as np
 import numpy.typing as npt
+from scipy.stats import kendalltau
 
 
 @dataclass
@@ -18,7 +19,15 @@ class Voter:
     uid: Any
     num_votes: int
     num_good_votes: int
-    total_points: int
+    num_prompts: int
+    num_good_prompts: int
+    num_rankings: int
+    num_good_rankings: int
+
+    #####################
+    voting_points: int
+    prompt_points: int
+    ranking_points: int
 
     def voter_quality(self):
         return self.num_good_votes / self.num_votes
@@ -26,11 +35,22 @@ class Voter:
     def is_well_behaved(self, threshhold):
         return self.voter_quality() > threshhold
 
+    def total_points(self, voting_weight, prompt_weight, ranking_weight):
+        return (
+            voting_weight * self.voting_points
+            + prompt_weight * self.prompt_points
+            + ranking_weight * self.ranking_points
+        )
 
-def score_update(new_vote: int, consensus: npt.ArrayLike, voter_data: Voter) -> Voter:
+
+def score_update_votes(new_vote: int, consensus: npt.ArrayLike, voter_data: Voter) -> Voter:
     """
-    This function returns the new "quality score" and points for a voter.
-    I.e. a voter casts a vote on a question.
+    This function returns the new "quality score" and points for a voter,
+    after that voter cast a vote on a question.
+
+    This function is only to be run when archiving a question
+    i.e. the question has had sufficiently many votes, or we cann't get more than "K" bits of information
+
     The consensus is the array of all votes cast by all voters for that question
     We then update the voter data using the new information
 
@@ -45,21 +65,111 @@ def score_update(new_vote: int, consensus: npt.ArrayLike, voter_data: Voter) ->
     # produces the ranking of votes, e.g. for [100,300,200] it returns [0, 2, 1],
     # since 100 is the lowest, 300 the highest and 200 the middle value
     consensus_ranking = np.argsort(np.argsort(consensus))
-    new_points = consensus_ranking[new_vote] + voter_data.total_points
+    new_points = consensus_ranking[new_vote] + voter_data.voting_points
+
     # we need to correct for 0 indexing, if you are closer to "right" than "wrong" of the conensus,
     # it's a good vote
     new_good_votes = int(consensus_ranking[new_vote] > (len(consensus) - 1) / 2) + voter_data.num_good_votes
     new_num_votes = voter_data.num_votes + 1
-    return Voter(voter_data.uid, new_num_votes, new_good_votes, new_points)
+    return replace(voter_data, num_votes=new_num_votes, num_good_votes=new_good_votes, voting_points=new_points)
+
+
+def score_update_prompts(consensus: npt.ArrayLike, voter_data: Voter) -> Voter:
+    """
+    This function returns the gain of points for a given prompt's votes
+
+    This function is only to be run when archiving a question
+    i.e. the question has had sufficiently many votes, or we cann't get more than "K" bits of information
+
+    Parameters:
+            consensus (ArrayLike): all votes cast for this question
+            voter_data (Voter): a "Voter" object that represents the person that wrote the prompt
+
+        Returns:
+            updated_voter (Voter): the new "quality score" and points for the voter
+    """
+    # produces the ranking of votes, e.g. for [100,300,200] it returns [0, 2, 1],
+    # since 100 is the lowest, 300 the highest and 200 the middle value
+    consensus_ranking = np.argsort(np.argsort(consensus)) - len(consensus) // 2
+    delta_votes = np.sum(consensus_ranking * consensus)
+    new_points = delta_votes + voter_data.prompt_points
+
+    # we need to correct for 0 indexing, if you are closer to "right" than "wrong" of the conensus,
+    # it's a good vote
+    new_good_prompts = int(delta_votes > 0) + voter_data.num_good_prompts
+    new_num_prompts = voter_data.num_prompts + 1
+    return replace(
+        voter_data,
+        num_prompts=new_num_prompts,
+        num_good_prompts=new_good_prompts,
+        prompt_points=new_points,
+    )
+
+
+def score_update_ranking(user_ranking: npt.ArrayLike, consensus_ranking: npt.ArrayLike, voter_data: Voter) -> Voter:
+    """
+    This function returns the gain of points for a given ranking's votes
+
+    This function is only to be run when archiving a question
+    i.e. the question has had sufficiently many votes, or we cann't get more than "K" bits of information
+
+    we use the bubble-sort distance (or "kendall-tau" distance) to compare the two rankings
+    we use this over spearman correlation since:
+        "[Kendall's τ] approaches a normal distribution more rapidly than ρ, as N, the sample size, increases;
+            and τ is also more tractable mathematically, particularly when ties are present"
+    Gilpin, A. R. (1993). Table for conversion of Kendall's Tau to Spearman's
+     Rho within the context measures of magnitude of effect for meta-analysis
+
+    Further in
+        "research design and statistical analyses, second edition, 2003"
+    the authors note that at least from an significance test POV they will yield the same p-values
+
+    Parameters:
+            user_ranking (ArrayLike): ranking produced by the user
+            consensus (ArrayLike): ranking produced after running the voting algorithm to merge into the consensus ranking
+            voter_data (Voter): a "Voter" object that represents the person that wrote the prompt
+
+        Returns:
+            updated_voter (Voter): the new "quality score" and points for the voter
+    """
+    bubble_sort_distance, p_value = kendalltau(user_ranking, consensus_ranking)
+    # normalize kendall-tau from [-1,1] into [0,1] range
+    bubble_sort_distance = (1 + bubble_sort_distance) / 2
+    new_points = bubble_sort_distance + voter_data.ranking_points
+    # it's a good ranking if the likelihood that the consensus ranking and the
+    # user ranking are related
+    new_good_rankings = int(p_value < 0.5) + voter_data.num_good_rankings
+    new_num_rankings = voter_data.num_rankings + 1
+    return replace(
+        voter_data,
+        num_rankings=new_num_rankings,
+        num_good_rankings=new_good_rankings,
+        ranking_points=new_points,
+    )
 
 
 if __name__ == "__main__":
-    demo_voter = Voter("abc", 10, 2, 6)
+    demo_voter = Voter(
+        "abc",
+        num_votes=10,
+        num_good_votes=2,
+        num_prompts=10,
+        num_good_prompts=2,
+        num_rankings=10,
+        num_good_rankings=2,
+        voting_points=6,
+        prompt_points=0,
+        ranking_points=0,
+    )
     new_vote = 3
     consensus = np.array([200, 300, 100, 500])
     print(demo_voter)
-    print("best   vote", score_update(new_vote, consensus, demo_voter))
+    print("best   vote  ", score_update_votes(new_vote, consensus, demo_voter))
     new_vote = 2
-    print("worst  vote", score_update(new_vote, consensus, demo_voter))
+    print("worst  vote  ", score_update_votes(new_vote, consensus, demo_voter))
     new_vote = 1
-    print("medium vote", score_update(new_vote, consensus, demo_voter))
+    print("medium vote  ", score_update_votes(new_vote, consensus, demo_voter))
+    print("prompt writer", score_update_prompts(consensus, demo_voter))
+    print("best   rank  ", score_update_ranking(np.array([0, 2, 1]), np.array([0, 2, 1]), demo_voter))
+    print("medium rank  ", score_update_ranking(np.array([2, 0, 1]), np.array([0, 2, 1]), demo_voter))
+    print("worst  rank  ", score_update_ranking(np.array([1, 0, 2]), np.array([0, 2, 1]), demo_voter))