From e643db758273e9ac2cd4bde0ff27f76b46849341 Mon Sep 17 00:00:00 2001 From: lsabor Date: Fri, 30 Aug 2024 16:04:11 -0700 Subject: [PATCH] tweak new peer leaderboard and remove bots from aggregates by default --- front_end/src/components/prediction_chip.tsx | 4 +-- migrator/services/migrate_scoring.py | 2 ++ scoring/score_math.py | 35 +++++++++++++++----- scoring/utils.py | 16 +++++++-- utils/the_math/community_prediction.py | 12 +++++-- 5 files changed, 54 insertions(+), 15 deletions(-) diff --git a/front_end/src/components/prediction_chip.tsx b/front_end/src/components/prediction_chip.tsx index 6db61def7..d0ac9167a 100644 --- a/front_end/src/components/prediction_chip.tsx +++ b/front_end/src/components/prediction_chip.tsx @@ -42,8 +42,8 @@ const PredictionChip: FC = ({ locale ); - const aggregate = question.aggregations.recency_weighted; - const lastUserForecast = aggregate.history[aggregate.history.length - 1]; + const aggregate = question.aggregations?.recency_weighted; + const lastUserForecast = aggregate?.history[aggregate.history.length - 1]; switch (status) { case PostStatus.PENDING: diff --git a/migrator/services/migrate_scoring.py b/migrator/services/migrate_scoring.py index 52e976a91..2b171a881 100644 --- a/migrator/services/migrate_scoring.py +++ b/migrator/services/migrate_scoring.py @@ -68,6 +68,7 @@ def migrate_archived_scores(): def score_questions(qty: int | None = None, start_id: int = 0): + fab_questions = Leaderboard.objects.get(project__slug="aibq3").get_questions() questions = ( Question.objects.filter( resolution__isnull=False, @@ -115,6 +116,7 @@ def score_questions(qty: int | None = None, start_id: int = 0): question, question.resolution, score_types=score_types, + include_bots_in_aggregates=question in fab_questions, ) print( f"\033[Kscoring question {i:>4}/{c} ID:{question.id:<4} forecasts:{f:<4} " diff --git a/scoring/score_math.py b/scoring/score_math.py index ae2050d92..8fa821310 100644 --- a/scoring/score_math.py +++ b/scoring/score_math.py @@ -19,17 +19,25 @@ class AggregationEntry: def get_geometric_means( forecasts: list[Forecast | AggregateForecast], + include_bots: bool = False, ) -> list[AggregationEntry]: + included_forecasts = forecasts + if not include_bots: + included_forecasts = [ + f + for f in forecasts + if (isinstance(f, AggregateForecast) or f.author.is_bot is False) + ] geometric_means = [] timesteps: set[datetime] = set() - for forecast in forecasts: + for forecast in included_forecasts: timesteps.add(forecast.start_time.timestamp()) if forecast.end_time: timesteps.add(forecast.end_time.timestamp()) for timestep in sorted(timesteps): prediction_values = [ f.get_pmf() - for f in forecasts + for f in included_forecasts if f.start_time.timestamp() <= timestep and (f.end_time is None or f.end_time.timestamp() > timestep) ] @@ -158,9 +166,12 @@ def evaluate_forecasts_peer_accuracy( forecast_horizon_end: float, question_type: str, geometric_means: list[AggregationEntry] | None = None, + include_bots_in_geometric_mean: bool = False, ) -> list[ForecastScore]: base_forecasts = base_forecasts or forecasts - geometric_mean_forecasts = geometric_means or get_geometric_means(base_forecasts) + geometric_mean_forecasts = geometric_means or get_geometric_means( + base_forecasts, include_bots_in_geometric_mean + ) for gm in geometric_mean_forecasts: gm.timestamp = max(gm.timestamp, forecast_horizon_start) total_duration = forecast_horizon_end - forecast_horizon_start @@ -216,9 +227,12 @@ def evaluate_forecasts_peer_spot_forecast( spot_forecast_timestamp: float, question_type: str, geometric_means: list[AggregationEntry] | None = None, + include_bots_in_geometric_mean: bool = False, ) -> list[ForecastScore]: base_forecasts = base_forecasts or forecasts - geometric_mean_forecasts = geometric_means or get_geometric_means(base_forecasts) + geometric_mean_forecasts = geometric_means or get_geometric_means( + base_forecasts, include_bots_in_geometric_mean + ) g = None for gm in geometric_mean_forecasts[::-1]: if gm.timestamp < spot_forecast_timestamp: @@ -254,7 +268,6 @@ def evaluate_forecasts_legacy_relative( resolution_bucket: int, forecast_horizon_start: float, actual_close_time: float, - forecast_horizon_end: float, ) -> list[ForecastScore]: baseline_forecasts = [ AggregationEntry( @@ -309,6 +322,7 @@ def evaluate_question( resolution_bucket: int, score_types: list[Score.ScoreTypes], spot_forecast_timestamp: float | None = None, + include_bots_in_aggregates: bool = False, ) -> list[Score]: forecast_horizon_start = question.open_time.timestamp() actual_close_time = question.actual_close_time.timestamp() @@ -319,12 +333,15 @@ def evaluate_question( question, minimize=False, aggregation_method=AggregationMethod.RECENCY_WEIGHTED, + include_bots=include_bots_in_aggregates, ) geometric_means: list[AggregationEntry] = [] ScoreTypes = Score.ScoreTypes if ScoreTypes.PEER in score_types: - geometric_means = get_geometric_means(user_forecasts) + geometric_means = get_geometric_means( + user_forecasts, include_bots=include_bots_in_aggregates + ) scores: list[Score] = [] for score_type in score_types: @@ -379,6 +396,7 @@ def evaluate_question( forecast_horizon_end, question.type, geometric_means=geometric_means, + include_bots_in_geometric_mean=include_bots_in_aggregates, ) community_scores = evaluate_forecasts_peer_accuracy( community_forecasts, @@ -389,6 +407,7 @@ def evaluate_question( forecast_horizon_end, question.type, geometric_means=geometric_means, + include_bots_in_geometric_mean=include_bots_in_aggregates, ) case ScoreTypes.SPOT_PEER: user_scores = evaluate_forecasts_peer_spot_forecast( @@ -398,6 +417,7 @@ def evaluate_question( spot_forecast_timestamp, question.type, geometric_means=geometric_means, + include_bots_in_geometric_mean=include_bots_in_aggregates, ) community_scores = evaluate_forecasts_peer_spot_forecast( community_forecasts, @@ -406,6 +426,7 @@ def evaluate_question( spot_forecast_timestamp, question.type, geometric_means=geometric_means, + include_bots_in_geometric_mean=include_bots_in_aggregates, ) case ScoreTypes.RELATIVE_LEGACY: user_scores = evaluate_forecasts_legacy_relative( @@ -414,7 +435,6 @@ def evaluate_question( resolution_bucket, forecast_horizon_start, actual_close_time, - forecast_horizon_end, ) community_scores = evaluate_forecasts_legacy_relative( community_forecasts, @@ -422,7 +442,6 @@ def evaluate_question( resolution_bucket, forecast_horizon_start, actual_close_time, - forecast_horizon_end, ) case other: raise NotImplementedError(f"Score type {other} not implemented") diff --git a/scoring/utils.py b/scoring/utils.py index 02423a716..1f452f0b5 100644 --- a/scoring/utils.py +++ b/scoring/utils.py @@ -28,6 +28,7 @@ def score_question( resolution: str, spot_forecast_time: float | None = None, score_types: list[str] | None = None, + include_bots_in_aggregates: bool = False, ): resolution_bucket = string_location_to_bucket_index(resolution, question) spot_forecast_time = spot_forecast_time or question.cp_reveal_time.timestamp() @@ -37,7 +38,11 @@ def score_question( Score.objects.filter(question=question, score_type__in=score_types) ) new_scores = evaluate_question( - question, resolution_bucket, score_types, spot_forecast_time + question, + resolution_bucket, + score_types, + spot_forecast_time, + include_bots_in_aggregates, ) for new_score in new_scores: is_new = True @@ -111,7 +116,7 @@ def generate_scoring_leaderboard_entries( calculated_on=now, ) entries[identifier].score += score.score - entries[identifier].coverage += score.coverage / maximum_coverage + entries[identifier].coverage += score.coverage entries[identifier].contribution_count += 1 if leaderboard.score_type == Leaderboard.ScoreTypes.PEER_GLOBAL: for entry in entries.values(): @@ -121,6 +126,7 @@ def generate_scoring_leaderboard_entries( entry.score /= max(40, entry.contribution_count) elif leaderboard.score_type == Leaderboard.ScoreTypes.RELATIVE_LEGACY_TOURNAMENT: for entry in entries.values(): + entry.coverage /= maximum_coverage entry.take = max(entry.coverage * np.exp(entry.score), 0) return sorted(entries.values(), key=lambda entry: entry.take, reverse=True) return sorted(entries.values(), key=lambda entry: entry.score, reverse=True) @@ -290,7 +296,11 @@ def update_project_leaderboard( rank = 1 prev_entry = None for entry in new_entries: - if (entry.user_id is None) or (entry.user_id in excluded_users): + if ( + (entry.user_id is None) + or (entry.user_id in excluded_users) + or (entry.user.is_bot and "global" in leaderboard.score_type) + ): entry.excluded = True entry.medal = None entry.rank = rank diff --git a/utils/the_math/community_prediction.py b/utils/the_math/community_prediction.py index 8f0adfad9..b62379a8d 100644 --- a/utils/the_math/community_prediction.py +++ b/utils/the_math/community_prediction.py @@ -123,12 +123,15 @@ def get_aggregation_at_time( include_stats: bool = False, histogram: bool = False, aggregation_method: AggregationMethod = AggregationMethod.RECENCY_WEIGHTED, + include_bots: bool = False, ) -> AggregateForecast | None: """set include_stats to True if you want to include num_forecasters, q1s, medians, and q3s""" forecasts = question.user_forecasts.filter( Q(end_time__isnull=True) | Q(end_time__gt=time), start_time__lte=time ).order_by("start_time") + if not include_bots: + forecasts = forecasts.exclude(forecaster__is_bot=True) if forecasts.count() == 0: return None forecast_set = ForecastSet( @@ -203,9 +206,11 @@ def find_index_of_middle(forecasts: list[AggregateForecast]) -> int: def get_user_forecast_history( - question: Question, minimize: bool = False + question: Question, minimize: bool = False, include_bots: bool = False ) -> list[ForecastSet]: forecasts = question.user_forecasts.order_by("start_time").all() + if not include_bots: + forecasts.exclude(author__is_bot=True) timestamps = set() for forecast in forecasts: timestamps.add(forecast.start_time) @@ -242,10 +247,13 @@ def get_cp_history( aggregation_method: AggregationMethod = AggregationMethod.RECENCY_WEIGHTED, minimize: bool = True, include_stats: bool = True, + include_bots: bool = False, ) -> list[AggregateForecast]: full_summary: list[AggregateForecast] = [] - forecast_history = get_user_forecast_history(question, minimize=minimize) + forecast_history = get_user_forecast_history( + question, minimize=minimize, include_bots=include_bots + ) for i, forecast_set in enumerate(forecast_history): if aggregation_method == AggregationMethod.RECENCY_WEIGHTED: weights = generate_recency_weights(len(forecast_set.forecasts_values))