8000 tweak new peer leaderboard and remove bots from aggregates by default by lsabor · Pull Request #359 · Metaculus/metaculus · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

tweak new peer leaderboard and remove bots from aggregates by default #359

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions front_end/src/components/prediction_chip.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ const PredictionChip: FC<Props> = ({
locale
);

const aggregate = question.aggregations.recency_weighted;
const lastUserForecast = aggregate.history[aggregate.history.length - 1];
const aggregate = question.aggregations?.recency_weighted;
const lastUserForecast = aggregate?.history[aggregate.history.length - 1];

switch (status) {
case PostStatus.PENDING:
Expand Down
2 changes: 2 additions & 0 deletions migrator/services/migrate_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def migrate_archived_scores():


def score_questions(qty: int | None = None, start_id: int = 0):
fab_questions = Leaderboard.objects.get(project__slug="aibq3").get_questions()
questions = (
Question.objects.filter(
resolution__isnull=False,
Expand Down Expand Up @@ -115,6 +116,7 @@ def score_questions(qty: int | None = None, start_id: int = 0):
question,
question.resolution,
score_types=score_types,
include_bots_in_aggregates=question in fab_questions,
)
print(
f"\033[Kscoring question {i:>4}/{c} ID:{question.id:<4} forecasts:{f:<4} "
Expand Down
35 changes: 27 additions & 8 deletions scoring/score_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,25 @@ class AggregationEntry:

def get_geometric_means(
forecasts: list[Forecast | AggregateForecast],
include_bots: bool = False,
) -> list[AggregationEntry]:
included_forecasts = forecasts
if not include_bots:
included_forecasts = [
f
for f in forecasts
if (isinstance(f, AggregateForecast) or f.author.is_bot is False)
]
geometric_means = []
timesteps: set[datetime] = set()
for forecast in forecasts:
for forecast in included_forecasts:
timesteps.add(forecast.start_time.timestamp())
if forecast.end_time:
timesteps.add(forecast.end_time.timestamp())
for timestep in sorted(timesteps):
prediction_values = [
f.get_pmf()
for f in forecasts
for f in included_forecasts
if f.start_time.timestamp() <= timestep
and (f.end_time is None or f.end_time.timestamp() > timestep)
]
Expand Down Expand Up @@ -158,9 +166,12 @@ def evaluate_forecasts_peer_accuracy(
forecast_horizon_end: float,
question_type: str,
geometric_means: list[AggregationEntry] | None = None,
include_bots_in_geometric_mean: bool = False,
) -> list[ForecastScore]:
base_forecasts = base_forecasts or forecasts
geometric_mean_forecasts = geometric_means or get_geometric_means(base_forecasts)
geometric_mean_forecasts = geometric_means or get_geometric_means(
base_forecasts, include_bots_in_geometric_mean
)
for gm in geometric_mean_forecasts:
gm.timestamp = max(gm.timestamp, forecast_horizon_start)
total_duration = forecast_horizon_end - forecast_horizon_start
Expand Down Expand Up @@ -216,9 +227,12 @@ def evaluate_forecasts_peer_spot_forecast(
spot_forecast_timestamp: float,
question_type: str,
geometric_means: list[AggregationEntry] | None = None,
include_bots_in_geometric_mean: bool = False,
) -> list[ForecastScore]:
base_forecasts = base_forecasts or forecasts
geometric_mean_forecasts = geometric_means or get_geometric_means(base_forecasts)
geometric_mean_forecasts = geometric_means or get_geometric_means(
base_forecasts, include_bots_in_geometric_mean
)
g = None
for gm in geometric_mean_forecasts[::-1]:
if gm.timestamp < spot_forecast_timestamp:
Expand Down Expand Up @@ -254,7 +268,6 @@ def evaluate_forecasts_legacy_relative(
resolution_bucket: int,
forecast_horizon_start: float,
actual_close_time: float,
forecast_horizon_end: float,
) -> list[ForecastScore]:
baseline_forecasts = [
AggregationEntry(
Expand Down Expand Up @@ -309,6 +322,7 @@ def evaluate_question(
resolution_bucket: int,
score_types: list[Score.ScoreTypes],
spot_forecast_timestamp: float | None = None,
include_bots_in_aggregates: bool = False,
) -> list[Score]:
forecast_horizon_start = question.open_time.timestamp()
actual_close_time = question.actual_close_time.timestamp()
Expand All @@ -319,12 +333,15 @@ def evaluate_question(
question,
minimize=False,
aggregation_method=AggregationMethod.RECENCY_WEIGHTED,
include_bots=include_bots_in_aggregates,
)
geometric_means: list[AggregationEntry] = []

ScoreTypes = Score.ScoreTypes
if ScoreTypes.PEER in score_types:
geometric_means = get_geometric_means(user_forecasts)
geometric_means = get_geometric_means(
user_forecasts, include_bots=include_bots_in_aggregates
)

scores: list[Score] = []
for score_type in score_types:
Expand Down Expand Up @@ -379,6 +396,7 @@ def evaluate_question(
forecast_horizon_end,
question.type,
geometric_means=geometric_means,
include_bots_in_geometric_mean=include_bots_in_aggregates,
)
community_scores = evaluate_forecasts_peer_accuracy(
community_forecasts,
Expand All @@ -389,6 +407,7 @@ def evaluate_question(
forecast_horizon_end,
question.type,
geometric_means=geometric_means,
include_bots_in_geometric_mean=include_bots_in_aggregates,
)
case ScoreTypes.SPOT_PEER:
user_scores = evaluate_forecasts_peer_spot_forecast(
Expand All @@ -398,6 +417,7 @@ def evaluate_question(
spot_forecast_timestamp,
questi 8000 on.type,
geometric_means=geometric_means,
include_bots_in_geometric_mean=include_bots_in_aggregates,
)
community_scores = evaluate_forecasts_peer_spot_forecast(
community_forecasts,
Expand All @@ -406,6 +426,7 @@ def evaluate_question(
spot_forecast_timestamp,
question.type,
geometric_means=geometric_means,
include_bots_in_geometric_mean=include_bots_in_aggregates,
)
case ScoreTypes.RELATIVE_LEGACY:
user_scores = evaluate_forecasts_legacy_relative(
Expand All @@ -414,15 +435,13 @@ def evaluate_question(
resolution_bucket,
forecast_horizon_start,
actual_close_time,
forecast_horizon_end,
)
community_scores = evaluate_forecasts_legacy_relative(
community_forecasts,
community_forecasts,
resolution_bucket,
forecast_horizon_start,
actual_close_time,
forecast_horizon_end,
)
case other:
raise NotImplementedError(f"Score type {other} not implemented")
Expand Down
16 changes: 13 additions & 3 deletions scoring/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def score_question(
resolution: str,
spot_forecast_time: float | None = None,
score_types: list[str] | None = None,
include_bots_in_aggregates: bool = False,
):
resolution_bucket = string_location_to_bucket_index(resolution, question)
spot_forecast_time = spot_forecast_time or question.cp_reveal_time.timestamp()
Expand All @@ -37,7 +38,11 @@ def score_question(
Score.objects.filter(question=question, score_type__in=score_types)
)
new_scores = evaluate_question(
question, resolution_bucket, score_types, spot_forecast_time
question,
resolution_bucket,
score_types,
spot_forecast_time,
include_bots_in_aggregates,
)
for new_score in new_scores:
is_new = True
Expand Down Expand Up @@ -111,7 +116,7 @@ def generate_scoring_leaderboard_entries(
calculated_on=now,
)
entries[identifier].score += score.score
entries[identifier].coverage += score.coverage / maximum_coverage
entries[identifier].coverage += score.coverage
entries[identifier].contribution_count += 1
if leaderboard.score_type == Leaderboard.ScoreTypes.PEER_GLOBAL:
for entry in entries.values():
Expand All @@ -121,6 +126,7 @@ def generate_scoring_leaderboard_entries(
entry.score /= max(40, entry.contribution_count)
elif leaderboard.score_type == Leaderboard.ScoreTypes.RELATIVE_LEGACY_TOURNAMENT:
for entry in entries.values():
entry.coverage /= maximum_coverage
entry.take = max(entry.coverage * np.exp(entry.score), 0)
return sorted(entries.values(), key=lambda entry: entry.take, reverse=True)
return sorted(entries.values(), key=lambda entry: entry.score, reverse=True)
Expand Down Expand Up @@ -290,7 +296,11 @@ def update_project_leaderboard(
rank = 1
prev_entry = None
for entry in new_entries:
if (entry.user_id is None) or (entry.user_id in excluded_users):
if (
(entry.user_id is None)
or (entry.user_id in excluded_users)
or (entry.user.is_bot and "global" in leaderboard.score_type)
):
entry.excluded = True
entry.medal = None
entry.rank = rank
Expand Down
12 changes: 10 additions & 2 deletions utils/the_math/community_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,15 @@ def get_aggregation_at_time(
include_stats: bool = False,
histogram: bool = False,
aggregation_method: AggregationMethod = AggregationMethod.RECENCY_WEIGHTED,
include_bots: bool = False,
) -> AggregateForecast | None:
"""set include_stats to True if you want to include num_forecasters, q1s, medians,
and q3s"""
forecasts = question.user_forecasts.filter(
Q(end_time__isnull=True) | Q(end_time__gt=time), start_time__lte=time
).order_by("start_time")
if not include_bots:
forecasts = forecasts.exclude(forecaster__is_bot=True)
if forecasts.count() == 0:
return None
forecast_set = ForecastSet(
Expand Down Expand Up @@ -203,9 +206,11 @@ def find_index_of_middle(forecasts: list[AggregateForecast]) -> int:


def get_user_forecast_history(
question: Question, minimize: bool = False
question: Question, minimize: bool = False, include_bots: bool = False
) -> list[ForecastSet]:
forecasts = question.user_forecasts.order_by("start_time").all()
if not include_bots:
forecasts.exclude(author__is_bot=True)
timestamps = set()
for forecast in forecasts:
timestamps.add(forecast.start_time)
Expand Down Expand Up @@ -242,10 +247,13 @@ def get_cp_history(
aggregation_method: AggregationMethod = AggregationMethod.RECENCY_WEIGHTED,
minimize: bool = True,
include_stats: bool = True,
include_bots: bool = False,
) -> list[AggregateForecast]:
full_summary: list[AggregateForecast] = []

forecast_history = get_user_forecast_history(question, minimize=minimize)
forecast_history = get_user_forecast_history(
question, minimize=minimize, include_bots=include_bots
)
for i, forecast_set in enumerate(forecast_history):
if aggregation_method == AggregationMethod.RECENCY_WEIGHTED:
weights = generate_recency_weights(len(forecast_set.forecasts_values))
Expand Down
0