androiddevnotesforks · pull · Jun 10, 2025 · Jun 6, 2025
diff --git a/hle_eval/run_model_predictions.py b/hle_eval/run_model_predictions.py
@@ -8,14 +8,9 @@
 
 client = AsyncOpenAI(timeout=600.0, max_retries=1)
 
-SYSTEM_EXACT_ANSWER = "Your response should be in the following format:\nExplanation: {your explanation for your final answer}\nExact Answer: {your succinct, final answer}\nConfidence: {your confidence score between 0% and 100% for your answer}"
-
-SYSTEM_MC = "Your response should be in the following format:\nExplanation: {your explanation for your answer choice}\nAnswer: {your chosen answer}\nConfidence: {your confidence score between 0% and 100% for your answer}"
-
+SYSTEM_PROMPT = "Your response should be in the following format:\nExplanation: {your explanation for your answer choice}\nAnswer: {your chosen answer}\nConfidence: {your confidence score between 0% and 100% for your answer}"
 
 def format_message(question):
-    answer_type = question['answer_type']
-    system_prompt = SYSTEM_EXACT_ANSWER if answer_type == 'exact_match' else SYSTEM_MC
     question_text = question['question']
 
     text_content = dict(type="text", text=question_text)
@@ -27,7 +22,7 @@ def format_message(question):
 
     system_role = "user" if "o1" in args.model else "system" # o1 no sys prompt
     messages = [
-        {"role": system_role, "content": system_prompt}, 
+        {"role": system_role, "content": SYSTEM_PROMPT}, 
         {"role": "user", "content": content}
     ]
     return messages