Spaces:

alfulanny
/

huggingface_ai_final

Running

App Files Files Community

alfulanny commited on 7 days ago

Commit

04bceee

verified ·

1 Parent(s): 4374e5c

Update evaluate_and_submit.py

Browse files

Files changed (1) hide show

evaluate_and_submit.py +103 -0

evaluate_and_submit.py CHANGED Viewed

	@@ -0,0 +1,103 @@

+"""
+Script to run the agent on Unit 4 questions and optionally submit results to the course scoring API.
+Usage:
+  # Dry run (no submit) on first 5 questions:
+  python evaluate_and_submit.py --limit 5
+  # Submit results (requires username and Space URL):
+  python evaluate_and_submit.py --submit --username YOUR_HF_USERNAME \\
+    --agent-code-url https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE
+Notes:
+- The scoring API expects exact-match answers, so make sure the agent returns only
+  the final answer without extra formatting.
+- Ensure you've run `huggingface-cli login` before submission.
+"""
+import argparse
+import os
+import sys
+import time
+from typing import Any, Dict, List
+from evaluation_client import ScoringAPIClient
+from code_agent import run_agent
+def extract_prompt_from_question(q: Dict[str, Any]) -> str:
+    """Extract the actual question/prompt from a question dict."""
+    for key in ("question", "prompt", "input", "text", "task"):
+        if key in q and isinstance(q[key], str):
+            return q[key]
+    return str(q)
+def main(argv: List[str]):
+    parser = argparse.ArgumentParser(description="Evaluate agent on course questions and optionally submit.")
+    parser.add_argument("--limit", type=int, default=0, help="Max questions to process (0=all)")
+    parser.add_argument("--submit", action="store_true", help="Submit answers to scoring API")
+    parser.add_argument("--username", type=str, default=os.environ.get("HF_USERNAME"), help="HF username for submission")
+    parser.add_argument("--agent-code-url", type=str, default=os.environ.get("AGENT_CODE_URL"), help="Public Space URL for your agent")
+    args = parser.parse_args(argv)
+    client = ScoringAPIClient()
+    print("Fetching questions from scoring API...")
+    questions = client.get_questions()
+    if not questions:
+        print("ERROR: No questions returned by the API.")
+        sys.exit(1)
+    if args.limit > 0:
+        questions = questions[:args.limit]
+    print(f"Processing {len(questions)} questions...")
+    answers = []
+    for idx, q in enumerate(questions, 1):
+        task_id = q.get("task_id") or q.get("id") or q.get("taskId")
+        prompt = extract_prompt_from_question(q)
+        print(f"\n[{idx}/{len(questions)}] Task {task_id}")
+        print(f"  Prompt: {prompt[:100]}...")
+        try:
+            ans = run_agent(prompt)
+            ans = ans.strip()
+            answers.append({"task_id": task_id, "submitted_answer": ans})
+            print(f"  Answer: {ans[:80]}...")
+        except Exception as e:
+            print(f"  ERROR: {type(e).__name__}: {str(e)[:100]}")
+            # Still add an error answer to maintain alignment
+            answers.append({"task_id": task_id, "submitted_answer": f"(error) {type(e).__name__}"})
+        # Polite pacing to avoid rate limits
+        time.sleep(0.5)
+    print(f"\n✓ Prepared answers for {len(answers)} tasks")
+    if args.submit:
+        if not args.username:
+            print("ERROR: --submit requires --username (or set HF_USERNAME env var)")
+            sys.exit(1)
+        if not args.agent_code_url:
+            print("ERROR: --submit requires --agent-code-url (or set AGENT_CODE_URL env var)")
+            sys.exit(1)
+        print(f"\nSubmitting {len(answers)} answers as user '{args.username}'...")
+        print(f"Agent Code URL: {args.agent_code_url}")
+        try:
+            resp = client.submit(username=args.username, agent_code=args.agent_code_url, answers=answers)
+            print(f"✓ Submission successful!")
+            print(f"Response: {resp}")
+        except Exception as e:
+            print(f"ERROR: Submission failed: {e}")
+            sys.exit(1)
+    else:
+        print("\nDry run complete. To submit, re-run with:")
+        print(f"  python evaluate_and_submit.py --submit --username YOUR_USERNAME \\")
+        print(f"    --agent-code-url https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE")
+if __name__ == "__main__":
+    main(sys.argv[1:])