xy63 commited on
Commit
7627f62
·
verified ·
1 Parent(s): b0ea783

Upload 3 files

Browse files
Files changed (2) hide show
  1. app.py +39 -352
  2. requirements.txt +0 -1
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import spaces
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, pipeline
4
  import torch
5
  from threading import Thread
6
 
@@ -13,12 +13,11 @@ from marker.settings import settings
13
  from marker.logger import configure_logging
14
  from surya.settings import settings as surya_settings
15
  import traceback
16
- import re
17
 
18
 
19
  # marker
20
  configure_logging()
21
- MAX_PAGES = 30
22
  MIN_LENGTH=200
23
  settings.EXTRACT_IMAGES = False
24
  settings.DEBUG = False
@@ -38,130 +37,12 @@ model = AutoModelForCausalLM.from_pretrained(
38
  device_map="auto"
39
  )
40
  tokenizer = AutoTokenizer.from_pretrained(model_name)
41
- # Set pad_token to eos_token if not set (common for Llama models)
42
- if tokenizer.pad_token is None:
43
- tokenizer.pad_token = tokenizer.eos_token
44
- tokenizer.pad_token_id = tokenizer.eos_token_id
45
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, decode_kwargs=dict(skip_special_tokens=True))
46
 
47
- # Initialize AI classifier for paper detection
48
- # Using a lightweight model for text classification
49
- try:
50
- paper_classifier = pipeline(
51
- "text-classification",
52
- model="fabriceyhc/bert-base-uncased-arxiv-classification", # You can use other models
53
- device=0 if torch.cuda.is_available() else -1,
54
- truncation=True,
55
- max_length=512
56
- )
57
- AI_CLASSIFIER_AVAILABLE = True
58
- except Exception as e:
59
- print(f"Warning: Could not load AI classifier: {e}")
60
- print("Falling back to rule-based detection only")
61
- paper_classifier = None
62
- AI_CLASSIFIER_AVAILABLE = False
63
-
64
- # Alternative: Use zero-shot classification (more flexible but slower)
65
- def init_zero_shot_classifier():
66
- try:
67
- from transformers import pipeline
68
- classifier = pipeline(
69
- "zero-shot-classification",
70
- model="facebook/bart-large-mnli",
71
- device=0 if torch.cuda.is_available() else -1
72
- )
73
- return classifier
74
- except Exception as e:
75
- print(f"Could not initialize zero-shot classifier: {e}")
76
- return None
77
-
78
- # If primary classifier fails, try zero-shot
79
- if not AI_CLASSIFIER_AVAILABLE:
80
- zero_shot_classifier = init_zero_shot_classifier()
81
- if zero_shot_classifier:
82
- AI_CLASSIFIER_AVAILABLE = True
83
- else:
84
- zero_shot_classifier = None
85
-
86
- def ai_check_paper(text):
87
- """
88
- Use AI model to check if text is a research paper
89
- Returns (is_paper, confidence, ai_reason)
90
- """
91
- if not AI_CLASSIFIER_AVAILABLE:
92
- return None, 0, "AI classifier not available"
93
-
94
- # Truncate text for AI model (keep beginning and end which are most informative)
95
- max_chars = 2000
96
- if len(text) > max_chars * 2:
97
- text_sample = text[:max_chars] + "\n...\n" + text[-max_chars:]
98
- else:
99
- text_sample = text[:max_chars*2]
100
-
101
- try:
102
- if zero_shot_classifier and not paper_classifier:
103
- # Use zero-shot classification
104
- labels = [
105
- "academic research paper",
106
- "scientific article",
107
- "technical report",
108
- "business document",
109
- "news article",
110
- "blog post",
111
- "other document"
112
- ]
113
-
114
- result = zero_shot_classifier(
115
- text_sample,
116
- candidate_labels=labels,
117
- hypothesis_template="This text is a {}."
118
- )
119
-
120
- # Check if top labels are paper-related
121
- top_label = result['labels'][0]
122
- top_score = result['scores'][0]
123
-
124
- paper_labels = {"academic research paper", "scientific article", "technical report"}
125
-
126
- if top_label in paper_labels:
127
- if top_score > 0.7:
128
- return True, top_score, f"AI detected: {top_label} (confidence: {top_score:.2f})"
129
- elif top_score > 0.5:
130
- return True, 0.6, f"AI detected: likely {top_label} (confidence: {top_score:.2f})"
131
- else:
132
- return False, top_score, f"AI detected: uncertain document type"
133
- else:
134
- return False, 1-top_score, f"AI detected: {top_label}, not a research paper"
135
-
136
- elif paper_classifier:
137
- # Use pre-trained classifier
138
- result = paper_classifier(text_sample)[0]
139
-
140
- # Check if the label indicates it's a paper
141
- # This depends on the specific model used
142
- label = result['label'].lower()
143
- score = result['score']
144
-
145
- # Adjust based on your chosen model's labels
146
- paper_keywords = ['cs', 'math', 'physics', 'eess', 'econ', 'stat', 'q-bio']
147
- is_paper = any(keyword in label for keyword in paper_keywords)
148
-
149
- if is_paper:
150
- return True, score, f"AI detected: {label} paper (confidence: {score:.2f})"
151
- else:
152
- return False, 1-score, f"AI detected: not a research paper"
153
-
154
- except Exception as e:
155
- print(f"AI classification error: {e}")
156
- return None, 0, "AI classification failed"
157
-
158
- return None, 0, "AI check not performed"
159
-
160
  # Define prompts
161
  SYSTEM_PROMPT_TEMPLATE = """You are an expert reviewer for AI conferences. You follow best practices and review papers according to the reviewer guidelines.
162
-
163
  Reviewer guidelines:
164
- 1. Read the paper: It's important to carefully read through the entire paper, and to look up any related work and citations that will help you comprehensively evaluate it. Be sure to give yourself sufficient time for this step.
165
  2. While reading, consider the following:
166
  - Objective of the work: What is the goal of the paper? Is it to better address a known application or problem, draw attention to a new application or problem, or to introduce and/or explain a new theoretical finding? A combination of these? Different objectives will require different considerations as to potential value and impact.
167
  - Strong points: is the submission clear, technically correct, experimentally rigorous, reproducible, does it present novel findings (e.g. theoretically, algorithmically, etc.)?
@@ -179,18 +60,13 @@ Reviewer guidelines:
179
  - Provide supporting arguments for your recommendation.
180
  - Ask questions you would like answered by the authors to help you clarify your understanding of the paper and provide the additional evidence you need to be confident in your assessment.
181
  - Provide additional feedback with the aim to improve the paper. Make it clear that these points are here to help, and not necessarily part of your decision assessment.
182
-
183
  Your write reviews in markdown format. Your reviews contain the following sections:
184
-
185
  # Review
186
-
187
  {review_fields}
188
-
189
  Your response must only contain the review in markdown format with sections as defined above.
190
  """
191
 
192
  USER_PROMPT_TEMPLATE = """Review the following paper:
193
-
194
  {paper_text}
195
  """
196
 
@@ -309,126 +185,7 @@ This should be a holistic assessment, not a repetition of individual sections.
309
 
310
  """
311
 
312
- # Enhanced function that combines rule-based and AI checks
313
- def is_research_paper(text, use_ai=True):
314
- """
315
- Check if the given text appears to be a research paper.
316
- Combines rule-based detection with AI classification.
317
- Returns (is_paper, confidence, reason)
318
- """
319
- if not text or len(text) < MIN_LENGTH:
320
- return False, 0, "Text is too short to be a research paper"
321
-
322
- text_lower = text.lower()
323
-
324
- # Academic paper indicators (must have multiple)
325
- indicators = {
326
- 'abstract': bool(re.search(r'\babstract\b', text_lower)),
327
- 'introduction': bool(re.search(r'\bintroduction\b', text_lower)),
328
- 'conclusion': bool(re.search(r'\bconclusion\b', text_lower)),
329
- 'references': bool(re.search(r'\b(references|bibliography)\b', text_lower)),
330
- 'methodology': bool(re.search(r'\b(method|methodology|approach|algorithm|model)\b', text_lower)),
331
- 'results': bool(re.search(r'\b(results|experiments|evaluation|analysis)\b', text_lower)),
332
- 'citations': bool(re.search(r'\[[\d,\s]+\]|\(\w+,?\s*\d{4}\)', text)), # [1,2,3] or (Author, 2024)
333
- 'figures_tables': bool(re.search(r'\b(figure\s*\d+|table\s*\d+|fig\.\s*\d+)\b', text_lower)),
334
- 'academic_terms': bool(re.search(r'\b(propose|present|demonstrate|evaluate|contribution|novel|state-of-the-art)\b', text_lower))
335
- }
336
-
337
- # Count how many indicators are present
338
- indicator_count = sum(indicators.values())
339
-
340
- # Check for non-paper content with context
341
- # Only flag as non-paper if these terms appear WITHOUT academic context
342
- non_paper_indicators = []
343
-
344
- # Check for invoice/receipt patterns (multiple commercial terms together)
345
- if re.search(r'\b(invoice|receipt)\b', text_lower) and re.search(r'\b(total|amount|payment|billing)\b', text_lower):
346
- non_paper_indicators.append(True)
347
-
348
- # Check for purchase order specifically (not just "purchase")
349
- if re.search(r'\bpurchase order\b', text_lower):
350
- non_paper_indicators.append(True)
351
-
352
- # Check for letter format
353
- if re.search(r'\b(dear\s+(sir|madam|customer)|sincerely|best regards|yours truly)\b', text_lower):
354
- non_paper_indicators.append(True)
355
-
356
- # Check for textbook structure
357
- if re.search(r'\b(chapter\s+\d+|lesson\s+\d+|exercise\s+\d+)\b', text_lower) and indicator_count < 3:
358
- non_paper_indicators.append(True)
359
-
360
- # Check for HTML/web content
361
- if re.search(r'<html|<body|<div|<script|<!DOCTYPE', text_lower):
362
- non_paper_indicators.append(True)
363
-
364
- # Check for recipe/cooking content
365
- if re.search(r'\b(ingredients|recipe|preparation|cooking time|servings)\b', text_lower) and not re.search(r'\b(algorithm|method|experiment)\b', text_lower):
366
- non_paper_indicators.append(True)
367
-
368
- # If we have strong non-paper indicators AND weak academic indicators, it's not a paper
369
- if any(non_paper_indicators) and indicator_count < 6:
370
- return False, 0, "Content appears to be a non-academic document"
371
-
372
- # Get AI assessment if available and requested
373
- ai_result = None
374
- ai_confidence = 0
375
- ai_reason = ""
376
-
377
- if use_ai and AI_CLASSIFIER_AVAILABLE:
378
- ai_result, ai_confidence, ai_reason = ai_check_paper(text)
379
-
380
- # Combine rule-based and AI assessments
381
- # Rule-based decision logic
382
- if indicator_count == 9:
383
- rule_decision = True
384
- rule_confidence = 0.9
385
- rule_reason = f"Found all {indicator_count}/9 academic paper indicators"
386
- elif indicator_count >= 6:
387
- rule_decision = True
388
- rule_confidence = 0.6
389
- missing = [k for k, v in indicators.items() if not v]
390
- rule_reason = f"Found only {indicator_count}/9 indicators. Missing: {', '.join(missing)}"
391
- else:
392
- rule_decision = False
393
- rule_confidence = 0
394
- missing = [k for k, v in indicators.items() if not v]
395
- rule_reason = f"Found only {indicator_count}/9 indicators. Missing: {', '.join(missing[:4])}"
396
-
397
- # Combine decisions
398
- if ai_result is not None:
399
- # Both methods available - combine them
400
- # Weight: 60% rule-based, 40% AI
401
- combined_confidence = (rule_confidence * 0.6) + (ai_confidence * 0.4)
402
-
403
- # Decision logic
404
- if rule_decision and ai_result:
405
- # Both agree it's a paper
406
- if combined_confidence >= 0.9:
407
- return True, 0.9, f"High confidence: {rule_reason}. {ai_reason}"
408
- else:
409
- return True, 0.6, f"Warning: {rule_reason}. {ai_reason}"
410
- elif not rule_decision and not ai_result:
411
- # Both agree it's not a paper
412
- return False, 0, f"Not a research paper. {rule_reason}. {ai_reason}"
413
- else:
414
- # Disagreement - use weighted decision
415
- if combined_confidence >= 0.5:
416
- return True, 0.6, f"Mixed signals: {rule_reason}. {ai_reason}"
417
- else:
418
- return False, 0, f"Likely not a research paper. {rule_reason}. {ai_reason}"
419
- else:
420
- # Only rule-based available
421
- if rule_decision:
422
- if rule_confidence >= 0.9:
423
- return True, 0.9, f"High confidence: {rule_reason}"
424
- else:
425
- return True, 0.6, f"Warning: {rule_reason}"
426
- else:
427
- return False, 0, f"Does not appear to be a research paper. {rule_reason}"
428
-
429
- # Rest of the code remains exactly the same...
430
- # (create_messages, convert_file, process_file, generate functions remain unchanged)
431
-
432
  def create_messages(review_fields, paper_text):
433
  messages = [
434
  {"role": "system", "content": SYSTEM_PROMPT_TEMPLATE.format(review_fields=review_fields)},
@@ -461,51 +218,28 @@ def process_file(file):
461
  raise ValueError()
462
  except spaces.zero.gradio.HTMLError as e:
463
  print(e)
464
- return "Error. GPU quota exceeded. Please return later.", False
 
 
 
 
465
  except Exception as e:
466
  print(traceback.format_exc())
467
  print(f"Error converting {filepath}: {e}")
468
- return "Error processing pdf", False
469
-
470
- # Check if it's a research paper (with AI)
471
- is_paper, confidence, reason = is_research_paper(paper_text, use_ai=True)
472
- if not is_paper:
473
- return f"⚠️ **Not a Research Paper**\n\nThe uploaded document does not appear to be a research paper.\n\nReason: {reason}\n\nPlease upload a proper academic/research paper with sections like Abstract, Introduction, Methodology, Results, and References.", False
474
-
475
- # If confidence is low (6-8 indicators), add a warning
476
- if confidence < 0.9:
477
- paper_text = f"⚠️ **Warning**: {reason}. \n\nThe document may be incomplete or missing key sections. Proceeding with review generation...\n\n---\n\n{paper_text}"
478
-
479
- return paper_text, True
480
-
481
- @spaces.GPU(duration=190)
482
  def generate(paper_text, review_template):
483
- # Final check before generation
484
- is_paper, confidence, reason = is_research_paper(paper_text, use_ai=False) # Quick check without AI
485
- if not is_paper:
486
- return f"⚠️ Cannot generate review: {reason}"
487
-
488
  messages = create_messages(review_template, paper_text)
489
  input_ids = tokenizer.apply_chat_template(
490
  messages,
491
  add_generation_prompt=True,
492
  return_tensors='pt'
493
  ).to(model.device)
494
-
495
- # Create attention mask
496
- attention_mask = torch.ones_like(input_ids)
497
-
498
  print(f"input_ids shape: {input_ids.shape}")
499
- generation_kwargs = dict(
500
- input_ids=input_ids,
501
- attention_mask=attention_mask,
502
- streamer=streamer,
503
- max_new_tokens=4096,
504
- do_sample=True,
505
- temperature=0.6,
506
- top_p=0.9,
507
- pad_token_id=tokenizer.pad_token_id
508
- )
509
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
510
  thread.start()
511
  generated_text = ""
@@ -513,97 +247,50 @@ def generate(paper_text, review_template):
513
  generated_text += new_text
514
  yield generated_text.replace("<|eot_id|>", "")
515
 
516
- # UI code remains exactly the same...
 
517
  # ui
 
 
 
 
 
 
 
518
  title = """<h1 align="center">OpenReviewer</h1>
519
  <div align="center">Using <a href="https://huggingface.co/maxidl/Llama-OpenReviewer-8B" target="_blank"><code>Llama-OpenReviewer-8B</code></a> - Built with Llama</div>
520
  """
521
 
522
  description = """This is an online demo featuring [Llama-OpenReviewer-8B](https://huggingface.co/maxidl/Llama-OpenReviewer-8B), a large language model that generates high-quality reviews for machine learning and AI papers.
523
-
524
  ## Demo Guidelines
525
-
526
- 1. Upload your paper as a PDF file. Alternatively you can paste the full text of your paper in markdown format below. We do **not** store your data. User data is kept in ephemeral storage during processing.
527
-
528
- 2. Once you upload a PDF, it will be converted to markdown and **validated to ensure it's a research paper**. This takes some time as it runs multiple transformer models to parse the layout and extract text and tables. Check out [marker](https://github.com/VikParuchuri/marker/tree/master) for details.
529
-
530
- 3. Having obtained a markdown version of your paper and confirmed it's a valid research paper, you can now click *Generate Review*.
531
-
532
  Take a look at the Review Template to properly interpret the generated review. You can also change the review template before generating in case you want to generate a review with a different schema and aspects.
533
-
534
  To obtain more than one review, just generate again.
535
-
536
  **GPU quota:** If exceeded, either sign in with your HF account or come back later. Your quota has a half-life of 2 hours.
537
-
538
  """
539
 
540
  theme = gr.themes.Default(primary_hue="gray", secondary_hue="blue", neutral_hue="slate")
541
  with gr.Blocks(theme=theme) as demo:
542
  title = gr.HTML(title)
543
  description = gr.Markdown(description)
544
-
545
- # Add paper validation status
546
- with gr.Row():
547
- file_input = gr.File(file_types=[".pdf"], file_count="single")
548
- validation_status = gr.Markdown("", visible=False)
549
-
550
- paper_text_field = gr.Textbox("Upload a pdf or paste the full text of your paper in markdown format here.", label="Paper Text", lines=20, max_lines=20, autoscroll=False)
551
-
552
  with gr.Accordion("Review Template", open=False):
553
  review_template_description = gr.Markdown("We use the ICLR 2025 review template by default, but you can modify the template below as you like.")
554
  review_template_field = gr.Textbox(label=" ",lines=20, max_lines=20, autoscroll=False, value=REVIEW_FIELDS)
555
-
556
- generate_button = gr.Button("Generate Review", interactive=False)
557
-
558
- def handle_file_upload(file):
559
- if file is None:
560
- return "", gr.update(visible=False), gr.update(interactive=False)
561
- text, is_valid = process_file(file)
562
- if is_valid:
563
- # Check confidence level for appropriate message
564
- is_paper, confidence, reason = is_research_paper(text, use_ai=False) # Quick check for display
565
- if confidence >= 0.9:
566
- status_msg = "✅ **Document validated**: This appears to be a complete research paper."
567
- else:
568
- status_msg = f"⚠️ **Warning**: {reason}\n\nThe document may be incomplete or missing key sections of a research paper."
569
- return text, gr.update(value=status_msg, visible=True), gr.update(interactive=True)
570
- else:
571
- return text, gr.update(value="❌ **Validation failed**: Please upload a research paper.", visible=True), gr.update(interactive=False)
572
-
573
- def handle_text_change(text):
574
- if not text or len(text) < 200:
575
- return gr.update(interactive=False), gr.update(visible=False)
576
-
577
- is_paper, confidence, reason = is_research_paper(text, use_ai=True)
578
- if is_paper:
579
- if confidence >= 0.9:
580
- status = "✅ **Text validated**: This appears to be a complete research paper."
581
- else:
582
- # confidence < 0.9 means warning (6-8 indicators)
583
- status = f"⚠️ **Warning**: {reason}\n\nThe document may be incomplete or missing key sections."
584
- return gr.update(interactive=True), gr.update(value=status, visible=True)
585
- else:
586
- return gr.update(interactive=False), gr.update(value=f"❌ **Not a research paper**: {reason}", visible=True)
587
-
588
- file_input.upload(handle_file_upload, file_input, [paper_text_field, validation_status, generate_button])
589
- paper_text_field.change(handle_text_change, paper_text_field, [generate_button, validation_status])
590
-
591
  review_field = gr.Markdown("\n\n\n\n\n", label="Review")
592
- generate_button.click(
593
- fn=lambda: gr.update(interactive=False),
594
- inputs=None,
595
- outputs=generate_button
596
- ).then(
597
- generate,
598
- [paper_text_field, review_template_field],
599
- review_field
600
- ).then(
601
- fn=lambda: gr.update(interactive=True),
602
- inputs=None,
603
- outputs=generate_button
604
- )
605
-
606
  demo.title = "OpenReviewer"
607
 
 
 
 
608
  if __name__ == "__main__":
609
- demo.launch()
 
1
  import gradio as gr
2
  import spaces
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
  import torch
5
  from threading import Thread
6
 
 
13
  from marker.logger import configure_logging
14
  from surya.settings import settings as surya_settings
15
  import traceback
 
16
 
17
 
18
  # marker
19
  configure_logging()
20
+ MAX_PAGES = 20
21
  MIN_LENGTH=200
22
  settings.EXTRACT_IMAGES = False
23
  settings.DEBUG = False
 
37
  device_map="auto"
38
  )
39
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
40
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, decode_kwargs=dict(skip_special_tokens=True))
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  # Define prompts
43
  SYSTEM_PROMPT_TEMPLATE = """You are an expert reviewer for AI conferences. You follow best practices and review papers according to the reviewer guidelines.
 
44
  Reviewer guidelines:
45
+ 1. Read the paper: Its important to carefully read through the entire paper, and to look up any related work and citations that will help you comprehensively evaluate it. Be sure to give yourself sufficient time for this step.
46
  2. While reading, consider the following:
47
  - Objective of the work: What is the goal of the paper? Is it to better address a known application or problem, draw attention to a new application or problem, or to introduce and/or explain a new theoretical finding? A combination of these? Different objectives will require different considerations as to potential value and impact.
48
  - Strong points: is the submission clear, technically correct, experimentally rigorous, reproducible, does it present novel findings (e.g. theoretically, algorithmically, etc.)?
 
60
  - Provide supporting arguments for your recommendation.
61
  - Ask questions you would like answered by the authors to help you clarify your understanding of the paper and provide the additional evidence you need to be confident in your assessment.
62
  - Provide additional feedback with the aim to improve the paper. Make it clear that these points are here to help, and not necessarily part of your decision assessment.
 
63
  Your write reviews in markdown format. Your reviews contain the following sections:
 
64
  # Review
 
65
  {review_fields}
 
66
  Your response must only contain the review in markdown format with sections as defined above.
67
  """
68
 
69
  USER_PROMPT_TEMPLATE = """Review the following paper:
 
70
  {paper_text}
71
  """
72
 
 
185
 
186
  """
187
 
188
+ # functions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  def create_messages(review_fields, paper_text):
190
  messages = [
191
  {"role": "system", "content": SYSTEM_PROMPT_TEMPLATE.format(review_fields=review_fields)},
 
218
  raise ValueError()
219
  except spaces.zero.gradio.HTMLError as e:
220
  print(e)
221
+ return "Error. GPU quota exceeded. Please return later."
222
+ # except gradio.exceptions.Error as e:
223
+ # if 'GPU task aborted' in str(e):
224
+ # print(e)
225
+ # return 'GPU task aborted'
226
  except Exception as e:
227
  print(traceback.format_exc())
228
  print(f"Error converting {filepath}: {e}")
229
+ return "Error processing pdf"
230
+ return paper_text
231
+
232
+
233
+ @spaces.GPU(duration=90)
 
 
 
 
 
 
 
 
 
234
  def generate(paper_text, review_template):
 
 
 
 
 
235
  messages = create_messages(review_template, paper_text)
236
  input_ids = tokenizer.apply_chat_template(
237
  messages,
238
  add_generation_prompt=True,
239
  return_tensors='pt'
240
  ).to(model.device)
 
 
 
 
241
  print(f"input_ids shape: {input_ids.shape}")
242
+ generation_kwargs = dict(input_ids=input_ids, streamer=streamer, max_new_tokens=4096, do_sample=True, temperature=0.6, top_p=0.9)
 
 
 
 
 
 
 
 
 
243
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
244
  thread.start()
245
  generated_text = ""
 
247
  generated_text += new_text
248
  yield generated_text.replace("<|eot_id|>", "")
249
 
250
+
251
+
252
  # ui
253
+ #8C1B13 red
254
+ #4D8093 blue
255
+ #767676 med grey
256
+ #EFECE3 light grey
257
+ #DDDDDD silver below red
258
+ #FFFDFA white
259
+
260
  title = """<h1 align="center">OpenReviewer</h1>
261
  <div align="center">Using <a href="https://huggingface.co/maxidl/Llama-OpenReviewer-8B" target="_blank"><code>Llama-OpenReviewer-8B</code></a> - Built with Llama</div>
262
  """
263
 
264
  description = """This is an online demo featuring [Llama-OpenReviewer-8B](https://huggingface.co/maxidl/Llama-OpenReviewer-8B), a large language model that generates high-quality reviews for machine learning and AI papers.
 
265
  ## Demo Guidelines
266
+ 1. Upload you paper as a pdf file. Alternatively you can paste the full text of your paper in markdown format below. We do **not** store your data. User data is kept in ephemeral storage during processing.
267
+ 2. Once you upload a pdf it will be converted to markdown. This takes some time as it runs multiple transformer models to parse the layout and extract text and tables. Checkout [marker](https://github.com/VikParuchuri/marker/tree/master) for details.
268
+ 3. Having obtained a markdown version of your paper, you can now click *Generate Review*.
 
 
 
 
269
  Take a look at the Review Template to properly interpret the generated review. You can also change the review template before generating in case you want to generate a review with a different schema and aspects.
 
270
  To obtain more than one review, just generate again.
 
271
  **GPU quota:** If exceeded, either sign in with your HF account or come back later. Your quota has a half-life of 2 hours.
 
272
  """
273
 
274
  theme = gr.themes.Default(primary_hue="gray", secondary_hue="blue", neutral_hue="slate")
275
  with gr.Blocks(theme=theme) as demo:
276
  title = gr.HTML(title)
277
  description = gr.Markdown(description)
278
+ file_input = gr.File(file_types=[".pdf"], file_count="single")
279
+ paper_text_field= gr.Textbox("Upload a pdf or paste the full text of your paper in markdown format here.", label="Paper Text", lines=20, max_lines=20, autoscroll=False)
 
 
 
 
 
 
280
  with gr.Accordion("Review Template", open=False):
281
  review_template_description = gr.Markdown("We use the ICLR 2025 review template by default, but you can modify the template below as you like.")
282
  review_template_field = gr.Textbox(label=" ",lines=20, max_lines=20, autoscroll=False, value=REVIEW_FIELDS)
283
+ generate_button = gr.Button("Generate Review", interactive=not paper_text_field)
284
+ file_input.upload(process_file, file_input, paper_text_field)
285
+ paper_text_field.change(lambda text: gr.update(interactive=True) if len(text) > 200 else gr.update(interactive=False), paper_text_field, generate_button)
286
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  review_field = gr.Markdown("\n\n\n\n\n", label="Review")
288
+ generate_button.click(fn=lambda: gr.update(interactive=False), inputs=None, outputs=generate_button).then(generate, [paper_text_field, review_template_field], review_field).then(fn=lambda: gr.update(interactive=True), inputs=None, outputs=generate_button)
289
+
 
 
 
 
 
 
 
 
 
 
 
 
290
  demo.title = "OpenReviewer"
291
 
292
+
293
+
294
+
295
  if __name__ == "__main__":
296
+ demo.launch()
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- gradio==5.8.0
2
  transformers==4.45.2
3
  accelerate
4
  marker-pdf @ git+https://github.com/maxidl/marker-arena.git@ffeb6ee6c1092f1e008000cb8d1d6240a7baeb52
 
 
1
  transformers==4.45.2
2
  accelerate
3
  marker-pdf @ git+https://github.com/maxidl/marker-arena.git@ffeb6ee6c1092f1e008000cb8d1d6240a7baeb52