Diomedes Git commited on
Commit
9ea1ddc
·
1 Parent(s): 75e240b

tweaky tweaks

Browse files
src/cluas_mcp/academic/arxiv_client.py ADDED
File without changes
src/cluas_mcp/academic/pubmed_client.py CHANGED
@@ -1,18 +1,19 @@
1
- from common.http import fetch_with_retry
2
- import requests
3
  import xml.etree.ElementTree as ET
4
  import urllib.parse
5
  from typing import List, Optional
6
 
 
 
7
  class PubMedClient:
 
8
 
9
  @staticmethod
10
  def parse_id_list(xml: str) -> List[str]:
11
- """Parse XML and return a list of PubMed IDs."""
12
  try:
13
  root = ET.fromstring(xml)
14
  except ET.ParseError:
15
- return [] # invalid XML or rate limit page
16
 
17
  id_list = root.find(".//IdList")
18
  if id_list is None:
@@ -25,33 +26,36 @@ class PubMedClient:
25
  keywords: List[str],
26
  extra_terms: Optional[List[str]] = None,
27
  retmax: int = 20,
 
 
28
  ) -> List[str]:
29
- """
30
- Search PubMed for (keywords OR ...) AND (extra_terms OR ...).
31
- Returns PubMed IDs.
32
- """
33
- # building grouped OR clauses
34
- base = "(" + " OR ".join(keywords) + ")"
35
  if extra_terms:
36
- base = f"{base} AND ({' OR '.join(extra_terms)})"
37
 
38
- # URL-encode the full term string
39
  term = urllib.parse.quote(base)
40
 
 
41
  url = (
42
  "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
43
  f"?db=pubmed&term={term}&retmax={retmax}&retmode=xml"
 
44
  )
 
 
45
 
 
46
  try:
47
  response = fetch_with_retry(url)
48
- response.raise_for_status()
49
  return PubMedClient.parse_id_list(response.text)
50
-
51
- except requests.exceptions.RequestException:
52
- # log instead of print, lol
53
  return []
54
-
55
 
56
-
57
 
 
 
 
 
 
 
1
  import xml.etree.ElementTree as ET
2
  import urllib.parse
3
  from typing import List, Optional
4
 
5
+ from cluas_mcp.common.http import fetch_with_retry
6
+
7
  class PubMedClient:
8
+ """Lightweight PubMed search client (ID only)."""
9
 
10
  @staticmethod
11
  def parse_id_list(xml: str) -> List[str]:
12
+ """Parse PubMed ESearch XML and return a list of IDs."""
13
  try:
14
  root = ET.fromstring(xml)
15
  except ET.ParseError:
16
+ return []
17
 
18
  id_list = root.find(".//IdList")
19
  if id_list is None:
 
26
  keywords: List[str],
27
  extra_terms: Optional[List[str]] = None,
28
  retmax: int = 20,
29
+ email: Optional[str] = None, # add an email later - sort the forwarding first
30
+ tool: str = "cluas_mcp",
31
  ) -> List[str]:
32
+ """Search PubMed for (keywords OR ...) AND (extra_terms OR ...)."""
33
+
34
+ # 1. build query
35
+ base = f"({' OR '.join(keywords)})"
 
 
36
  if extra_terms:
37
+ base += f" AND ({' OR '.join(extra_terms)})"
38
 
 
39
  term = urllib.parse.quote(base)
40
 
41
+ # 2. build URL
42
  url = (
43
  "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
44
  f"?db=pubmed&term={term}&retmax={retmax}&retmode=xml"
45
+ f"&tool={tool}"
46
  )
47
+ if email:
48
+ url += f"&email={urllib.parse.quote(email)}"
49
 
50
+ # 3. fetch + parse
51
  try:
52
  response = fetch_with_retry(url)
 
53
  return PubMedClient.parse_id_list(response.text)
54
+ except Exception:
55
+ # shift to logging soon
 
56
  return []
 
57
 
 
58
 
59
+
60
+ # # Example usage:
61
+ # ids = PubMedClient.pubmed_search(["corvid", "crow"], ["mating"])
src/cluas_mcp/academic/semantic_scholar_client.py ADDED
File without changes