VibecoderMcSwaggins commited on
Commit
de1af88
·
unverified ·
1 Parent(s): 2c5db87

feat(clinicaltrials): SPEC_14 Outcome Measures (#100)

Browse files

* feat: add outcome measures to ClinicalTrials.gov tool

- Add OutcomesModule and HasResults to FIELDS constant
- Extract primary outcomes with measure and timeFrame
- Show results availability status with posted date
- Boost relevance for trials with results (0.90 vs 0.85)
- Add 4 unit tests per SPEC_14

Closes #95

* fix: address CodeRabbit review feedback for SPEC_14

- Fix unconditional ellipsis: Only add "..." when summary > 400 chars
- Refine primary outcome truncation: Build full string before truncating
- Extract _extract_primary_outcome() helper to reduce statement count
- Add identificationModule to test mock for completeness

Addresses review comments on PR #100.

* test: add pytest.skip for API flakiness resilience

Add defensive skip if ClinicalTrials.gov API returns no results,
making the integration test more resilient to external API changes.

Addresses CodeRabbit review feedback on PR #100.

src/tools/clinicaltrials.py CHANGED
@@ -30,6 +30,9 @@ class ClinicalTrialsTool:
30
  "InterventionName",
31
  "StartDate",
32
  "BriefSummary",
 
 
 
33
  ]
34
 
35
  # Status filter: Only active/completed studies with potential data
@@ -89,6 +92,20 @@ class ClinicalTrialsTool:
89
  except requests.RequestException as e:
90
  raise SearchError(f"ClinicalTrials.gov request failed: {e}") from e
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  def _study_to_evidence(self, study: dict[str, Any]) -> Evidence:
93
  """Convert a clinical trial study to Evidence."""
94
  # Navigate nested structure
@@ -99,6 +116,7 @@ class ClinicalTrialsTool:
99
  design_module = protocol.get("designModule", {})
100
  conditions_module = protocol.get("conditionsModule", {})
101
  arms_module = protocol.get("armsInterventionsModule", {})
 
102
 
103
  nct_id = id_module.get("nctId", "Unknown")
104
  title = id_module.get("briefTitle", "Untitled Study")
@@ -121,14 +139,42 @@ class ClinicalTrialsTool:
121
  # Get summary
122
  summary = desc_module.get("briefSummary", "No summary available.")
123
 
 
 
 
 
 
 
 
 
 
 
 
124
  # Build content with key trial info
125
- content = (
126
- f"{summary[:500]}... "
127
- f"Trial Phase: {phase}. "
128
- f"Status: {status}. "
129
- f"Conditions: {conditions_str}. "
130
- f"Interventions: {interventions_str}."
131
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
  return Evidence(
134
  content=content[:2000],
@@ -139,5 +185,5 @@ class ClinicalTrialsTool:
139
  date=start_date,
140
  authors=[], # Trials don't have traditional authors
141
  ),
142
- relevance=0.85, # Trials are highly relevant for repurposing
143
  )
 
30
  "InterventionName",
31
  "StartDate",
32
  "BriefSummary",
33
+ # NEW: Outcome measures
34
+ "OutcomesModule",
35
+ "HasResults",
36
  ]
37
 
38
  # Status filter: Only active/completed studies with potential data
 
92
  except requests.RequestException as e:
93
  raise SearchError(f"ClinicalTrials.gov request failed: {e}") from e
94
 
95
+ def _extract_primary_outcome(self, outcomes_module: dict[str, Any]) -> str:
96
+ """Extract and format primary outcome from outcomes module."""
97
+ primary_outcomes = outcomes_module.get("primaryOutcomes", [])
98
+ if not primary_outcomes:
99
+ return ""
100
+ # Get first primary outcome measure and timeframe
101
+ first = primary_outcomes[0]
102
+ measure = first.get("measure", "")
103
+ timeframe = first.get("timeFrame", "")
104
+ # Build full outcome string first, then truncate
105
+ result = f"{measure} (measured at {timeframe})" if timeframe else measure
106
+ # Truncate long outcome descriptions with ellipsis
107
+ return result[:197] + "..." if len(result) > 200 else result
108
+
109
  def _study_to_evidence(self, study: dict[str, Any]) -> Evidence:
110
  """Convert a clinical trial study to Evidence."""
111
  # Navigate nested structure
 
116
  design_module = protocol.get("designModule", {})
117
  conditions_module = protocol.get("conditionsModule", {})
118
  arms_module = protocol.get("armsInterventionsModule", {})
119
+ outcomes_module = protocol.get("outcomesModule", {})
120
 
121
  nct_id = id_module.get("nctId", "Unknown")
122
  title = id_module.get("briefTitle", "Untitled Study")
 
139
  # Get summary
140
  summary = desc_module.get("briefSummary", "No summary available.")
141
 
142
+ # Extract outcome measures
143
+ primary_outcome_str = self._extract_primary_outcome(outcomes_module)
144
+ secondary_count = len(outcomes_module.get("secondaryOutcomes", []))
145
+
146
+ # Check if results are available (hasResults is TOP-LEVEL, not in protocol!)
147
+ has_results = study.get("hasResults", False)
148
+
149
+ # Results date is in statusModule (nested inside date struct)
150
+ results_date_struct = status_module.get("resultsFirstPostDateStruct", {})
151
+ results_date = results_date_struct.get("date", "")
152
+
153
  # Build content with key trial info
154
+ summary_text = summary[:400] + "..." if len(summary) > 400 else summary
155
+ content_parts = [
156
+ summary_text,
157
+ f"Trial Phase: {phase}.",
158
+ f"Status: {status}.",
159
+ f"Conditions: {conditions_str}.",
160
+ f"Interventions: {interventions_str}.",
161
+ ]
162
+
163
+ if primary_outcome_str:
164
+ content_parts.append(f"Primary Outcome: {primary_outcome_str}.")
165
+
166
+ if secondary_count > 0:
167
+ content_parts.append(f"Secondary Outcomes: {secondary_count} additional endpoints.")
168
+
169
+ if has_results:
170
+ results_info = "Results Available: Yes"
171
+ if results_date:
172
+ results_info += f" (posted {results_date})"
173
+ content_parts.append(results_info + ".")
174
+ else:
175
+ content_parts.append("Results Available: Not yet posted.")
176
+
177
+ content = " ".join(content_parts)
178
 
179
  return Evidence(
180
  content=content[:2000],
 
185
  date=start_date,
186
  authors=[], # Trials don't have traditional authors
187
  ),
188
+ relevance=0.90 if has_results else 0.85, # Boost relevance for trials with results
189
  )
tests/unit/tools/test_clinicaltrials.py CHANGED
@@ -128,6 +128,150 @@ class TestClinicalTrialsTool:
128
  assert results == []
129
 
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  @pytest.mark.integration
132
  class TestClinicalTrialsIntegration:
133
  """Integration tests with real API."""
@@ -150,3 +294,21 @@ class TestClinicalTrialsIntegration:
150
  or "phase" in all_content
151
  )
152
  assert has_intervention
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  assert results == []
129
 
130
 
131
+ @pytest.mark.unit
132
+ class TestClinicalTrialsOutcomes:
133
+ """Tests for outcome measure extraction."""
134
+
135
+ @pytest.fixture
136
+ def tool(self) -> ClinicalTrialsTool:
137
+ return ClinicalTrialsTool()
138
+
139
+ @pytest.mark.asyncio
140
+ async def test_extracts_primary_outcome(self, tool: ClinicalTrialsTool) -> None:
141
+ """Test that primary outcome is extracted from response."""
142
+ mock_study = {
143
+ "protocolSection": {
144
+ "identificationModule": {"nctId": "NCT12345678", "briefTitle": "Test"},
145
+ "statusModule": {"overallStatus": "COMPLETED", "startDateStruct": {"date": "2023"}},
146
+ "descriptionModule": {"briefSummary": "Summary"},
147
+ "designModule": {"phases": ["PHASE3"]},
148
+ "conditionsModule": {"conditions": ["ED"]},
149
+ "armsInterventionsModule": {"interventions": []},
150
+ "outcomesModule": {
151
+ "primaryOutcomes": [
152
+ {"measure": "Change in IIEF-EF score", "timeFrame": "Week 12"}
153
+ ]
154
+ },
155
+ },
156
+ "hasResults": True,
157
+ }
158
+
159
+ mock_response = MagicMock()
160
+ mock_response.json.return_value = {"studies": [mock_study]}
161
+ mock_response.raise_for_status = MagicMock()
162
+
163
+ with patch("requests.get", return_value=mock_response):
164
+ results = await tool.search("test", max_results=1)
165
+
166
+ assert len(results) == 1
167
+ assert "Primary Outcome" in results[0].content
168
+ assert "IIEF-EF" in results[0].content
169
+ assert "Week 12" in results[0].content
170
+
171
+ @pytest.mark.asyncio
172
+ async def test_includes_results_status(self, tool: ClinicalTrialsTool) -> None:
173
+ """Test that results availability is shown."""
174
+ mock_study = {
175
+ "protocolSection": {
176
+ "identificationModule": {"nctId": "NCT12345678", "briefTitle": "Test"},
177
+ "statusModule": {
178
+ "overallStatus": "COMPLETED",
179
+ "startDateStruct": {"date": "2023"},
180
+ # Note: resultsFirstPostDateStruct, not resultsFirstSubmitDate
181
+ "resultsFirstPostDateStruct": {"date": "2024-06-15"},
182
+ },
183
+ "descriptionModule": {"briefSummary": "Summary"},
184
+ "designModule": {"phases": ["PHASE3"]},
185
+ "conditionsModule": {"conditions": ["ED"]},
186
+ "armsInterventionsModule": {"interventions": []},
187
+ "outcomesModule": {},
188
+ },
189
+ "hasResults": True, # Note: hasResults is TOP-LEVEL
190
+ }
191
+
192
+ mock_response = MagicMock()
193
+ mock_response.json.return_value = {"studies": [mock_study]}
194
+ mock_response.raise_for_status = MagicMock()
195
+
196
+ with patch("requests.get", return_value=mock_response):
197
+ results = await tool.search("test", max_results=1)
198
+
199
+ assert "Results Available: Yes" in results[0].content
200
+ assert "2024-06-15" in results[0].content
201
+
202
+ @pytest.mark.asyncio
203
+ async def test_shows_no_results_when_missing(self, tool: ClinicalTrialsTool) -> None:
204
+ """Test that missing results are indicated."""
205
+ mock_study = {
206
+ "protocolSection": {
207
+ "identificationModule": {
208
+ "nctId": "NCT99999999",
209
+ "briefTitle": "Test Study",
210
+ },
211
+ "statusModule": {
212
+ "overallStatus": "RECRUITING",
213
+ "startDateStruct": {"date": "2024"},
214
+ },
215
+ "descriptionModule": {"briefSummary": "Summary"},
216
+ "designModule": {"phases": ["PHASE2"]},
217
+ "conditionsModule": {"conditions": ["ED"]},
218
+ "armsInterventionsModule": {"interventions": []},
219
+ "outcomesModule": {},
220
+ },
221
+ "hasResults": False,
222
+ }
223
+
224
+ mock_response = MagicMock()
225
+ mock_response.json.return_value = {"studies": [mock_study]}
226
+ mock_response.raise_for_status = MagicMock()
227
+
228
+ with patch("requests.get", return_value=mock_response):
229
+ results = await tool.search("test", max_results=1)
230
+
231
+ assert "Results Available: Not yet posted" in results[0].content
232
+
233
+ @pytest.mark.asyncio
234
+ async def test_boosts_relevance_for_results(self, tool: ClinicalTrialsTool) -> None:
235
+ """Trials with results should have higher relevance score."""
236
+ with_results = {
237
+ "protocolSection": {
238
+ "identificationModule": {"nctId": "NCT11111111", "briefTitle": "With Results"},
239
+ "statusModule": {"overallStatus": "COMPLETED", "startDateStruct": {"date": "2023"}},
240
+ "descriptionModule": {"briefSummary": "Summary"},
241
+ "designModule": {"phases": []},
242
+ "conditionsModule": {"conditions": []},
243
+ "armsInterventionsModule": {"interventions": []},
244
+ "outcomesModule": {},
245
+ },
246
+ "hasResults": True,
247
+ }
248
+ without_results = {
249
+ "protocolSection": {
250
+ "identificationModule": {"nctId": "NCT22222222", "briefTitle": "No Results"},
251
+ "statusModule": {
252
+ "overallStatus": "RECRUITING",
253
+ "startDateStruct": {"date": "2024"},
254
+ },
255
+ "descriptionModule": {"briefSummary": "Summary"},
256
+ "designModule": {"phases": []},
257
+ "conditionsModule": {"conditions": []},
258
+ "armsInterventionsModule": {"interventions": []},
259
+ "outcomesModule": {},
260
+ },
261
+ "hasResults": False,
262
+ }
263
+
264
+ mock_response = MagicMock()
265
+ mock_response.json.return_value = {"studies": [with_results, without_results]}
266
+ mock_response.raise_for_status = MagicMock()
267
+
268
+ with patch("requests.get", return_value=mock_response):
269
+ results = await tool.search("test", max_results=2)
270
+
271
+ assert results[0].relevance == 0.90 # With results
272
+ assert results[1].relevance == 0.85 # Without results
273
+
274
+
275
  @pytest.mark.integration
276
  class TestClinicalTrialsIntegration:
277
  """Integration tests with real API."""
 
294
  or "phase" in all_content
295
  )
296
  assert has_intervention
297
+
298
+ @pytest.mark.asyncio
299
+ async def test_real_completed_trial_has_outcome(self) -> None:
300
+ """Real completed Phase 3 trials should have outcome measures."""
301
+ tool = ClinicalTrialsTool()
302
+
303
+ # Search for completed Phase 3 ED trials (likely to have outcomes)
304
+ results = await tool.search(
305
+ "sildenafil erectile dysfunction Phase 3 COMPLETED", max_results=3
306
+ )
307
+
308
+ # Skip if API returns no results (external dependency)
309
+ if not results:
310
+ pytest.skip("API returned no results for this query")
311
+
312
+ # At least one should have primary outcome
313
+ has_outcome = any("Primary Outcome" in r.content for r in results)
314
+ assert has_outcome, "No completed trials with outcome measures found"