Spaces:

DataQuests
/

DeepCritical

Running

App Files Files Community

VibecoderMcSwaggins commited on 14 days ago

Commit

316dc7d

unverified ·

1 Parent(s): 9286db5

feat: implement async-safe rate limiting (Phase 17) (#40)

Browse files

- Add 'limits' library dependency
- Implement async RateLimiter with finer-grained polling (0.01s)
- Refactor PubMedTool to use shared singleton limiter
- Add comprehensive unit tests and demo script

Files changed (6) hide show

examples/rate_limiting_demo.py +82 -0
pyproject.toml +1 -0
src/tools/pubmed.py +5 -9
src/tools/rate_limiter.py +121 -0
tests/unit/tools/test_rate_limiting.py +104 -0
uv.lock +16 -0

examples/rate_limiting_demo.py ADDED Viewed

	@@ -0,0 +1,82 @@

+#!/usr/bin/env python3
+"""Demo script to verify rate limiting works correctly."""
+import asyncio
+import time
+from src.tools.pubmed import PubMedTool
+from src.tools.rate_limiter import RateLimiter, get_pubmed_limiter, reset_pubmed_limiter
+async def test_basic_limiter():
+    """Test basic rate limiter behavior."""
+    print("=" * 60)
+    print("Rate Limiting Demo")
+    print("=" * 60)
+    # Test 1: Basic limiter
+    print("\n[Test 1] Testing 3/second limiter...")
+    limiter = RateLimiter("3/second")
+    start = time.monotonic()
+    for i in range(6):
+        await limiter.acquire()
+        elapsed = time.monotonic() - start
+        print(f"  Request {i+1} at {elapsed:.2f}s")
+    total = time.monotonic() - start
+    print(f"  Total time for 6 requests: {total:.2f}s (expected ~2s)")
+async def test_pubmed_limiter():
+    """Test PubMed-specific limiter."""
+    print("\n[Test 2] Testing PubMed limiter (shared)...")
+    reset_pubmed_limiter()  # Clean state
+    # Without API key: 3/sec
+    limiter = get_pubmed_limiter(api_key=None)
+    print(f"  Rate without key: {limiter.rate}")
+    # Multiple tools should share the same limiter
+    tool1 = PubMedTool()
+    tool2 = PubMedTool()
+    # Verify they share the limiter
+    print(f"  Tools share limiter: {tool1._limiter is tool2._limiter}")
+async def test_concurrent_requests():
+    """Test rate limiting under concurrent load."""
+    print("\n[Test 3] Testing concurrent request limiting...")
+    limiter = RateLimiter("5/second")
+    async def make_request(i: int):
+        await limiter.acquire()
+        return time.monotonic()
+    start = time.monotonic()
+    # Launch 10 concurrent requests
+    tasks = [make_request(i) for i in range(10)]
+    times = await asyncio.gather(*tasks)
+    # Calculate distribution
+    relative_times = [t - start for t in times]
+    print(f"  Request times: {[f'{t:.2f}s' for t in sorted(relative_times)]}")
+    total = max(relative_times)
+    print(f"  All 10 requests completed in {total:.2f}s (expected ~2s)")
+async def main():
+    await test_basic_limiter()
+    await test_pubmed_limiter()
+    await test_concurrent_requests()
+    print("\n" + "=" * 60)
+    print("Demo complete!")
+if __name__ == "__main__":
+    asyncio.run(main())

pyproject.toml CHANGED Viewed

@@ -24,6 +24,7 @@ dependencies = [
     "tenacity>=8.2", # Retry logic
     "structlog>=24.1", # Structured logging
     "requests>=2.32.5", # ClinicalTrials.gov (httpx blocked by WAF)
 ]
 [project.optional-dependencies]

     "tenacity>=8.2", # Retry logic
     "structlog>=24.1", # Structured logging
     "requests>=2.32.5", # ClinicalTrials.gov (httpx blocked by WAF)
+    "limits>=3.0", # Rate limiting
 ]
 [project.optional-dependencies]

src/tools/pubmed.py CHANGED Viewed

@@ -1,6 +1,5 @@
 """PubMed search tool using NCBI E-utilities."""
-import asyncio
 from typing import Any
 import httpx
@@ -8,6 +7,7 @@ import xmltodict
 from tenacity import retry, stop_after_attempt, wait_exponential
 from src.tools.query_utils import preprocess_query
 from src.utils.config import settings
 from src.utils.exceptions import RateLimitError, SearchError
 from src.utils.models import Citation, Evidence
@@ -17,7 +17,6 @@ class PubMedTool:
     """Search tool for PubMed/NCBI."""
     BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
-    RATE_LIMIT_DELAY = 0.34  # ~3 requests/sec without API key
     HTTP_TOO_MANY_REQUESTS = 429
     def __init__(self, api_key: str | None = None) -> None:
@@ -25,7 +24,9 @@ class PubMedTool:
         # Ignore placeholder values from .env.example
         if self.api_key == "your-ncbi-key-here":
             self.api_key = None
-        self._last_request_time = 0.0
     @property
     def name(self) -> str:
@@ -33,12 +34,7 @@ class PubMedTool:
     async def _rate_limit(self) -> None:
         """Enforce NCBI rate limiting."""
-        loop = asyncio.get_running_loop()
-        now = loop.time()
-        elapsed = now - self._last_request_time
-        if elapsed < self.RATE_LIMIT_DELAY:
-            await asyncio.sleep(self.RATE_LIMIT_DELAY - elapsed)
-        self._last_request_time = loop.time()
     def _build_params(self, **kwargs: Any) -> dict[str, Any]:
         """Build request params with optional API key."""

 """PubMed search tool using NCBI E-utilities."""
 from typing import Any
 import httpx
 from tenacity import retry, stop_after_attempt, wait_exponential
 from src.tools.query_utils import preprocess_query
+from src.tools.rate_limiter import get_pubmed_limiter
 from src.utils.config import settings
 from src.utils.exceptions import RateLimitError, SearchError
 from src.utils.models import Citation, Evidence
     """Search tool for PubMed/NCBI."""
     BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
     HTTP_TOO_MANY_REQUESTS = 429
     def __init__(self, api_key: str | None = None) -> None:
         # Ignore placeholder values from .env.example
         if self.api_key == "your-ncbi-key-here":
             self.api_key = None
+        # Use shared rate limiter
+        self._limiter = get_pubmed_limiter(self.api_key)
     @property
     def name(self) -> str:
     async def _rate_limit(self) -> None:
         """Enforce NCBI rate limiting."""
+        await self._limiter.acquire()
     def _build_params(self, **kwargs: Any) -> dict[str, Any]:
         """Build request params with optional API key."""

src/tools/rate_limiter.py ADDED Viewed

	@@ -0,0 +1,121 @@

+"""Rate limiting utilities using the limits library."""
+import asyncio
+from typing import ClassVar
+from limits import RateLimitItem, parse
+from limits.storage import MemoryStorage
+from limits.strategies import MovingWindowRateLimiter
+class RateLimiter:
+    """
+    Async-compatible rate limiter using limits library.
+    Uses moving window algorithm for smooth rate limiting.
+    """
+    def __init__(self, rate: str) -> None:
+        """
+        Initialize rate limiter.
+        Args:
+            rate: Rate string like "3/second" or "10/second"
+        """
+        self.rate = rate
+        self._storage = MemoryStorage()
+        self._limiter = MovingWindowRateLimiter(self._storage)
+        self._rate_limit: RateLimitItem = parse(rate)
+        self._identity = "default"  # Single identity for shared limiting
+    async def acquire(self, wait: bool = True) -> bool:
+        """
+        Acquire permission to make a request.
+        ASYNC-SAFE: Uses asyncio.sleep(), never time.sleep().
+        The polling pattern allows other coroutines to run while waiting.
+        Args:
+            wait: If True, wait until allowed. If False, return immediately.
+        Returns:
+            True if allowed, False if not (only when wait=False)
+        """
+        while True:
+            # Check if we can proceed (synchronous, fast - ~microseconds)
+            if self._limiter.hit(self._rate_limit, self._identity):
+                return True
+            if not wait:
+                return False
+            # CRITICAL: Use asyncio.sleep(), NOT time.sleep()
+            # This yields control to the event loop, allowing other
+            # coroutines (UI, parallel searches) to run.
+            # Using 0.01s for fine-grained responsiveness.
+            await asyncio.sleep(0.01)
+    def reset(self) -> None:
+        """Reset the rate limiter (for testing)."""
+        self._storage.reset()
+# Singleton limiter for PubMed/NCBI
+_pubmed_limiter: RateLimiter | None = None
+def get_pubmed_limiter(api_key: str | None = None) -> RateLimiter:
+    """
+    Get the shared PubMed rate limiter.
+    Rate depends on whether API key is provided:
+    - Without key: 3 requests/second
+    - With key: 10 requests/second
+    Args:
+        api_key: NCBI API key (optional)
+    Returns:
+        Shared RateLimiter instance
+    """
+    global _pubmed_limiter
+    if _pubmed_limiter is None:
+        rate = "10/second" if api_key else "3/second"
+        _pubmed_limiter = RateLimiter(rate)
+    return _pubmed_limiter
+def reset_pubmed_limiter() -> None:
+    """Reset the PubMed limiter (for testing)."""
+    global _pubmed_limiter
+    _pubmed_limiter = None
+# Factory for other APIs
+class RateLimiterFactory:
+    """Factory for creating/getting rate limiters for different APIs."""
+    _limiters: ClassVar[dict[str, RateLimiter]] = {}
+    @classmethod
+    def get(cls, api_name: str, rate: str) -> RateLimiter:
+        """
+        Get or create a rate limiter for an API.
+        Args:
+            api_name: Unique identifier for the API
+            rate: Rate limit string (e.g., "10/second")
+        Returns:
+            RateLimiter instance (shared for same api_name)
+        """
+        if api_name not in cls._limiters:
+            cls._limiters[api_name] = RateLimiter(rate)
+        return cls._limiters[api_name]
+    @classmethod
+    def reset_all(cls) -> None:
+        """Reset all limiters (for testing)."""
+        cls._limiters.clear()

tests/unit/tools/test_rate_limiting.py ADDED Viewed

	@@ -0,0 +1,104 @@

+"""Tests for rate limiting functionality."""
+import asyncio
+import time
+import pytest
+from src.tools.rate_limiter import RateLimiter, get_pubmed_limiter, reset_pubmed_limiter
+class TestRateLimiter:
+    """Test suite for rate limiter."""
+    def test_create_limiter_without_api_key(self) -> None:
+        """Should create 3/sec limiter without API key."""
+        limiter = RateLimiter(rate="3/second")
+        assert limiter.rate == "3/second"
+    def test_create_limiter_with_api_key(self) -> None:
+        """Should create 10/sec limiter with API key."""
+        limiter = RateLimiter(rate="10/second")
+        assert limiter.rate == "10/second"
+    @pytest.mark.asyncio
+    async def test_limiter_allows_requests_under_limit(self) -> None:
+        """Should allow requests under the rate limit."""
+        limiter = RateLimiter(rate="10/second")
+        # 3 requests should all succeed immediately
+        for _ in range(3):
+            allowed = await limiter.acquire()
+            assert allowed is True
+    @pytest.mark.asyncio
+    async def test_limiter_blocks_when_exceeded(self) -> None:
+        """Should wait when rate limit exceeded."""
+        limiter = RateLimiter(rate="2/second")
+        # First 2 should be instant
+        await limiter.acquire()
+        await limiter.acquire()
+        # Third should block briefly
+        start = time.monotonic()
+        await limiter.acquire()
+        elapsed = time.monotonic() - start
+        # Should have waited ~0.5 seconds (half second window for 2/sec)
+        assert elapsed >= 0.3
+    @pytest.mark.asyncio
+    async def test_limiter_resets_after_window(self) -> None:
+        """Rate limit should reset after time window."""
+        limiter = RateLimiter(rate="5/second")
+        # Use up the limit
+        for _ in range(5):
+            await limiter.acquire()
+        # Wait for window to pass
+        await asyncio.sleep(1.1)
+        # Should be allowed again
+        start = time.monotonic()
+        await limiter.acquire()
+        elapsed = time.monotonic() - start
+        assert elapsed < 0.1  # Should be nearly instant
+class TestGetPubmedLimiter:
+    """Test PubMed-specific limiter factory."""
+    @pytest.fixture(autouse=True)
+    def setup_teardown(self):
+        """Reset limiter before and after each test."""
+        reset_pubmed_limiter()
+        yield
+        reset_pubmed_limiter()
+    def test_limiter_without_api_key(self) -> None:
+        """Should return 3/sec limiter without key."""
+        limiter = get_pubmed_limiter(api_key=None)
+        assert "3" in limiter.rate
+    def test_limiter_with_api_key(self) -> None:
+        """Should return 10/sec limiter with key."""
+        limiter = get_pubmed_limiter(api_key="my-api-key")
+        assert "10" in limiter.rate
+    def test_limiter_is_singleton(self) -> None:
+        """Same API key should return same limiter instance."""
+        limiter1 = get_pubmed_limiter(api_key="key1")
+        limiter2 = get_pubmed_limiter(api_key="key1")
+        assert limiter1 is limiter2
+    def test_different_keys_different_limiters(self) -> None:
+        """Different API keys should return different limiters."""
+        limiter1 = get_pubmed_limiter(api_key="key1")
+        limiter2 = get_pubmed_limiter(api_key="key2")
+        # Clear cache for clean test
+        # Actually, different keys SHOULD share the same limiter
+        # since we're limiting against the same API
+        assert limiter1 is limiter2  # Shared NCBI rate limit

uv.lock CHANGED Viewed

@@ -1066,6 +1066,7 @@ dependencies = [
     { name = "gradio", extra = ["mcp"] },
     { name = "httpx" },
     { name = "huggingface-hub" },
     { name = "openai" },
     { name = "pydantic" },
     { name = "pydantic-ai" },
@@ -1116,6 +1117,7 @@ requires-dist = [
     { name = "gradio", extras = ["mcp"], specifier = ">=6.0.0" },
     { name = "httpx", specifier = ">=0.27" },
     { name = "huggingface-hub", specifier = ">=0.20.0" },
     { name = "llama-index", marker = "extra == 'modal'", specifier = ">=0.11.0" },
     { name = "llama-index-embeddings-openai", marker = "extra == 'modal'" },
     { name = "llama-index-llms-openai", marker = "extra == 'modal'" },
@@ -2259,6 +2261,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ca/ec/65f7d563aa4a62dd58777e8f6aa882f15db53b14eb29aba0c28a20f7eb26/kubernetes-34.1.0-py2.py3-none-any.whl", hash = "sha256:bffba2272534e224e6a7a74d582deb0b545b7c9879d2cd9e4aae9481d1f2cc2a", size = 2008380 },
 ]
 [[package]]
 name = "llama-cloud"
 version = "0.1.35"

     { name = "gradio", extra = ["mcp"] },
     { name = "httpx" },
     { name = "huggingface-hub" },
+    { name = "limits" },
     { name = "openai" },
     { name = "pydantic" },
     { name = "pydantic-ai" },
     { name = "gradio", extras = ["mcp"], specifier = ">=6.0.0" },
     { name = "httpx", specifier = ">=0.27" },
     { name = "huggingface-hub", specifier = ">=0.20.0" },
+    { name = "limits", specifier = ">=3.0" },
     { name = "llama-index", marker = "extra == 'modal'", specifier = ">=0.11.0" },
     { name = "llama-index-embeddings-openai", marker = "extra == 'modal'" },
     { name = "llama-index-llms-openai", marker = "extra == 'modal'" },
     { url = "https://files.pythonhosted.org/packages/ca/ec/65f7d563aa4a62dd58777e8f6aa882f15db53b14eb29aba0c28a20f7eb26/kubernetes-34.1.0-py2.py3-none-any.whl", hash = "sha256:bffba2272534e224e6a7a74d582deb0b545b7c9879d2cd9e4aae9481d1f2cc2a", size = 2008380 },
 ]
+[[package]]
+name = "limits"
+version = "5.6.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "deprecated" },
+    { name = "packaging" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/bb/e5/c968d43a65128cd54fb685f257aafb90cd5e4e1c67d084a58f0e4cbed557/limits-5.6.0.tar.gz", hash = "sha256:807fac75755e73912e894fdd61e2838de574c5721876a19f7ab454ae1fffb4b5", size = 182984 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/40/96/4fcd44aed47b8fcc457653b12915fcad192cd646510ef3f29fd216f4b0ab/limits-5.6.0-py3-none-any.whl", hash = "sha256:b585c2104274528536a5b68864ec3835602b3c4a802cd6aa0b07419798394021", size = 60604 },
+]
 [[package]]
 name = "llama-cloud"
 version = "0.1.35"