| # |
| |
| # ARG TARGETPLATFORM |
| # ARG BUILDPLATFORM |
| |
| # |
| # ARG PYTHON_VERSION=3.10 |
| |
| # |
| # FROM python:${PYTHON_VERSION}-slim as base |
| |
| # |
| # ARG INSTALL_TYPE=basic |
| # ARG ENABLE_GPU=false |
| |
| # |
| # LABEL maintainer="unclecode" |
| # LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper" |
| # LABEL version="1.0" |
| |
| # |
| # ENV PYTHONUNBUFFERED=1 \ |
| |
| |
| |
| |
| |
| |
| # |
| # RUN apt-get update && apt-get install -y --no-install-recommends \ |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| # |
| # RUN apt-get update && apt-get install -y --no-install-recommends \ |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| # |
| # RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ |
| |
| |
| |
| |
| |
| |
| |
| # |
| # WORKDIR /app |
| |
| # |
| # COPY . . |
| |
| # |
| # RUN pip install --no-cache-dir -r requirements.txt |
| |
| # |
| # RUN pip install fastapi uvicorn psutil |
| |
| # |
| # RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| # |
| # RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| # |
| # RUN pip install --no-cache-dir \ |
| |
| |
| |
| |
| |
| # |
| # RUN mkdocs build |
| |
| # |
| # RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ |
| |
| |
| |
| |
| |
| # |
| # EXPOSE 8000 11235 9222 8080 |
| |
| # |
| # CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11235"] |
| |
| # syntax=docker/dockerfile:1.4 |
| |
| # syntax=docker/dockerfile:1.4 |
|
|
| ARG TARGETPLATFORM |
| ARG BUILDPLATFORM |
| |
| # Other build arguments |
| ARG PYTHON_VERSION=3.10 |
| |
| # Base stage with system dependencies |
| FROM python:${PYTHON_VERSION}-slim as base |
| |
| # Declare ARG variables again within the build stage |
| ARG INSTALL_TYPE=basic |
| ARG ENABLE_GPU=false |
| |
| # Platform-specific labels |
| LABEL maintainer="unclecode" |
| LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper" |
| LABEL version="1.0" |
| |
| # Environment setup |
| ENV PYTHONUNBUFFERED=1 \ |
| PYTHONDONTWRITEBYTECODE=1 \ |
| PIP_NO_CACHE_DIR=1 \ |
| PIP_DISABLE_PIP_VERSION_CHECK=1 \ |
| PIP_DEFAULT_TIMEOUT=100 \ |
| DEBIAN_FRONTEND=noninteractive |
| |
| # Install system dependencies as root |
| RUN apt-get update && apt-get install -y --no-install-recommends \ |
| build-essential \ |
| curl \ |
| wget \ |
| gnupg \ |
| git \ |
| cmake \ |
| pkg-config \ |
| python3-dev \ |
| libjpeg-dev \ |
| libpng-dev \ |
| && rm -rf /var/lib/apt/lists/* |
| |
| # Playwright system dependencies for Linux |
| RUN apt-get update && apt-get install -y --no-install-recommends \ |
| libglib2.0-0 \ |
| libnss3 \ |
| libnspr4 \ |
| libatk1.0-0 \ |
| libatk-bridge2.0-0 \ |
| libcups2 \ |
| libdrm2 \ |
| libdbus-1-3 \ |
| libxcb1 \ |
| libxkbcommon0 \ |
| libx11-6 \ |
| libxcomposite1 \ |
| libxdamage1 \ |
| libxext6 \ |
| libxfixes3 \ |
| libxrandr2 \ |
| libgbm1 \ |
| libpango-1.0-0 \ |
| libcairo2 \ |
| libasound2 \ |
| libatspi2.0-0 \ |
| && rm -rf /var/lib/apt/lists/* |
| |
| # GPU support if enabled and architecture is supported |
| RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ |
| apt-get update && apt-get install -y --no-install-recommends \ |
| nvidia-cuda-toolkit \ |
| && rm -rf /var/lib/apt/lists/* ; \ |
| else \ |
| echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \ |
| fi |
| |
| # Add a non-root user |
| RUN useradd -m -u 1000 user |
| USER user |
| ENV PATH="/home/user/.local/bin:$PATH" |
| |
| # Create and set working directory |
| WORKDIR /app |
| |
| # Copy the entire project with correct ownership |
| COPY --chown=user . . |
| |
| # Install base requirements |
| RUN pip install --no-cache-dir -r requirements.txt |
| |
| # Install required library for FastAPI |
| RUN pip install fastapi uvicorn psutil |
| |
| # Install ML dependencies first for better layer caching |
| RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ |
| pip install --no-cache-dir \ |
| torch \ |
| torchvision \ |
| torchaudio \ |
| scikit-learn \ |
| nltk \ |
| transformers \ |
| tokenizers && \ |
| python -m nltk.downloader punkt stopwords ; \ |
| fi |
| |
| # Install the package |
| RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ |
| pip install ".[all]" && \ |
| python -m crawl4ai.model_loader ; \ |
| elif [ "$INSTALL_TYPE" = "torch" ] ; then \ |
| pip install ".[torch]" ; \ |
| elif [ "$INSTALL_TYPE" = "transformer" ] ; then \ |
| pip install ".[transformer]" && \ |
| python -m crawl4ai.model_loader ; \ |
| else \ |
| pip install "." ; \ |
| fi |
| |
| # Install MkDocs and required plugins |
| RUN pip install --no-cache-dir \ |
| mkdocs \ |
| mkdocs-material \ |
| mkdocs-terminal \ |
| pymdown-extensions |
| |
| # Build MkDocs documentation |
| RUN mkdocs build |
| |
| # Install Playwright and browsers |
| RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ |
| playwright install chromium; \ |
| elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ |
| playwright install chromium; \ |
| fi |
| |
| # Expose port |
| EXPOSE 8000 11235 9222 8080 |
|
|
| RUN python -m playwright install chromium |
| |
| # Start the FastAPI server |
| CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11235"] |