# Start with the official Python 3.11 image FROM python:3.11 # Set the working directory inside the container WORKDIR /app # Install system dependencies needed to build wheels for libraries like llama-cpp-python RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ cmake \ && rm -rf /var/lib/apt/lists/* # Copy the requirements file first and install dependencies COPY requirements.txt requirements.txt # Tell llama-cpp-python to build itself during installation ENV CMAKE_ARGS="-DLLAMA_CUBLAS=OFF -DLLAMA_HIPBLAS=OFF -DLLAMA_METAL=OFF" RUN pip install --no-cache-dir --upgrade -r requirements.txt # Copy the rest of your application code COPY . . # Expose the port the app will run on (Hugging Face uses 7860) EXPOSE 7860 # ✅ THE FINAL FIX IS HERE # Run the command inside a shell to allow environment variable expansion CMD ["/bin/sh", "-c", "uvicorn api.main:app --host 0.0.0.0 --port ${PORT:-7860}"]