Deploying a CV API
Building a production-ready image classification API with FastAPI
Objectives
By the end of this practical work, you will be able to:
- Create a FastAPI application for image classification
- Implement proper image preprocessing and validation
- Add confidence thresholds and error handling
- Containerize the application with Docker
- Test the API with various clients
Prerequisites
- Completed Practical Work 4 (trained model available)
- Python 3.8+ with pip
- Docker installed (optional but recommended)
- Basic understanding of REST APIs
Install required packages:
pip install fastapi uvicorn python-multipart pillow torch torchvision
Instructions
Step 1: Project Structure
Create the following project structure:
cv-api/
├── app/
│ ├── __init__.py
│ ├── main.py
│ ├── model.py
│ ├── schemas.py
│ └── config.py
├── models/
│ └── best_model.pth
├── tests/
│ └── test_api.py
├── Dockerfile
├── requirements.txt
└── README.md
Step 2: Create Configuration
Set up configuration management:
# app/config.py
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
"""Application settings."""
app_name: str = "CV Classification API"
model_path: str = "models/best_model.pth"
confidence_threshold: float = 0.7
max_image_size: int = 10 * 1024 * 1024 # 10MB
allowed_extensions: set = {"jpg", "jpeg", "png", "webp"}
class Config:
env_file = ".env"
settings = Settings()
Step 3: Create Data Schemas
Define request and response models:
# app/schemas.py
from pydantic import BaseModel
from typing import Dict, Optional
class PredictionResult(BaseModel):
"""Classification prediction result."""
predicted_class: str
confidence: float
probabilities: Dict[str, float]
is_confident: bool
class HealthResponse(BaseModel):
"""Health check response."""
status: str
model_loaded: bool
version: str
class ErrorResponse(BaseModel):
"""Error response."""
error: str
detail: Optional[str] = None
Step 4: Create Model Service
Implement the model loading and inference:
# app/model.py
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
from io import BytesIO
from typing import Tuple, Dict
import logging
logger = logging.getLogger(__name__)
class ImageClassifier:
"""Image classification model wrapper."""
def __init__(self, model_path: str, device: str = None):
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
self.model = None
self.classes = None
self.transform = self._get_transform()
self._load_model(model_path)
def _get_transform(self) -> transforms.Compose:
"""Get preprocessing transforms."""
return transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])
def _load_model(self, model_path: str):
"""Load the trained model."""
logger.info(f"Loading model from {model_path}")
checkpoint = torch.load(model_path, map_location=self.device)
self.classes = checkpoint["classes"]
# Recreate model architecture
self.model = models.resnet50(weights=None)
num_features = self.model.fc.in_features
self.model.fc = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(num_features, len(self.classes))
)
self.model.load_state_dict(checkpoint["model_state_dict"])
self.model.to(self.device)
self.model.eval()
logger.info(f"Model loaded. Classes: {self.classes}")
def preprocess(self, image_bytes: bytes) -> torch.Tensor:
"""Preprocess image bytes for inference."""
image = Image.open(BytesIO(image_bytes)).convert("RGB")
tensor = self.transform(image).unsqueeze(0)
return tensor.to(self.device)
def predict(self, image_bytes: bytes) -> Tuple[str, float, Dict[str, float]]:
"""Run inference on image bytes."""
input_tensor = self.preprocess(image_bytes)
with torch.no_grad():
outputs = self.model(input_tensor)
probabilities = torch.softmax(outputs, dim=1)
confidence, predicted = probabilities.max(1)
prob_dict = {
cls: prob.item()
for cls, prob in zip(self.classes, probabilities[0])
}
return (
self.classes[predicted.item()],
confidence.item(),
prob_dict
)
# Singleton instance
classifier: ImageClassifier = None
def get_classifier() -> ImageClassifier:
"""Get or create the classifier instance."""
global classifier
return classifier
def init_classifier(model_path: str):
"""Initialize the classifier."""
global classifier
classifier = ImageClassifier(model_path)
Step 5: Create the FastAPI Application
Build the main API application:
# app/main.py
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
import logging
from .config import settings
from .schemas import PredictionResult, HealthResponse, ErrorResponse
from .model import init_classifier, get_classifier
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan handler."""
# Startup
logger.info("Starting application...")
init_classifier(settings.model_path)
logger.info("Model loaded successfully")
yield
# Shutdown
logger.info("Shutting down...")
app = FastAPI(
title=settings.app_name,
description="Image classification API using transfer learning",
version="1.0.0",
lifespan=lifespan
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/health", response_model=HealthResponse)
async def health_check():
"""Check API health status."""
classifier = get_classifier()
return HealthResponse(
status="healthy",
model_loaded=classifier is not None,
version="1.0.0"
)
@app.post("/predict", response_model=PredictionResult)
async def predict(file: UploadFile = File(...)):
"""Classify an uploaded image."""
# Validate file extension
extension = file.filename.split(".")[-1].lower()
if extension not in settings.allowed_extensions:
raise HTTPException(
status_code=400,
detail=f"File type not allowed. Allowed: {settings.allowed_extensions}"
)
# Read and validate file size
contents = await file.read()
if len(contents) > settings.max_image_size:
raise HTTPException(
status_code=400,
detail=f"File too large. Max size: {settings.max_image_size} bytes"
)
# Run prediction
try:
classifier = get_classifier()
predicted_class, confidence, probabilities = classifier.predict(contents)
except Exception as e:
logger.error(f"Prediction error: {e}")
raise HTTPException(
status_code=500,
detail="Error processing image"
)
return PredictionResult(
predicted_class=predicted_class,
confidence=confidence,
probabilities=probabilities,
is_confident=confidence >= settings.confidence_threshold
)
@app.get("/classes")
async def get_classes():
"""Get available classification classes."""
classifier = get_classifier()
return {"classes": classifier.classes}
Step 6: Create Requirements File
List all dependencies:
# requirements.txt
fastapi==0.109.0
uvicorn[standard]==0.27.0
python-multipart==0.0.6
pillow==10.2.0
torch==2.1.2
torchvision==0.16.2
pydantic-settings==2.1.0
Step 7: Run and Test Locally
Start the server:
# Run the server
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
Test with curl:
# Health check
curl http://localhost:8000/health
# Classify an image
curl -X POST "http://localhost:8000/predict" \
-H "accept: application/json" \
-F "file=@test_image.jpg"
# Get available classes
curl http://localhost:8000/classes
Or test with Python:
# test_client.py
import requests
# Health check
response = requests.get("http://localhost:8000/health")
print("Health:", response.json())
# Classify image
with open("test_image.jpg", "rb") as f:
response = requests.post(
"http://localhost:8000/predict",
files={"file": ("image.jpg", f, "image/jpeg")}
)
print("Prediction:", response.json())
Step 8: Create Dockerfile
Containerize the application:
# Dockerfile
FROM python:3.10-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
libgl1-mesa-glx \
libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY app/ ./app/
COPY models/ ./models/
# Expose port
EXPOSE 8000
# Run the application
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
Build and run:
# Build the image
docker build -t cv-api .
# Run the container
docker run -p 8000:8000 cv-api
Step 9: Add API Tests
Create automated tests:
# tests/test_api.py
import pytest
from fastapi.testclient import TestClient
from app.main import app
from io import BytesIO
from PIL import Image
client = TestClient(app)
def create_test_image() -> bytes:
"""Create a simple test image."""
img = Image.new("RGB", (224, 224), color="red")
buffer = BytesIO()
img.save(buffer, format="JPEG")
buffer.seek(0)
return buffer.read()
def test_health_check():
"""Test health endpoint."""
response = client.get("/health")
assert response.status_code == 200
data = response.json()
assert data["status"] == "healthy"
assert data["model_loaded"] is True
def test_get_classes():
"""Test classes endpoint."""
response = client.get("/classes")
assert response.status_code == 200
assert "classes" in response.json()
def test_predict_success():
"""Test successful prediction."""
image_bytes = create_test_image()
response = client.post(
"/predict",
files={"file": ("test.jpg", image_bytes, "image/jpeg")}
)
assert response.status_code == 200
data = response.json()
assert "predicted_class" in data
assert "confidence" in data
assert "probabilities" in data
def test_predict_invalid_file_type():
"""Test rejection of invalid file types."""
response = client.post(
"/predict",
files={"file": ("test.txt", b"not an image", "text/plain")}
)
assert response.status_code == 400
Run tests:
pip install pytest
pytest tests/ -v
Expected Output
After completing this practical work, you should have:
- A working FastAPI application serving your trained model
- Interactive API documentation at
/docs - Docker container ready for deployment
- Passing test suite
- Successful predictions via curl or Python client
Deliverables
- Complete project folder with all files
- Working Docker image
- Screenshot of the API documentation (
/docs) - Example API responses (JSON) for at least 3 test images
- Brief documentation on how to run and use the API
Bonus Challenges
- Challenge 1: Add request logging with timestamps and response times
- Challenge 2: Implement a batch prediction endpoint for multiple images
- Challenge 3: Add Prometheus metrics for monitoring
- Challenge 4: Deploy to a cloud platform (Railway, Render, or AWS)