Production architectures, optimization, and monitoring
| Factor | Cloud | Edge |
|---|---|---|
| Latency needs | 100ms+ acceptable | Real-time required |
| Processing type | Batch processing | Streaming/continuous |
| Model complexity | Large models OK | Must be lightweight |
| Connectivity | Reliable internet | Offline/remote |
| Data sensitivity | Can send to cloud | Must stay local |
| Scale | Variable workloads | Predictable volume |
Best for: Low-to-medium traffic, simple operations
Tools: FastAPI, Flask, AWS Lambda, Cloud Run
Best for: Batch processing, variable loads, long tasks
Tools: RabbitMQ, AWS SQS, Redis Queue, Celery
Strategy: Edge handles 80% of simple cases, cloud handles complex ones
from fastapi import FastAPI, File, UploadFile, HTTPException
import torch
from torchvision import transforms
from PIL import Image
import io
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI(
title="Product Classification API",
description="CV API for product classification",
version="1.0.0"
)
# Global model (loaded once at startup)
model = None
classes = None
transform = None
@app.on_event("startup")
async def load_model():
"""Load model at application startup."""
global model, classes, transform
logger.info("Loading model...")
# Load TorchScript model
model = torch.jit.load("product_classifier.pt")
model.eval()
classes = ["electronics", "clothing", "furniture", "food"]
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])
])
logger.info("Model loaded successfully!")
@app.get("/health")
async def health_check():
"""Health check endpoint for load balancers."""
return {
"status": "healthy",
"model_loaded": model is not None,
"version": "1.0.0"
}
@app.get("/")
async def root():
"""API documentation redirect."""
return {
"message": "Product Classification API",
"docs": "/docs",
"health": "/health"
}
@app.post("/classify")
async def classify_image(file: UploadFile = File(...)):
"""Classify a product image."""
# Validate file type
if file.content_type not in ["image/jpeg", "image/png"]:
raise HTTPException(400, "Invalid image format")
try:
contents = await file.read()
image = Image.open(io.BytesIO(contents)).convert("RGB")
input_tensor = transform(image).unsqueeze(0)
with torch.no_grad():
outputs = model(input_tensor)
probs = torch.softmax(outputs, dim=1)
confidence, predicted = probs.max(1)
return {
"class": classes[predicted.item()],
"confidence": round(confidence.item(), 4)
}
except Exception as e:
logger.error(f"Error: {str(e)}")
raise HTTPException(500, str(e))
from typing import List
@app.post("/batch_classify")
async def batch_classify(files: List[UploadFile] = File(...)):
"""Classify multiple product images."""
results = []
for file in files:
try:
contents = await file.read()
image = Image.open(io.BytesIO(contents)).convert("RGB")
input_tensor = transform(image).unsqueeze(0)
with torch.no_grad():
outputs = model(input_tensor)
probs = torch.softmax(outputs, dim=1)
confidence, predicted = probs.max(1)
results.append({
"filename": file.filename,
"class": classes[predicted.item()],
"confidence": round(confidence.item(), 4)
})
except Exception as e:
results.append({"filename": file.filename, "error": str(e)})
return {"results": results}
# Development (with auto-reload)
uvicorn app:app --reload --host 0.0.0.0 --port 8000
# Production (with workers)
uvicorn app:app --host 0.0.0.0 --port 8000 --workers 4
# Or with Gunicorn for more control
gunicorn app:app -w 4 -k uvicorn.workers.UvicornWorker
| Technique | Speedup | Trade-off |
|---|---|---|
| Quantization | 2-4x | Minor accuracy loss (0.5-2%) |
| Model Pruning | 1.5-3x | May need fine-tuning |
| Knowledge Distillation | 2-10x | Requires training smaller model |
| TensorRT/ONNX | 2-5x | Hardware-specific |
| Batching | 2-8x | Adds latency for single requests |
import torch.quantization
# Dynamic quantization (easiest)
quantized_model = torch.quantization.quantize_dynamic(
model,
{torch.nn.Linear},
dtype=torch.qint8
)
# Check size reduction
print(f"Original: {get_model_size(model)}MB")
print(f"Quantized: {get_model_size(quantized_model)}MB")
# Export to ONNX format
dummy_input = torch.randn(1, 3, 224, 224)
torch.onnx.export(
model,
dummy_input,
"model.onnx",
opset_version=11,
input_names=["input"],
output_names=["output"],
dynamic_axes={"input": {0: "batch"}, "output": {0: "batch"}}
)
# Run with ONNX Runtime
import onnxruntime as ort
session = ort.InferenceSession("model.onnx")
outputs = session.run(None, {"input": input_array})
| Category | Metrics | Alert Threshold |
|---|---|---|
| Performance | p95 latency | > 500ms |
| Errors | Error rate | > 1% |
| Quality | Avg confidence | < 0.7 |
| System | GPU memory | > 90% |
| Queue | Queue depth | > 1000 |
Tools: Prometheus, Grafana, DataDog, CloudWatch
import numpy as np
from scipy import stats
class DriftDetector:
def __init__(self, baseline, threshold=0.05):
self.baseline = baseline
self.threshold = threshold
self.recent = []
def add_prediction(self, pred):
self.recent.append(pred)
if len(self.recent) >= 100:
self._check_drift()
self.recent = []
def _check_drift(self):
recent = np.array(self.recent)
# Kolmogorov-Smirnov test
stat, p_value = stats.ks_2samp(
self.baseline.flatten(),
recent.flatten()
)
if p_value < self.threshold:
alert("DRIFT DETECTED!")
def classify_with_threshold(model, image, threshold=0.7):
"""Route low-confidence predictions for review."""
confidence, prediction = model.predict(image)
if confidence < threshold:
return {
"status": "review_required",
"confidence": confidence,
"suggestion": prediction
}
return {
"status": "auto_accepted",
"class": prediction,
"confidence": confidence
}
# Tune threshold based on:
# - Cost of errors vs cost of review
# - Acceptable error rate
# - Review capacity
from dataclasses import dataclass
from enum import Enum
class ImageQuality(Enum):
EXCELLENT = "excellent"
GOOD = "good"
ACCEPTABLE = "acceptable"
REJECTED = "rejected"
@dataclass
class QualityAssessment:
quality: ImageQuality
issues: list
recommendations: list
scores: dict # {"technical": 0.9, "visual": 0.8}
class ProductImageChecker:
def __init__(self, api_key: str):
self.client = anthropic.Anthropic(api_key=api_key)
self.criteria = {
"resolution": {"min_width": 800, "min_height": 800},
"aspect_ratio": {"min": 0.8, "max": 1.2}
}
def _check_technical(self, image_path: str) -> dict:
"""Check technical aspects without AI."""
from PIL import Image
issues = []
score = 1.0
img = Image.open(image_path)
width, height = img.size
# Resolution check
if width < 800 or height < 800:
issues.append(f"Resolution too low: {width}x{height}")
score -= 0.3
# Aspect ratio check
ratio = width / height
if ratio < 0.8 or ratio > 1.2:
issues.append(f"Aspect ratio {ratio:.2f} not ideal")
score -= 0.1
# File size check (rough quality indicator)
file_size = os.path.getsize(image_path)
if file_size < 50000: # 50KB
issues.append("Image may be over-compressed")
score -= 0.2
return {"issues": issues, "score": max(0, score)}
def _check_visual(self, image_path: str) -> dict:
"""AI-powered visual quality assessment."""
image_data = encode_image(image_path)
response = self.client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=500,
messages=[{
"role": "user",
"content": [
{"type": "image", "source": {
"type": "base64", "media_type": "image/jpeg",
"data": image_data}},
{"type": "text", "text": """Rate this product image:
{"lighting": 0-1, "background": 0-1,
"focus": 0-1, "composition": 0-1,
"issues": [], "recommendations": []}
Return JSON only."""}
]
}]
)
return json.loads(response.content[0].text)
def check_image(self, image_path: str) -> QualityAssessment:
"""Run complete quality assessment."""
# Technical checks
tech = self._check_technical(image_path)
# Visual analysis
visual = self._check_visual(image_path)
# Calculate overall score
visual_avg = sum([visual["lighting"], visual["background"],
visual["focus"], visual["composition"]]) / 4
overall = (tech["score"] + visual_avg) / 2
# Determine quality level
if overall >= 0.9:
quality = ImageQuality.EXCELLENT
elif overall >= 0.7:
quality = ImageQuality.GOOD
elif overall >= 0.5:
quality = ImageQuality.ACCEPTABLE
else:
quality = ImageQuality.REJECTED
return QualityAssessment(
quality=quality,
issues=tech["issues"] + visual.get("issues", []),
recommendations=visual.get("recommendations", []),
scores={"technical": tech["score"], "visual": visual_avg}
)
def main():
checker = ProductImageChecker(api_key="your-key")
result = checker.check_image("product.jpg")
print(f"Quality: {result.quality.value}")
print(f"Scores: {result.scores}")
if result.issues:
print("\nIssues:")
for issue in result.issues:
print(f" - {issue}")
if result.recommendations:
print("\nRecommendations:")
for rec in result.recommendations:
print(f" - {rec}")
# Output:
# Quality: good
# Scores: {'technical': 0.9, 'visual': 0.75}
# Issues:
# - Background slightly cluttered
# Recommendations:
# - Use solid white background
Deliverable: Complete project with API and documentation