FastAPI app

FastAPI is a modern, fast web framework for building APIs. Flyte provides FastAPIAppEnvironment which makes it easy to deploy FastAPI applications.

Basic FastAPI app

Here’s a simple FastAPI app:

basic_fastapi.py
                
                    
                
            
                
            
# /// script
# requires-python = ">=3.12"
# dependencies = [
#    "flyte>=2.0.0b52",
#    "fastapi",
# ]
# ///

"""A basic FastAPI app example."""

from fastapi import FastAPI
import pathlib
import flyte
from flyte.app.extras import FastAPIAppEnvironment

app = FastAPI(
    title="My API",
    description="A simple FastAPI application",
    version="1.0.0",
)

env = FastAPIAppEnvironment(
    name="my-fastapi-app",
    app=app,
    image=flyte.Image.from_debian_base(python_version=(3, 12)).with_pip_packages(
        "fastapi",
        "uvicorn",
    ),
    resources=flyte.Resources(cpu=1, memory="512Mi"),
    requires_auth=False,
)

@app.get("/")
async def root():
    return {"message": "Hello, World!"}

@app.get("/health")
async def health_check():
    return {"status": "healthy"}

if __name__ == "__main__":
    flyte.init_from_config(root_dir=pathlib.Path(__file__).parent)
    app_deployment = flyte.deploy(env)
    print(f"Deployed: {app_deployment[0].summary_repr()}")

Once deployed, you can:

Access the API at the generated URL
View interactive API docs at /docs (Swagger UI)
View alternative docs at /redoc

Serving a machine learning model

Here’s an example of serving a scikit-learn model:

ml_model_serving.py
                
                    
                
            
                
            
app = FastAPI(title="ML Model API")


# Define request/response models
class PredictionRequest(BaseModel):
    feature1: float
    feature2: float
    feature3: float


class PredictionResponse(BaseModel):
    prediction: float
    probability: float


# Load model (you would typically load this from storage)
model = None


@asynccontextmanager
async def lifespan(app: FastAPI):
    global model
    model_path = os.getenv("MODEL_PATH", "/app/models/model.joblib")
    # In production, load from your storage
    if os.path.exists(model_path):
        with open(model_path, "rb") as f:
            model = joblib.load(f)
    yield


@app.post("/predict", response_model=PredictionResponse)
async def predict(request: PredictionRequest):
    # Make prediction
    # prediction = model.predict([[request.feature1, request.feature2, request.feature3]])

    # Dummy prediction for demo
    prediction = 0.85
    probability = 0.92

    return PredictionResponse(
        prediction=prediction,
        probability=probability,
    )


env = FastAPIAppEnvironment(
    name="ml-model-api",
    app=app,
    image=flyte.Image.from_debian_base(python_version=(3, 12)).with_pip_packages(
        "fastapi",
        "uvicorn",
        "scikit-learn",
        "pydantic",
        "joblib",
    ),
    parameters=[
        flyte.app.Parameter(
            name="model_file",
            value=flyte.io.File("s3://bucket/models/model.joblib"),
            mount="/app/models",
            env_var="MODEL_PATH",
        ),
    ],
    resources=flyte.Resources(cpu=2, memory="2Gi"),
    requires_auth=False,
)

Accessing Swagger documentation

FastAPI automatically generates interactive API documentation. Once deployed:

Swagger UI: Access at {app_url}/docs
ReDoc: Access at {app_url}/redoc
OpenAPI JSON: Access at {app_url}/openapi.json

The Swagger UI provides an interactive interface where you can:

See all available endpoints
Test API calls directly from the browser
View request/response schemas
See example payloads

Example: REST API with multiple endpoints

rest_api.py
                
                    
                
            
                
            
app = FastAPI(title="Product API")


# Data models
class Product(BaseModel):
    id: int
    name: str
    price: float


class ProductCreate(BaseModel):
    name: str
    price: float


# In-memory database (use real database in production)
products_db = []


@app.get("/products", response_model=List[Product])
async def get_products():
    return products_db


@app.get("/products/{product_id}", response_model=Product)
async def get_product(product_id: int):
    product = next((p for p in products_db if p["id"] == product_id), None)
    if not product:
        raise HTTPException(status_code=404, detail="Product not found")
    return product


@app.post("/products", response_model=Product)
async def create_product(product: ProductCreate):
    new_product = {
        "id": len(products_db) + 1,
        "name": product.name,
        "price": product.price,
    }
    products_db.append(new_product)
    return new_product


env = FastAPIAppEnvironment(
    name="product-api",
    app=app,
    image=flyte.Image.from_debian_base(python_version=(3, 12)).with_pip_packages(
        "fastapi",
        "uvicorn",
    ),
    resources=flyte.Resources(cpu=1, memory="512Mi"),
    requires_auth=False,
)

Multi-file FastAPI app

Here’s an example of a multi-file FastAPI app:

app.py
                
                    
                
            
                
            
# /// script
# requires-python = ">=3.12"
# dependencies = [
#    "flyte>=2.0.0b52",
#    "fastapi",
# ]
# ///

"""Multi-file FastAPI app example."""

from fastapi import FastAPI
from module import function  # Import from another file
import pathlib

import flyte
from flyte.app.extras import FastAPIAppEnvironment

app = FastAPI(title="Multi-file FastAPI Demo")

app_env = FastAPIAppEnvironment(
    name="fastapi-multi-file",
    app=app,
    image=flyte.Image.from_debian_base(python_version=(3, 12)).with_pip_packages(
        "fastapi",
        "uvicorn",
    ),
    resources=flyte.Resources(cpu=1, memory="512Mi"),
    requires_auth=False,
    # FastAPIAppEnvironment automatically includes necessary files
    # But you can also specify explicitly:
    # include=["app.py", "module.py"],
)

@app.get("/")
async def root():
    return function()  # Uses function from module.py

if __name__ == "__main__":
    flyte.init_from_config(root_dir=pathlib.Path(__file__).parent)
    app_deployment = flyte.deploy(app_env)
    print(f"Deployed: {app_deployment[0].summary_repr()}")

The helper module:

module.py
                
def function():
    """Helper function used by the FastAPI app."""
    return {"message": "Hello from module.py!"}

See Multi-script apps for more details on building FastAPI apps with multiple files.

A common ML pattern: train a model with a Flyte pipeline, then serve predictions from it. During local development, the app loads the model from a local file (e.g. model.pt saved by your training pipeline). When deployed remotely, Flyte’s Parameter system automatically resolves the model from the latest training run output.

        
    
from contextlib import asynccontextmanager
from pathlib import Path
import os

from fastapi import FastAPI
import flyte
from flyte.app import Parameter, RunOutput
from flyte.app.extras import FastAPIAppEnvironment

MODEL_PATH_ENV = "MODEL_PATH"

@asynccontextmanager
async def lifespan(app: FastAPI):
    """Load model on startup, either local file or remote run output."""
    model_path = Path(os.environ.get(MODEL_PATH_ENV, "model.pt"))
    model = load_model(model_path)
    app.state.model = model
    yield

app = FastAPI(title="MNIST Predictor", lifespan=lifespan)

serving_env = FastAPIAppEnvironment(
    name="mnist-predictor",
    app=app,
    parameters=[
        # Remote: resolves model from the latest train run and sets MODEL_PATH
        Parameter(
            name="model",
            value=RunOutput(task_name="ml_pipeline.pipeline", type="file", getter=(1,)),
            download=True,
            env_var=MODEL_PATH_ENV,
        ),
    ],
    image=flyte.Image.from_debian_base(python_version=(3, 12)).with_pip_packages(
        "fastapi", "uvicorn", "torch", "torchvision",
    ),
    resources=flyte.Resources(cpu=1, memory="4Gi"),
)

@app.get("/predict")
async def predict(index: int = 0) -> dict:
    return {"prediction": app.state.model(index)}

if __name__ == "__main__":
    # Local: skip RunOutput resolution, lifespan falls back to local model.pt
    serving_env.parameters = []
    local_app = flyte.with_servecontext(mode="local").serve(serving_env)
    local_app.activate(wait=True)

Locally, the app loads model.pt from disk:

python serve_model.py

Remotely, Flyte resolves the model from the latest training run:

flyte deploy serve_model.py serving_env

The key idea: Parameter with RunOutput bridges the gap between local and remote. Locally, the app falls back to a local file. Remotely, Flyte resolves the model artifact from the latest pipeline run automatically.

Best practices

Use Pydantic models: Define request/response models for type safety and automatic validation
Handle errors: Use HTTPException for proper error responses
Async operations: Use async/await for I/O operations
Environment variables: Use environment variables for configuration
Logging: Add proper logging for debugging and monitoring
Health checks: Always include a /health endpoint
API documentation: FastAPI auto-generates docs, but add descriptions to your endpoints

Advanced features

FastAPI supports many features that work with Flyte:

Dependencies: Use FastAPI’s dependency injection system
Background tasks: Run background tasks with BackgroundTasks
WebSockets: See WebSocket-based patterns for details
Authentication: Add authentication middleware (see secret-based authentication)
CORS: Configure CORS for cross-origin requests
Rate limiting: Add rate limiting middleware

Troubleshooting

App not starting:

Check that uvicorn can find your app module
Verify all dependencies are installed in the image
Check container logs for startup errors

Import errors:

Ensure all imported modules are available
Use include parameter if you have custom modules
Check that file paths are correct

API not accessible:

Verify requires_auth setting
Check that the app is listening on the correct port (8080)
Review network/firewall settings