RT-DETR object detection
Code available here.
This tutorial fine-tunes RT-DETRv2 on a custom COCO-format dataset from HuggingFace. The pipeline downloads and splits the data, fine-tunes the detector with live training charts in Flyte reports, evaluates COCO mAP on a validation split, and renders a side-by-side inference demo with ground-truth and predicted bounding boxes.
Flyte highlights:
- Cached dataset preparation so re-runs skip the HuggingFace download.
- Live training reports with loss curves and optional periodic mAP checkpoints.
- GPU evaluation and demo tasks that stream annotated images into the UI.
Define the task environments
main_img = flyte.Image.from_uv_script(__file__, name="detr-object-detection", pre=True)
gpu_env = flyte.TaskEnvironment(
name="detr-object-detection-gpu",
image=main_img,
resources=flyte.Resources(cpu=4, memory="24Gi", gpu=1),
)
cpu_env = flyte.TaskEnvironment(
name="detr-object-detection-cpu",
image=main_img,
resources=flyte.Resources(cpu=2, memory="6Gi"),
depends_on=[gpu_env],
)
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "flyte>=2.4.0",
# "torch>=2.9.0",
# "transformers>=4.49.0",
# "albumentations>=1.4.0",
# "torchmetrics>=1.4.0",
# ...
# ]
# ///Orchestrate the pipeline
The pipeline task prepares data, fine-tunes RT-DETR, evaluates mAP, and renders an inference demo.
@cpu_env.task(report=True)
async def pipeline(
model_name: str = "PekingU/rtdetr_v2_r18vd",
dataset_repo: str = "sagecodes/union_flyte_swag_object_detection",
annotations_path: str = "swag/train.json",
images_subdir: str = "swag/images",
epochs: int = 30,
lr: float = 5e-5,
batch_size: int = 4,
val_fraction: float = 0.2,
threshold: float = 0.5,
demo_images: int = 8,
eval_every_n_epochs: int | None = None,
) -> tuple[flyte.io.Dir, str]:
"""
End-to-end RT-DETRv2 fine-tuning pipeline.
Returns the fine-tuned model directory and a JSON summary.
1. Download COCO dataset from HuggingFace and split train/val
2. Fine-tune RT-DETRv2 on the train split
3. Evaluate: COCO mAP comparison (base vs fine-tuned)
4. Inference demo: render bounding boxes on val images
"""
log.info(f"Pipeline: {model_name} | dataset={dataset_repo}")
def _pipeline_progress(step: int, label: str) -> str:
steps = ["Preparing Data", "Fine-tuning", "Evaluating", "Inference Demo"]
dots = ""
for i, s in enumerate(steps):
if i + 1 < step:
icon = '<span style="color:#06d6a0;">✓</span>'
elif i + 1 == step:
icon = '<span style="color:#e94560;">●</span>'
else:
icon = '<span style="color:#adb5bd;">○</span>'
dots += f"<span style='margin:0 8px;'>{icon} {s}</span>"
return f"""
<h2>RT-DETRv2 Object Detection Pipeline</h2>
<p><b>Model:</b> {model_name} | <b>Dataset:</b> {dataset_repo}</p>
<div class="card" style="text-align:center;">{dots}</div>
<p>{label}</p>
"""
await flyte.report.replace.aio(
_wrap_report(_pipeline_progress(1, "Downloading and splitting dataset...")),
do_flush=True,
)
data_dir = await prepare_data(
dataset_repo=dataset_repo,
annotations_path=annotations_path,
images_subdir=images_subdir,
val_fraction=val_fraction,
)
await flyte.report.replace.aio(
_wrap_report(_pipeline_progress(2, "Fine-tuning model...")),
do_flush=True,
)
finetuned_dir = await train(
model_name, data_dir, epochs, lr, batch_size,
eval_every_n_epochs=eval_every_n_epochs,
)
await flyte.report.replace.aio(
_wrap_report(_pipeline_progress(3, "Running COCO mAP evaluation...")),
do_flush=True,
)
metrics_json = await evaluate(finetuned_dir, data_dir, threshold)
metrics = json.loads(metrics_json)
await flyte.report.replace.aio(
_wrap_report(_pipeline_progress(4, "Rendering bounding box demo...")),
do_flush=True,
)
demo_json = await inference_demo(
finetuned_dir, data_dir, threshold, demo_images,
metrics_json=metrics_json,
)
ft_map = metrics["finetuned"].get("map", 0)
ft_map50 = metrics["finetuned"].get("map_50", 0)
final_html = f"""
<h2>Pipeline Complete</h2>
<h3>{model_name}</h3>
<div class="stat-grid">
<div class="stat"><div class="value">{metrics['num_val_images']}</div><div class="label">Val Images</div></div>
<div class="stat"><div class="value highlight">{ft_map:.3f}</div><div class="label">mAP</div></div>
<div class="stat"><div class="value highlight">{ft_map50:.3f}</div><div class="label">mAP@50</div></div>
</div>
<div class="card">
<b>Configuration:</b> {epochs} epochs | LR {lr} | Batch size {batch_size} |
Val fraction {val_fraction} | Threshold {threshold}
</div>
"""
await flyte.report.replace.aio(_wrap_report(final_html), do_flush=True)
log.info(f"Pipeline complete. Fine-tuned mAP: {ft_map:.3f}")
return finetuned_dir, json.dumps({"metrics": metrics, "demo": json.loads(demo_json)})
Run the workflow
From the example directory:
cd v2/tutorials/detr_object_detection
uv run --script detr_object_detection.pyQuick local smoke test with one epoch:
flyte run detr_object_detection.py pipeline --epochs 1 --batch_size 2This workflow needs a GPU. Check the train, evaluate, and inference_demo task reports for charts and annotated images.