跟踪对象¶
利用 Supervision 的高级功能,通过无缝跟踪物体检测、分割(segmentation)和关键点模型识别的物体,来增强您的视频分析能力。本综合指南将引导您完成使用 YOLOv8 模型进行推理的步骤,您可以通过 Inference 或 Ultralytics 包来完成。在此之后,您将学习如何高效地跟踪这些对象并为您的视频内容添加注释,以进行更深入的分析。
对象检测与分割¶
为了方便您跟随我们的教程,请下载我们将用作示例的视频。您可以使用 supervision[assets]
扩展来完成此操作。
from supervision.assets import download_assets, VideoAssets
download_assets(VideoAssets.PEOPLE_WALKING)
运行推理¶
首先,您需要从您的对象检测或分割模型中获取预测结果。在本教程中,我们将使用 YOLOv8 模型作为示例。但是,Supervision 非常灵活,并且与各种模型兼容。请参阅此 链接 以获取有关如何集成其他模型的指导。
我们将定义一个 callback
函数,该函数将处理视频的每一帧,获取模型预测结果,然后根据这些预测对帧进行注释。此 callback
函数将在教程的后续步骤中至关重要,因为它将被修改为包含跟踪、标签和轨迹注释。
Tip
支持对象检测和分割模型。可以尝试使用 yolov8n.pt
或 yolov8n-640-seg
!
import numpy as np
import supervision as sv
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
box_annotator = sv.BoxAnnotator()
def callback(frame: np.ndarray, _: int) -> np.ndarray:
results = model(frame)[0]
detections = sv.Detections.from_ultralytics(results)
return box_annotator.annotate(frame.copy(), detections=detections)
sv.process_video(
source_path="people-walking.mp4",
target_path="result.mp4",
callback=callback
)
import numpy as np
import supervision as sv
from inference.models.utils import get_roboflow_model
model = get_roboflow_model(model_id="yolov8n-640", api_key=<ROBOFLOW API KEY>)
box_annotator = sv.BoxAnnotator()
def callback(frame: np.ndarray, _: int) -> np.ndarray:
results = model.infer(frame)[0]
detections = sv.Detections.from_inference(results)
return box_annotator.annotate(frame.copy(), detections=detections)
sv.process_video(
source_path="people-walking.mp4",
target_path="result.mp4",
callback=callback
)
跟踪¶
在运行推理并获取预测结果后,下一步是跟踪视频中检测到的对象。利用 Supervision 的 sv.ByteTrack
功能,为每个检测到的对象分配一个唯一的跟踪 ID,从而能够持续跟踪对象在不同帧中的运动路径。
import numpy as np
import supervision as sv
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator()
def callback(frame: np.ndarray, _: int) -> np.ndarray:
results = model(frame)[0]
detections = sv.Detections.from_ultralytics(results)
detections = tracker.update_with_detections(detections)
return box_annotator.annotate(frame.copy(), detections=detections)
sv.process_video(
source_path="people-walking.mp4",
target_path="result.mp4",
callback=callback
)
import numpy as np
import supervision as sv
from inference.models.utils import get_roboflow_model
model = get_roboflow_model(model_id="yolov8n-640", api_key=<ROBOFLOW API KEY>)
tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator()
def callback(frame: np.ndarray, _: int) -> np.ndarray:
results = model.infer(frame)[0]
detections = sv.Detections.from_inference(results)
detections = tracker.update_with_detections(detections)
return box_annotator.annotate(frame.copy(), detections=detections)
sv.process_video(
source_path="people-walking.mp4",
target_path="result.mp4",
callback=callback
)
使用跟踪 ID 注释视频¶
使用跟踪 ID 注释视频有助于区分和跟踪每个对象。借助 Supervision 的 sv.LabelAnnotator
,我们可以将跟踪 ID 和类别标签叠加到检测到的对象上,从而清晰地显示每个对象的类别和唯一标识符。
import numpy as np
import supervision as sv
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()
def callback(frame: np.ndarray, _: int) -> np.ndarray:
results = model(frame)[0]
detections = sv.Detections.from_ultralytics(results)
detections = tracker.update_with_detections(detections)
labels = [
f"#{tracker_id} {class_name}"
for class_name, tracker_id
in zip(detections.data["class_name"], detections.tracker_id)
]
annotated_frame = box_annotator.annotate(
frame.copy(), detections=detections)
return label_annotator.annotate(
annotated_frame, detections=detections, labels=labels)
sv.process_video(
source_path="people-walking.mp4",
target_path="result.mp4",
callback=callback
)
import numpy as np
import supervision as sv
from inference.models.utils import get_roboflow_model
model = get_roboflow_model(model_id="yolov8n-640", api_key=<ROBOFLOW API KEY>)
tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()
def callback(frame: np.ndarray, _: int) -> np.ndarray:
results = model.infer(frame)[0]
detections = sv.Detections.from_inference(results)
detections = tracker.update_with_detections(detections)
labels = [
f"#{tracker_id} {class_name}"
for class_name, tracker_id
in zip(detections.data["class_name"], detections.tracker_id)
]
annotated_frame = box_annotator.annotate(
frame.copy(), detections=detections)
return label_annotator.annotate(
annotated_frame, detections=detections, labels=labels)
sv.process_video(
source_path="people-walking.mp4",
target_path="result.mp4",
callback=callback
)
使用轨迹注释视频¶
向视频添加轨迹涉及叠加检测对象的历史路径。此功能由 sv.TraceAnnotator
提供,允许可视化对象的轨迹,有助于理解视频中对象之间的运动模式和交互。
import numpy as np
import supervision as sv
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()
trace_annotator = sv.TraceAnnotator()
def callback(frame: np.ndarray, _: int) -> np.ndarray:
results = model(frame)[0]
detections = sv.Detections.from_ultralytics(results)
detections = tracker.update_with_detections(detections)
labels = [
f"#{tracker_id} {class_name}"
for class_name, tracker_id
in zip(detections.data["class_name"], detections.tracker_id)
]
annotated_frame = box_annotator.annotate(
frame.copy(), detections=detections)
annotated_frame = label_annotator.annotate(
annotated_frame, detections=detections, labels=labels)
return trace_annotator.annotate(
annotated_frame, detections=detections)
sv.process_video(
source_path="people-walking.mp4",
target_path="result.mp4",
callback=callback
)
import numpy as np
import supervision as sv
from inference.models.utils import get_roboflow_model
model = get_roboflow_model(model_id="yolov8n-640", api_key=<ROBOFLOW API KEY>)
tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()
trace_annotator = sv.TraceAnnotator()
def callback(frame: np.ndarray, _: int) -> np.ndarray:
results = model.infer(frame)[0]
detections = sv.Detections.from_inference(results)
detections = tracker.update_with_detections(detections)
labels = [
f"#{tracker_id} {class_name}"
for class_name, tracker_id
in zip(detections.data["class_name"], detections.tracker_id)
]
annotated_frame = box_annotator.annotate(
frame.copy(), detections=detections)
annotated_frame = label_annotator.annotate(
annotated_frame, detections=detections, labels=labels)
return trace_annotator.annotate(
annotated_frame, detections=detections)
sv.process_video(
source_path="people-walking.mp4",
target_path="result.mp4",
callback=callback
)
关键点¶
模型不仅限于对象检测和分割。关键点检测允许对身体关节和连接进行详细分析,这对于姿态估计等应用尤其有价值。本节介绍关键点跟踪。我们将引导您完成注释关键点、将其转换为与 ByteTrack
兼容的边界框检测以及应用检测平滑以增强稳定性的步骤。
为了方便您跟随我们的教程,让我们下载我们将使用的示例视频。您可以使用 supervision[assets]
扩展来完成此操作。
关键点检测¶
首先,您需要从关键点检测模型中获取预测结果。在本教程中,我们将使用 YOLOv8 模型作为示例。但是,Supervision 非常灵活,并且与各种模型兼容。请参阅此 链接 以获取有关如何集成其他模型的指导。
我们将定义一个 callback
函数,该函数将处理视频的每一帧,获取模型预测结果,然后根据这些预测对帧进行注释。
让我们立即使用我们的 EdgeAnnotator
和 VertexAnnotator
来可视化结果。
import numpy as np
import supervision as sv
from ultralytics import YOLO
model = YOLO("yolov8m-pose.pt")
edge_annotator = sv.EdgeAnnotator()
vertex_annotator = sv.VertexAnnotator()
def callback(frame: np.ndarray, _: int) -> np.ndarray:
results = model(frame)[0]
key_points = sv.KeyPoints.from_ultralytics(results)
annotated_frame = edge_annotator.annotate(
frame.copy(), key_points=key_points)
return vertex_annotator.annotate(
annotated_frame, key_points=key_points)
sv.process_video(
source_path="skiing.mp4",
target_path="result.mp4",
callback=callback
)
import numpy as np
import supervision as sv
from inference.models.utils import get_roboflow_model
model = get_roboflow_model(
model_id="yolov8m-pose-640", api_key=<ROBOFLOW API KEY>)
edge_annotator = sv.EdgeAnnotator()
vertex_annotator = sv.VertexAnnotator()
def callback(frame: np.ndarray, _: int) -> np.ndarray:
results = model.infer(frame)[0]
key_points = sv.KeyPoints.from_inference(results)
annotated_frame = edge_annotator.annotate(
frame.copy(), key_points=key_points)
return vertex_annotator.annotate(
annotated_frame, key_points=key_points)
sv.process_video(
source_path="skiing.mp4",
target_path="result.mp4",
callback=callback
)
转换为检测结果¶
关键点跟踪目前通过将 KeyPoints
转换为 Detections
来支持。这是使用 KeyPoints.as_detections()
函数实现的。
让我们将其转换为检测结果,并使用我们的 BoxAnnotator
来可视化结果。
Tip
您可以使用 selected_keypoint_indices
参数来指定要转换的子集。当某些关键点可能被遮挡时,这非常有用。例如:一个人可能会摆动手臂,导致肘部有时被躯干遮挡。
import numpy as np
import supervision as sv
from ultralytics import YOLO
model = YOLO("yolov8m-pose.pt")
edge_annotator = sv.EdgeAnnotator()
vertex_annotator = sv.VertexAnnotator()
box_annotator = sv.BoxAnnotator()
def callback(frame: np.ndarray, _: int) -> np.ndarray:
results = model(frame)[0]
key_points = sv.KeyPoints.from_ultralytics(results)
detections = key_points.as_detections()
annotated_frame = edge_annotator.annotate(
frame.copy(), key_points=key_points)
annotated_frame = vertex_annotator.annotate(
annotated_frame, key_points=key_points)
return box_annotator.annotate(
annotated_frame, detections=detections)
sv.process_video(
source_path="skiing.mp4",
target_path="result.mp4",
callback=callback
)
import numpy as np
import supervision as sv
from inference.models.utils import get_roboflow_model
model = get_roboflow_model(
model_id="yolov8m-pose-640", api_key=<ROBOFLOW API KEY>)
edge_annotator = sv.EdgeAnnotator()
vertex_annotator = sv.VertexAnnotator()
box_annotator = sv.BoxAnnotator()
def callback(frame: np.ndarray, _: int) -> np.ndarray:
results = model.infer(frame)[0]
key_points = sv.KeyPoints.from_inference(results)
detections = key_points.as_detections()
annotated_frame = edge_annotator.annotate(
frame.copy(), key_points=key_points)
annotated_frame = vertex_annotator.annotate(
annotated_frame, key_points=key_points)
return box_annotator.annotate(
annotated_frame, detections=detections)
sv.process_video(
source_path="skiing.mp4",
target_path="result.mp4",
callback=callback
)
关键点跟踪¶
现在我们有了一个 Detections
对象,我们可以在视频中跟踪它。利用 Supervision 的 sv.ByteTrack
功能,为每个检测到的对象分配一个唯一的跟踪 ID,从而能够持续跟踪对象在不同帧中的运动路径。我们将使用 TraceAnnotator
来可视化结果。
import numpy as np
import supervision as sv
from ultralytics import YOLO
model = YOLO("yolov8m-pose.pt")
edge_annotator = sv.EdgeAnnotator()
vertex_annotator = sv.VertexAnnotator()
box_annotator = sv.BoxAnnotator()
tracker = sv.ByteTrack()
trace_annotator = sv.TraceAnnotator()
def callback(frame: np.ndarray, _: int) -> np.ndarray:
results = model(frame)[0]
key_points = sv.KeyPoints.from_ultralytics(results)
detections = key_points.as_detections()
detections = tracker.update_with_detections(detections)
annotated_frame = edge_annotator.annotate(
frame.copy(), key_points=key_points)
annotated_frame = vertex_annotator.annotate(
annotated_frame, key_points=key_points)
annotated_frame = box_annotator.annotate(
annotated_frame, detections=detections)
return trace_annotator.annotate(
annotated_frame, detections=detections)
sv.process_video(
source_path="skiing.mp4",
target_path="result.mp4",
callback=callback
)
import numpy as np
import supervision as sv
from inference.models.utils import get_roboflow_model
model = get_roboflow_model(
model_id="yolov8m-pose-640", api_key=<ROBOFLOW API KEY>)
edge_annotator = sv.EdgeAnnotator()
vertex_annotator = sv.VertexAnnotator()
box_annotator = sv.BoxAnnotator()
tracker = sv.ByteTrack()
trace_annotator = sv.TraceAnnotator()
def callback(frame: np.ndarray, _: int) -> np.ndarray:
results = model.infer(frame)[0]
key_points = sv.KeyPoints.from_inference(results)
detections = key_points.as_detections()
detections = tracker.update_with_detections(detections)
annotated_frame = edge_annotator.annotate(
frame.copy(), key_points=key_points)
annotated_frame = vertex_annotator.annotate(
annotated_frame, key_points=key_points)
annotated_frame = box_annotator.annotate(
annotated_frame, detections=detections)
return trace_annotator.annotate(
annotated_frame, detections=detections)
sv.process_video(
source_path="skiing.mp4",
target_path="result.mp4",
callback=callback
)
奖励:平滑处理¶
至此,我们已成功跟踪了由关键点模型检测到的对象,可以就此打住了。然而,我们可以通过应用 DetectionsSmoother
来进一步提高边界框的稳定性。此工具通过在帧之间平滑边界框坐标来帮助稳定边界框。它非常易于使用:
import numpy as np
import supervision as sv
from ultralytics import YOLO
model = YOLO("yolov8m-pose.pt")
edge_annotator = sv.EdgeAnnotator()
vertex_annotator = sv.VertexAnnotator()
box_annotator = sv.BoxAnnotator()
tracker = sv.ByteTrack()
smoother = sv.DetectionsSmoother()
trace_annotator = sv.TraceAnnotator()
def callback(frame: np.ndarray, _: int) -> np.ndarray:
results = model(frame)[0]
key_points = sv.KeyPoints.from_ultralytics(results)
detections = key_points.as_detections()
detections = tracker.update_with_detections(detections)
detections = smoother.update_with_detections(detections)
annotated_frame = edge_annotator.annotate(
frame.copy(), key_points=key_points)
annotated_frame = vertex_annotator.annotate(
annotated_frame, key_points=key_points)
annotated_frame = box_annotator.annotate(
annotated_frame, detections=detections)
return trace_annotator.annotate(
annotated_frame, detections=detections)
sv.process_video(
source_path="skiing.mp4",
target_path="result.mp4",
callback=callback
)
import numpy as np
import supervision as sv
from inference.models.utils import get_roboflow_model
model = get_roboflow_model(
model_id="yolov8m-pose-640", api_key=<ROBOFLOW API KEY>)
edge_annotator = sv.EdgeAnnotator()
vertex_annotator = sv.VertexAnnotator()
box_annotator = sv.BoxAnnotator()
tracker = sv.ByteTrack()
smoother = sv.DetectionsSmoother()
trace_annotator = sv.TraceAnnotator()
def callback(frame: np.ndarray, _: int) -> np.ndarray:
results = model.infer(frame)[0]
key_points = sv.KeyPoints.from_inference(results)
detections = key_points.as_detections()
detections = tracker.update_with_detections(detections)
detections = smoother.update_with_detections(detections)
annotated_frame = edge_annotator.annotate(
frame.copy(), key_points=key_points)
annotated_frame = vertex_annotator.annotate(
annotated_frame, key_points=key_points)
annotated_frame = box_annotator.annotate(
annotated_frame, detections=detections)
return trace_annotator.annotate(
annotated_frame, detections=detections)
sv.process_video(
source_path="skiing.mp4",
target_path="result.mp4",
callback=callback
)
这个结构化的演练应该为您提供了使用 Supervision 的各种功能(包括对象跟踪和轨迹注释)有效注释视频的详细路径。