Skip to content

FocoosModel

ExportableModel #

Bases: Module

A wrapper class for making models exportable to different formats.

This class wraps a BaseModelNN model to make it compatible with export formats like ONNX and TorchScript by handling the output formatting.

Parameters:

Name Type Description Default
model BaseModelNN

The base model to wrap for export.

required
device

The device to move the model to. Defaults to "cuda".

'cuda'
Source code in focoos/models/focoos_model.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
class ExportableModel(torch.nn.Module):
    """A wrapper class for making models exportable to different formats.

    This class wraps a BaseModelNN model to make it compatible with export formats
    like ONNX and TorchScript by handling the output formatting.

    Args:
        model: The base model to wrap for export.
        device: The device to move the model to. Defaults to "cuda".
    """

    def __init__(self, model: BaseModelNN, device="cuda"):
        """Initialize the ExportableModel.

        Args:
            model: The base model to wrap for export.
            device: The device to move the model to. Defaults to "cuda".
        """
        super().__init__()
        self.model = model.eval().to(device)

    def forward(self, x):
        """Forward pass through the wrapped model.

        Args:
            x: Input tensor to pass through the model.

        Returns:
            Model output converted to tuple format for export compatibility.
        """
        return self.model(x).to_tuple()

__init__(model, device='cuda') #

Initialize the ExportableModel.

Parameters:

Name Type Description Default
model BaseModelNN

The base model to wrap for export.

required
device

The device to move the model to. Defaults to "cuda".

'cuda'
Source code in focoos/models/focoos_model.py
48
49
50
51
52
53
54
55
56
def __init__(self, model: BaseModelNN, device="cuda"):
    """Initialize the ExportableModel.

    Args:
        model: The base model to wrap for export.
        device: The device to move the model to. Defaults to "cuda".
    """
    super().__init__()
    self.model = model.eval().to(device)

forward(x) #

Forward pass through the wrapped model.

Parameters:

Name Type Description Default
x

Input tensor to pass through the model.

required

Returns:

Type Description

Model output converted to tuple format for export compatibility.

Source code in focoos/models/focoos_model.py
58
59
60
61
62
63
64
65
66
67
def forward(self, x):
    """Forward pass through the wrapped model.

    Args:
        x: Input tensor to pass through the model.

    Returns:
        Model output converted to tuple format for export compatibility.
    """
    return self.model(x).to_tuple()

FocoosModel #

Main model class for Focoos computer vision models.

This class provides a high-level interface for training, testing, exporting, and running inference with Focoos models. It handles model configuration, weight loading, preprocessing, and postprocessing.

Parameters:

Name Type Description Default
model BaseModelNN

The underlying neural network model.

required
model_info ModelInfo

Metadata and configuration information for the model.

required
Source code in focoos/models/focoos_model.py
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
class FocoosModel:
    """Main model class for Focoos computer vision models.

    This class provides a high-level interface for training, testing, exporting,
    and running inference with Focoos models. It handles model configuration,
    weight loading, preprocessing, and postprocessing.

    Args:
        model: The underlying neural network model.
        model_info: Metadata and configuration information for the model.
    """

    def __init__(self, model: BaseModelNN, model_info: ModelInfo):
        """Initialize the FocoosModel.

        Args:
            model: The underlying neural network model.
            model_info: Metadata and configuration information for the model.
        """
        self.model = model
        self.model_info = model_info
        self.processor = ProcessorManager.get_processor(self.model_info.model_family, self.model_info.config)
        if self.model_info.weights_uri:
            self._load_weights()
        else:
            logger.warning(f"⚠️ Model {self.model_info.name} has no pretrained weights")

    def __str__(self):
        """Return string representation of the model.

        Returns:
            String containing model name and family.
        """
        return f"{self.model_info.name} ({self.model_info.model_family.value})"

    def __repr__(self):
        """Return detailed string representation of the model.

        Returns:
            String containing model name and family.
        """
        return f"{self.model_info.name} ({self.model_info.model_family.value})"

    def _setup_model_for_training(self, train_args: TrainerArgs, data_train: MapDataset, data_val: MapDataset):
        """Set up the model and metadata for training.

        This method configures the model information with training parameters,
        device information, dataset metadata, and initializes training status.

        Args:
            train_args: Training configuration arguments.
            data_train: Training dataset.
            data_val: Validation dataset.
        """
        device = get_cpu_name()
        system_info = get_system_info()
        if system_info.gpu_info and system_info.gpu_info.devices and len(system_info.gpu_info.devices) > 0:
            device = system_info.gpu_info.devices[0].gpu_name
        self.model_info.ref = None

        self.model_info.train_args = train_args  # type: ignore
        self.model_info.val_dataset = data_val.dataset.metadata.name
        self.model_info.val_metrics = None
        self.model_info.classes = data_val.dataset.metadata.classes
        self.model_info.focoos_version = get_focoos_version()
        self.model_info.status = ModelStatus.TRAINING_STARTING
        self.model_info.updated_at = datetime.now().isoformat()
        self.model_info.latency = []
        self.model_info.metrics = None
        self.model_info.training_info = TrainingInfo(
            instance_device=device,
            main_status=ModelStatus.TRAINING_STARTING,
            start_time=datetime.now().isoformat(),
            status_transitions=[
                dict(
                    status=ModelStatus.TRAINING_STARTING,
                    timestamp=datetime.now().isoformat(),
                )
            ],
        )

        self.model_info.classes = data_train.dataset.metadata.classes
        self.model_info.config["num_classes"] = len(data_train.dataset.metadata.classes)
        self._reload_model()
        self.model_info.name = train_args.run_name.strip()
        self.processor = ProcessorManager.get_processor(self.model_info.model_family, self.model_info.config)
        assert self.model_info.task == data_train.dataset.metadata.task, "Task mismatch between model and dataset."

    def train(self, args: TrainerArgs, data_train: MapDataset, data_val: MapDataset, hub: Optional[FocoosHUB] = None):
        """Train the model on the provided datasets.

        This method handles both single-GPU and multi-GPU distributed training.
        It sets up the model for training, optionally syncs with Focoos Hub,
        and manages the training process.

        Args:
            args: Training configuration and hyperparameters.
            data_train: Training dataset containing images and annotations.
            data_val: Validation dataset for model evaluation.
            hub: Optional Focoos Hub instance for model syncing.

        Raises:
            AssertionError: If task mismatch between model and dataset.
            AssertionError: If number of classes mismatch between model and dataset.
            AssertionError: If num_gpus is 0 (GPU training is required).
            FileNotFoundError: If training artifacts are not found after completion.
        """
        from focoos.trainer.trainer import run_train

        self._setup_model_for_training(args, data_train, data_val)
        assert self.model_info.task == data_train.dataset.metadata.task, "Task mismatch between model and dataset."
        assert self.model_info.config["num_classes"] == data_train.dataset.metadata.num_classes, (
            "Number of classes mismatch between model and dataset."
        )
        remote_model = None
        if args.sync_to_hub:
            hub = hub or FocoosHUB()
            remote_model = hub.new_model(self.model_info)

            self.model_info.ref = remote_model.ref
            logger.info(f"Model {self.model_info.name} created in hub with ref {self.model_info.ref}")

        assert args.num_gpus, "Training without GPUs is not supported. num_gpus must be greater than 0"
        if args.num_gpus > 1:
            launch(
                run_train,
                args.num_gpus,
                dist_url="auto",
                args=(args, data_train, data_val, self.model, self.processor, self.model_info, remote_model),
            )

            logger.info("Training done, resuming main process.")
            # here i should restore the best model and config since in DDP it is not updated
            final_folder = os.path.join(args.output_dir, args.run_name)
            model_path = os.path.join(final_folder, ArtifactName.WEIGHTS)
            metadata_path = os.path.join(final_folder, ArtifactName.INFO)

            if not os.path.exists(model_path):
                raise FileNotFoundError(f"Training did not end correctly, model file not found at {model_path}")
            if not os.path.exists(metadata_path):
                raise FileNotFoundError(f"Training did not end correctly, metadata file not found at {metadata_path}")
            self.model_info = ModelInfo.from_json(metadata_path)

            logger.info(f"Reloading weights from {self.model_info.weights_uri}")
            self._reload_model()
        else:
            run_train(args, data_train, data_val, self.model, self.processor, self.model_info, remote_model)

    def test(self, args: TrainerArgs, data_test: MapDataset):
        """Test the model on the provided test dataset.

        This method evaluates the model performance on a test dataset,
        supporting both single-GPU and multi-GPU testing.

        Args:
            args: Test configuration arguments.
            data_test: Test dataset for model evaluation.

        Raises:
            AssertionError: If task mismatch between model and dataset.
            AssertionError: If num_gpus is 0 (GPU testing is required).
        """
        from focoos.trainer.trainer import run_test

        self.model_info.val_dataset = data_test.dataset.metadata.name
        self.model_info.val_metrics = None
        self.model_info.classes = data_test.dataset.metadata.classes
        self.model_info.config["num_classes"] = data_test.dataset.metadata.num_classes
        assert self.model_info.task == data_test.dataset.metadata.task, "Task mismatch between model and dataset."

        assert args.num_gpus, "Testing without GPUs is not supported. num_gpus must be greater than 0"
        if args.num_gpus > 1:
            launch(
                run_test,
                args.num_gpus,
                dist_url="auto",
                args=(args, data_test, self.model, self.processor, self.model_info),
            )
            logger.info("Testing done, resuming main process.")
            # here i should restore the best model and config since in DDP it is not updated
            final_folder = os.path.join(args.output_dir, args.run_name)
            metadata_path = os.path.join(final_folder, ArtifactName.INFO)
            self.model_info = ModelInfo.from_json(metadata_path)
        else:
            run_test(args, data_test, self.model, self.processor, self.model_info)

    @property
    def name(self):
        return self.model_info.name

    @property
    def device(self):
        """Get the device where the model is located.

        Returns:
            The device (CPU or CUDA) where the model is currently located.
        """
        return self.model.device

    @property
    def resolution(self):
        """Get the input resolution of the model.

        Returns:
            The input image resolution expected by the model.
        """
        return self.model_info.config["resolution"]

    @property
    def config(self) -> dict:
        """Get the model configuration.

        Returns:
            Dictionary containing the model configuration parameters.
        """
        return self.model_info.config

    @property
    def classes(self):
        """Get the class names the model can predict.

        Returns:
            List of class names that the model was trained to recognize.
        """
        return self.model_info.classes

    @property
    def task(self):
        """Get the computer vision task type.

        Returns:
            The type of computer vision task (e.g., detection, classification).
        """
        return self.model_info.task

    def export(
        self,
        runtime_type: RuntimeType = RuntimeType.TORCHSCRIPT_32,
        onnx_opset: int = 17,
        out_dir: Optional[str] = None,
        device: Literal["cuda", "cpu"] = "cuda",
        overwrite: bool = False,
        image_size: Optional[int] = None,
    ) -> InferModel:
        """Export the model to different runtime formats.

        This method exports the model to formats like ONNX or TorchScript
        for deployment and inference optimization.

        Args:
            runtime_type: Target runtime format for export.
            onnx_opset: ONNX opset version to use for ONNX export.
            out_dir: Output directory for exported model. If None, uses default location.
            device: Device to use for export ("cuda" or "cpu").
            overwrite: Whether to overwrite existing exported model files.
            image_size: Custom image size for export. If None, uses model's default size.

        Returns:
            InferModel instance for the exported model.

        Raises:
            ValueError: If unsupported PyTorch version or export format.
        """
        if device == "cuda" and not torch.cuda.is_available():
            device = "cpu"
            logger.warning("CUDA is not available. Using CPU for export.")
        if out_dir is None:
            out_dir = os.path.join(MODELS_DIR, self.model_info.ref or self.model_info.name)

        format = runtime_type.to_export_format()
        exportable_model = ExportableModel(self.model, device=device)
        os.makedirs(out_dir, exist_ok=True)
        if image_size is None:
            data = 128 * torch.randn(1, 3, self.model_info.im_size, self.model_info.im_size).to(device)
        else:
            data = 128 * torch.randn(1, 3, image_size, image_size).to(device)
            self.model_info.im_size = image_size

        export_model_name = ArtifactName.ONNX if format == ExportFormat.ONNX else ArtifactName.PT
        _out_file = os.path.join(out_dir, export_model_name)

        dynamic_axes = self.processor.get_dynamic_axes()

        # Hack to warm up the model and record the spacial shapes if needed
        self.model(data)

        if not overwrite and os.path.exists(_out_file):
            logger.info(f"Model file {_out_file} already exists. Set overwrite to True to overwrite.")
            return InferModel(model_dir=out_dir, runtime_type=runtime_type)

        if format == "onnx":
            with torch.no_grad():
                logger.info("🚀 Exporting ONNX model..")
                if TORCH_VERSION >= (2, 5):
                    exp_program = torch.onnx.export(
                        exportable_model,
                        (data,),
                        f=_out_file,
                        opset_version=onnx_opset,
                        verbose=False,
                        verify=True,
                        dynamo=False,
                        external_data=False,  # model weights external to model
                        input_names=dynamic_axes.input_names,
                        output_names=dynamic_axes.output_names,
                        dynamic_axes=dynamic_axes.dynamic_axes,
                        do_constant_folding=True,
                        export_params=True,
                        # dynamic_shapes={
                        #    "x": {
                        #        0: torch.export.Dim("batch", min=1, max=64),
                        #        #2: torch.export.Dim("height", min=18, max=4096),
                        #        #3: torch.export.Dim("width", min=18, max=4096),
                        #    }
                        # },
                    )
                elif TORCH_VERSION >= (2, 0):
                    torch.onnx.export(
                        exportable_model,
                        (data,),
                        f=_out_file,
                        opset_version=onnx_opset,
                        verbose=False,
                        input_names=dynamic_axes.input_names,
                        output_names=dynamic_axes.output_names,
                        dynamic_axes=dynamic_axes.dynamic_axes,
                        do_constant_folding=True,
                        export_params=True,
                    )
                else:
                    raise ValueError(f"Unsupported Torch version: {TORCH_VERSION}. Install torch 2.x")
                # if exp_program is not None:
                #    exp_program.optimize()
                #    exp_program.save(_out_file)
                logger.info(f"✅ Exported {format} model to {_out_file}")

        elif format == "torchscript":
            with torch.no_grad():
                logger.info("🚀 Exporting TorchScript model..")
                exp_program = torch.jit.trace(exportable_model, data)
                if exp_program is not None:
                    _out_file = os.path.join(out_dir, ArtifactName.PT)
                    torch.jit.save(exp_program, _out_file)
                    logger.info(f"✅ Exported {format} model to {_out_file} ")
                else:
                    raise ValueError(f"Failed to export {format} model")

        # Fixme: this may override the model_info with the one from the exportable model
        self.model_info.dump_json(os.path.join(out_dir, ArtifactName.INFO))
        return InferModel(model_dir=out_dir, runtime_type=runtime_type)

    def __call__(
        self,
        inputs: Union[
            torch.Tensor,
            np.ndarray,
            Image.Image,
            list[Image.Image],
            list[np.ndarray],
            list[torch.Tensor],
        ],
        **kwargs,
    ) -> FocoosDetections:
        """Run inference on input images.

        This method performs end-to-end inference including preprocessing,
        model forward pass, and postprocessing to return detections.

        Args:
            inputs: Input images in various formats (PIL, numpy, torch tensor, or lists).
            **kwargs: Additional arguments passed to postprocessing.

        Returns:
            FocoosDetections containing the detection results.
        """
        model = self.model.eval()
        processor = self.processor.eval()
        try:
            model = model.cuda()
        except Exception:
            logger.warning("Unable to use CUDA")
        images, _ = processor.preprocess(
            inputs,
            device=model.device,
            dtype=model.dtype,
            image_size=self.model_info.im_size,
        )  # second output is targets that we're not using
        with torch.no_grad():
            try:
                with torch.autocast(device_type="cuda", dtype=torch.float16):
                    output = model.forward(images)
            except Exception:
                output = model.forward(images)
        class_names = self.model_info.classes
        output_fdet = processor.postprocess(output, inputs, class_names=class_names, **kwargs)

        # FIXME: we don't support batching yet
        return output_fdet[0]

    def _reload_model(self):
        """Reload the model with updated configuration.

        This method recreates the model instance with the current configuration
        and reloads the weights. Used when configuration changes during training.
        """
        from focoos.model_manager import ConfigManager  # here to avoid circular import

        torch.cuda.empty_cache()
        model_class = self.model.__class__
        # without the next line, the inner config may be not a ModelConfig but a dict
        config = ConfigManager.from_dict(self.model_info.model_family, self.model_info.config)
        self.model_info.config = config
        model = model_class(config)
        self.model = model
        self._load_weights()

    def _load_weights(self) -> int:
        """Load model weights from the specified URI.

        This method loads the model weights from either a local path or a remote URL,
        depending on the value of `self.model_info.weights_uri`. If the weights are remote,
        they are downloaded to a local directory. The method then loads the weights into
        the model, allowing for missing or unexpected keys (non-strict loading).

        Returns:
            The total number of missing or unexpected keys encountered during loading.
            Returns 0 if no weights are loaded or an error occurs.

        Raises:
            FileNotFoundError: If the weights file cannot be found at the specified path.
        """
        if not self.model_info.weights_uri:
            logger.warning(f"⚠️ Model {self.model_info.name} has no pretrained weights")
            return 0

        # Determine if weights are remote or local
        parsed_uri = urlparse(self.model_info.weights_uri)
        is_remote = bool(parsed_uri.scheme and parsed_uri.netloc)

        # Get weights path
        if is_remote:
            logger.info(f"Downloading weights from remote URL: {self.model_info.weights_uri}")
            model_dir = Path(MODELS_DIR) / self.model_info.name
            weights_path = ApiClient().download_ext_file(
                self.model_info.weights_uri, str(model_dir), skip_if_exists=True
            )
        else:
            logger.info(f"Using weights from local path: {self.model_info.weights_uri}")
            weights_path = self.model_info.weights_uri

        try:
            if not os.path.exists(weights_path):
                raise FileNotFoundError(f"Weights file not found: {weights_path}")

            # Load weights and extract model state if needed
            state_dict = torch.load(weights_path, map_location="cpu", weights_only=True)
            weights_dict = state_dict.get("model", state_dict) if isinstance(state_dict, dict) else state_dict

        except Exception as e:
            logger.error(f"Error loading weights for {self.model_info.name}: {str(e)}")
            return 0

        incompatible = self.model.load_state_dict(weights_dict, strict=False)
        return len(incompatible.missing_keys) + len(incompatible.unexpected_keys)

    def benchmark(
        self,
        iterations: int = 50,
        size: Optional[Union[int, Tuple[int, int]]] = None,
        device: Literal["cuda", "cpu"] = "cuda",
    ) -> LatencyMetrics:
        """Benchmark the model's inference performance.

        This method measures the raw model inference latency without
        preprocessing and postprocessing overhead.

        Args:
            iterations: Number of iterations to run for benchmarking.
            size: Input image size. If None, uses model's default size.
            device: Device to run benchmarking on ("cuda" or "cpu").

        Returns:
            LatencyMetrics containing performance statistics.
        """
        self.model.eval()

        if size is None:
            size = self.model_info.im_size
        if isinstance(size, int):
            size = (size, size)
        model = self.model.to(device)
        metrics = model.benchmark(size=size, iterations=iterations)
        return metrics

    def end2end_benchmark(
        self, iterations: int = 50, size: Optional[int] = None, device: Literal["cuda", "cpu"] = "cuda"
    ) -> LatencyMetrics:
        """Benchmark the complete end-to-end inference pipeline.

        This method measures the full inference latency including preprocessing,
        model forward pass, and postprocessing steps.

        Args:
            iterations: Number of iterations to run for benchmarking.
            size: Input image size. If None, uses model's default size.
            device: Device to run benchmarking on ("cuda" or "cpu").

        Returns:
            LatencyMetrics containing end-to-end performance statistics.
        """
        if size is None:
            size = self.model_info.im_size

        try:
            model = self.model.cuda()
        except Exception:
            logger.warning("Unable to use CUDA")
        logger.info(f"⏱️ Benchmarking latency on {model.device}, size: {size}x{size}..")
        # warmup
        data = 128 * torch.randn(1, 3, size, size).to(model.device)

        durations = []
        for _ in range(iterations):
            start = torch.cuda.Event(enable_timing=True)
            end = torch.cuda.Event(enable_timing=True)
            start.record(stream=torch.cuda.Stream())
            _ = self(data)
            end.record(stream=torch.cuda.Stream())
            torch.cuda.synchronize()
            durations.append(start.elapsed_time(end))

        durations = np.array(durations)
        metrics = LatencyMetrics(
            fps=int(1000 / durations.mean()),
            engine=f"torch.{self.model.device}",
            mean=round(durations.mean().astype(float), 3),
            max=round(durations.max().astype(float), 3),
            min=round(durations.min().astype(float), 3),
            std=round(durations.std().astype(float), 3),
            im_size=size,
            device=str(self.model.device),
        )
        logger.info(f"🔥 FPS: {metrics.fps} Mean latency: {metrics.mean} ms ")
        return metrics

classes property #

Get the class names the model can predict.

Returns:

Type Description

List of class names that the model was trained to recognize.

config property #

Get the model configuration.

Returns:

Type Description
dict

Dictionary containing the model configuration parameters.

device property #

Get the device where the model is located.

Returns:

Type Description

The device (CPU or CUDA) where the model is currently located.

resolution property #

Get the input resolution of the model.

Returns:

Type Description

The input image resolution expected by the model.

task property #

Get the computer vision task type.

Returns:

Type Description

The type of computer vision task (e.g., detection, classification).

__call__(inputs, **kwargs) #

Run inference on input images.

This method performs end-to-end inference including preprocessing, model forward pass, and postprocessing to return detections.

Parameters:

Name Type Description Default
inputs Union[Tensor, ndarray, Image, list[Image], list[ndarray], list[Tensor]]

Input images in various formats (PIL, numpy, torch tensor, or lists).

required
**kwargs

Additional arguments passed to postprocessing.

{}

Returns:

Type Description
FocoosDetections

FocoosDetections containing the detection results.

Source code in focoos/models/focoos_model.py
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
def __call__(
    self,
    inputs: Union[
        torch.Tensor,
        np.ndarray,
        Image.Image,
        list[Image.Image],
        list[np.ndarray],
        list[torch.Tensor],
    ],
    **kwargs,
) -> FocoosDetections:
    """Run inference on input images.

    This method performs end-to-end inference including preprocessing,
    model forward pass, and postprocessing to return detections.

    Args:
        inputs: Input images in various formats (PIL, numpy, torch tensor, or lists).
        **kwargs: Additional arguments passed to postprocessing.

    Returns:
        FocoosDetections containing the detection results.
    """
    model = self.model.eval()
    processor = self.processor.eval()
    try:
        model = model.cuda()
    except Exception:
        logger.warning("Unable to use CUDA")
    images, _ = processor.preprocess(
        inputs,
        device=model.device,
        dtype=model.dtype,
        image_size=self.model_info.im_size,
    )  # second output is targets that we're not using
    with torch.no_grad():
        try:
            with torch.autocast(device_type="cuda", dtype=torch.float16):
                output = model.forward(images)
        except Exception:
            output = model.forward(images)
    class_names = self.model_info.classes
    output_fdet = processor.postprocess(output, inputs, class_names=class_names, **kwargs)

    # FIXME: we don't support batching yet
    return output_fdet[0]

__init__(model, model_info) #

Initialize the FocoosModel.

Parameters:

Name Type Description Default
model BaseModelNN

The underlying neural network model.

required
model_info ModelInfo

Metadata and configuration information for the model.

required
Source code in focoos/models/focoos_model.py
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def __init__(self, model: BaseModelNN, model_info: ModelInfo):
    """Initialize the FocoosModel.

    Args:
        model: The underlying neural network model.
        model_info: Metadata and configuration information for the model.
    """
    self.model = model
    self.model_info = model_info
    self.processor = ProcessorManager.get_processor(self.model_info.model_family, self.model_info.config)
    if self.model_info.weights_uri:
        self._load_weights()
    else:
        logger.warning(f"⚠️ Model {self.model_info.name} has no pretrained weights")

__repr__() #

Return detailed string representation of the model.

Returns:

Type Description

String containing model name and family.

Source code in focoos/models/focoos_model.py
105
106
107
108
109
110
111
def __repr__(self):
    """Return detailed string representation of the model.

    Returns:
        String containing model name and family.
    """
    return f"{self.model_info.name} ({self.model_info.model_family.value})"

__str__() #

Return string representation of the model.

Returns:

Type Description

String containing model name and family.

Source code in focoos/models/focoos_model.py
 97
 98
 99
100
101
102
103
def __str__(self):
    """Return string representation of the model.

    Returns:
        String containing model name and family.
    """
    return f"{self.model_info.name} ({self.model_info.model_family.value})"

benchmark(iterations=50, size=None, device='cuda') #

Benchmark the model's inference performance.

This method measures the raw model inference latency without preprocessing and postprocessing overhead.

Parameters:

Name Type Description Default
iterations int

Number of iterations to run for benchmarking.

50
size Optional[Union[int, Tuple[int, int]]]

Input image size. If None, uses model's default size.

None
device Literal['cuda', 'cpu']

Device to run benchmarking on ("cuda" or "cpu").

'cuda'

Returns:

Type Description
LatencyMetrics

LatencyMetrics containing performance statistics.

Source code in focoos/models/focoos_model.py
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
def benchmark(
    self,
    iterations: int = 50,
    size: Optional[Union[int, Tuple[int, int]]] = None,
    device: Literal["cuda", "cpu"] = "cuda",
) -> LatencyMetrics:
    """Benchmark the model's inference performance.

    This method measures the raw model inference latency without
    preprocessing and postprocessing overhead.

    Args:
        iterations: Number of iterations to run for benchmarking.
        size: Input image size. If None, uses model's default size.
        device: Device to run benchmarking on ("cuda" or "cpu").

    Returns:
        LatencyMetrics containing performance statistics.
    """
    self.model.eval()

    if size is None:
        size = self.model_info.im_size
    if isinstance(size, int):
        size = (size, size)
    model = self.model.to(device)
    metrics = model.benchmark(size=size, iterations=iterations)
    return metrics

end2end_benchmark(iterations=50, size=None, device='cuda') #

Benchmark the complete end-to-end inference pipeline.

This method measures the full inference latency including preprocessing, model forward pass, and postprocessing steps.

Parameters:

Name Type Description Default
iterations int

Number of iterations to run for benchmarking.

50
size Optional[int]

Input image size. If None, uses model's default size.

None
device Literal['cuda', 'cpu']

Device to run benchmarking on ("cuda" or "cpu").

'cuda'

Returns:

Type Description
LatencyMetrics

LatencyMetrics containing end-to-end performance statistics.

Source code in focoos/models/focoos_model.py
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
def end2end_benchmark(
    self, iterations: int = 50, size: Optional[int] = None, device: Literal["cuda", "cpu"] = "cuda"
) -> LatencyMetrics:
    """Benchmark the complete end-to-end inference pipeline.

    This method measures the full inference latency including preprocessing,
    model forward pass, and postprocessing steps.

    Args:
        iterations: Number of iterations to run for benchmarking.
        size: Input image size. If None, uses model's default size.
        device: Device to run benchmarking on ("cuda" or "cpu").

    Returns:
        LatencyMetrics containing end-to-end performance statistics.
    """
    if size is None:
        size = self.model_info.im_size

    try:
        model = self.model.cuda()
    except Exception:
        logger.warning("Unable to use CUDA")
    logger.info(f"⏱️ Benchmarking latency on {model.device}, size: {size}x{size}..")
    # warmup
    data = 128 * torch.randn(1, 3, size, size).to(model.device)

    durations = []
    for _ in range(iterations):
        start = torch.cuda.Event(enable_timing=True)
        end = torch.cuda.Event(enable_timing=True)
        start.record(stream=torch.cuda.Stream())
        _ = self(data)
        end.record(stream=torch.cuda.Stream())
        torch.cuda.synchronize()
        durations.append(start.elapsed_time(end))

    durations = np.array(durations)
    metrics = LatencyMetrics(
        fps=int(1000 / durations.mean()),
        engine=f"torch.{self.model.device}",
        mean=round(durations.mean().astype(float), 3),
        max=round(durations.max().astype(float), 3),
        min=round(durations.min().astype(float), 3),
        std=round(durations.std().astype(float), 3),
        im_size=size,
        device=str(self.model.device),
    )
    logger.info(f"🔥 FPS: {metrics.fps} Mean latency: {metrics.mean} ms ")
    return metrics

export(runtime_type=RuntimeType.TORCHSCRIPT_32, onnx_opset=17, out_dir=None, device='cuda', overwrite=False, image_size=None) #

Export the model to different runtime formats.

This method exports the model to formats like ONNX or TorchScript for deployment and inference optimization.

Parameters:

Name Type Description Default
runtime_type RuntimeType

Target runtime format for export.

TORCHSCRIPT_32
onnx_opset int

ONNX opset version to use for ONNX export.

17
out_dir Optional[str]

Output directory for exported model. If None, uses default location.

None
device Literal['cuda', 'cpu']

Device to use for export ("cuda" or "cpu").

'cuda'
overwrite bool

Whether to overwrite existing exported model files.

False
image_size Optional[int]

Custom image size for export. If None, uses model's default size.

None

Returns:

Type Description
InferModel

InferModel instance for the exported model.

Raises:

Type Description
ValueError

If unsupported PyTorch version or export format.

Source code in focoos/models/focoos_model.py
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
def export(
    self,
    runtime_type: RuntimeType = RuntimeType.TORCHSCRIPT_32,
    onnx_opset: int = 17,
    out_dir: Optional[str] = None,
    device: Literal["cuda", "cpu"] = "cuda",
    overwrite: bool = False,
    image_size: Optional[int] = None,
) -> InferModel:
    """Export the model to different runtime formats.

    This method exports the model to formats like ONNX or TorchScript
    for deployment and inference optimization.

    Args:
        runtime_type: Target runtime format for export.
        onnx_opset: ONNX opset version to use for ONNX export.
        out_dir: Output directory for exported model. If None, uses default location.
        device: Device to use for export ("cuda" or "cpu").
        overwrite: Whether to overwrite existing exported model files.
        image_size: Custom image size for export. If None, uses model's default size.

    Returns:
        InferModel instance for the exported model.

    Raises:
        ValueError: If unsupported PyTorch version or export format.
    """
    if device == "cuda" and not torch.cuda.is_available():
        device = "cpu"
        logger.warning("CUDA is not available. Using CPU for export.")
    if out_dir is None:
        out_dir = os.path.join(MODELS_DIR, self.model_info.ref or self.model_info.name)

    format = runtime_type.to_export_format()
    exportable_model = ExportableModel(self.model, device=device)
    os.makedirs(out_dir, exist_ok=True)
    if image_size is None:
        data = 128 * torch.randn(1, 3, self.model_info.im_size, self.model_info.im_size).to(device)
    else:
        data = 128 * torch.randn(1, 3, image_size, image_size).to(device)
        self.model_info.im_size = image_size

    export_model_name = ArtifactName.ONNX if format == ExportFormat.ONNX else ArtifactName.PT
    _out_file = os.path.join(out_dir, export_model_name)

    dynamic_axes = self.processor.get_dynamic_axes()

    # Hack to warm up the model and record the spacial shapes if needed
    self.model(data)

    if not overwrite and os.path.exists(_out_file):
        logger.info(f"Model file {_out_file} already exists. Set overwrite to True to overwrite.")
        return InferModel(model_dir=out_dir, runtime_type=runtime_type)

    if format == "onnx":
        with torch.no_grad():
            logger.info("🚀 Exporting ONNX model..")
            if TORCH_VERSION >= (2, 5):
                exp_program = torch.onnx.export(
                    exportable_model,
                    (data,),
                    f=_out_file,
                    opset_version=onnx_opset,
                    verbose=False,
                    verify=True,
                    dynamo=False,
                    external_data=False,  # model weights external to model
                    input_names=dynamic_axes.input_names,
                    output_names=dynamic_axes.output_names,
                    dynamic_axes=dynamic_axes.dynamic_axes,
                    do_constant_folding=True,
                    export_params=True,
                    # dynamic_shapes={
                    #    "x": {
                    #        0: torch.export.Dim("batch", min=1, max=64),
                    #        #2: torch.export.Dim("height", min=18, max=4096),
                    #        #3: torch.export.Dim("width", min=18, max=4096),
                    #    }
                    # },
                )
            elif TORCH_VERSION >= (2, 0):
                torch.onnx.export(
                    exportable_model,
                    (data,),
                    f=_out_file,
                    opset_version=onnx_opset,
                    verbose=False,
                    input_names=dynamic_axes.input_names,
                    output_names=dynamic_axes.output_names,
                    dynamic_axes=dynamic_axes.dynamic_axes,
                    do_constant_folding=True,
                    export_params=True,
                )
            else:
                raise ValueError(f"Unsupported Torch version: {TORCH_VERSION}. Install torch 2.x")
            # if exp_program is not None:
            #    exp_program.optimize()
            #    exp_program.save(_out_file)
            logger.info(f"✅ Exported {format} model to {_out_file}")

    elif format == "torchscript":
        with torch.no_grad():
            logger.info("🚀 Exporting TorchScript model..")
            exp_program = torch.jit.trace(exportable_model, data)
            if exp_program is not None:
                _out_file = os.path.join(out_dir, ArtifactName.PT)
                torch.jit.save(exp_program, _out_file)
                logger.info(f"✅ Exported {format} model to {_out_file} ")
            else:
                raise ValueError(f"Failed to export {format} model")

    # Fixme: this may override the model_info with the one from the exportable model
    self.model_info.dump_json(os.path.join(out_dir, ArtifactName.INFO))
    return InferModel(model_dir=out_dir, runtime_type=runtime_type)

test(args, data_test) #

Test the model on the provided test dataset.

This method evaluates the model performance on a test dataset, supporting both single-GPU and multi-GPU testing.

Parameters:

Name Type Description Default
args TrainerArgs

Test configuration arguments.

required
data_test MapDataset

Test dataset for model evaluation.

required

Raises:

Type Description
AssertionError

If task mismatch between model and dataset.

AssertionError

If num_gpus is 0 (GPU testing is required).

Source code in focoos/models/focoos_model.py
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
def test(self, args: TrainerArgs, data_test: MapDataset):
    """Test the model on the provided test dataset.

    This method evaluates the model performance on a test dataset,
    supporting both single-GPU and multi-GPU testing.

    Args:
        args: Test configuration arguments.
        data_test: Test dataset for model evaluation.

    Raises:
        AssertionError: If task mismatch between model and dataset.
        AssertionError: If num_gpus is 0 (GPU testing is required).
    """
    from focoos.trainer.trainer import run_test

    self.model_info.val_dataset = data_test.dataset.metadata.name
    self.model_info.val_metrics = None
    self.model_info.classes = data_test.dataset.metadata.classes
    self.model_info.config["num_classes"] = data_test.dataset.metadata.num_classes
    assert self.model_info.task == data_test.dataset.metadata.task, "Task mismatch between model and dataset."

    assert args.num_gpus, "Testing without GPUs is not supported. num_gpus must be greater than 0"
    if args.num_gpus > 1:
        launch(
            run_test,
            args.num_gpus,
            dist_url="auto",
            args=(args, data_test, self.model, self.processor, self.model_info),
        )
        logger.info("Testing done, resuming main process.")
        # here i should restore the best model and config since in DDP it is not updated
        final_folder = os.path.join(args.output_dir, args.run_name)
        metadata_path = os.path.join(final_folder, ArtifactName.INFO)
        self.model_info = ModelInfo.from_json(metadata_path)
    else:
        run_test(args, data_test, self.model, self.processor, self.model_info)

train(args, data_train, data_val, hub=None) #

Train the model on the provided datasets.

This method handles both single-GPU and multi-GPU distributed training. It sets up the model for training, optionally syncs with Focoos Hub, and manages the training process.

Parameters:

Name Type Description Default
args TrainerArgs

Training configuration and hyperparameters.

required
data_train MapDataset

Training dataset containing images and annotations.

required
data_val MapDataset

Validation dataset for model evaluation.

required
hub Optional[FocoosHUB]

Optional Focoos Hub instance for model syncing.

None

Raises:

Type Description
AssertionError

If task mismatch between model and dataset.

AssertionError

If number of classes mismatch between model and dataset.

AssertionError

If num_gpus is 0 (GPU training is required).

FileNotFoundError

If training artifacts are not found after completion.

Source code in focoos/models/focoos_model.py
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
def train(self, args: TrainerArgs, data_train: MapDataset, data_val: MapDataset, hub: Optional[FocoosHUB] = None):
    """Train the model on the provided datasets.

    This method handles both single-GPU and multi-GPU distributed training.
    It sets up the model for training, optionally syncs with Focoos Hub,
    and manages the training process.

    Args:
        args: Training configuration and hyperparameters.
        data_train: Training dataset containing images and annotations.
        data_val: Validation dataset for model evaluation.
        hub: Optional Focoos Hub instance for model syncing.

    Raises:
        AssertionError: If task mismatch between model and dataset.
        AssertionError: If number of classes mismatch between model and dataset.
        AssertionError: If num_gpus is 0 (GPU training is required).
        FileNotFoundError: If training artifacts are not found after completion.
    """
    from focoos.trainer.trainer import run_train

    self._setup_model_for_training(args, data_train, data_val)
    assert self.model_info.task == data_train.dataset.metadata.task, "Task mismatch between model and dataset."
    assert self.model_info.config["num_classes"] == data_train.dataset.metadata.num_classes, (
        "Number of classes mismatch between model and dataset."
    )
    remote_model = None
    if args.sync_to_hub:
        hub = hub or FocoosHUB()
        remote_model = hub.new_model(self.model_info)

        self.model_info.ref = remote_model.ref
        logger.info(f"Model {self.model_info.name} created in hub with ref {self.model_info.ref}")

    assert args.num_gpus, "Training without GPUs is not supported. num_gpus must be greater than 0"
    if args.num_gpus > 1:
        launch(
            run_train,
            args.num_gpus,
            dist_url="auto",
            args=(args, data_train, data_val, self.model, self.processor, self.model_info, remote_model),
        )

        logger.info("Training done, resuming main process.")
        # here i should restore the best model and config since in DDP it is not updated
        final_folder = os.path.join(args.output_dir, args.run_name)
        model_path = os.path.join(final_folder, ArtifactName.WEIGHTS)
        metadata_path = os.path.join(final_folder, ArtifactName.INFO)

        if not os.path.exists(model_path):
            raise FileNotFoundError(f"Training did not end correctly, model file not found at {model_path}")
        if not os.path.exists(metadata_path):
            raise FileNotFoundError(f"Training did not end correctly, metadata file not found at {metadata_path}")
        self.model_info = ModelInfo.from_json(metadata_path)

        logger.info(f"Reloading weights from {self.model_info.weights_uri}")
        self._reload_model()
    else:
        run_train(args, data_train, data_val, self.model, self.processor, self.model_info, remote_model)