Using DeepFense as a Library

This tutorial shows how to use DeepFense as an installed Python library. Instead of running train.py, you can build experiments programmatically in your own scripts or notebooks.


Installation

pip install -e .   # from the repo root

Quick Example

import deepfense
from deepfense.utils.registry import build_detector, build_frontend, build_backend, build_loss

# Build a complete detector
model = build_detector("ModularDetector", {
    "frontend": {"type": "wav2vec2", "args": {"source": "huggingface", "ckpt_path": "facebook/wav2vec2-base", "freeze": True}},
    "backend": {"type": "MLP", "args": {"input_dim": 768, "projection": [256, 64]}},
    "loss": [{"type": "CrossEntropy", "embedding_dim": 64, "n_classes": 2}]
})

# Run inference
import torch
audio = torch.randn(1, 64000)  # Fake audio
output = model(audio)
print(f"Score: {output['scores']}")

1. Building Models

Build Individual Components

from deepfense.utils.registry import build_frontend, build_backend, build_loss

# Frontend
frontend = build_frontend("wavlm", {
    "source": "huggingface",
    "ckpt_path": "microsoft/wavlm-base",
    "freeze": True
})

# Backend  
backend = build_backend("AASIST", {
    "input_dim": 768,
    "filts": [70, [1, 32], [32, 32], [32, 64], [64, 64]],
    "gat_dims": [64, 32]
})

# Loss
loss = build_loss("OCSoftmax", {
    "embedding_dim": 32,
    "w_posi": 0.9,
    "w_nega": 0.2,
    "alpha": 20.0
})

Build Complete Detector

from deepfense.utils.registry import build_detector

config = {
    "frontend": {
        "type": "wavlm",
        "args": {
            "source": "huggingface", 
            "ckpt_path": "microsoft/wavlm-base",
            "freeze": True
        }
    },
    "backend": {
        "type": "AASIST",
        "args": {"input_dim": 768}
    },
    "loss": [{
        "type": "OCSoftmax",
        "embedding_dim": 32,
        "w_posi": 0.9,
        "w_nega": 0.2,
        "alpha": 20.0
    }]
}

model = build_detector("ModularDetector", config)
model.to("cuda")

2. Loading Data

Using StandardDataset

from deepfense.data.detection_dataset import StandardDataset
from torch.utils.data import DataLoader

dataset_config = {
    "parquet_files": ["/path/to/train.parquet"],
    "root_dir": "/path/to/audio/root",
    "label_map": {"bonafide": 1, "spoof": 0},
    "base_transform": [
        {"type": "pad", "max_len": 64600, "pad_type": "repeat"}
    ]
}

dataset = StandardDataset(dataset_config)
loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)

# Iterate
for batch in loader:
    audio = batch["x"]        # [B, T]
    labels = batch["label"]   # [B]
    ids = batch["ID"]         # List of IDs
    break

Using build_dataloader

from deepfense.data.data_utils import build_dataloader

config = {
    "dataset_type": "StandardDataset",
    "parquet_files": ["/path/to/data.parquet"],
    "root_dir": "/path/to/audio",
    "label_map": {"bonafide": 1, "spoof": 0},
    "batch_size": 32,
    "shuffle": True,
    "num_workers": 4,
    "base_transform": [
        {"type": "pad", "max_len": 64600}
    ]
}

loader = build_dataloader(config)

3. Training Loop

Basic Training

import torch
import torch.optim as optim
from deepfense.utils.registry import build_detector
from deepfense.data.data_utils import build_dataloader

# Build model
model = build_detector("ModularDetector", model_config)
model.to("cuda")

# Build data
train_loader = build_dataloader(train_config)
val_loader = build_dataloader(val_config)

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training loop
for epoch in range(50):
    model.train()
    for batch in train_loader:
        audio = batch["x"].to("cuda")
        labels = batch["label"].to("cuda")

        optimizer.zero_grad()

        # Forward
        outputs = model(audio)
        loss = model.compute_loss(outputs, labels)

        # Backward
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    all_scores, all_labels = [], []
    with torch.no_grad():
        for batch in val_loader:
            audio = batch["x"].to("cuda")
            outputs = model(audio)
            all_scores.append(outputs["scores"].cpu())
            all_labels.append(batch["label"])

    scores = torch.cat(all_scores).numpy()
    labels = torch.cat(all_labels).numpy()

    # Compute EER
    from deepfense.training.evaluations.metrics import compute_eer
    eer = compute_eer(scores, labels)
    print(f"Epoch {epoch+1}: EER = {eer:.2%}")

Using StandardTrainer

from deepfense.training.standard_trainer import StandardTrainer
from deepfense.utils.registry import build_detector
from deepfense.data.data_utils import build_dataloader

# Build components
model = build_detector("ModularDetector", model_config)
train_loader = build_dataloader(train_config)
val_loader = build_dataloader(val_config)

# Trainer config
training_config = {
    "epochs": 50,
    "device": "cuda",
    "output_dir": "./outputs/my_experiment",
    "optimizer": {"type": "adam", "lr": 1e-4},
    "scheduler": {"type": "cosine_annealing", "T_max": 50},
    "monitor_metric": "EER",
    "monitor_mode": "min",
    "metrics": {"EER": {}, "F1_SCORE": {}}
}

# Create trainer
trainer = StandardTrainer(
    model=model,
    data_loaders={"train": train_loader, "val": val_loader},
    config=training_config
)

# Train
trainer.train()

4. Inference

Single File Inference

import torch
import torchaudio
from deepfense.utils.registry import build_detector

# Load model
model = build_detector("ModularDetector", model_config)
checkpoint = torch.load("best_model.pth")
model.load_state_dict(checkpoint["model_state"])
model.eval()
model.to("cuda")

# Load audio
def load_audio(path, target_sr=16000, max_len=64600):
    audio, sr = torchaudio.load(path)
    if sr != target_sr:
        audio = torchaudio.transforms.Resample(sr, target_sr)(audio)
    audio = audio.mean(dim=0)  # Mono
    if len(audio) < max_len:
        audio = torch.nn.functional.pad(audio, (0, max_len - len(audio)))
    else:
        audio = audio[:max_len]
    return audio.unsqueeze(0)  # Add batch dim

# Inference
audio = load_audio("/path/to/test.flac").to("cuda")
with torch.no_grad():
    output = model(audio)
    score = output["scores"].item()

print(f"Score: {score:.4f}")
print(f"Prediction: {'bonafide' if score > 0 else 'spoof'}")

Batch Inference

import pandas as pd
from tqdm import tqdm

# Load test data
test_df = pd.read_parquet("test.parquet")
results = []

for _, row in tqdm(test_df.iterrows(), total=len(test_df)):
    audio = load_audio(row["path"]).to("cuda")
    with torch.no_grad():
        output = model(audio)
        score = output["scores"].item()

    results.append({
        "ID": row["ID"],
        "score": score,
        "prediction": "bonafide" if score > 0 else "spoof"
    })

results_df = pd.DataFrame(results)
results_df.to_csv("predictions.csv", index=False)

5. Custom Experiments

Comparing Frontends

from deepfense.utils.registry import build_detector

frontends = [
    ("wav2vec2", {"source": "huggingface", "ckpt_path": "facebook/wav2vec2-base"}),
    ("wavlm", {"source": "huggingface", "ckpt_path": "microsoft/wavlm-base"}),
    ("hubert", {"source": "huggingface", "ckpt_path": "facebook/hubert-base-ls960"}),
]

results = {}
for name, frontend_args in frontends:
    config = {
        "frontend": {"type": name, "args": {**frontend_args, "freeze": True}},
        "backend": {"type": "MLP", "args": {"input_dim": 768, "projection": [256, 64]}},
        "loss": [{"type": "CrossEntropy", "embedding_dim": 64, "n_classes": 2}]
    }

    model = build_detector("ModularDetector", config)
    # ... train and evaluate ...
    results[name] = eer

print(results)
import itertools

learning_rates = [1e-3, 1e-4, 1e-5]
batch_sizes = [16, 32, 64]

for lr, bs in itertools.product(learning_rates, batch_sizes):
    print(f"Training with lr={lr}, batch_size={bs}")

    train_config["batch_size"] = bs
    training_config["optimizer"]["lr"] = lr

    loader = build_dataloader(train_config)
    trainer = StandardTrainer(model, {"train": loader, "val": val_loader}, training_config)
    trainer.train()

6. Available Registries

from deepfense.utils.registry import (
    FRONTEND_REGISTRY,
    BACKEND_REGISTRY,
    LOSS_REGISTRY,
    DATASET_REGISTRY,
    TRANSFORM_REGISTRY,
    OPTIMIZER_REGISTRY,
    METRIC_REGISTRY
)

# List all available components
print("Frontends:", list(FRONTEND_REGISTRY.keys()))
print("Backends:", list(BACKEND_REGISTRY.keys()))
print("Losses:", list(LOSS_REGISTRY.keys()))

7. Extending from Library

You can register new components even when using DeepFense as a library:

from deepfense.utils.registry import register_backend
import torch.nn as nn

@register_backend("MyCustomBackend")
class MyCustomBackend(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.fc = nn.Linear(config["input_dim"], config["output_dim"])

    def forward(self, x):
        return self.fc(x.mean(dim=1))

# Now use it
from deepfense.utils.registry import build_backend

backend = build_backend("MyCustomBackend", {"input_dim": 768, "output_dim": 64})

API Quick Reference

Function Description
build_detector(name, config) Build complete model
build_frontend(name, config) Build frontend only
build_backend(name, config) Build backend only
build_loss(name, config) Build loss module
build_dataloader(config) Build DataLoader
build_transforms_pipeline(config) Build transforms

Complete Minimal Example

import torch
from deepfense.utils.registry import build_detector
from deepfense.data.data_utils import build_dataloader
from deepfense.training.evaluations.metrics import compute_eer

# 1. Build Model
model = build_detector("ModularDetector", {
    "frontend": {"type": "wav2vec2", "args": {"source": "huggingface", "ckpt_path": "facebook/wav2vec2-base", "freeze": True}},
    "backend": {"type": "MLP", "args": {"input_dim": 768, "projection": [128, 32]}},
    "loss": [{"type": "CrossEntropy", "embedding_dim": 32, "n_classes": 2}]
}).to("cuda")

# 2. Build Data
train_loader = build_dataloader({
    "dataset_type": "StandardDataset",
    "parquet_files": ["train.parquet"],
    "label_map": {"bonafide": 1, "spoof": 0},
    "batch_size": 32,
    "base_transform": [{"type": "pad", "max_len": 64600}]
})

# 3. Train
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(10):
    model.train()
    for batch in train_loader:
        x, y = batch["x"].to("cuda"), batch["label"].to("cuda")
        outputs = model(x)
        loss = model.compute_loss(outputs, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1} done")

# 4. Save
torch.save({"model_state": model.state_dict()}, "model.pth")