Custom Models¶
This guide shows how to create and integrate custom models with ICOS-FL.
Basic LSTM Model Structure¶
ICOS-FL uses an LSTM model defined in icos_fl/models/lstm.py. The default model has this structure:
class LSTMModel(nn.Module):
def __init__(
self,
hidden_layer_size: int,
time_step: int,
num_layers: int,
output_size: int = 1,
) -> None:
super().__init__()
self.hidden_layer_size = hidden_layer_size
self.time_step = time_step
self.num_layers = num_layers
# LSTM layer
self.lstm = nn.LSTM(time_step, hidden_layer_size, num_layers, batch_first=True)
# Linear layer to produce output prediction
self.linear = nn.Linear(hidden_layer_size, output_size)
def forward(self, input_seq: torch.Tensor) -> torch.Tensor:
lstm_out, _ = self.lstm(input_seq)
predictions = self.linear(lstm_out[:, -1, :])
return predictions
Creating a Custom Model¶
To create a custom model:
Subclass PyTorch’s
nn.ModuleImplement the required interface (init and forward methods)
Make sure your model accepts the expected input shape
Example: Adding Dropout¶
Here’s an example of extending the base LSTM model with dropout for regularization:
import torch.nn as nn
from icos_fl.models.lstm import LSTMModel
class LSTMWithDropout(LSTMModel):
def __init__(
self,
hidden_layer_size: int,
time_step: int,
num_layers: int,
dropout_rate: float = 0.2,
output_size: int = 1,
) -> None:
super().__init__(
hidden_layer_size=hidden_layer_size,
time_step=time_step,
num_layers=num_layers,
output_size=output_size,
)
# Replace the existing LSTM with one that has dropout
self.lstm = nn.LSTM(
time_step,
hidden_layer_size,
num_layers,
batch_first=True,
dropout=dropout_rate
)
# Add dropout before the linear layer
self.dropout = nn.Dropout(dropout_rate)
def forward(self, input_seq: torch.Tensor) -> torch.Tensor:
lstm_out, _ = self.lstm(input_seq)
lstm_out = self.dropout(lstm_out[:, -1, :])
predictions = self.linear(lstm_out)
return predictions
Integrating the Custom Model¶
To use your custom model in ICOS-FL, modify the client code to use your model instead of the default one.
Import your custom model:
from path.to.your.model import LSTMWithDropout
Modify the model creation in the client:
# In client_fn function in client.py model = LSTMWithDropout( hidden_layer_size=hidden_layer_size, time_step=time_step, num_layers=num_layers, dropout_rate=0.2 ).to(device)
Ensure your model is serializable using Flower’s model serialization:
# These functions must work with your custom model from icos_fl.models.lstm import get_weights, set_weights weights = get_weights(model) # Extract weights as numpy arrays set_weights(model, weights) # Set weights from numpy arrays
Example: Bidirectional LSTM¶
For more advanced sequence modeling, here’s a bidirectional LSTM implementation:
class BidirectionalLSTM(nn.Module):
def __init__(
self,
hidden_layer_size: int,
time_step: int,
num_layers: int,
output_size: int = 1,
) -> None:
super().__init__()
self.hidden_layer_size = hidden_layer_size
self.time_step = time_step
self.num_layers = num_layers
# Bidirectional LSTM
self.lstm = nn.LSTM(
time_step,
hidden_layer_size,
num_layers,
batch_first=True,
bidirectional=True
)
# The output size is doubled due to bidirectionality
self.linear = nn.Linear(hidden_layer_size * 2, output_size)
def forward(self, input_seq: torch.Tensor) -> torch.Tensor:
lstm_out, _ = self.lstm(input_seq)
predictions = self.linear(lstm_out[:, -1, :])
return predictions
Weight Handling for Custom Models¶
When implementing custom models, ensure the get_weights and set_weights functions work with your model structure:
def get_weights_for_custom_model(model: nn.Module) -> List[np.ndarray]:
"""Extract model weights for custom model."""
return [val.cpu().numpy() for _, val in model.state_dict().items()]
def set_weights_for_custom_model(model: nn.Module, weights: List[np.ndarray]) -> None:
"""Set model weights for custom model."""
params_dict = zip(model.state_dict().keys(), weights, strict=False)
state_dict = {k: torch.tensor(v) for k, v in params_dict}
model.load_state_dict(state_dict, strict=True)