Source code for bonni.model.utils

from enum import Enum
from typing import Callable
import jax
import jax.numpy as jnp
from flax import linen as nn


class SkipConnectionType(Enum):
    linear = "linear"
    identity = "identity"


def get_skip_connection(
    in_channels: int,
    out_channels: int,
    skip_type: SkipConnectionType,
):
    """Create a skip connection module."""
    if skip_type == SkipConnectionType.linear:
        return _LinearSkipConnection(in_channels, out_channels)
    if skip_type == SkipConnectionType.identity:
        if in_channels == out_channels:
            return lambda x: x
        else:
            return _LinearSkipConnection(in_channels, out_channels)
    raise ValueError(f"Unsupported skip connection type: {skip_type}")


class _LinearSkipConnection(nn.Module):
    """A linear skip connection implemented as a flax.linen Module."""

    in_features: int
    out_features: int

    def setup(self):
        self.linear = nn.Dense(
            features=self.out_features,
            use_bias=False,
            kernel_init=nn.initializers.lecun_normal(),
        )

    def __call__(self, x):
        return self.linear(x)



[docs]
class ActivationType(Enum):
    """
    Enumeration of supported activation functions for neural network layers.

    These values are used to configure the non-linearity applied after linear
    transformations in the model configuration.

    Attributes:
        identity: Applies no activation (f(x) = x). typically used for the final
            output layer to produce unbounded linear predictions.
        gelu: Gaussian Error Linear Unit. A smooth approximation of ReLU often
            used in Transformer architectures and modern MLPs.
        relu: Rectified Linear Unit (f(x) = max(0, x)). A standard non-linear
            activation that introduces sparsity.
        leaky_relu: Leaky Rectified Linear Unit. Similar to ReLU but allows a
            small, non-zero gradient when the unit is not active.
        sigmoid: Sigmoid function. Squashes values to the range [0, 1], often
            used for binary classification probabilities.
        tanh: Hyperbolic Tangent. Squashes values to the range [-1, 1].
    """

    identity = "identity"
    gelu = "gelu"
    relu = "relu"
    leaky_relu = "leaky_relu"
    sigmoid = "sigmoid"
    tanh = "tanh"



def get_activation_fn(
    activation_type: ActivationType,
) -> Callable[[jax.Array], jax.Array]:
    if activation_type == ActivationType.identity:
        return lambda x: x
    if activation_type == ActivationType.gelu:
        return jax.nn.gelu
    if activation_type == ActivationType.relu:
        return jax.nn.relu
    if activation_type == ActivationType.leaky_relu:
        return jax.nn.leaky_relu
    if activation_type == ActivationType.sigmoid:
        return jax.nn.sigmoid
    if activation_type == ActivationType.tanh:
        return jax.nn.tanh
    raise ValueError(f"Invalid activation type: {activation_type}")



[docs]
class InitType(Enum):
    """
    Enumeration of initialization strategies for model parameters.

    These values define how weights or biases are initialized before training begins.
    Used primarily for `bias_init` in the model configuration.

    Attributes:
        zeros: Initializes parameters to exactly 0. This is the standard practice
            for bias terms in most neural network layers.
        ones: Initializes parameters to exactly 1.
        uniform: Initializes parameters with values drawn from a uniform distribution.
            The range is typically determined by the specific layer implementation.
        normal: Initializes parameters with values drawn from a normal (Gaussian)
            distribution.
    """

    zeros = "zeros"
    ones = "ones"
    uniform = "uniform"
    normal = "normal"



def get_init_fn(
    init_type: InitType,
):
    if init_type == InitType.zeros:
        return nn.initializers.zeros
    if init_type == InitType.ones:
        return nn.initializers.ones
    if init_type == InitType.uniform:
        scale = 0.01

        def init(key, shape, dtype=jnp.float64, out_sharding=None) -> jax.Array:
            return jax.random.uniform(
                key, shape, dtype=dtype, out_sharding=out_sharding
            ) * jnp.array(scale, dtype)

        return jax.tree_util.Partial(init)
    if init_type == InitType.normal:
        scale = 0.1

        def init(key, shape, dtype=jnp.float64, out_sharding=None) -> jax.Array:
            return jax.random.normal(
                key, shape, dtype=dtype, out_sharding=out_sharding
            ) * jnp.array(scale, dtype)

        return jax.tree_util.Partial(init)
    raise ValueError(f"Invalid init type: {init_type}")