Skip to content

caveat.models.utils

ScheduledOptim(optimizer, lr_mul, d_model, n_warmup_steps) #

Bases: _LRScheduler

A simple wrapper class for learning rate scheduling

Source code in caveat/models/utils.py
def __init__(self, optimizer, lr_mul, d_model, n_warmup_steps):
    self.optimizer = optimizer
    self.lr_mul = lr_mul
    self.d_model = d_model
    self.n_warmup_steps = n_warmup_steps
    self.n_steps = 0

d_model = d_model instance-attribute #

lr_mul = lr_mul instance-attribute #

n_steps = 0 instance-attribute #

n_warmup_steps = n_warmup_steps instance-attribute #

optimizer = optimizer instance-attribute #

step() #

Step with the inner optimizer

Source code in caveat/models/utils.py
def step(self):
    "Step with the inner optimizer"
    self._update_learning_rate()

build_hidden_layers(config) #

Build hidden layer sizes from config.

PARAMETER DESCRIPTION
config

Configuration dictionary containing hidden layer parameters.

TYPE: dict

RAISES DESCRIPTION
ValueError

If both hidden_layers and hidden_n/hidden_size are specified.

ValueError

If hidden_layers is not a list.

ValueError

If hidden_layers contains non-integer values.

ValueError

If neither hidden_layers nor hidden_n/hidden_size are specified.

RETURNS DESCRIPTION
list

List of hidden layer sizes.

TYPE: list

Source code in caveat/models/utils.py
def build_hidden_layers(config: dict) -> list:
    """
    Build hidden layer sizes from config.

    Args:
        config (dict): Configuration dictionary containing hidden layer parameters.

    Raises:
        ValueError: If both hidden_layers and hidden_n/hidden_size are specified.
        ValueError: If hidden_layers is not a list.
        ValueError: If hidden_layers contains non-integer values.
        ValueError: If neither hidden_layers nor hidden_n/hidden_size are specified.

    Returns:
        list: List of hidden layer sizes.
    """
    hidden_layers = config.get("hidden_layers", None)
    hidden_n = config.get("hidden_n", None)
    hidden_size = config.get("hidden_size", None)
    if hidden_layers is not None:
        if hidden_n is not None or hidden_size is not None:
            raise ValueError(
                "Cannot specify hidden_layers and layer_n or layer_size"
            )
        if not isinstance(hidden_layers, list):
            raise ValueError("hidden_layers must be a list")
        for layer in hidden_layers:
            if not isinstance(layer, int):
                raise ValueError("hidden_layers must be a list of integers")
        return hidden_layers
    if hidden_n is not None and hidden_size is not None:
        return [int(hidden_size)] * int(hidden_n)
    raise ValueError("Must specify hidden_layers or layer_n and layer_size")

calc_output_padding_1d(length, target, kernel_size, stride, padding, patience=20) #

Calculate the output padding required for a 1D transposed convolution to achieve a target length. This function iterates over possible padding values and output padding values to find a combination that results in the desired target length after a 1D transposed convolution. Args: length (int): The length of the input. target (int): The desired length of the output. kernel_size (int): The size of the convolution kernel. stride (int): The stride of the convolution. padding (int): The initial padding value. patience (int, optional): The maximum number of iterations to try for padding and output padding. Default is 20. Returns: tuple: A tuple containing the padding and output padding values that achieve the target length. Raises: ValueError: If no combination of padding and output padding can achieve the target length within the given patience.

Source code in caveat/models/utils.py
def calc_output_padding_1d(
    length: int,
    target: int,
    kernel_size: int,
    stride: int,
    padding: int,
    patience: int = 20,
) -> int:
    """
    Calculate the output padding required for a 1D transposed convolution to achieve a target length.
    This function iterates over possible padding values and output padding values to find a combination
    that results in the desired target length after a 1D transposed convolution.
    Args:
        length (int): The length of the input.
        target (int): The desired length of the output.
        kernel_size (int): The size of the convolution kernel.
        stride (int): The stride of the convolution.
        padding (int): The initial padding value.
        patience (int, optional): The maximum number of iterations to try for padding and output padding. Default is 20.
    Returns:
        tuple: A tuple containing the padding and output padding values that achieve the target length.
    Raises:
        ValueError: If no combination of padding and output padding can achieve the target length within the given patience.
    """

    for pad in range(padding, padding + patience):
        for i in range(patience):
            if transconv_size_1d(length, kernel_size, stride, pad, i) == target:
                if pad != padding:
                    print(
                        f"Changed padding from {padding} to {pad} for target {target}."
                    )
                return pad, i
    raise ValueError(
        f"""Could not find input and output padding combination for target {target},
        length {length}, kernel_size {kernel_size}, stride {stride}, padding {padding}.
    """
    )

calc_output_padding_2d(size) #

Calculate output padding for a transposed convolution such that output dims will match dimensions of inputs to a convolution of given size. For each dimension, padding is set to 1 if even size, otherwise 0.

PARAMETER DESCRIPTION
size

input size (h, w)

TYPE: Union[tuple[int, int, int], int]

RETURNS DESCRIPTION
array

np.array: required padding

Source code in caveat/models/utils.py
def calc_output_padding_2d(size: Union[tuple[int, int, int], int]) -> np.array:
    """Calculate output padding for a transposed convolution such that output dims will
    match dimensions of inputs to a convolution of given size.
    For each dimension, padding is set to 1 if even size, otherwise 0.

    Args:
        size (Union[tuple[int, int, int], int]): input size (h, w)

    Returns:
        np.array: required padding
    """
    if isinstance(size, int):
        size = (0, size, size)
    _, h, w = size
    return (int(h % 2 == 0), int(w % 2 == 0))

conv1d_size(length, kernel_size, stride, padding=0) #

Calculate output dimensions for 1d convolution.

PARAMETER DESCRIPTION
length

Input size.

TYPE: int

kernel_size

Kernel_size.

TYPE: int

stride

Stride.

TYPE: int

padding

Input padding.

TYPE: int DEFAULT: 0

Returns: int: Output size.

Source code in caveat/models/utils.py
def conv1d_size(
    length: int, kernel_size: int, stride: int, padding: int = 0
) -> int:
    """Calculate output dimensions for 1d convolution.

    Args:
        length (int): Input size.
        kernel_size (int): Kernel_size.
        stride (int): Stride.
        padding (int): Input padding.
    Returns:
        int: Output size.
    """
    return int((length - (kernel_size - 1) + (2 * padding) - 1) / stride) + 1

conv2d_size(size, kernel_size=3, stride=2, padding=1, dilation=1) #

Calculate output dimensions for 2d convolution.

PARAMETER DESCRIPTION
size

Input size, may be integer if symetric.

TYPE: Union[tuple[int, int], int]

kernel_size

Kernel_size. Defaults to 3.

TYPE: Union[tuple[int, int], int] DEFAULT: 3

stride

Stride. Defaults to 2.

TYPE: Union[tuple[int, int], int] DEFAULT: 2

padding

Input padding. Defaults to 1.

TYPE: Union[tuple[int, int], int] DEFAULT: 1

dilation

Dilation. Defaults to 1.

TYPE: Union[tuple[int, int], int] DEFAULT: 1

RETURNS DESCRIPTION
array

np.array: Output size.

Source code in caveat/models/utils.py
def conv2d_size(
    size: Union[tuple[int, int], int],
    kernel_size: Union[tuple[int, int], int] = 3,
    stride: Union[tuple[int, int], int] = 2,
    padding: Union[tuple[int, int], int] = 1,
    dilation: Union[tuple[int, int], int] = 1,
) -> np.array:
    """Calculate output dimensions for 2d convolution.

    Args:
        size (Union[tuple[int, int], int]): Input size, may be integer if symetric.
        kernel_size (Union[tuple[int, int], int], optional): Kernel_size. Defaults to 3.
        stride (Union[tuple[int, int], int], optional): Stride. Defaults to 2.
        padding (Union[tuple[int, int], int], optional): Input padding. Defaults to 1.
        dilation (Union[tuple[int, int], int], optional): Dilation. Defaults to 1.

    Returns:
        np.array: Output size.
    """
    if isinstance(size, int):
        size = (size, size)
    if isinstance(kernel_size, int):
        kernel_size = (kernel_size, kernel_size)
    if isinstance(stride, int):
        stride = (stride, stride)
    if isinstance(padding, int):
        padding = (padding, padding)
    if isinstance(dilation, int):
        dilation = (dilation, dilation)
    return (
        np.array(size)
        + 2 * np.array(padding)
        - np.array(dilation) * (np.array(kernel_size) - 1)
        - 1
    ) // np.array(stride) + 1

duration_mask(mask) #

Source code in caveat/models/utils.py
def duration_mask(mask: Tensor) -> Tensor:
    duration_mask = mask.clone()
    duration_mask[:, 0] = 0.0
    idxs = torch.arange(duration_mask.shape[0])
    duration_mask[idxs, (mask != 0).cumsum(-1).argmax(1)] = 0.0
    return duration_mask

hot_argmax(batch, axis=-1) #

Encoded given axis as one-hot based on argmax for that axis.

PARAMETER DESCRIPTION
batch

Input tensor.

TYPE: tensor

axis

Axis index to encode. Defaults to -1.

TYPE: int DEFAULT: -1

RETURNS DESCRIPTION
tensor

One hot encoded tensor.

TYPE: tensor

Source code in caveat/models/utils.py
def hot_argmax(batch: tensor, axis: int = -1) -> tensor:
    """Encoded given axis as one-hot based on argmax for that axis.

    Args:
        batch (tensor): Input tensor.
        axis (int, optional): Axis index to encode. Defaults to -1.

    Returns:
        tensor: One hot encoded tensor.
    """
    batch = batch.swapaxes(axis, -1)
    argmax = batch.argmax(axis=-1)
    eye = torch.eye(batch.shape[-1])
    eye = eye.to(current_device())
    batch = eye[argmax]
    return batch.swapaxes(axis, -1)

mask_after_eos(acts, sos=0, eos=1) #

Mask all values after the first occurrence of the end of sequence (eos) token. This is useful for ensuring that any predictions or calculations do not consider values that occur after the end of a sequence, which is often required in sequence models.

PARAMETER DESCRIPTION
acts

Input tensor of shape [N, steps].

TYPE: Tensor

eos

End of sequence token index. Defaults to 1.

TYPE: int DEFAULT: 1

RETURNS DESCRIPTION
Tensor

Masked tensor with values after the first eos token set to 0.

TYPE: Tensor

Source code in caveat/models/utils.py
def mask_after_eos(acts: Tensor, sos: int = 0, eos: int = 1) -> Tensor:
    """Mask all values after the first occurrence of the end of sequence (eos) token.
    This is useful for ensuring that any predictions or calculations do not consider
    values that occur after the end of a sequence, which is often required in sequence
    models.

    Args:
        acts (Tensor): Input tensor of shape [N, steps].
        eos (int): End of sequence token index. Defaults to 1.

    Returns:
        Tensor: Masked tensor with values after the first eos token set to 0.
    """
    N, C = acts.shape

    idx = acts == eos
    not_found = idx.sum(dim=-1) == 0
    idx = idx.int().argmax(dim=-1, keepdim=True)

    idx_mask = torch.arange(C).repeat(N, 1).to(acts.device)
    mask = idx_mask >= idx
    mask[not_found] = False  # if no eos, no mask
    return mask

normalise_log_durations(batch, sos=0, eos=1) #

Normalise the durations in the log_probs tensor to sum to 1, excluding start of sequence (sos) and end of sequence (eos) tokens. SOS and EOS locations are determined by the argmax of the activity logits. This function is useful for ensuring that the durations of activities in a sequence sum to 1, which is often required for models that predict durations as part of their output. but cannot deal with all weird edge cases. Args: input (Tensor): Log probabilities tensor of shape [N, steps, encodings + 1]. batch (int): Start of sequence token index. Defaults to 0. eos (int): End of sequence token index. Defaults to 1. Returns: Tensor: Normalised log probabilities tensor with durations summing to 1.

Source code in caveat/models/utils.py
def normalise_log_durations(
    batch: Tensor, sos: int = 0, eos: int = 1
) -> Tensor:
    """Normalise the durations in the log_probs tensor to sum to 1, excluding
    start of sequence (sos) and end of sequence (eos) tokens.
    SOS and EOS locations are determined by the argmax of the activity logits.
    This function is useful for ensuring that the durations of activities
    in a sequence sum to 1, which is often required for models that predict
    durations as part of their output. but cannot deal with all weird edge cases.
    Args:
        input (Tensor): Log probabilities tensor of shape [N, steps, encodings + 1].
        batch (int): Start of sequence token index. Defaults to 0.
        eos (int): End of sequence token index. Defaults to 1.
    Returns:
        Tensor: Normalised log probabilities tensor with durations summing to 1.
    """
    # normalise durations to sum to 1, excluding sos and eos tokens
    _, _, C = batch.shape
    acts, durations = torch.split(batch, [C - 1, 1], dim=-1)
    argmax_acts = acts.argmax(dim=-1)
    durations = torch.exp(durations.squeeze(-1))

    sos_mask = argmax_acts == sos
    eos_mask = mask_after_eos(argmax_acts, eos=eos)

    mask = torch.ones_like(durations)
    mask[sos_mask] = 0.0
    mask[eos_mask] = 0.0

    # totals
    totals = (durations * mask).sum(dim=-1, keepdim=True)
    totals[totals == 0] = 1.0  # avoid division by zero

    durations = durations / totals
    durations = torch.log(durations)
    batch = torch.cat(
        (acts, durations.unsqueeze(-1)), dim=-1  # [N, steps, encodings + 1]
    )
    return batch

transconv_size_1d(length, kernel_size, stride, padding, output_padding, dilation=1) #

Source code in caveat/models/utils.py
def transconv_size_1d(
    length, kernel_size, stride, padding, output_padding, dilation=1
):
    return (
        (length - 1) * stride
        - 2 * padding
        + dilation * (kernel_size - 1)
        + output_padding
        + 1
    )

transconv_size_2d(size, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) #

Calculate output dimension for 2d transpose convolution.

PARAMETER DESCRIPTION
size

Input size, may be integer if symetric.

TYPE: Union[tuple[int, int], int]

kernel_size

Kernel size. Defaults to 3.

TYPE: Union[tuple[int, int], int] DEFAULT: 3

stride

Stride. Defaults to 2.

TYPE: Union[tuple[int, int], int] DEFAULT: 2

padding

Input padding. Defaults to 1.

TYPE: Union[tuple[int, int], int] DEFAULT: 1

dilation

Dilation. Defaults to 1.

TYPE: Union[tuple[int, int], int] DEFAULT: 1

output_padding

Output padding. Defaults to 1.

TYPE: Union[tuple[int, int], int] DEFAULT: 1

RETURNS DESCRIPTION
array

np.array: Output size.

Source code in caveat/models/utils.py
def transconv_size_2d(
    size: Union[tuple[int, int], int],
    kernel_size: Union[tuple[int, int], int] = 3,
    stride: Union[tuple[int, int], int] = 2,
    padding: Union[tuple[int, int], int] = 1,
    dilation: Union[tuple[int, int], int] = 1,
    output_padding: Union[tuple[int, int], int] = 1,
) -> np.array:
    """Calculate output dimension for 2d transpose convolution.

    Args:
        size (Union[tuple[int, int], int]): Input size, may be integer if symetric.
        kernel_size (Union[tuple[int, int], int], optional): Kernel size. Defaults to 3.
        stride (Union[tuple[int, int], int], optional): Stride. Defaults to 2.
        padding (Union[tuple[int, int], int], optional): Input padding. Defaults to 1.
        dilation (Union[tuple[int, int], int], optional): Dilation. Defaults to 1.
        output_padding (Union[tuple[int, int], int], optional): Output padding. Defaults to 1.

    Returns:
        np.array: Output size.
    """
    if isinstance(size, int):
        size = (size, size)
    if isinstance(kernel_size, int):
        kernel_size = (kernel_size, kernel_size)
    if isinstance(stride, int):
        stride = (stride, stride)
    if isinstance(padding, int):
        padding = (padding, padding)
    if isinstance(dilation, int):
        dilation = (dilation, dilation)
    if isinstance(output_padding, int):
        output_padding = (output_padding, output_padding)
    return (
        (np.array(size) - 1) * np.array(stride)
        - 2 * np.array(padding)
        + np.array(dilation) * (np.array(kernel_size) - 1)
        + np.array(output_padding)
        + 1
    )