Skip to content

caveat.models.utils

ScheduledOptim(optimizer, lr_mul, d_model, n_warmup_steps) #

Bases: _LRScheduler

A simple wrapper class for learning rate scheduling

Source code in caveat/models/utils.py
def __init__(self, optimizer, lr_mul, d_model, n_warmup_steps):
    self.optimizer = optimizer
    self.lr_mul = lr_mul
    self.d_model = d_model
    self.n_warmup_steps = n_warmup_steps
    self.n_steps = 0

d_model = d_model instance-attribute #

lr_mul = lr_mul instance-attribute #

n_steps = 0 instance-attribute #

n_warmup_steps = n_warmup_steps instance-attribute #

optimizer = optimizer instance-attribute #

step() #

Step with the inner optimizer

Source code in caveat/models/utils.py
def step(self):
    "Step with the inner optimizer"
    self._update_learning_rate()

build_hidden_layers(config) #

Build hidden layer sizes from config.

PARAMETER DESCRIPTION
config

Configuration dictionary containing hidden layer parameters.

TYPE: dict

RAISES DESCRIPTION
ValueError

If both hidden_layers and hidden_n/hidden_size are specified.

ValueError

If hidden_layers is not a list.

ValueError

If hidden_layers contains non-integer values.

ValueError

If neither hidden_layers nor hidden_n/hidden_size are specified.

RETURNS DESCRIPTION
list

List of hidden layer sizes.

TYPE: list

Source code in caveat/models/utils.py
def build_hidden_layers(config: dict) -> list:
    """
    Build hidden layer sizes from config.

    Args:
        config (dict): Configuration dictionary containing hidden layer parameters.

    Raises:
        ValueError: If both hidden_layers and hidden_n/hidden_size are specified.
        ValueError: If hidden_layers is not a list.
        ValueError: If hidden_layers contains non-integer values.
        ValueError: If neither hidden_layers nor hidden_n/hidden_size are specified.

    Returns:
        list: List of hidden layer sizes.
    """
    hidden_layers = config.get("hidden_layers", None)
    hidden_n = config.get("hidden_n", None)
    hidden_size = config.get("hidden_size", None)
    if hidden_layers is not None:
        if hidden_n is not None or hidden_size is not None:
            raise ValueError(
                "Cannot specify hidden_layers and layer_n or layer_size"
            )
        if not isinstance(hidden_layers, list):
            raise ValueError("hidden_layers must be a list")
        for layer in hidden_layers:
            if not isinstance(layer, int):
                raise ValueError("hidden_layers must be a list of integers")
        return hidden_layers
    if hidden_n is not None and hidden_size is not None:
        return [int(hidden_size)] * int(hidden_n)
    raise ValueError("Must specify hidden_layers or layer_n and layer_size")

calc_output_padding_1d(length, target, kernel_size, stride, padding, patience=20) #

Calculate the output padding required for a 1D transposed convolution to achieve a target length. This function iterates over possible padding values and output padding values to find a combination that results in the desired target length after a 1D transposed convolution. Args: length (int): The length of the input. target (int): The desired length of the output. kernel_size (int): The size of the convolution kernel. stride (int): The stride of the convolution. padding (int): The initial padding value. patience (int, optional): The maximum number of iterations to try for padding and output padding. Default is 20. Returns: tuple: A tuple containing the padding and output padding values that achieve the target length. Raises: ValueError: If no combination of padding and output padding can achieve the target length within the given patience.

Source code in caveat/models/utils.py
def calc_output_padding_1d(
    length: int,
    target: int,
    kernel_size: int,
    stride: int,
    padding: int,
    patience: int = 20,
) -> int:
    """
    Calculate the output padding required for a 1D transposed convolution to achieve a target length.
    This function iterates over possible padding values and output padding values to find a combination
    that results in the desired target length after a 1D transposed convolution.
    Args:
        length (int): The length of the input.
        target (int): The desired length of the output.
        kernel_size (int): The size of the convolution kernel.
        stride (int): The stride of the convolution.
        padding (int): The initial padding value.
        patience (int, optional): The maximum number of iterations to try for padding and output padding. Default is 20.
    Returns:
        tuple: A tuple containing the padding and output padding values that achieve the target length.
    Raises:
        ValueError: If no combination of padding and output padding can achieve the target length within the given patience.
    """

    for pad in range(padding, padding + patience):
        for i in range(patience):
            if transconv_size_1d(length, kernel_size, stride, pad, i) == target:
                if pad != padding:
                    print(
                        f"Changed padding from {padding} to {pad} for target {target}."
                    )
                return pad, i
    raise ValueError(
        f"""Could not find input and output padding combination for target {target},
        length {length}, kernel_size {kernel_size}, stride {stride}, padding {padding}.
    """
    )

calc_output_padding_2d(size) #

Calculate output padding for a transposed convolution such that output dims will match dimensions of inputs to a convolution of given size. For each dimension, padding is set to 1 if even size, otherwise 0.

PARAMETER DESCRIPTION
size

input size (h, w)

TYPE: Union[tuple[int, int, int], int]

RETURNS DESCRIPTION
array

np.array: required padding

Source code in caveat/models/utils.py
def calc_output_padding_2d(size: Union[tuple[int, int, int], int]) -> np.array:
    """Calculate output padding for a transposed convolution such that output dims will
    match dimensions of inputs to a convolution of given size.
    For each dimension, padding is set to 1 if even size, otherwise 0.

    Args:
        size (Union[tuple[int, int, int], int]): input size (h, w)

    Returns:
        np.array: required padding
    """
    if isinstance(size, int):
        size = (0, size, size)
    _, h, w = size
    return (int(h % 2 == 0), int(w % 2 == 0))

conv1d_size(length, kernel_size, stride, padding=0) #

Calculate output dimensions for 1d convolution.

PARAMETER DESCRIPTION
length

Input size.

TYPE: int

kernel_size

Kernel_size.

TYPE: int

stride

Stride.

TYPE: int

padding

Input padding.

TYPE: int DEFAULT: 0

Returns: int: Output size.

Source code in caveat/models/utils.py
def conv1d_size(
    length: int, kernel_size: int, stride: int, padding: int = 0
) -> int:
    """Calculate output dimensions for 1d convolution.

    Args:
        length (int): Input size.
        kernel_size (int): Kernel_size.
        stride (int): Stride.
        padding (int): Input padding.
    Returns:
        int: Output size.
    """
    return int((length - (kernel_size - 1) + (2 * padding) - 1) / stride) + 1

conv2d_size(size, kernel_size=3, stride=2, padding=1, dilation=1) #

Calculate output dimensions for 2d convolution.

PARAMETER DESCRIPTION
size

Input size, may be integer if symetric.

TYPE: Union[tuple[int, int], int]

kernel_size

Kernel_size. Defaults to 3.

TYPE: Union[tuple[int, int], int] DEFAULT: 3

stride

Stride. Defaults to 2.

TYPE: Union[tuple[int, int], int] DEFAULT: 2

padding

Input padding. Defaults to 1.

TYPE: Union[tuple[int, int], int] DEFAULT: 1

dilation

Dilation. Defaults to 1.

TYPE: Union[tuple[int, int], int] DEFAULT: 1

RETURNS DESCRIPTION
array

np.array: Output size.

Source code in caveat/models/utils.py
def conv2d_size(
    size: Union[tuple[int, int], int],
    kernel_size: Union[tuple[int, int], int] = 3,
    stride: Union[tuple[int, int], int] = 2,
    padding: Union[tuple[int, int], int] = 1,
    dilation: Union[tuple[int, int], int] = 1,
) -> np.array:
    """Calculate output dimensions for 2d convolution.

    Args:
        size (Union[tuple[int, int], int]): Input size, may be integer if symetric.
        kernel_size (Union[tuple[int, int], int], optional): Kernel_size. Defaults to 3.
        stride (Union[tuple[int, int], int], optional): Stride. Defaults to 2.
        padding (Union[tuple[int, int], int], optional): Input padding. Defaults to 1.
        dilation (Union[tuple[int, int], int], optional): Dilation. Defaults to 1.

    Returns:
        np.array: Output size.
    """
    if isinstance(size, int):
        size = (size, size)
    if isinstance(kernel_size, int):
        kernel_size = (kernel_size, kernel_size)
    if isinstance(stride, int):
        stride = (stride, stride)
    if isinstance(padding, int):
        padding = (padding, padding)
    if isinstance(dilation, int):
        dilation = (dilation, dilation)
    return (
        np.array(size)
        + 2 * np.array(padding)
        - np.array(dilation) * (np.array(kernel_size) - 1)
        - 1
    ) // np.array(stride) + 1

duration_mask(mask) #

Source code in caveat/models/utils.py
def duration_mask(mask: Tensor) -> Tensor:
    duration_mask = mask.clone()
    duration_mask[:, 0] = 0.0
    idxs = torch.arange(duration_mask.shape[0])
    duration_mask[idxs, (mask != 0).cumsum(-1).argmax(1)] = 0.0
    return duration_mask

hot_argmax(batch, axis=-1) #

Encoded given axis as one-hot based on argmax for that axis.

PARAMETER DESCRIPTION
batch

Input tensor.

TYPE: tensor

axis

Axis index to encode. Defaults to -1.

TYPE: int DEFAULT: -1

RETURNS DESCRIPTION
tensor

One hot encoded tensor.

TYPE: tensor

Source code in caveat/models/utils.py
def hot_argmax(batch: tensor, axis: int = -1) -> tensor:
    """Encoded given axis as one-hot based on argmax for that axis.

    Args:
        batch (tensor): Input tensor.
        axis (int, optional): Axis index to encode. Defaults to -1.

    Returns:
        tensor: One hot encoded tensor.
    """
    batch = batch.swapaxes(axis, -1)
    argmax = batch.argmax(axis=-1)
    eye = torch.eye(batch.shape[-1])
    eye = eye.to(current_device())
    batch = eye[argmax]
    return batch.swapaxes(axis, -1)

transconv_size_1d(length, kernel_size, stride, padding, output_padding, dilation=1) #

Source code in caveat/models/utils.py
def transconv_size_1d(
    length, kernel_size, stride, padding, output_padding, dilation=1
):
    return (
        (length - 1) * stride
        - 2 * padding
        + dilation * (kernel_size - 1)
        + output_padding
        + 1
    )

transconv_size_2d(size, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) #

Calculate output dimension for 2d transpose convolution.

PARAMETER DESCRIPTION
size

Input size, may be integer if symetric.

TYPE: Union[tuple[int, int], int]

kernel_size

Kernel size. Defaults to 3.

TYPE: Union[tuple[int, int], int] DEFAULT: 3

stride

Stride. Defaults to 2.

TYPE: Union[tuple[int, int], int] DEFAULT: 2

padding

Input padding. Defaults to 1.

TYPE: Union[tuple[int, int], int] DEFAULT: 1

dilation

Dilation. Defaults to 1.

TYPE: Union[tuple[int, int], int] DEFAULT: 1

output_padding

Output padding. Defaults to 1.

TYPE: Union[tuple[int, int], int] DEFAULT: 1

RETURNS DESCRIPTION
array

np.array: Output size.

Source code in caveat/models/utils.py
def transconv_size_2d(
    size: Union[tuple[int, int], int],
    kernel_size: Union[tuple[int, int], int] = 3,
    stride: Union[tuple[int, int], int] = 2,
    padding: Union[tuple[int, int], int] = 1,
    dilation: Union[tuple[int, int], int] = 1,
    output_padding: Union[tuple[int, int], int] = 1,
) -> np.array:
    """Calculate output dimension for 2d transpose convolution.

    Args:
        size (Union[tuple[int, int], int]): Input size, may be integer if symetric.
        kernel_size (Union[tuple[int, int], int], optional): Kernel size. Defaults to 3.
        stride (Union[tuple[int, int], int], optional): Stride. Defaults to 2.
        padding (Union[tuple[int, int], int], optional): Input padding. Defaults to 1.
        dilation (Union[tuple[int, int], int], optional): Dilation. Defaults to 1.
        output_padding (Union[tuple[int, int], int], optional): Output padding. Defaults to 1.

    Returns:
        np.array: Output size.
    """
    if isinstance(size, int):
        size = (size, size)
    if isinstance(kernel_size, int):
        kernel_size = (kernel_size, kernel_size)
    if isinstance(stride, int):
        stride = (stride, stride)
    if isinstance(padding, int):
        padding = (padding, padding)
    if isinstance(dilation, int):
        dilation = (dilation, dilation)
    if isinstance(output_padding, int):
        output_padding = (output_padding, output_padding)
    return (
        (np.array(size) - 1) * np.array(stride)
        - 2 * np.array(padding)
        + np.array(dilation) * (np.array(kernel_size) - 1)
        + np.array(output_padding)
        + 1
    )