Source code for s3prl.nn.common

"""
Common probing models

Authors:
  * Leo 2022
"""

from typing import List

import torch.nn as nn

import s3prl.nn.pooling as pooling

__all__ = [
    "FrameLevel",
    "UtteranceLevel",
]


[docs]class FrameLevel(nn.Module): """ The common frame-to-frame probing model Args: input_size (int): input size output_size (int): output size hidden_sizes (List[int]): a list of hidden layers' hidden size. by default is [256] to project all different input sizes to the same dimension. set empty list to use the vanilla single layer linear model activation_type (str): the activation class name in :obj:`torch.nn`. Set None to disable activation and the model is pure linear. Default: None activation_conf (dict): the arguments for initializing the activation class. Default: empty dict """ def __init__( self, input_size: int, output_size: int, hidden_sizes: List[int] = None, activation_type: str = None, activation_conf: dict = None, ): super().__init__() self._indim = input_size self._outdim = output_size hidden_sizes = hidden_sizes or [256] latest_size = input_size hidden_layers = [] if len(hidden_sizes) > 0: for size in hidden_sizes: hidden_layers.append(nn.Linear(latest_size, size)) if activation_type is not None: hidden_layers.append( getattr(nn, activation_type)(**(activation_conf or {})) ) latest_size = size self.hidden_layers = nn.Sequential(*hidden_layers) self.final_proj = nn.Linear(latest_size, output_size) @property def input_size(self) -> int: return self._indim @property def output_size(self) -> int: return self._outdim
[docs] def forward(self, x, x_len): """ Args: x (torch.FloatTensor): (batch_size, seq_len, input_size) x_len (torch.LongTensor): (batch_size, ) Returns: tuple 1. ys (torch.FloatTensor): (batch_size, seq_len, output_size) 2. ys_len (torch.LongTensor): (batch_size, ) """ ys = self.hidden_layers(x) ys = self.final_proj(ys) return ys, x_len
[docs]class UtteranceLevel(nn.Module): """ Args: input_size (int): input_size output_size (int): output_size hidden_sizes (List[int]): a list of hidden layers' hidden size. by default is [256] to project all different input sizes to the same dimension. set empty list to use the vanilla single layer linear model activation_type (str): the activation class name in :obj:`torch.nn`. Set None to disable activation and the model is pure linear. Default: None activation_conf (dict): the arguments for initializing the activation class. Default: empty dict pooling_type (str): the pooling class name in :obj:`s3prl.nn.pooling`. Default: MeanPooling pooling_conf (dict): the arguments for initializing the pooling class. Default: empty dict """ def __init__( self, input_size: int, output_size: int, hidden_sizes: List[int] = None, activation_type: str = None, activation_conf: dict = None, pooling_type: str = "MeanPooling", pooling_conf: dict = None, ): super().__init__() self._indim = input_size self._outdim = output_size hidden_sizes = hidden_sizes or [256] latest_size = input_size hidden_layers = [] if len(hidden_sizes) > 0: for size in hidden_sizes: hidden_layers.append(nn.Linear(latest_size, size)) if activation_type is not None: hidden_layers.append( getattr(nn, activation_type)(**(activation_conf or {})) ) latest_size = size self.hidden_layers = nn.Sequential(*hidden_layers) pooling_conf = pooling_conf or {} self.pooling = getattr(pooling, pooling_type)(latest_size, **pooling_conf) latest_size = self.pooling.output_size self.final_proj = nn.Linear(latest_size, output_size) @property def input_size(self) -> int: return self._indim @property def output_size(self) -> int: return self._outdim
[docs] def forward(self, x, x_len): """ Args: x (torch.FloatTensor): (batch_size, seq_len, input_size) x_len (torch.LongTensor): (batch_size, ) Returns: torch.FloatTensor (batch_size, output_size) """ x = self.hidden_layers(x) x_pooled = self.pooling(x, x_len) y = self.final_proj(x_pooled) return y