Source code for patsemb.postprocess.Smoother


import numpy as np
import numba as nb

from patsemb.postprocess.Postprocessor import Postprocessor


[docs] class Smoother(Postprocessor): """ Apply temporal smoothing on the embedding matrix. Temporal smoothing will transform each column in the embedding matrix by taking the weighted average of that column and the neighboring columns. This ensures that the consecutive time steps have more similar values. Specifically, the transformation of column i in embedding matrix E with normalized weights W equals: E[:, i] = ... + W[1] * E[:, i-1] + W[0] * E[:, i] + W[1] * E[:, i+1] + ... Parameters ---------- nb_iterations: int, default=1 The number of times smoothing will be applied. This value must be at least 1. weights: np.array of shape (size_neighborhood,) The weights used for aggregating the columns. The first weight corresponds to the weight of the current column, the other weights correspond to the weights of neighboring columns. The weights are interpreted to be relative and will be normalized during smoothing. At least two weights must be provided. None of the weights can be negative. """ def __init__(self, nb_iterations: int = 1, weights: np.array = None): self.nb_iterations: int = nb_iterations self.weights: np.array = np.array([2, 1]) if weights is None else weights # Check input if self.nb_iterations < 1: raise Exception("The 'nb_iterations' parameter in Smoother should be 1 or larger to perform " "at least one iteration!") if self.weights.shape[0] < 2: raise Exception("The 'weights' parameter in Smoother should have at least 2 weights in order to " "take the neighbouring time steps into account!") if self.weights.min() < 0: raise Exception("All weights in 'Smoother' should be larger than 0!")
[docs] def fit(self, X: np.ndarray, y=None) -> 'Smoother': """ Fit this Smoother. For smoothing there is no fitting necessary. Parameters ---------- X: np.ndarray of shape (n_patterns, n_samples) The embedding matrix to use for fitting this postprocessor. y: Ignored Is passed for fitting the discretizer, but will typically not be used and is only present here for API consistency by convention. Returns ------- self: Smoother Returns the instance itself """ return self
[docs] def transform(self, X: np.ndarray) -> np.ndarray: """ Smooth the given embedding matrix. Parameters ---------- X: np.ndarray of shape (n_patterns, n_samples) The embedding matrix on which the smoothing should be applied. Returns ------- smoothed_embedding_matrix: np.ndarray of shape (n_patterns, n_samples) The smoothed embedding matrix. """ return smoothing(X, self.nb_iterations, self.weights)
@nb.njit(fastmath=True) def smoothing(embedding_matrix: np.ndarray, nb_iterations: int, weights: np.array) -> np.ndarray: """ Method to effectively smooth the embedding matrix. """ # col[i] = ... + weight[1] * col_prev[i-1] + weights[0] * col_prev[0] + weights[1] * col_prev[i+1] + ... # Set the weights correctly weights = weights / (np.sum(weights) + np.sum(weights[1:])) # [..., weight[1], weights[0], weights[1], ..] weights_expanded = np.empty(shape=2 * weights.shape[0] - 1) weights_expanded[:weights.shape[0]] = weights[::-1] weights_expanded[weights.shape[0]:] = weights[1:] # Repeat the same process for the given number of iterations for _ in range(nb_iterations): # Initialize the next iteration next_iteration = np.zeros_like(embedding_matrix) # The first few and last few columns for i in range(weights.shape[0] - 1): adjusted_weights = weights_expanded[weights.shape[0] - i - 1:] / weights_expanded[weights.shape[0] - i - 1:].sum() for j in range(adjusted_weights.shape[0]): next_iteration[:, i] += embedding_matrix[:, j] * adjusted_weights[j] next_iteration[:, -i - 1] += embedding_matrix[:, -j - 1] * adjusted_weights[-j] # Center columns for i in range(weights.shape[0] - 1, embedding_matrix.shape[1] - weights.shape[0] + 1): for j in range(-weights.shape[0] + 1, weights.shape[0]): next_iteration[:, i] += embedding_matrix[:, i + j] * weights[abs(j)] # Update the matrix embedding_matrix = next_iteration.copy() return embedding_matrix