Source code for rythm_forge.core.feature_functions

import numpy as np
from ..exceptions.exceptions import RythmForgeValueError
from .._lib import rythm_forge_core_cpp as core_backend
from .fft_functions import stft


[docs] def resample(y: np.ndarray, sr: int, new_sr=8000) -> tuple[np.ndarray, int]: """ Resample a time series from sr to new_sr :param y: np.ndarray A 1D or 2D numpy array of input audio samples, with each row being different channel :param sr: int Original sampling rate at which y has been acquired. :param new_sr: int Target sampling rate :return: ,int y_hat: mp.ndarray, y resampled from sr to new_sr new_sr:int sampling rate used in resampling """ resampled_y, new_sr = core_backend.resample(y, sr, new_sr) return resampled_y, new_sr
[docs] def mel_filter_bank(sr: int, n_fft: int, n_mel: int) -> np.ndarray: """ Create a Mel filter-bank. This produces a linear transformation matrix to project FFT bins onto Mel-frequency bins. :param sr : int > 0 [scalar] Sampling rate of the incoming signal. :param n_fft: int > 0 [scalar] Number of FFT components :param n_mel: int > 0 [scalar] number of Mel bands to generate :return: M np.ndarray [shape=(n_mels, 1 + n_fft/2)] Mel transform matrix """ mel_filter = core_backend.mel_filter_bank(sr, n_fft, n_mel) return mel_filter.T
[docs] def hz_to_mel(array: np.ndarray): """ Converts Hz to Mels. :param array: np.ndarray of values in Hz to be converted to Mels """ return core_backend.hz_to_mel(array)
[docs] def mel_to_hz(array: np.ndarray): """ Converts Mels to Hz :param array: np.ndarray of values in Mels to be converted to Hz """ return core_backend.mel_to_hz(array)
[docs] def magnitude(complex_matrix: np.ndarray): """ Converts matrix filled with complex values to matrix of magnitudes of elements, similar to np.abs(array) :param complex_matrix: np.ndarray Array with complex values, most often from stft :return: np.ndarray """ return np.abs(complex_matrix)
def find_peaks(y: np.ndarray): """ Iterates through 1D array and returns indices of peak samples, meaning sample before and after are smaller than checked. :param y: 1D array, like onset_strength envelope, which elements will be compared :return: 1D np.nadrray, with elements being indices of beats that are peaks in the series """ if y.ndim != 2: raise ValueError(f"Given vector y must be 1D, but {y.ndim} was given") return core_backend.find_peaks(y) def onset_strength(y: np.ndarray, sr: int): """ Compute the onset strength envelope of an audio signal. The onset strength envelope is a measure of the sudden increases in energy across frequency bands, which typically correspond to note onsets or other transient events in the audio signal. :param y: np.ndarray The input audio signal as a 2D numpy array. :param sr: int The sample rate of the input audio signal. :return: np.ndarray The onset strength envelope of the audio signal as a 1D numpy array. """ y_resampled, sr = resample(y, sr, 8000) S = stft(y_resampled, 2048, 512) S = magnitude(S) mel_filter = mel_filter_bank(sr, 2048, 40) mel_filter_expanded = mel_filter[np.newaxis, :, :] S = np.abs(np.dot(mel_filter_expanded, S)[0]) S = power_to_dB(S) ref = S lag = 3 onset_env = S[..., lag:] - ref[..., :-lag] onset_env = np.maximum(0.0, onset_env) pad_width = lag pad_width += 2048 // (2 * 512) padding = [(0, 0) for _ in onset_env.shape] padding[-1] = (int(pad_width), 0) onset_env = np.pad(onset_env, padding, mode="constant") return np.sum(onset_env, axis=0) def tempo_estimation(y: np.ndarray, sr: int): """ Does not work, don't know why Estimates tempo of the signal, returns bpm :param y: np.ndarray The input audio signal as a 1D numpy array. :param sr: int The sample rate of the input audio signal. :return: int representing calculated tempo in bpm """ envelope = onset_strength(y, sr) peaks = find_peaks(envelope) peak_intervals = np.diff(peaks) / sr return 60 / np.mean(peak_intervals) def beat_estimation(y: np.ndarray, sr: int): """ Does not work, don't know why :param y: np.ndarray The input audio signal as a 1D numpy array. :param sr: int The sample rate of the input audio signal. :return: np.ndarray, with samples numbers being beats """ tempo = tempo_estimation(y, sr) beat_interval = 60 / tempo beat_interval_samples = int(beat_interval / 8000) peaks = find_peaks(y) beat_location = [p * beat_interval_samples for p in peaks] return beat_location
[docs] def amplitude_to_dB(A, ref=1.0, amin=1e-10, top_db=80.0): """ Convert an amplitude spectrogram to decibel (dB) units. :param A: np.ndarray Input amplitude spectrogram. :param ref: float or callable Reference value. If scalar, amplitude is scaled relative to `ref`. If callable, the reference value is computed as `ref(A)`. :param amin: float Minimum threshold for `A` and `ref`. :param top_db: float Threshold the output at `top_db` below the peak. :return: np.ndarray The dB-scaled spectrogram. """ A = np.asarray(A) if callable(ref): ref_value = ref(A) else: ref_value = ref log_spec = 20.0 * np.log10(np.maximum(amin, A) / np.maximum(amin, ref_value)) log_spec = np.maximum(log_spec, log_spec.max() - top_db)
[docs] def melspectrogram( stft_matrix: np.ndarray, n_fft=2048, sr=44100, n_mels=128 ) -> np.ndarray: """ Convert an STFT matrix to a mel spectrogram. This function transforms a Short-Time Fourier Transform (STFT) matrix into a mel spectrogram, where the frequency axis is mapped to the mel scale, which is a perceptually motivated scale of pitches. :param stft_matrix: np.ndarray The input STFT matrix of shape (..., n_freqs, n_times), representing the magnitude of the STFT of the audio signal. :param n_fft: int, optional, default=2048 The number of FFT components, corresponding to the number of frequency bins in the STFT. This value determines the resolution of the frequency axis. :param sr: int, optional, default=44100 The sample rate of the audio signal. This is used to compute the mel filter bank. :param n_mels: int, optional, default=128 The number of mel bands to generate. This determines the resolution of the mel scale. :return: np.ndarray The mel spectrogram of shape (..., n_mels, n_times), where the frequency bins are replaced by mel bands. """ if stft_matrix.ndim != 2: raise RythmForgeValueError( "Wrong STFT matrix dim number! STFT should have ndim=2" ) mel_filter = mel_filter_bank(sr, n_fft, n_mels) return np.einsum("...ft,mf->...mt", stft_matrix, mel_filter, optimize=True)
[docs] def power_to_dB(S, ref=1.0, amin=1e-10, top_db=80.0): """ Convert a power spectrogram to decibel (dB) units. :param S: np.ndarray Input power spectrogram. :param ref: float or callable Reference value. If scalar, amplitude is scaled relative to `ref`. If callable, the reference value is computed as `ref(S)`. :param amin: float Minimum threshold for `S` and `ref`. :param top_db: float Threshold the output at `top_db` below the peak. :return: np.ndarray The dB-scaled spectrogram. """ S = np.asarray(S) if callable(ref): ref_value = ref(S) else: ref_value = ref log_spec = 10.0 * np.log10(np.maximum(amin, S) / np.maximum(amin, ref_value)) log_spec = np.maximum(log_spec, log_spec.max() - top_db) return log_spec