/*
* Klang – a node+text-based synthesizer library
*
* This file is part of the *wellen* library (https://github.com/dennisppaul/wellen).
* Copyright (c) 2022 Dennis P Paul.
*
* This library is free software: you can redistribute it and/or modify
* it under the terms of the GNU General License as published by
* the Free Software Foundation, version 3.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General License for more details.
*
* You should have received a copy of the GNU General License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _KLANG_NODE_PITCHDETECTION_H_
#define _KLANG_NODE_PITCHDETECTION_H_
#include "KlangNode.hpp"
namespace klang {
// @TODO(implement node and atom fully)
// @TODO(add mechanism to partially update buffer in order to collect more samples and thus get better frequency range i.e minimum buffer size of 1024)
/**
* detectes the pitch of a signal.
* <p>
* it uses the YIN algorithm described in the paper <a
* href="http://recherche.ircam.fr/equipes/pcm/cheveign/ps/2002_JASA_YIN_proof.pdf">Alain de Cheveigné + Hideki
* Kawahara:YIN, a fundamental frequency estimator for speech and music</a>.
* <p>
* Fair Use Disclaimer: this implementation was taken, with minor modifications, from a project called <a
* href="https://github.com/JorenSix/TarsosDSP">TarsosDSP</a> by Joren Six released under the GPL-3.0 license. the
* project seems to be really well written. i therefore think it is fair to use the existing source code and modify it.
*/
template<uint16_t BUFFER_SIZE> // The size of a buffer. E.g. 1024.
class NodePitchDetection
// : Node
{
public:
NodePitchDetection() : fSampleRate(KLANG_AUDIO_RATE), fThreshold(0.2) {}
/**
* Create a new pitch detector for a stream with the defined sample rate. Processes the audio in blocks of the
* defined size.
*
* @param audioSampleRate The sample rate of the audio stream. E.g. 44.1 kHz.
* @param yinThreshold The parameter that defines which peaks are kept as possible pitch candidates. See the YIN
* paper for more details.
*/
NodePitchDetection(const float audioSampleRate, const double yinThreshold) : fSampleRate(audioSampleRate),
fThreshold(yinThreshold) {
}
/**
* The main flow of the YIN algorithm. Returns a pitch value in Hz or -1 if no pitch is detected.
*
* @return a pitch value in Hz or -1 if no pitch is detected.
*/
const float *process(const float *audioBuffer) {
// step 2
difference(audioBuffer);
// step 3
cumulativeMeanNormalizedDifference();
// step 4
const int tauEstimate = absoluteThreshold();
// step 5
float pitchInHertz;
if (tauEstimate != -1) {
const float betterTau = parabolicInterpolation(tauEstimate);
// step 6
// TODO Implement optimization for the AUBIO_YIN algorithm.
// 0.77% => 0.5% error rate,
// using the data of the YIN paper
// bestLocalEstimate()
// conversion to Hz
pitchInHertz = fSampleRate / betterTau;
} else {
// no pitch found
pitchInHertz = -1;
}
fPitch = pitchInHertz;
return audioBuffer;
}
[[nodiscard]] float get_pitch() const {
return fPitch;
}
[[nodiscard]] float get_probability() const {
return fProbability;
}
[[nodiscard]] bool is_pitched() const {
return fPitched;
}
private:
/*
* An implementation of the AUBIO_YIN pitch tracking algorithm. See <a href=
* "http://recherche.ircam.fr/equipes/pcm/cheveign/ps/2002_JASA_YIN_proof.pdf" >the YIN paper.</a> Implementation
* based
* on <a href="http://aubio.org">aubio</a>
*
* @author Joren Six
* @author Paul Brossier
*/
/**
* The default YIN threshold value. Should be around 0.10~0.15. See YIN paper for more information.
*/
constexpr static double DEFAULT_THRESHOLD = 0.20;
/**
* The actual YIN threshold.
*/
const double fThreshold;
/**
* The audio sample rate. Most audio has a sample rate of 44.1kHz.
*/
const float fSampleRate;
/**
* The buffer that stores the calculated values. It is exactly half the size of the input buffer.
*/
const static uint16_t fYINBuffer_length = BUFFER_SIZE / 2;
float fYINBuffer[fYINBuffer_length]{};
float fPitch{};
float fProbability{};
bool fPitched{};
/**
* Implements the difference function as described in step 2 of the YIN paper.
*/
void difference(const float *audioBuffer) {
int index, tau;
float delta;
for (tau = 0; tau < fYINBuffer_length; tau++) {
fYINBuffer[tau] = 0;
}
for (tau = 1; tau < fYINBuffer_length; tau++) {
for (index = 0; index < fYINBuffer_length; index++) {
delta = audioBuffer[index] - audioBuffer[index + tau];
fYINBuffer[tau] += delta * delta;
}
}
}
/**
* The cumulative mean normalized difference function as described in step 3 of the YIN paper. <br>
* <code>
* yinBuffer[0] == yinBuffer[1] = 1
* </code>
*/
void cumulativeMeanNormalizedDifference() {
int tau;
fYINBuffer[0] = 1;
float runningSum = 0;
for (tau = 1; tau < fYINBuffer_length; tau++) {
runningSum += fYINBuffer[tau];
fYINBuffer[tau] *= (float) tau / runningSum;
}
}
/**
* Implements step 4 of the AUBIO_YIN paper.
*/
int absoluteThreshold() {
// Uses another loop construct
// than the AUBIO implementation
int tau;
// first two positions in yinBuffer are always 1
// So start at the third (index 2)
for (tau = 2; tau < fYINBuffer_length; tau++) {
if (fYINBuffer[tau] < fThreshold) {
while (tau + 1 < fYINBuffer_length && fYINBuffer[tau + 1] < fYINBuffer[tau]) {
tau++;
}
// found tau, exit loop and return
// store the probability
// From the YIN paper: The threshold determines the list of
// candidates admitted to the set, and can be interpreted as the
// proportion of aperiodic power tolerated
// within a periodic signal.
//
// Since we want the periodicity and and not aperiodicity:
// periodicity = 1 - aperiodicity
fProbability = 1 - fYINBuffer[tau];
break;
}
}
// if no pitch found, tau => -1
if (tau == fYINBuffer_length || fYINBuffer[tau] >= fThreshold) {
tau = -1;
fProbability = 0;
fPitched = false;
} else {
fPitched = true;
}
return tau;
}
/**
* Implements step 5 of the AUBIO_YIN paper. It refines the estimated tau value using parabolic interpolation. This
* is needed to detect higher frequencies more precisely. See
* <a href="http://fizyka.umk.pl/nrbook/c10-2.pdf">(dead link)</a> and for more
* background <a
* href="http://sfb649.wiwi.hu-berlin.de/fedc_homepage/xplore/tutorials/xegbohtmlnode62.html">Minimization of a
* Function: One-dimensional Case </a>
*
* @param tauEstimate The estimated tau value.
* @return A better, more precise tau value.
*/
float parabolicInterpolation(const int tauEstimate) {
float betterTau;
int x0;
int x2;
if (tauEstimate < 1) {
x0 = tauEstimate;
} else {
x0 = tauEstimate - 1;
}
if (tauEstimate + 1 < fYINBuffer_length) {
x2 = tauEstimate + 1;
} else {
x2 = tauEstimate;
}
if (x0 == tauEstimate) {
if (fYINBuffer[tauEstimate] <= fYINBuffer[x2]) {
betterTau = (float) tauEstimate;
} else {
betterTau = (float) x2;
}
} else if (x2 == tauEstimate) {
if (fYINBuffer[tauEstimate] <= fYINBuffer[x0]) {
betterTau = (float) tauEstimate;
} else {
betterTau = (float) x0;
}
} else {
float s0, s1, s2;
s0 = fYINBuffer[x0];
s1 = fYINBuffer[tauEstimate];
s2 = fYINBuffer[x2];
// fixed AUBIO implementation, thanks to Karl Helgason:
// (2.0f * s1 - s2 - s0) was incorrectly multiplied with -1
betterTau = (float) tauEstimate + (s2 - s0) / (2 * (2 * s1 - s2 - s0));
}
return betterTau;
}
};
} // namespace klang
#endif //_KLANG_NODE_PITCHDETECTION_H_