MythTV master
freesurround_decoder.cpp
Go to the documentation of this file.
1/*
2Copyright (C) 2007 Christian Kothe
3
4This program is free software; you can redistribute it and/or
5modify it under the terms of the GNU General Public License
6as published by the Free Software Foundation; either version 2
7of the License, or (at your option) any later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17*/
19
20#include <algorithm>
21#include <array>
22#include <cmath>
23#include <complex>
24#include <cstdlib>
25#include <cstring>
26#include <numbers>
27#include <vector>
28extern "C" {
29#include "libavutil/mem.h"
30#include "libavutil/tx.h"
31}
32
33using cfloat = std::complex<float>;
34using InputBufs = std::array<float*,2>;
35using OutputBufs = std::array<float*,6>;
36
37static const float PI = std::numbers::pi;
38static const float epsilon = 0.000001;
39static const float center_level = 0.5F*sqrtf(0.5F);
40
41template <class T>
42T sqr(T x) { return x*x; }
43
44// private implementation of the surround decoder
46public:
47 // create an instance of the decoder
48 // blocksize is fixed over the lifetime of this object for performance reasons
49 explicit Impl(unsigned blocksize=8192)
50 : m_n(blocksize),
51 m_halfN(blocksize/2),
52 // create lavc fft buffers
53 m_dftL((AVComplexFloat*)av_malloc(sizeof(AVComplexFloat) * m_n * 2)),
54 m_dftR((AVComplexFloat*)av_malloc(sizeof(AVComplexFloat) * m_n * 2)),
55 m_src ((AVComplexFloat*)av_malloc(sizeof(AVComplexFloat) * m_n * 2))
56 {
57 // If av_tx_init() fails (returns < 0), the contexts will be nullptr and will crash later,
58 // but av_malloc() is not checked to succeed either.
59 av_tx_init(&m_fftContext , &m_fft , AV_TX_FLOAT_FFT, 0, m_n, &kScale, AV_TX_INPLACE);
60 av_tx_init(&m_ifftContext, &m_ifft, AV_TX_FLOAT_FFT, 1, m_n, &kScale, AV_TX_INPLACE);
61 // resize our own buffers
62 m_frontR.resize(m_n);
63 m_frontL.resize(m_n);
64 m_avg.resize(m_n);
65 m_surR.resize(m_n);
66 m_surL.resize(m_n);
67 m_trueavg.resize(m_n);
68 m_xFs.resize(m_n);
69 m_yFs.resize(m_n);
70 m_inbuf[0].resize(m_n);
71 m_inbuf[1].resize(m_n);
72 for (unsigned c=0;c<6;c++) {
73 m_outbuf[c].resize(m_n);
74 m_filter[c].resize(m_n);
75 }
76 sample_rate(48000);
77 // generate the window function (square root of hann, b/c it is applied before and after the transform)
78 m_wnd.resize(m_n);
79 for (unsigned k=0;k<m_n;k++)
80 m_wnd[k] = std::sqrt(0.5F*(1-std::cos(2*PI*k/m_n))/m_n);
81 m_currentBuf = 0;
82 m_inbufs.fill(nullptr);
83 m_outbufs.fill(nullptr);
84 // set the default coefficients
85 surround_coefficients(std::numbers::sqrt2 / std::numbers::sqrt3,
86 std::numbers::inv_sqrt3);
87 phase_mode(0);
88 separation(1,1);
89 steering_mode(true);
90 }
91
92 // destructor
94 av_tx_uninit(&m_fftContext);
95 av_tx_uninit(&m_ifftContext);
96 av_free(m_src);
97 av_free(m_dftR);
98 av_free(m_dftL);
99 }
100
102 {
105 return m_inbufs.data();
106 }
107
109 {
116 return m_outbufs.data();
117 }
118
119 // decode a chunk of stereo sound, has to contain exactly blocksize samples
120 // center_width [0..1] distributes the center information towards the front left/right channels, 1=full distribution, 0=no distribution
121 // dimension [0..1] moves the soundfield backwards, 0=front, 1=side
122 // adaption_rate [0..1] determines how fast the steering gets adapted, 1=instantaneous, 0.1 = very slow adaption
123 void decode(float center_width, float dimension, float adaption_rate) {
124 // process first part
125 int index = m_currentBuf*m_halfN;
126 InputBufs in_second {&m_inbuf[0][index],&m_inbuf[1][index]};
127 m_currentBuf ^= 1;
128 index = m_currentBuf*m_halfN;
129 InputBufs in_first {&m_inbuf[0][index],&m_inbuf[1][index]};
130 add_output(in_first,in_second,center_width,dimension,adaption_rate,true);
131 // shift last half of input buffer to the beginning
132 }
133
134 // flush the internal buffers
135 void flush() {
136 for (unsigned k=0;k<m_n;k++) {
137 for (auto & c : m_outbuf)
138 c[k] = 0;
139 m_inbuf[0][k] = 0;
140 m_inbuf[1][k] = 0;
141 }
142 }
143
144 // set lfe filter params
145 void sample_rate(unsigned int srate) {
146 // lfe filter is just straight through band limited
147 unsigned int cutoff = (30*m_n)/srate;
148 for (unsigned f=0;f<=m_halfN;f++) {
149 if (f<cutoff)
150 m_filter[5][f] = 0.5*sqrt(0.5);
151 else
152 m_filter[5][f] = 0.0;
153 }
154 }
155
156 // set the assumed surround mixing coefficients
157 void surround_coefficients(float a, float b) {
158 // calc the simple coefficients
159 m_surroundHigh = a;
160 m_surroundLow = b;
161 m_surroundBalance = (a-b)/(a+b);
162 m_surroundLevel = 1/(a+b);
163 // calc the linear coefficients
164 cfloat i(0,1);
165 cfloat u((a+b)*i);
166 cfloat v((b-a)*i);
167 cfloat n(0.25,0);
168 cfloat o(1,0);
169 m_a = (v-o)*n; m_b = (o-u)*n; m_c = (-o-v)*n; m_d = (o+u)*n;
170 m_e = (o+v)*n; m_f = (o+u)*n; m_g = (o-v)*n; m_h = (o-u)*n;
171 }
172
173 // set the phase shifting mode
174 void phase_mode(unsigned mode) {
175 const std::array<std::array<float,2>,4> modes {{ {0,0}, {0,PI}, {PI,0}, {-PI/2,PI/2} }};
176 m_phaseOffsetL = modes[mode][0];
177 m_phaseOffsetR = modes[mode][1];
178 }
179
180 // what steering mode should be chosen
181 void steering_mode(bool mode) { m_linearSteering = mode; }
182
183 // set front & rear separation controls
184 void separation(float front, float rear) {
185 m_frontSeparation = front;
186 m_rearSeparation = rear;
187 }
188
189private:
190 // polar <-> cartesian coodinates conversion
191 static cfloat polar(float a, float p) { return {a*std::cos(p), a*std::sin(p)}; }
192 static float amplitude(AVComplexFloat z) { return std::hypot(z.re, z.im); }
193 static float phase(AVComplexFloat z) { return std::atan2(z.im, z.re); }
194
196 static float clamp_unit_mag(float x) { return std::clamp(x, -1.0F, 1.0F); }
197
198 // handle the output buffering for overlapped calls of block_decode
199 void add_output(InputBufs input1, InputBufs input2, float center_width, float dimension, float adaption_rate, bool /*result*/=false) {
200 // add the windowed data to the last 1/2 of the output buffer
201 OutputBufs out {m_outbuf[0].data(),m_outbuf[1].data(),m_outbuf[2].data(),m_outbuf[3].data(),m_outbuf[4].data(),m_outbuf[5].data()};
202 block_decode(input1,input2,out,center_width,dimension,adaption_rate);
203 }
204
205 // CORE FUNCTION: decode a block of data
206 void block_decode(InputBufs input1, InputBufs input2, OutputBufs output, float center_width, float dimension, float adaption_rate) {
207 // 1. scale the input by the window function; this serves a dual purpose:
208 // - first it improves the FFT resolution b/c boundary discontinuities (and their frequencies) get removed
209 // - second it allows for smooth blending of varying filters between the blocks
210
211 // the window also includes 1.0/sqrt(n) normalization
212 // concatenate copies of input1 and input2 for some undetermined reason
213 // input1 is in the rising half of the window
214 // input2 is in the falling half of the window
215 for (unsigned k = 0; k < m_halfN; k++)
216 {
217 m_dftL[k] = (AVComplexFloat){ .re = input1[0][k] * m_wnd[k], .im = 0 };
218 m_dftR[k] = (AVComplexFloat){ .re = input1[1][k] * m_wnd[k], .im = 0 };
219
220 m_dftL[k + m_halfN] = (AVComplexFloat){ .re = input2[0][k] * m_wnd[k + m_halfN], .im = 0 };
221 m_dftR[k + m_halfN] = (AVComplexFloat){ .re = input2[1][k] * m_wnd[k + m_halfN], .im = 0 };
222 }
223
224 // ... and tranform it into the frequency domain
225 m_fft(m_fftContext, m_dftL, m_dftL, sizeof(AVComplexFloat));
226 m_fft(m_fftContext, m_dftR, m_dftR, sizeof(AVComplexFloat));
227
228 // 2. compare amplitude and phase of each DFT bin and produce the X/Y coordinates in the sound field
229 // but dont do DC or N/2 component
230 for (unsigned f=0;f<m_halfN;f++) {
231 // get left/right amplitudes/phases
232 float ampL = amplitude(m_dftL[f]);
233 float ampR = amplitude(m_dftR[f]);
234 float phaseL = phase(m_dftL[f]);
235 float phaseR = phase(m_dftR[f]);
236// if (ampL+ampR < epsilon)
237// continue;
238
239 // calculate the amplitude/phase difference
240 float ampDiff = clamp_unit_mag((ampL+ampR < epsilon) ? 0 : (ampR-ampL) / (ampR+ampL));
241 float phaseDiff = phaseL - phaseR;
242 if (phaseDiff < -PI) phaseDiff += 2*PI;
243 if (phaseDiff > PI) phaseDiff -= 2*PI;
244 phaseDiff = std::abs(phaseDiff);
245
246 if (m_linearSteering) {
247 // --- this is the fancy new linear mode ---
248
249 // get sound field x/y position
250 m_yFs[f] = get_yfs(ampDiff,phaseDiff);
251 m_xFs[f] = get_xfs(ampDiff,m_yFs[f]);
252
253 // add dimension control
254 m_yFs[f] = clamp_unit_mag(m_yFs[f] - dimension);
255
256 // add crossfeed control
257 m_xFs[f] = clamp_unit_mag(m_xFs[f] * (m_frontSeparation*(1+m_yFs[f])/2 + m_rearSeparation*(1-m_yFs[f])/2));
258
259 // 3. generate frequency filters for each output channel
260 float left = (1-m_xFs[f])/2;
261 float right = (1+m_xFs[f])/2;
262 float front = (1+m_yFs[f])/2;
263 float back = (1-m_yFs[f])/2;
264 std::array<float, 5> volume
265 {
266 front * (left * center_width + std::max(0.0F, -m_xFs[f]) * (1.0F - center_width) ), // left
267 front * center_level * ( (1.0F - std::abs(m_xFs[f])) * (1.0F - center_width) ), // center
268 front * (right * center_width + std::max(0.0F, m_xFs[f]) * (1.0F - center_width) ), // right
269 back * m_surroundLevel * left, // left surround
270 back * m_surroundLevel * right // right surround
271 };
272
273 // adapt the prior filter
274 for (unsigned c=0;c<5;c++)
275 m_filter[c][f] = ((1-adaption_rate)*m_filter[c][f]) + (adaption_rate*volume[c]);
276
277 } else {
278 // --- this is the old & simple steering mode ---
279
280 // determine sound field x-position
281 m_xFs[f] = ampDiff;
282
283 // determine preliminary sound field y-position from phase difference
284 m_yFs[f] = 1 - ((phaseDiff/PI)*2);
285
286 if (std::abs(m_xFs[f]) > m_surroundBalance) {
287 // blend linearly between the surrounds and the fronts if the balance exceeds the surround encoding balance
288 // this is necessary because the sound field is trapezoidal and will be stretched behind the listener
289 float frontness = (std::abs(m_xFs[f]) - m_surroundBalance)/(1-m_surroundBalance);
290 m_yFs[f] = ((1-frontness) * m_yFs[f]) + (frontness * 1);
291 }
292
293 // add dimension control
294 m_yFs[f] = clamp_unit_mag(m_yFs[f] - dimension);
295
296 // add crossfeed control
297 m_xFs[f] = clamp_unit_mag(m_xFs[f] * (m_frontSeparation*(1+m_yFs[f])/2 + m_rearSeparation*(1-m_yFs[f])/2));
298
299 // 3. generate frequency filters for each output channel, according to the signal position
300 // the sum of all channel volumes must be 1.0
301 float left = (1-m_xFs[f])/2;
302 float right = (1+m_xFs[f])/2;
303 float front = (1+m_yFs[f])/2;
304 float back = (1-m_yFs[f])/2;
305 std::array<float, 5> volume
306 {
307 front * (left * center_width + std::max(0.0F, -m_xFs[f]) * (1.0F-center_width)), // left
308 front * center_level * ( (1.0F - std::abs(m_xFs[f])) * (1.0F - center_width) ), // center
309 front * (right * center_width + std::max(0.0F, m_xFs[f]) * (1.0F-center_width)), // right
310 back * m_surroundLevel * std::clamp( (1.0F - (m_xFs[f] / m_surroundBalance) ) / 2.0F, 0.0F, 1.0F),// left surround
311 back * m_surroundLevel * std::clamp( (1.0F + (m_xFs[f] / m_surroundBalance) ) / 2.0F, 0.0F, 1.0F) // right surround
312 };
313
314 // adapt the prior filter
315 for (unsigned c=0;c<5;c++)
316 m_filter[c][f] = ((1-adaption_rate)*m_filter[c][f]) + (adaption_rate*volume[c]);
317 }
318
319 // ... and build the signal which we want to position
320 m_frontL[f] = polar(ampL+ampR,phaseL);
321 m_frontR[f] = polar(ampL+ampR,phaseR);
322 m_avg[f] = m_frontL[f] + m_frontR[f];
323 m_surL[f] = polar(ampL+ampR,phaseL+m_phaseOffsetL);
324 m_surR[f] = polar(ampL+ampR,phaseR+m_phaseOffsetR);
325 m_trueavg[f] = cfloat(m_dftL[f].re + m_dftR[f].re, m_dftL[f].im + m_dftR[f].im);
326 }
327
328 // 4. distribute the unfiltered reference signals over the channels
329 apply_filter((m_frontL).data(), m_filter[0].data(),&output[0][0]); // front left
330 apply_filter((m_avg).data(), m_filter[1].data(),&output[1][0]); // front center
331 apply_filter((m_frontR).data(), m_filter[2].data(),&output[2][0]); // front right
332 apply_filter((m_surL).data(), m_filter[3].data(),&output[3][0]); // surround left
333 apply_filter((m_surR).data(), m_filter[4].data(),&output[4][0]); // surround right
334 apply_filter((m_trueavg).data(),m_filter[5].data(),&output[5][0]); // lfe
335 }
336
337#define FASTER_CALC
338 // map from amplitude difference and phase difference to yfs
339 static double get_yfs(double ampDiff, double phaseDiff) {
340 double x = 1-(((1-sqr(ampDiff))*phaseDiff)/M_PI*2);
341#ifdef FASTER_CALC
342 double tanX = tan(x);
343 return 0.16468622925824683 + (0.5009268347818189*x) - (0.06462757726992101*x*x)
344 + (0.09170680403453149*x*x*x) + (0.2617754892323973*tanX) - (0.04180413533856156*sqr(tanX));
345#else
346 return 0.16468622925824683 + (0.5009268347818189*x) - (0.06462757726992101*x*x)
347 + (0.09170680403453149*x*x*x) + (0.2617754892323973*tan(x)) - (0.04180413533856156*sqr(tan(x)));
348#endif
349 }
350
351 // map from amplitude difference and yfs to xfs
352 static double get_xfs(double ampDiff, double yfs) {
353 double x=ampDiff;
354 double y=yfs;
355#ifdef FASTER_CALC
356 double tanX = tan(x);
357 double tanY = tan(y);
358 double asinX = asin(x);
359 double sinX = sin(x);
360 double sinY = sin(y);
361 double x3 = x*x*x;
362 double y2 = y*y;
363 double y3 = y*y2;
364 return (2.464833559224702*x) - (423.52131153259404*x*y) +
365 (67.8557858606918*x3*y) + (788.2429425544392*x*y2) -
366 (79.97650354902909*x3*y2) - (513.8966153850349*x*y3) +
367 (35.68117670186306*x3*y3) + (13867.406173420834*y*asinX) -
368 (2075.8237075786396*y2*asinX) - (908.2722068360281*y3*asinX) -
369 (12934.654772878019*asinX*sinY) - (13216.736529661162*y*tanX) +
370 (1288.6463247741938*y2*tanX) + (1384.372969378453*y3*tanX) +
371 (12699.231471126128*sinY*tanX) + (95.37131275594336*sinX*tanY) -
372 (91.21223198407546*tanX*tanY);
373#else
374 return (2.464833559224702*x) - (423.52131153259404*x*y) +
375 (67.8557858606918*x*x*x*y) + (788.2429425544392*x*y*y) -
376 (79.97650354902909*x*x*x*y*y) - (513.8966153850349*x*y*y*y) +
377 (35.68117670186306*x*x*x*y*y*y) + (13867.406173420834*y*asin(x)) -
378 (2075.8237075786396*y*y*asin(x)) - (908.2722068360281*y*y*y*asin(x)) -
379 (12934.654772878019*asin(x)*sin(y)) - (13216.736529661162*y*tan(x)) +
380 (1288.6463247741938*y*y*tan(x)) + (1384.372969378453*y*y*y*tan(x)) +
381 (12699.231471126128*sin(y)*tan(x)) + (95.37131275594336*sin(x)*tan(y)) -
382 (91.21223198407546*tan(x)*tan(y));
383#endif
384 }
385
395 void apply_filter(cfloat *signal, const float *flt, float *target) {
396 // filter the signal
397 for (unsigned f = 0; f <= m_halfN; f++)
398 {
399 m_src[f].re = signal[f].real() * flt[f];
400 m_src[f].im = signal[f].imag() * flt[f];
401 }
402 // enforce odd symmetry
403 for (unsigned f = 1; f < m_halfN; f++)
404 {
405 m_src[m_n - f].re = m_src[f].re;
406 m_src[m_n - f].im = -m_src[f].im; // complex conjugate
407 }
408 m_ifft(m_ifftContext, m_src, m_src, sizeof(AVComplexFloat));
409
410 // add the result to target, windowed
411 for (unsigned int k = 0; k < m_halfN; k++)
412 {
413 // 1st part is overlap add
414 target[(m_currentBuf * m_halfN) + k] += m_src[k].re * m_wnd[k];
415 // 2nd part is set as has no history
416 target[((m_currentBuf ^ 1) * m_halfN) + k] = m_src[m_halfN + k].re * m_wnd[m_halfN + k];
417 }
418 }
419
420 size_t m_n; // the block size
421 size_t m_halfN; // half block size precalculated
422 static constexpr float kScale {1.0F};
423 AVTXContext *m_fftContext {nullptr};
424 av_tx_fn m_fft {nullptr};
425 AVTXContext *m_ifftContext {nullptr};
426 av_tx_fn m_ifft {nullptr};
427 // FFTs are computed in-place in these buffers on copies of the input
428 AVComplexFloat *m_dftL {nullptr};
429 AVComplexFloat *m_dftR {nullptr};
430 AVComplexFloat *m_src {nullptr};
431 // buffers
432 std::vector<cfloat> m_frontL,m_frontR,m_avg,m_surL,m_surR; // the signal (phase-corrected) in the frequency domain
433 std::vector<cfloat> m_trueavg; // for lfe generation
434 std::vector<float> m_xFs,m_yFs; // the feature space positions for each frequency bin
435 std::vector<float> m_wnd; // the window function, precalculated
436 std::array<std::vector<float>,6> m_filter; // a frequency filter for each output channel
437 std::array<std::vector<float>,2> m_inbuf; // the sliding input buffers
438 std::array<std::vector<float>,6> m_outbuf; // the sliding output buffers
439 // coefficients
440 float m_surroundHigh {0.0F}; // high surround mixing coefficient (e.g. 0.8165/0.5774)
441 float m_surroundLow {0.0F}; // low surround mixing coefficient (e.g. 0.8165/0.5774)
442 float m_surroundBalance {0.0F}; // the xfs balance that follows from the coeffs
443 float m_surroundLevel {0.0F}; // gain for the surround channels (follows from the coeffs
444 float m_phaseOffsetL {0.0F}; // phase shifts to be applied to the rear channels
445 float m_phaseOffsetR {0.0F}; // phase shifts to be applied to the rear channels
446 float m_frontSeparation {0.0F}; // front stereo separation
447 float m_rearSeparation {0.0F}; // rear stereo separation
448 bool m_linearSteering {false}; // whether the steering should be linear or not
449 cfloat m_a,m_b,m_c,m_d,m_e,m_f,m_g,m_h; // coefficients for the linear steering
450 int m_currentBuf; // specifies which buffer is 2nd half of input sliding buffer
451 InputBufs m_inbufs {}; // for passing back to driver
452 OutputBufs m_outbufs {}; // for passing back to driver
453};
454
455
456// implementation of the shell class
457
458fsurround_decoder::fsurround_decoder(unsigned blocksize): m_impl(new Impl(blocksize)) { }
459
461
462void fsurround_decoder::decode(float center_width, float dimension, float adaption_rate) {
463 m_impl->decode(center_width,dimension,adaption_rate);
464}
465
467
469
470void fsurround_decoder::phase_mode(unsigned mode) { m_impl->phase_mode(mode); }
471
473
474void fsurround_decoder::separation(float front, float rear) { m_impl->separation(front,rear); }
475
477{
478 return m_impl->getInputBuffers();
479}
480
482{
483 return m_impl->getOutputBuffers();
484}
485
486void fsurround_decoder::sample_rate(unsigned int samplerate)
487{
488 m_impl->sample_rate(samplerate);
489}
std::vector< cfloat > m_surL
Impl(unsigned blocksize=8192)
void surround_coefficients(float a, float b)
std::vector< cfloat > m_frontR
static float amplitude(AVComplexFloat z)
static float clamp_unit_mag(float x)
Clamp the input to the interval [-1, 1], i.e. clamp the magnitude to the unit interval [0,...
void sample_rate(unsigned int srate)
std::array< std::vector< float >, 6 > m_filter
std::vector< cfloat > m_trueavg
void phase_mode(unsigned mode)
void block_decode(InputBufs input1, InputBufs input2, OutputBufs output, float center_width, float dimension, float adaption_rate)
static double get_yfs(double ampDiff, double phaseDiff)
void add_output(InputBufs input1, InputBufs input2, float center_width, float dimension, float adaption_rate, bool=false)
std::array< std::vector< float >, 2 > m_inbuf
std::array< std::vector< float >, 6 > m_outbuf
static float phase(AVComplexFloat z)
static constexpr float kScale
void decode(float center_width, float dimension, float adaption_rate)
static double get_xfs(double ampDiff, double yfs)
void apply_filter(cfloat *signal, const float *flt, float *target)
Filter the complex source signal in the frequency domain and add it to the time domain target signal.
void separation(float front, float rear)
std::vector< cfloat > m_surR
std::vector< cfloat > m_avg
std::vector< cfloat > m_frontL
static cfloat polar(float a, float p)
AVComplexFloat * m_src
Used only in apply_filter.
void steering_mode(bool mode)
fsurround_decoder(unsigned blocksize=8192)
void decode(float center_width=1, float dimension=0, float adaption_rate=1)
void separation(float front, float rear)
void surround_coefficients(float a, float b)
void sample_rate(unsigned int samplerate)
void phase_mode(unsigned mode)
static const float center_level
T sqr(T x)
static const float PI
std::array< float *, 6 > OutputBufs
std::array< float *, 2 > InputBufs
std::complex< float > cfloat
static const float epsilon
static std::vector< uint32_t > back
Definition: goom_core.cpp:27
static constexpr double M_PI
Definition: goom_tools.h:10
static int x3
Definition: mythsocket.cpp:56
static eu8 clamp(eu8 value, eu8 low, eu8 high)
Definition: pxsup2dast.c:201
#define output