MythTV master
freesurround_decoder.cpp
Go to the documentation of this file.
1/*
2Copyright (C) 2007 Christian Kothe
3
4This program is free software; you can redistribute it and/or
5modify it under the terms of the GNU General Public License
6as published by the Free Software Foundation; either version 2
7of the License, or (at your option) any later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17*/
19
20#include <algorithm>
21#include <array>
22#include <cmath>
23#include <complex>
24#include <cstdlib>
25#include <cstring>
26#include <vector>
27extern "C" {
28#include "libavutil/mem.h"
29#include "libavutil/tx.h"
30}
31
32using cfloat = std::complex<float>;
33using InputBufs = std::array<float*,2>;
34using OutputBufs = std::array<float*,6>;
35
36static const float PI = 3.141592654;
37static const float epsilon = 0.000001;
38static const float center_level = 0.5F*sqrtf(0.5F);
39
40template <class T>
41T sqr(T x) { return x*x; }
42
43// private implementation of the surround decoder
45public:
46 // create an instance of the decoder
47 // blocksize is fixed over the lifetime of this object for performance reasons
48 explicit Impl(unsigned blocksize=8192)
49 : m_n(blocksize),
50 m_halfN(blocksize/2),
51 // create lavc fft buffers
52 m_dftL((AVComplexFloat*)av_malloc(sizeof(AVComplexFloat) * m_n * 2)),
53 m_dftR((AVComplexFloat*)av_malloc(sizeof(AVComplexFloat) * m_n * 2)),
54 m_src ((AVComplexFloat*)av_malloc(sizeof(AVComplexFloat) * m_n * 2))
55 {
56 // If av_tx_init() fails (returns < 0), the contexts will be nullptr and will crash later,
57 // but av_malloc() is not checked to succeed either.
58 av_tx_init(&m_fftContext , &m_fft , AV_TX_FLOAT_FFT, 0, m_n, &kScale, AV_TX_INPLACE);
59 av_tx_init(&m_ifftContext, &m_ifft, AV_TX_FLOAT_FFT, 1, m_n, &kScale, AV_TX_INPLACE);
60 // resize our own buffers
61 m_frontR.resize(m_n);
62 m_frontL.resize(m_n);
63 m_avg.resize(m_n);
64 m_surR.resize(m_n);
65 m_surL.resize(m_n);
66 m_trueavg.resize(m_n);
67 m_xFs.resize(m_n);
68 m_yFs.resize(m_n);
69 m_inbuf[0].resize(m_n);
70 m_inbuf[1].resize(m_n);
71 for (unsigned c=0;c<6;c++) {
72 m_outbuf[c].resize(m_n);
73 m_filter[c].resize(m_n);
74 }
75 sample_rate(48000);
76 // generate the window function (square root of hann, b/c it is applied before and after the transform)
77 m_wnd.resize(m_n);
78 for (unsigned k=0;k<m_n;k++)
79 m_wnd[k] = std::sqrt(0.5F*(1-std::cos(2*PI*k/m_n))/m_n);
80 m_currentBuf = 0;
81 m_inbufs.fill(nullptr);
82 m_outbufs.fill(nullptr);
83 // set the default coefficients
84 surround_coefficients(0.8165,0.5774);
85 phase_mode(0);
86 separation(1,1);
87 steering_mode(true);
88 }
89
90 // destructor
92 av_tx_uninit(&m_fftContext);
93 av_tx_uninit(&m_ifftContext);
94 av_free(m_src);
95 av_free(m_dftR);
96 av_free(m_dftL);
97 }
98
99 float ** getInputBuffers()
100 {
103 return m_inbufs.data();
104 }
105
107 {
114 return m_outbufs.data();
115 }
116
117 // decode a chunk of stereo sound, has to contain exactly blocksize samples
118 // center_width [0..1] distributes the center information towards the front left/right channels, 1=full distribution, 0=no distribution
119 // dimension [0..1] moves the soundfield backwards, 0=front, 1=side
120 // adaption_rate [0..1] determines how fast the steering gets adapted, 1=instantaneous, 0.1 = very slow adaption
121 void decode(float center_width, float dimension, float adaption_rate) {
122 // process first part
123 int index = m_currentBuf*m_halfN;
124 InputBufs in_second {&m_inbuf[0][index],&m_inbuf[1][index]};
125 m_currentBuf ^= 1;
126 index = m_currentBuf*m_halfN;
127 InputBufs in_first {&m_inbuf[0][index],&m_inbuf[1][index]};
128 add_output(in_first,in_second,center_width,dimension,adaption_rate,true);
129 // shift last half of input buffer to the beginning
130 }
131
132 // flush the internal buffers
133 void flush() {
134 for (unsigned k=0;k<m_n;k++) {
135 for (auto & c : m_outbuf)
136 c[k] = 0;
137 m_inbuf[0][k] = 0;
138 m_inbuf[1][k] = 0;
139 }
140 }
141
142 // set lfe filter params
143 void sample_rate(unsigned int srate) {
144 // lfe filter is just straight through band limited
145 unsigned int cutoff = (30*m_n)/srate;
146 for (unsigned f=0;f<=m_halfN;f++) {
147 if (f<cutoff)
148 m_filter[5][f] = 0.5*sqrt(0.5);
149 else
150 m_filter[5][f] = 0.0;
151 }
152 }
153
154 // set the assumed surround mixing coefficients
155 void surround_coefficients(float a, float b) {
156 // calc the simple coefficients
157 m_surroundHigh = a;
158 m_surroundLow = b;
159 m_surroundBalance = (a-b)/(a+b);
160 m_surroundLevel = 1/(a+b);
161 // calc the linear coefficients
162 cfloat i(0,1);
163 cfloat u((a+b)*i);
164 cfloat v((b-a)*i);
165 cfloat n(0.25,0);
166 cfloat o(1,0);
167 m_a = (v-o)*n; m_b = (o-u)*n; m_c = (-o-v)*n; m_d = (o+u)*n;
168 m_e = (o+v)*n; m_f = (o+u)*n; m_g = (o-v)*n; m_h = (o-u)*n;
169 }
170
171 // set the phase shifting mode
172 void phase_mode(unsigned mode) {
173 const std::array<std::array<float,2>,4> modes {{ {0,0}, {0,PI}, {PI,0}, {-PI/2,PI/2} }};
174 m_phaseOffsetL = modes[mode][0];
175 m_phaseOffsetR = modes[mode][1];
176 }
177
178 // what steering mode should be chosen
179 void steering_mode(bool mode) { m_linearSteering = mode; }
180
181 // set front & rear separation controls
182 void separation(float front, float rear) {
183 m_frontSeparation = front;
184 m_rearSeparation = rear;
185 }
186
187private:
188 // polar <-> cartesian coodinates conversion
189 static cfloat polar(float a, float p) { return {a*std::cos(p), a*std::sin(p)}; }
190 static float amplitude(AVComplexFloat z) { return std::hypot(z.re, z.im); }
191 static float phase(AVComplexFloat z) { return std::atan2(z.im, z.re); }
192
194 static float clamp_unit_mag(float x) { return std::clamp(x, -1.0F, 1.0F); }
195
196 // handle the output buffering for overlapped calls of block_decode
197 void add_output(InputBufs input1, InputBufs input2, float center_width, float dimension, float adaption_rate, bool /*result*/=false) {
198 // add the windowed data to the last 1/2 of the output buffer
199 OutputBufs out {m_outbuf[0].data(),m_outbuf[1].data(),m_outbuf[2].data(),m_outbuf[3].data(),m_outbuf[4].data(),m_outbuf[5].data()};
200 block_decode(input1,input2,out,center_width,dimension,adaption_rate);
201 }
202
203 // CORE FUNCTION: decode a block of data
204 void block_decode(InputBufs input1, InputBufs input2, OutputBufs output, float center_width, float dimension, float adaption_rate) {
205 // 1. scale the input by the window function; this serves a dual purpose:
206 // - first it improves the FFT resolution b/c boundary discontinuities (and their frequencies) get removed
207 // - second it allows for smooth blending of varying filters between the blocks
208
209 // the window also includes 1.0/sqrt(n) normalization
210 // concatenate copies of input1 and input2 for some undetermined reason
211 // input1 is in the rising half of the window
212 // input2 is in the falling half of the window
213 for (unsigned k = 0; k < m_halfN; k++)
214 {
215 m_dftL[k] = (AVComplexFloat){ .re = input1[0][k] * m_wnd[k], .im = 0 };
216 m_dftR[k] = (AVComplexFloat){ .re = input1[1][k] * m_wnd[k], .im = 0 };
217
218 m_dftL[k + m_halfN] = (AVComplexFloat){ .re = input2[0][k] * m_wnd[k + m_halfN], .im = 0 };
219 m_dftR[k + m_halfN] = (AVComplexFloat){ .re = input2[1][k] * m_wnd[k + m_halfN], .im = 0 };
220 }
221
222 // ... and tranform it into the frequency domain
223 m_fft(m_fftContext, m_dftL, m_dftL, sizeof(AVComplexFloat));
224 m_fft(m_fftContext, m_dftR, m_dftR, sizeof(AVComplexFloat));
225
226 // 2. compare amplitude and phase of each DFT bin and produce the X/Y coordinates in the sound field
227 // but dont do DC or N/2 component
228 for (unsigned f=0;f<m_halfN;f++) {
229 // get left/right amplitudes/phases
230 float ampL = amplitude(m_dftL[f]);
231 float ampR = amplitude(m_dftR[f]);
232 float phaseL = phase(m_dftL[f]);
233 float phaseR = phase(m_dftR[f]);
234// if (ampL+ampR < epsilon)
235// continue;
236
237 // calculate the amplitude/phase difference
238 float ampDiff = clamp_unit_mag((ampL+ampR < epsilon) ? 0 : (ampR-ampL) / (ampR+ampL));
239 float phaseDiff = phaseL - phaseR;
240 if (phaseDiff < -PI) phaseDiff += 2*PI;
241 if (phaseDiff > PI) phaseDiff -= 2*PI;
242 phaseDiff = std::abs(phaseDiff);
243
244 if (m_linearSteering) {
245 // --- this is the fancy new linear mode ---
246
247 // get sound field x/y position
248 m_yFs[f] = get_yfs(ampDiff,phaseDiff);
249 m_xFs[f] = get_xfs(ampDiff,m_yFs[f]);
250
251 // add dimension control
252 m_yFs[f] = clamp_unit_mag(m_yFs[f] - dimension);
253
254 // add crossfeed control
255 m_xFs[f] = clamp_unit_mag(m_xFs[f] * (m_frontSeparation*(1+m_yFs[f])/2 + m_rearSeparation*(1-m_yFs[f])/2));
256
257 // 3. generate frequency filters for each output channel
258 float left = (1-m_xFs[f])/2;
259 float right = (1+m_xFs[f])/2;
260 float front = (1+m_yFs[f])/2;
261 float back = (1-m_yFs[f])/2;
262 std::array<float, 5> volume
263 {
264 front * (left * center_width + std::max(0.0F, -m_xFs[f]) * (1.0F - center_width) ), // left
265 front * center_level * ( (1.0F - std::abs(m_xFs[f])) * (1.0F - center_width) ), // center
266 front * (right * center_width + std::max(0.0F, m_xFs[f]) * (1.0F - center_width) ), // right
267 back * m_surroundLevel * left, // left surround
268 back * m_surroundLevel * right // right surround
269 };
270
271 // adapt the prior filter
272 for (unsigned c=0;c<5;c++)
273 m_filter[c][f] = ((1-adaption_rate)*m_filter[c][f]) + (adaption_rate*volume[c]);
274
275 } else {
276 // --- this is the old & simple steering mode ---
277
278 // determine sound field x-position
279 m_xFs[f] = ampDiff;
280
281 // determine preliminary sound field y-position from phase difference
282 m_yFs[f] = 1 - ((phaseDiff/PI)*2);
283
284 if (std::abs(m_xFs[f]) > m_surroundBalance) {
285 // blend linearly between the surrounds and the fronts if the balance exceeds the surround encoding balance
286 // this is necessary because the sound field is trapezoidal and will be stretched behind the listener
287 float frontness = (std::abs(m_xFs[f]) - m_surroundBalance)/(1-m_surroundBalance);
288 m_yFs[f] = ((1-frontness) * m_yFs[f]) + (frontness * 1);
289 }
290
291 // add dimension control
292 m_yFs[f] = clamp_unit_mag(m_yFs[f] - dimension);
293
294 // add crossfeed control
295 m_xFs[f] = clamp_unit_mag(m_xFs[f] * (m_frontSeparation*(1+m_yFs[f])/2 + m_rearSeparation*(1-m_yFs[f])/2));
296
297 // 3. generate frequency filters for each output channel, according to the signal position
298 // the sum of all channel volumes must be 1.0
299 float left = (1-m_xFs[f])/2;
300 float right = (1+m_xFs[f])/2;
301 float front = (1+m_yFs[f])/2;
302 float back = (1-m_yFs[f])/2;
303 std::array<float, 5> volume
304 {
305 front * (left * center_width + std::max(0.0F, -m_xFs[f]) * (1.0F-center_width)), // left
306 front * center_level * ( (1.0F - std::abs(m_xFs[f])) * (1.0F - center_width) ), // center
307 front * (right * center_width + std::max(0.0F, m_xFs[f]) * (1.0F-center_width)), // right
308 back * m_surroundLevel * std::clamp( (1.0F - (m_xFs[f] / m_surroundBalance) ) / 2.0F, 0.0F, 1.0F),// left surround
309 back * m_surroundLevel * std::clamp( (1.0F + (m_xFs[f] / m_surroundBalance) ) / 2.0F, 0.0F, 1.0F) // right surround
310 };
311
312 // adapt the prior filter
313 for (unsigned c=0;c<5;c++)
314 m_filter[c][f] = ((1-adaption_rate)*m_filter[c][f]) + (adaption_rate*volume[c]);
315 }
316
317 // ... and build the signal which we want to position
318 m_frontL[f] = polar(ampL+ampR,phaseL);
319 m_frontR[f] = polar(ampL+ampR,phaseR);
320 m_avg[f] = m_frontL[f] + m_frontR[f];
321 m_surL[f] = polar(ampL+ampR,phaseL+m_phaseOffsetL);
322 m_surR[f] = polar(ampL+ampR,phaseR+m_phaseOffsetR);
323 m_trueavg[f] = cfloat(m_dftL[f].re + m_dftR[f].re, m_dftL[f].im + m_dftR[f].im);
324 }
325
326 // 4. distribute the unfiltered reference signals over the channels
327 apply_filter((m_frontL).data(), m_filter[0].data(),&output[0][0]); // front left
328 apply_filter((m_avg).data(), m_filter[1].data(),&output[1][0]); // front center
329 apply_filter((m_frontR).data(), m_filter[2].data(),&output[2][0]); // front right
330 apply_filter((m_surL).data(), m_filter[3].data(),&output[3][0]); // surround left
331 apply_filter((m_surR).data(), m_filter[4].data(),&output[4][0]); // surround right
332 apply_filter((m_trueavg).data(),m_filter[5].data(),&output[5][0]); // lfe
333 }
334
335#define FASTER_CALC
336 // map from amplitude difference and phase difference to yfs
337 static double get_yfs(double ampDiff, double phaseDiff) {
338 double x = 1-(((1-sqr(ampDiff))*phaseDiff)/M_PI*2);
339#ifdef FASTER_CALC
340 double tanX = tan(x);
341 return 0.16468622925824683 + (0.5009268347818189*x) - (0.06462757726992101*x*x)
342 + (0.09170680403453149*x*x*x) + (0.2617754892323973*tanX) - (0.04180413533856156*sqr(tanX));
343#else
344 return 0.16468622925824683 + (0.5009268347818189*x) - (0.06462757726992101*x*x)
345 + (0.09170680403453149*x*x*x) + (0.2617754892323973*tan(x)) - (0.04180413533856156*sqr(tan(x)));
346#endif
347 }
348
349 // map from amplitude difference and yfs to xfs
350 static double get_xfs(double ampDiff, double yfs) {
351 double x=ampDiff;
352 double y=yfs;
353#ifdef FASTER_CALC
354 double tanX = tan(x);
355 double tanY = tan(y);
356 double asinX = asin(x);
357 double sinX = sin(x);
358 double sinY = sin(y);
359 double x3 = x*x*x;
360 double y2 = y*y;
361 double y3 = y*y2;
362 return (2.464833559224702*x) - (423.52131153259404*x*y) +
363 (67.8557858606918*x3*y) + (788.2429425544392*x*y2) -
364 (79.97650354902909*x3*y2) - (513.8966153850349*x*y3) +
365 (35.68117670186306*x3*y3) + (13867.406173420834*y*asinX) -
366 (2075.8237075786396*y2*asinX) - (908.2722068360281*y3*asinX) -
367 (12934.654772878019*asinX*sinY) - (13216.736529661162*y*tanX) +
368 (1288.6463247741938*y2*tanX) + (1384.372969378453*y3*tanX) +
369 (12699.231471126128*sinY*tanX) + (95.37131275594336*sinX*tanY) -
370 (91.21223198407546*tanX*tanY);
371#else
372 return (2.464833559224702*x) - (423.52131153259404*x*y) +
373 (67.8557858606918*x*x*x*y) + (788.2429425544392*x*y*y) -
374 (79.97650354902909*x*x*x*y*y) - (513.8966153850349*x*y*y*y) +
375 (35.68117670186306*x*x*x*y*y*y) + (13867.406173420834*y*asin(x)) -
376 (2075.8237075786396*y*y*asin(x)) - (908.2722068360281*y*y*y*asin(x)) -
377 (12934.654772878019*asin(x)*sin(y)) - (13216.736529661162*y*tan(x)) +
378 (1288.6463247741938*y*y*tan(x)) + (1384.372969378453*y*y*y*tan(x)) +
379 (12699.231471126128*sin(y)*tan(x)) + (95.37131275594336*sin(x)*tan(y)) -
380 (91.21223198407546*tan(x)*tan(y));
381#endif
382 }
383
393 void apply_filter(cfloat *signal, const float *flt, float *target) {
394 // filter the signal
395 for (unsigned f = 0; f <= m_halfN; f++)
396 {
397 m_src[f].re = signal[f].real() * flt[f];
398 m_src[f].im = signal[f].imag() * flt[f];
399 }
400 // enforce odd symmetry
401 for (unsigned f = 1; f < m_halfN; f++)
402 {
403 m_src[m_n - f].re = m_src[f].re;
404 m_src[m_n - f].im = -m_src[f].im; // complex conjugate
405 }
406 m_ifft(m_ifftContext, m_src, m_src, sizeof(AVComplexFloat));
407
408 // add the result to target, windowed
409 for (unsigned int k = 0; k < m_halfN; k++)
410 {
411 // 1st part is overlap add
412 target[(m_currentBuf * m_halfN) + k] += m_src[k].re * m_wnd[k];
413 // 2nd part is set as has no history
414 target[((m_currentBuf ^ 1) * m_halfN) + k] = m_src[m_halfN + k].re * m_wnd[m_halfN + k];
415 }
416 }
417
418 size_t m_n; // the block size
419 size_t m_halfN; // half block size precalculated
420 static constexpr float kScale {1.0F};
421 AVTXContext *m_fftContext {nullptr};
422 av_tx_fn m_fft {nullptr};
423 AVTXContext *m_ifftContext {nullptr};
424 av_tx_fn m_ifft {nullptr};
425 // FFTs are computed in-place in these buffers on copies of the input
426 AVComplexFloat *m_dftL {nullptr};
427 AVComplexFloat *m_dftR {nullptr};
428 AVComplexFloat *m_src {nullptr};
429 // buffers
430 std::vector<cfloat> m_frontL,m_frontR,m_avg,m_surL,m_surR; // the signal (phase-corrected) in the frequency domain
431 std::vector<cfloat> m_trueavg; // for lfe generation
432 std::vector<float> m_xFs,m_yFs; // the feature space positions for each frequency bin
433 std::vector<float> m_wnd; // the window function, precalculated
434 std::array<std::vector<float>,6> m_filter; // a frequency filter for each output channel
435 std::array<std::vector<float>,2> m_inbuf; // the sliding input buffers
436 std::array<std::vector<float>,6> m_outbuf; // the sliding output buffers
437 // coefficients
438 float m_surroundHigh {0.0F}; // high surround mixing coefficient (e.g. 0.8165/0.5774)
439 float m_surroundLow {0.0F}; // low surround mixing coefficient (e.g. 0.8165/0.5774)
440 float m_surroundBalance {0.0F}; // the xfs balance that follows from the coeffs
441 float m_surroundLevel {0.0F}; // gain for the surround channels (follows from the coeffs
442 float m_phaseOffsetL {0.0F}; // phase shifts to be applied to the rear channels
443 float m_phaseOffsetR {0.0F}; // phase shifts to be applied to the rear channels
444 float m_frontSeparation {0.0F}; // front stereo separation
445 float m_rearSeparation {0.0F}; // rear stereo separation
446 bool m_linearSteering {false}; // whether the steering should be linear or not
447 cfloat m_a,m_b,m_c,m_d,m_e,m_f,m_g,m_h; // coefficients for the linear steering
448 int m_currentBuf; // specifies which buffer is 2nd half of input sliding buffer
449 InputBufs m_inbufs {}; // for passing back to driver
450 OutputBufs m_outbufs {}; // for passing back to driver
451};
452
453
454// implementation of the shell class
455
456fsurround_decoder::fsurround_decoder(unsigned blocksize): m_impl(new Impl(blocksize)) { }
457
459
460void fsurround_decoder::decode(float center_width, float dimension, float adaption_rate) {
461 m_impl->decode(center_width,dimension,adaption_rate);
462}
463
465
467
468void fsurround_decoder::phase_mode(unsigned mode) { m_impl->phase_mode(mode); }
469
471
472void fsurround_decoder::separation(float front, float rear) { m_impl->separation(front,rear); }
473
475{
476 return m_impl->getInputBuffers();
477}
478
480{
481 return m_impl->getOutputBuffers();
482}
483
484void fsurround_decoder::sample_rate(unsigned int samplerate)
485{
486 m_impl->sample_rate(samplerate);
487}
std::vector< cfloat > m_surL
Impl(unsigned blocksize=8192)
void surround_coefficients(float a, float b)
std::vector< cfloat > m_frontR
static float amplitude(AVComplexFloat z)
static float clamp_unit_mag(float x)
Clamp the input to the interval [-1, 1], i.e. clamp the magnitude to the unit interval [0,...
void sample_rate(unsigned int srate)
std::array< std::vector< float >, 6 > m_filter
std::vector< cfloat > m_trueavg
void phase_mode(unsigned mode)
void block_decode(InputBufs input1, InputBufs input2, OutputBufs output, float center_width, float dimension, float adaption_rate)
static double get_yfs(double ampDiff, double phaseDiff)
void add_output(InputBufs input1, InputBufs input2, float center_width, float dimension, float adaption_rate, bool=false)
std::array< std::vector< float >, 2 > m_inbuf
std::array< std::vector< float >, 6 > m_outbuf
static float phase(AVComplexFloat z)
static constexpr float kScale
void decode(float center_width, float dimension, float adaption_rate)
static double get_xfs(double ampDiff, double yfs)
void apply_filter(cfloat *signal, const float *flt, float *target)
Filter the complex source signal in the frequency domain and add it to the time domain target signal.
void separation(float front, float rear)
std::vector< cfloat > m_surR
std::vector< cfloat > m_avg
std::vector< cfloat > m_frontL
static cfloat polar(float a, float p)
AVComplexFloat * m_src
Used only in apply_filter.
void steering_mode(bool mode)
fsurround_decoder(unsigned blocksize=8192)
void decode(float center_width=1, float dimension=0, float adaption_rate=1)
void separation(float front, float rear)
void surround_coefficients(float a, float b)
void sample_rate(unsigned int samplerate)
void phase_mode(unsigned mode)
static const float center_level
T sqr(T x)
static const float PI
std::array< float *, 6 > OutputBufs
std::array< float *, 2 > InputBufs
std::complex< float > cfloat
static const float epsilon
static guint32 * back
Definition: goom_core.cpp:25
static constexpr double M_PI
Definition: goom_tools.h:9
static int x3
Definition: mythsocket.cpp:52
static eu8 clamp(eu8 value, eu8 low, eu8 high)
Definition: pxsup2dast.c:206
#define output