alc/effects/reverb.cpp

   1 /**
   2  * Ambisonic reverb engine for the OpenAL cross platform audio library
   3  * Copyright (C) 2008-2017 by Chris Robinson and Christopher Fitzgerald.
   4  * This library is free software; you can redistribute it and/or
   5  *  modify it under the terms of the GNU Library General Public
   6  *  License as published by the Free Software Foundation; either
   7  *  version 2 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  *  Library General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Library General Public
  15  *  License along with this library; if not, write to the
  16  *  Free Software Foundation, Inc.,
  17  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18  * Or go to http://www.gnu.org/copyleft/lgpl.html
  19  */
  20
  21 #include "config.h"
  22
  23 #include <algorithm>
  24 #include <array>
  25 #include <cstdio>
  26 #include <functional>
  27 #include <iterator>
  28 #include <numeric>
  29 #include <stdint.h>
  30
  31 #include "alc/effects/base.h"
  32 #include "almalloc.h"
  33 #include "alnumbers.h"
  34 #include "alnumeric.h"
  35 #include "alspan.h"
  36 #include "core/ambidefs.h"
  37 #include "core/bufferline.h"
  38 #include "core/context.h"
  39 #include "core/devformat.h"
  40 #include "core/device.h"
  41 #include "core/effectslot.h"
  42 #include "core/filters/biquad.h"
  43 #include "core/filters/splitter.h"
  44 #include "core/mixer.h"
  45 #include "core/mixer/defs.h"
  46 #include "intrusive_ptr.h"
  47 #include "opthelpers.h"
  48 #include "vecmat.h"
  49 #include "vector.h"
  50
  51 /* This is a user config option for modifying the overall output of the reverb
  52  * effect.
  53  */
  54 float ReverbBoost = 1.0f;
  55
  56 namespace {
  57
  58 using uint = unsigned int;
  59
  60 constexpr float MaxModulationTime{4.0f};
  61 constexpr float DefaultModulationTime{0.25f};
  62
  63 #define MOD_FRACBITS 24
  64 #define MOD_FRACONE  (1<<MOD_FRACBITS)
  65 #define MOD_FRACMASK (MOD_FRACONE-1)
  66
  67
  68 struct CubicFilter {
  69     static constexpr size_t sTableBits{8};
  70     static constexpr size_t sTableSteps{1 << sTableBits};
  71     static constexpr size_t sTableMask{sTableSteps - 1};
  72
  73     float mFilter[sTableSteps*2 + 1]{};
  74
  75     constexpr CubicFilter()
  76     {
  77         /* This creates a lookup table for a cubic spline filter, with 256
  78          * steps between samples. Only half the coefficients are needed, since
  79          * Coeff2 is just Coeff1 in reverse and Coeff3 is just Coeff0 in
  80          * reverse.
  81          */
  82         for(size_t i{0};i < sTableSteps;++i)
  83         {
  84             const double mu{static_cast<double>(i) / double{sTableSteps}};
  85             const double mu2{mu*mu}, mu3{mu2*mu};
  86             const double a0{-0.5*mu3 +      mu2 + -0.5*mu};
  87             const double a1{ 1.5*mu3 + -2.5*mu2           + 1.0f};
  88             mFilter[i] = static_cast<float>(a1);
  89             mFilter[sTableSteps+i] = static_cast<float>(a0);
  90         }
  91     }
  92
  93     constexpr float getCoeff0(size_t i) const noexcept { return mFilter[sTableSteps+i]; }
  94     constexpr float getCoeff1(size_t i) const noexcept { return mFilter[i]; }
  95     constexpr float getCoeff2(size_t i) const noexcept { return mFilter[sTableSteps-i]; }
  96     constexpr float getCoeff3(size_t i) const noexcept { return mFilter[sTableSteps*2-i]; }
  97 };
  98 constexpr CubicFilter gCubicTable;
  99
 100
 101 using namespace std::placeholders;
 102
 103 /* Max samples per process iteration. Used to limit the size needed for
 104  * temporary buffers. Must be a multiple of 4 for SIMD alignment.
 105  */
 106 constexpr size_t MAX_UPDATE_SAMPLES{256};
 107
 108 /* The number of spatialized lines or channels to process. Four channels allows
 109  * for a 3D A-Format response. NOTE: This can't be changed without taking care
 110  * of the conversion matrices, and a few places where the length arrays are
 111  * assumed to have 4 elements.
 112  */
 113 constexpr size_t NUM_LINES{4u};
 114
 115
 116 /* This coefficient is used to define the maximum frequency range controlled by
 117  * the modulation depth. The current value of 0.05 will allow it to swing from
 118  * 0.95x to 1.05x. This value must be below 1. At 1 it will cause the sampler
 119  * to stall on the downswing, and above 1 it will cause it to sample backwards.
 120  * The value 0.05 seems be nearest to Creative hardware behavior.
 121  */
 122 constexpr float MODULATION_DEPTH_COEFF{0.05f};
 123
 124
 125 /* The B-Format to A-Format conversion matrix. The arrangement of rows is
 126  * deliberately chosen to align the resulting lines to their spatial opposites
 127  * (0:above front left <-> 3:above back right, 1:below front right <-> 2:below
 128  * back left). It's not quite opposite, since the A-Format results in a
 129  * tetrahedron, but it's close enough. Should the model be extended to 8-lines
 130  * in the future, true opposites can be used.
 131  */
 132 alignas(16) constexpr float B2A[NUM_LINES][NUM_LINES]{
 133     { 0.5f,  0.5f,  0.5f,  0.5f },
 134     { 0.5f, -0.5f, -0.5f,  0.5f },
 135     { 0.5f,  0.5f, -0.5f, -0.5f },
 136     { 0.5f, -0.5f,  0.5f, -0.5f }
 137 };
 138
 139 /* Converts A-Format to B-Format for early reflections. */
 140 alignas(16) constexpr std::array<std::array<float,NUM_LINES>,NUM_LINES> EarlyA2B{{
 141     {{ 0.5f,  0.5f,  0.5f,  0.5f }},
 142     {{ 0.5f, -0.5f,  0.5f, -0.5f }},
 143     {{ 0.5f, -0.5f, -0.5f,  0.5f }},
 144     {{ 0.5f,  0.5f, -0.5f, -0.5f }}
 145 }};
 146
 147 /* Converts A-Format to B-Format for late reverb. */
 148 constexpr auto InvSqrt2 = static_cast<float>(1.0/al::numbers::sqrt2);
 149 alignas(16) constexpr std::array<std::array<float,NUM_LINES>,NUM_LINES> LateA2B{{
 150     {{ 0.5f,  0.5f,  0.5f,  0.5f }},
 151     {{ InvSqrt2, -InvSqrt2,  0.0f,  0.0f }},
 152     {{ 0.0f,  0.0f,  InvSqrt2, -InvSqrt2 }},
 153     {{ 0.5f,  0.5f, -0.5f, -0.5f }}
 154 }};
 155
 156 /* The all-pass and delay lines have a variable length dependent on the
 157  * effect's density parameter, which helps alter the perceived environment
 158  * size. The size-to-density conversion is a cubed scale:
 159  *
 160  * density = min(1.0, pow(size, 3.0) / DENSITY_SCALE);
 161  *
 162  * The line lengths scale linearly with room size, so the inverse density
 163  * conversion is needed, taking the cube root of the re-scaled density to
 164  * calculate the line length multiplier:
 165  *
 166  *     length_mult = max(5.0, cbrt(density*DENSITY_SCALE));
 167  *
 168  * The density scale below will result in a max line multiplier of 50, for an
 169  * effective size range of 5m to 50m.
 170  */
 171 constexpr float DENSITY_SCALE{125000.0f};
 172
 173 /* All delay line lengths are specified in seconds.
 174  *
 175  * To approximate early reflections, we break them up into primary (those
 176  * arriving from the same direction as the source) and secondary (those
 177  * arriving from the opposite direction).
 178  *
 179  * The early taps decorrelate the 4-channel signal to approximate an average
 180  * room response for the primary reflections after the initial early delay.
 181  *
 182  * Given an average room dimension (d_a) and the speed of sound (c) we can
 183  * calculate the average reflection delay (r_a) regardless of listener and
 184  * source positions as:
 185  *
 186  *     r_a = d_a / c
 187  *     c   = 343.3
 188  *
 189  * This can extended to finding the average difference (r_d) between the
 190  * maximum (r_1) and minimum (r_0) reflection delays:
 191  *
 192  *     r_0 = 2 / 3 r_a
 193  *         = r_a - r_d / 2
 194  *         = r_d
 195  *     r_1 = 4 / 3 r_a
 196  *         = r_a + r_d / 2
 197  *         = 2 r_d
 198  *     r_d = 2 / 3 r_a
 199  *         = r_1 - r_0
 200  *
 201  * As can be determined by integrating the 1D model with a source (s) and
 202  * listener (l) positioned across the dimension of length (d_a):
 203  *
 204  *     r_d = int_(l=0)^d_a (int_(s=0)^d_a |2 d_a - 2 (l + s)| ds) dl / c
 205  *
 206  * The initial taps (T_(i=0)^N) are then specified by taking a power series
 207  * that ranges between r_0 and half of r_1 less r_0:
 208  *
 209  *     R_i = 2^(i / (2 N - 1)) r_d
 210  *         = r_0 + (2^(i / (2 N - 1)) - 1) r_d
 211  *         = r_0 + T_i
 212  *     T_i = R_i - r_0
 213  *         = (2^(i / (2 N - 1)) - 1) r_d
 214  *
 215  * Assuming an average of 1m, we get the following taps:
 216  */
 217 constexpr std::array<float,NUM_LINES> EARLY_TAP_LENGTHS{{
 218     0.0000000e+0f, 2.0213520e-4f, 4.2531060e-4f, 6.7171600e-4f
 219 }};
 220
 221 /* The early all-pass filter lengths are based on the early tap lengths:
 222  *
 223  *     A_i = R_i / a
 224  *
 225  * Where a is the approximate maximum all-pass cycle limit (20).
 226  */
 227 constexpr std::array<float,NUM_LINES> EARLY_ALLPASS_LENGTHS{{
 228     9.7096800e-5f, 1.0720356e-4f, 1.1836234e-4f, 1.3068260e-4f
 229 }};
 230
 231 /* The early delay lines are used to transform the primary reflections into
 232  * the secondary reflections.  The A-format is arranged in such a way that
 233  * the channels/lines are spatially opposite:
 234  *
 235  *     C_i is opposite C_(N-i-1)
 236  *
 237  * The delays of the two opposing reflections (R_i and O_i) from a source
 238  * anywhere along a particular dimension always sum to twice its full delay:
 239  *
 240  *     2 r_a = R_i + O_i
 241  *
 242  * With that in mind we can determine the delay between the two reflections
 243  * and thus specify our early line lengths (L_(i=0)^N) using:
 244  *
 245  *     O_i = 2 r_a - R_(N-i-1)
 246  *     L_i = O_i - R_(N-i-1)
 247  *         = 2 (r_a - R_(N-i-1))
 248  *         = 2 (r_a - T_(N-i-1) - r_0)
 249  *         = 2 r_a (1 - (2 / 3) 2^((N - i - 1) / (2 N - 1)))
 250  *
 251  * Using an average dimension of 1m, we get:
 252  */
 253 constexpr std::array<float,NUM_LINES> EARLY_LINE_LENGTHS{{
 254     5.9850400e-4f, 1.0913150e-3f, 1.5376658e-3f, 1.9419362e-3f
 255 }};
 256
 257 /* The late all-pass filter lengths are based on the late line lengths:
 258  *
 259  *     A_i = (5 / 3) L_i / r_1
 260  */
 261 constexpr std::array<float,NUM_LINES> LATE_ALLPASS_LENGTHS{{
 262     1.6182800e-4f, 2.0389060e-4f, 2.8159360e-4f, 3.2365600e-4f
 263 }};
 264
 265 /* The late lines are used to approximate the decaying cycle of recursive
 266  * late reflections.
 267  *
 268  * Splitting the lines in half, we start with the shortest reflection paths
 269  * (L_(i=0)^(N/2)):
 270  *
 271  *     L_i = 2^(i / (N - 1)) r_d
 272  *
 273  * Then for the opposite (longest) reflection paths (L_(i=N/2)^N):
 274  *
 275  *     L_i = 2 r_a - L_(i-N/2)
 276  *         = 2 r_a - 2^((i - N / 2) / (N - 1)) r_d
 277  *
 278  * For our 1m average room, we get:
 279  */
 280 constexpr std::array<float,NUM_LINES> LATE_LINE_LENGTHS{{
 281     1.9419362e-3f, 2.4466860e-3f, 3.3791220e-3f, 3.8838720e-3f
 282 }};
 283
 284
 285 using ReverbUpdateLine = std::array<float,MAX_UPDATE_SAMPLES>;
 286
 287 struct DelayLineI {
 288     /* The delay lines use interleaved samples, with the lengths being powers
 289      * of 2 to allow the use of bit-masking instead of a modulus for wrapping.
 290      */
 291     size_t Mask{0u};
 292     union {
 293         uintptr_t LineOffset{0u};
 294         std::array<float,NUM_LINES> *Line;
 295     };
 296
 297     /* Given the allocated sample buffer, this function updates each delay line
 298      * offset.
 299      */
 300     void realizeLineOffset(std::array<float,NUM_LINES> *sampleBuffer) noexcept
 301     { Line = sampleBuffer + LineOffset; }
 302
 303     /* Calculate the length of a delay line and store its mask and offset. */
 304     uint calcLineLength(const float length, const uintptr_t offset, const float frequency,
 305         const uint extra)
 306     {
 307         /* All line lengths are powers of 2, calculated from their lengths in
 308          * seconds, rounded up.
 309          */
 310         uint samples{float2uint(std::ceil(length*frequency))};
 311         samples = NextPowerOf2(samples + extra);
 312
 313         /* All lines share a single sample buffer. */
 314         Mask = samples - 1;
 315         LineOffset = offset;
 316
 317         /* Return the sample count for accumulation. */
 318         return samples;
 319     }
 320
 321     void write(size_t offset, const size_t c, const float *RESTRICT in, const size_t count) const noexcept
 322     {
 323         ASSUME(count > 0);
 324         for(size_t i{0u};i < count;)
 325         {
 326             offset &= Mask;
 327             size_t td{minz(Mask+1 - offset, count - i)};
 328             do {
 329                 Line[offset++][c] = in[i++];
 330             } while(--td);
 331         }
 332     }
 333 };
 334
 335 struct VecAllpass {
 336     DelayLineI Delay;
 337     float Coeff{0.0f};
 338     size_t Offset[NUM_LINES]{};
 339
 340     void process(const al::span<ReverbUpdateLine,NUM_LINES> samples, size_t offset,
 341         const float xCoeff, const float yCoeff, const size_t todo);
 342 };
 343
 344 struct T60Filter {
 345     /* Two filters are used to adjust the signal. One to control the low
 346      * frequencies, and one to control the high frequencies.
 347      */
 348     float MidGain{0.0f};
 349     BiquadFilter HFFilter, LFFilter;
 350
 351     void calcCoeffs(const float length, const float lfDecayTime, const float mfDecayTime,
 352         const float hfDecayTime, const float lf0norm, const float hf0norm);
 353
 354     /* Applies the two T60 damping filter sections. */
 355     void process(const al::span<float> samples)
 356     { DualBiquad{HFFilter, LFFilter}.process(samples, samples.data()); }
 357
 358     void clear() noexcept { HFFilter.clear(); LFFilter.clear(); }
 359 };
 360
 361 struct EarlyReflections {
 362     /* A Gerzon vector all-pass filter is used to simulate initial diffusion.
 363      * The spread from this filter also helps smooth out the reverb tail.
 364      */
 365     VecAllpass VecAp;
 366
 367     /* An echo line is used to complete the second half of the early
 368      * reflections.
 369      */
 370     DelayLineI Delay;
 371     size_t Offset[NUM_LINES]{};
 372     float Coeff[NUM_LINES]{};
 373
 374     /* The gain for each output channel based on 3D panning. */
 375     float CurrentGains[NUM_LINES][MaxAmbiChannels]{};
 376     float TargetGains[NUM_LINES][MaxAmbiChannels]{};
 377
 378     void updateLines(const float density_mult, const float diffusion, const float decayTime,
 379         const float frequency);
 380 };
 381
 382
 383 struct Modulation {
 384     /* The vibrato time is tracked with an index over a (MOD_FRACONE)
 385      * normalized range.
 386      */
 387     uint Index, Step;
 388
 389     /* The depth of frequency change, in samples. */
 390     float Depth;
 391
 392     float ModDelays[MAX_UPDATE_SAMPLES];
 393
 394     void updateModulator(float modTime, float modDepth, float frequency);
 395
 396     void calcDelays(size_t todo);
 397 };
 398
 399 struct LateReverb {
 400     /* A recursive delay line is used fill in the reverb tail. */
 401     DelayLineI Delay;
 402     size_t     Offset[NUM_LINES]{};
 403
 404     /* Attenuation to compensate for the modal density and decay rate of the
 405      * late lines.
 406      */
 407     float DensityGain{0.0f};
 408
 409     /* T60 decay filters are used to simulate absorption. */
 410     T60Filter T60[NUM_LINES];
 411
 412     Modulation Mod;
 413
 414     /* A Gerzon vector all-pass filter is used to simulate diffusion. */
 415     VecAllpass VecAp;
 416
 417     /* The gain for each output channel based on 3D panning. */
 418     float CurrentGains[NUM_LINES][MaxAmbiChannels]{};
 419     float TargetGains[NUM_LINES][MaxAmbiChannels]{};
 420
 421     void updateLines(const float density_mult, const float diffusion, const float lfDecayTime,
 422         const float mfDecayTime, const float hfDecayTime, const float lf0norm,
 423         const float hf0norm, const float frequency);
 424
 425     void clear() noexcept
 426     {
 427         for(auto &filter : T60)
 428             filter.clear();
 429     }
 430 };
 431
 432 struct ReverbPipeline {
 433     /* Master effect filters */
 434     struct {
 435         BiquadFilter Lp;
 436         BiquadFilter Hp;
 437     } mFilter[NUM_LINES];
 438
 439     /* Core delay line (early reflections and late reverb tap from this). */
 440     DelayLineI mEarlyDelayIn;
 441     DelayLineI mLateDelayIn;
 442
 443     /* Tap points for early reflection delay. */
 444     size_t mEarlyDelayTap[NUM_LINES][2]{};
 445     float mEarlyDelayCoeff[NUM_LINES]{};
 446
 447     /* Tap points for late reverb feed and delay. */
 448     size_t mLateDelayTap[NUM_LINES][2]{};
 449
 450     /* Coefficients for the all-pass and line scattering matrices. */
 451     float mMixX{0.0f};
 452     float mMixY{0.0f};
 453
 454     EarlyReflections mEarly;
 455
 456     LateReverb mLate;
 457
 458     std::array<std::array<BandSplitter,NUM_LINES>,2> mAmbiSplitter;
 459
 460     size_t mFadeSampleCount{1};
 461
 462     void updateDelayLine(const float earlyDelay, const float lateDelay, const float density_mult,
 463         const float decayTime, const float frequency);
 464     void update3DPanning(const float *ReflectionsPan, const float *LateReverbPan,
 465         const float earlyGain, const float lateGain, const bool doUpmix, const MixParams *mainMix);
 466
 467     void processEarly(size_t offset, const size_t samplesToDo,
 468         const al::span<ReverbUpdateLine,NUM_LINES> tempSamples,
 469         const al::span<FloatBufferLine,NUM_LINES> outSamples);
 470     void processLate(size_t offset, const size_t samplesToDo,
 471         const al::span<ReverbUpdateLine,NUM_LINES> tempSamples,
 472         const al::span<FloatBufferLine,NUM_LINES> outSamples);
 473
 474     void clear() noexcept
 475     {
 476         for(auto &filter : mFilter)
 477         {
 478             filter.Lp.clear();
 479             filter.Hp.clear();
 480         }
 481         mLate.clear();
 482         for(auto &filters : mAmbiSplitter)
 483         {
 484             for(auto &filter : filters)
 485                 filter.clear();
 486         }
 487     }
 488 };
 489
 490 struct ReverbState final : public EffectState {
 491     /* All delay lines are allocated as a single buffer to reduce memory
 492      * fragmentation and management code.
 493      */
 494     al::vector<std::array<float,NUM_LINES>,16> mSampleBuffer;
 495
 496     struct {
 497         /* Calculated parameters which indicate if cross-fading is needed after
 498          * an update.
 499          */
 500         float Density{1.0f};
 501         float Diffusion{1.0f};
 502         float DecayTime{1.49f};
 503         float HFDecayTime{0.83f * 1.49f};
 504         float LFDecayTime{1.0f * 1.49f};
 505         float ModulationTime{0.25f};
 506         float ModulationDepth{0.0f};
 507         float HFReference{5000.0f};
 508         float LFReference{250.0f};
 509     } mParams;
 510
 511     enum PipelineState : uint8_t {
 512         DeviceClear,
 513         StartFade,
 514         Fading,
 515         Cleanup,
 516         Normal,
 517     };
 518     PipelineState mPipelineState{DeviceClear};
 519     uint8_t mCurrentPipeline{0};
 520
 521     ReverbPipeline mPipelines[2];
 522
 523     /* The current write offset for all delay lines. */
 524     size_t mOffset{};
 525
 526     /* Temporary storage used when processing. */
 527     union {
 528         alignas(16) FloatBufferLine mTempLine{};
 529         alignas(16) std::array<ReverbUpdateLine,NUM_LINES> mTempSamples;
 530     };
 531     alignas(16) std::array<FloatBufferLine,NUM_LINES> mEarlySamples{};
 532     alignas(16) std::array<FloatBufferLine,NUM_LINES> mLateSamples{};
 533
 534     std::array<float,MaxAmbiOrder+1> mOrderScales{};
 535
 536     bool mUpmixOutput{false};
 537
 538
 539     void MixOutPlain(ReverbPipeline &pipeline, const al::span<FloatBufferLine> samplesOut,
 540         const size_t todo)
 541     {
 542         ASSUME(todo > 0);
 543
 544         /* When not upsampling, the panning gains convert to B-Format and pan
 545          * at the same time.
 546          */
 547         for(size_t c{0u};c < NUM_LINES;c++)
 548         {
 549             const al::span<float> tmpspan{mEarlySamples[c].data(), todo};
 550             MixSamples(tmpspan, samplesOut, pipeline.mEarly.CurrentGains[c],
 551                 pipeline.mEarly.TargetGains[c], todo, 0);
 552         }
 553         for(size_t c{0u};c < NUM_LINES;c++)
 554         {
 555             const al::span<float> tmpspan{mLateSamples[c].data(), todo};
 556             MixSamples(tmpspan, samplesOut, pipeline.mLate.CurrentGains[c],
 557                 pipeline.mLate.TargetGains[c], todo, 0);
 558         }
 559     }
 560
 561     void MixOutAmbiUp(ReverbPipeline &pipeline, const al::span<FloatBufferLine> samplesOut,
 562         const size_t todo)
 563     {
 564         ASSUME(todo > 0);
 565
 566         auto DoMixRow = [](const al::span<float> OutBuffer, const al::span<const float,4> Gains,
 567             const float *InSamples, const size_t InStride)
 568         {
 569             std::fill(OutBuffer.begin(), OutBuffer.end(), 0.0f);
 570             for(const float gain : Gains)
 571             {
 572                 const float *RESTRICT input{al::assume_aligned<16>(InSamples)};
 573                 InSamples += InStride;
 574
 575                 if(!(std::fabs(gain) > GainSilenceThreshold))
 576                     continue;
 577
 578                 auto mix_sample = [gain](const float sample, const float in) noexcept -> float
 579                 { return sample + in*gain; };
 580                 std::transform(OutBuffer.begin(), OutBuffer.end(), input, OutBuffer.begin(),
 581                     mix_sample);
 582             }
 583         };
 584
 585         /* When upsampling, the B-Format conversion needs to be done separately
 586          * so the proper HF scaling can be applied to each B-Format channel.
 587          * The panning gains then pan and upsample the B-Format channels.
 588          */
 589         const al::span<float> tmpspan{al::assume_aligned<16>(mTempLine.data()), todo};
 590         for(size_t c{0u};c < NUM_LINES;c++)
 591         {
 592             DoMixRow(tmpspan, EarlyA2B[c], mEarlySamples[0].data(), mEarlySamples[0].size());
 593
 594             /* Apply scaling to the B-Format's HF response to "upsample" it to
 595              * higher-order output.
 596              */
 597             const float hfscale{(c==0) ? mOrderScales[0] : mOrderScales[1]};
 598             pipeline.mAmbiSplitter[0][c].processHfScale(tmpspan, hfscale);
 599
 600             MixSamples(tmpspan, samplesOut, pipeline.mEarly.CurrentGains[c],
 601                 pipeline.mEarly.TargetGains[c], todo, 0);
 602         }
 603         for(size_t c{0u};c < NUM_LINES;c++)
 604         {
 605             DoMixRow(tmpspan, LateA2B[c], mLateSamples[0].data(), mLateSamples[0].size());
 606
 607             const float hfscale{(c==0) ? mOrderScales[0] : mOrderScales[1]};
 608             pipeline.mAmbiSplitter[1][c].processHfScale(tmpspan, hfscale);
 609
 610             MixSamples(tmpspan, samplesOut, pipeline.mLate.CurrentGains[c],
 611                 pipeline.mLate.TargetGains[c], todo, 0);
 612         }
 613     }
 614
 615     void mixOut(ReverbPipeline &pipeline, const al::span<FloatBufferLine> samplesOut, const size_t todo)
 616     {
 617         if(mUpmixOutput)
 618             MixOutAmbiUp(pipeline, samplesOut, todo);
 619         else
 620             MixOutPlain(pipeline, samplesOut, todo);
 621     }
 622
 623     void allocLines(const float frequency);
 624
 625     void deviceUpdate(const DeviceBase *device, const BufferStorage *buffer) override;
 626     void update(const ContextBase *context, const EffectSlot *slot, const EffectProps *props,
 627         const EffectTarget target) override;
 628     void process(const size_t samplesToDo, const al::span<const FloatBufferLine> samplesIn,
 629         const al::span<FloatBufferLine> samplesOut) override;
 630
 631     DEF_NEWDEL(ReverbState)
 632 };
 633
 634 /**************************************
 635  *  Device Update                     *
 636  **************************************/
 637
 638 inline float CalcDelayLengthMult(float density)
 639 { return maxf(5.0f, std::cbrt(density*DENSITY_SCALE)); }
 640
 641 /* Calculates the delay line metrics and allocates the shared sample buffer
 642  * for all lines given the sample rate (frequency).
 643  */
 644 void ReverbState::allocLines(const float frequency)
 645 {
 646     /* All delay line lengths are calculated to accomodate the full range of
 647      * lengths given their respective paramters.
 648      */
 649     size_t totalSamples{0u};
 650
 651     /* Multiplier for the maximum density value, i.e. density=1, which is
 652      * actually the least density...
 653      */
 654     const float multiplier{CalcDelayLengthMult(1.0f)};
 655
 656     /* The modulator's line length is calculated from the maximum modulation
 657      * time and depth coefficient, and halfed for the low-to-high frequency
 658      * swing.
 659      */
 660     constexpr float max_mod_delay{MaxModulationTime*MODULATION_DEPTH_COEFF / 2.0f};
 661
 662     for(auto &pipeline : mPipelines)
 663     {
 664         /* The main delay length includes the maximum early reflection delay,
 665          * the largest early tap width, the maximum late reverb delay, and the
 666          * largest late tap width.  Finally, it must also be extended by the
 667          * update size (BufferLineSize) for block processing.
 668          */
 669         float length{ReverbMaxReflectionsDelay + EARLY_TAP_LENGTHS.back()*multiplier};
 670         totalSamples += pipeline.mEarlyDelayIn.calcLineLength(length, totalSamples, frequency,
 671             BufferLineSize);
 672
 673         constexpr float LateLineDiffAvg{(LATE_LINE_LENGTHS.back()-LATE_LINE_LENGTHS.front()) /
 674             float{NUM_LINES}};
 675         length = ReverbMaxLateReverbDelay + LateLineDiffAvg*multiplier;
 676         totalSamples += pipeline.mLateDelayIn.calcLineLength(length, totalSamples, frequency,
 677             BufferLineSize);
 678
 679         /* The early vector all-pass line. */
 680         length = EARLY_ALLPASS_LENGTHS.back() * multiplier;
 681         totalSamples += pipeline.mEarly.VecAp.Delay.calcLineLength(length, totalSamples, frequency,
 682             0);
 683
 684         /* The early reflection line. */
 685         length = EARLY_LINE_LENGTHS.back() * multiplier;
 686         totalSamples += pipeline.mEarly.Delay.calcLineLength(length, totalSamples, frequency,
 687             MAX_UPDATE_SAMPLES);
 688
 689         /* The late vector all-pass line. */
 690         length = LATE_ALLPASS_LENGTHS.back() * multiplier;
 691         totalSamples += pipeline.mLate.VecAp.Delay.calcLineLength(length, totalSamples, frequency,
 692             0);
 693
 694         /* The late delay lines are calculated from the largest maximum density
 695          * line length, and the maximum modulation delay. Four additional
 696          * samples are needed for resampling the modulator delay.
 697          */
 698         length = LATE_LINE_LENGTHS.back()*multiplier + max_mod_delay;
 699         totalSamples += pipeline.mLate.Delay.calcLineLength(length, totalSamples, frequency, 4);
 700     }
 701
 702     if(totalSamples != mSampleBuffer.size())
 703         decltype(mSampleBuffer)(totalSamples).swap(mSampleBuffer);
 704
 705     /* Clear the sample buffer. */
 706     std::fill(mSampleBuffer.begin(), mSampleBuffer.end(), decltype(mSampleBuffer)::value_type{});
 707
 708     /* Update all delays to reflect the new sample buffer. */
 709     for(auto &pipeline : mPipelines)
 710     {
 711         pipeline.mEarlyDelayIn.realizeLineOffset(mSampleBuffer.data());
 712         pipeline.mLateDelayIn.realizeLineOffset(mSampleBuffer.data());
 713         pipeline.mEarly.VecAp.Delay.realizeLineOffset(mSampleBuffer.data());
 714         pipeline.mEarly.Delay.realizeLineOffset(mSampleBuffer.data());
 715         pipeline.mLate.VecAp.Delay.realizeLineOffset(mSampleBuffer.data());
 716         pipeline.mLate.Delay.realizeLineOffset(mSampleBuffer.data());
 717     }
 718 }
 719
 720 void ReverbState::deviceUpdate(const DeviceBase *device, const BufferStorage*)
 721 {
 722     const auto frequency = static_cast<float>(device->Frequency);
 723
 724     /* Allocate the delay lines. */
 725     allocLines(frequency);
 726
 727     for(auto &pipeline : mPipelines)
 728     {
 729         /* Clear filters and gain coefficients since the delay lines were all just
 730         * cleared (if not reallocated).
 731         */
 732         for(auto &filter : pipeline.mFilter)
 733         {
 734             filter.Lp.clear();
 735             filter.Hp.clear();
 736         }
 737
 738         std::fill(std::begin(pipeline.mEarlyDelayCoeff),std::end(pipeline.mEarlyDelayCoeff), 0.0f);
 739         std::fill(std::begin(pipeline.mEarlyDelayCoeff),std::end(pipeline.mEarlyDelayCoeff), 0.0f);
 740
 741         pipeline.mLate.DensityGain = 0.0f;
 742         for(auto &t60 : pipeline.mLate.T60)
 743         {
 744             t60.MidGain = 0.0f;
 745             t60.HFFilter.clear();
 746             t60.LFFilter.clear();
 747         }
 748
 749         pipeline.mLate.Mod.Index = 0;
 750         pipeline.mLate.Mod.Step = 1;
 751         pipeline.mLate.Mod.Depth = 0.0f;
 752
 753         for(auto &gains : pipeline.mEarly.CurrentGains)
 754             std::fill(std::begin(gains), std::end(gains), 0.0f);
 755         for(auto &gains : pipeline.mEarly.TargetGains)
 756             std::fill(std::begin(gains), std::end(gains), 0.0f);
 757         for(auto &gains : pipeline.mLate.CurrentGains)
 758             std::fill(std::begin(gains), std::end(gains), 0.0f);
 759         for(auto &gains : pipeline.mLate.TargetGains)
 760             std::fill(std::begin(gains), std::end(gains), 0.0f);
 761     }
 762     mPipelineState = DeviceClear;
 763
 764     /* Reset offset base. */
 765     mOffset = 0;
 766
 767     if(device->mAmbiOrder > 1)
 768     {
 769         mUpmixOutput = true;
 770         mOrderScales = AmbiScale::GetHFOrderScales(1, device->mAmbiOrder, device->m2DMixing);
 771     }
 772     else
 773     {
 774         mUpmixOutput = false;
 775         mOrderScales.fill(1.0f);
 776     }
 777     mPipelines[0].mAmbiSplitter[0][0].init(device->mXOverFreq / frequency);
 778     for(auto &pipeline : mPipelines)
 779     {
 780         std::fill(pipeline.mAmbiSplitter[0].begin(), pipeline.mAmbiSplitter[0].end(),
 781             pipeline.mAmbiSplitter[0][0]);
 782         std::fill(pipeline.mAmbiSplitter[1].begin(), pipeline.mAmbiSplitter[1].end(),
 783             pipeline.mAmbiSplitter[0][0]);
 784     }
 785 }
 786
 787 /**************************************
 788  *  Effect Update                     *
 789  **************************************/
 790
 791 /* Calculate a decay coefficient given the length of each cycle and the time
 792  * until the decay reaches -60 dB.
 793  */
 794 inline float CalcDecayCoeff(const float length, const float decayTime)
 795 { return std::pow(ReverbDecayGain, length/decayTime); }
 796
 797 /* Calculate a decay length from a coefficient and the time until the decay
 798  * reaches -60 dB.
 799  */
 800 inline float CalcDecayLength(const float coeff, const float decayTime)
 801 {
 802     constexpr float log10_decaygain{-3.0f/*std::log10(ReverbDecayGain)*/};
 803     return std::log10(coeff) * decayTime / log10_decaygain;
 804 }
 805
 806 /* Calculate an attenuation to be applied to the input of any echo models to
 807  * compensate for modal density and decay time.
 808  */
 809 inline float CalcDensityGain(const float a)
 810 {
 811     /* The energy of a signal can be obtained by finding the area under the
 812      * squared signal.  This takes the form of Sum(x_n^2), where x is the
 813      * amplitude for the sample n.
 814      *
 815      * Decaying feedback matches exponential decay of the form Sum(a^n),
 816      * where a is the attenuation coefficient, and n is the sample.  The area
 817      * under this decay curve can be calculated as:  1 / (1 - a).
 818      *
 819      * Modifying the above equation to find the area under the squared curve
 820      * (for energy) yields:  1 / (1 - a^2).  Input attenuation can then be
 821      * calculated by inverting the square root of this approximation,
 822      * yielding:  1 / sqrt(1 / (1 - a^2)), simplified to: sqrt(1 - a^2).
 823      */
 824     return std::sqrt(1.0f - a*a);
 825 }
 826
 827 /* Calculate the scattering matrix coefficients given a diffusion factor. */
 828 inline void CalcMatrixCoeffs(const float diffusion, float *x, float *y)
 829 {
 830     /* The matrix is of order 4, so n is sqrt(4 - 1). */
 831     constexpr float n{al::numbers::sqrt3_v<float>};
 832     const float t{diffusion * std::atan(n)};
 833
 834     /* Calculate the first mixing matrix coefficient. */
 835     *x = std::cos(t);
 836     /* Calculate the second mixing matrix coefficient. */
 837     *y = std::sin(t) / n;
 838 }
 839
 840 /* Calculate the limited HF ratio for use with the late reverb low-pass
 841  * filters.
 842  */
 843 float CalcLimitedHfRatio(const float hfRatio, const float airAbsorptionGainHF,
 844     const float decayTime)
 845 {
 846     /* Find the attenuation due to air absorption in dB (converting delay
 847      * time to meters using the speed of sound).  Then reversing the decay
 848      * equation, solve for HF ratio.  The delay length is cancelled out of
 849      * the equation, so it can be calculated once for all lines.
 850      */
 851     float limitRatio{1.0f / SpeedOfSoundMetersPerSec /
 852         CalcDecayLength(airAbsorptionGainHF, decayTime)};
 853
 854     /* Using the limit calculated above, apply the upper bound to the HF ratio. */
 855     return minf(limitRatio, hfRatio);
 856 }
 857
 858
 859 /* Calculates the 3-band T60 damping coefficients for a particular delay line
 860  * of specified length, using a combination of two shelf filter sections given
 861  * decay times for each band split at two reference frequencies.
 862  */
 863 void T60Filter::calcCoeffs(const float length, const float lfDecayTime,
 864     const float mfDecayTime, const float hfDecayTime, const float lf0norm,
 865     const float hf0norm)
 866 {
 867     const float mfGain{CalcDecayCoeff(length, mfDecayTime)};
 868     const float lfGain{CalcDecayCoeff(length, lfDecayTime) / mfGain};
 869     const float hfGain{CalcDecayCoeff(length, hfDecayTime) / mfGain};
 870
 871     MidGain = mfGain;
 872     LFFilter.setParamsFromSlope(BiquadType::LowShelf, lf0norm, lfGain, 1.0f);
 873     HFFilter.setParamsFromSlope(BiquadType::HighShelf, hf0norm, hfGain, 1.0f);
 874 }
 875
 876 /* Update the early reflection line lengths and gain coefficients. */
 877 void EarlyReflections::updateLines(const float density_mult, const float diffusion,
 878     const float decayTime, const float frequency)
 879 {
 880     /* Calculate the all-pass feed-back/forward coefficient. */
 881     VecAp.Coeff = diffusion*diffusion * InvSqrt2;
 882
 883     for(size_t i{0u};i < NUM_LINES;i++)
 884     {
 885         /* Calculate the delay length of each all-pass line. */
 886         float length{EARLY_ALLPASS_LENGTHS[i] * density_mult};
 887         VecAp.Offset[i] = float2uint(length * frequency);
 888
 889         /* Calculate the delay length of each delay line. */
 890         length = EARLY_LINE_LENGTHS[i] * density_mult;
 891         Offset[i] = float2uint(length * frequency);
 892
 893         /* Calculate the gain (coefficient) for each line. */
 894         Coeff[i] = CalcDecayCoeff(length, decayTime);
 895     }
 896 }
 897
 898 /* Update the EAX modulation step and depth. Keep in mind that this kind of
 899  * vibrato is additive and not multiplicative as one may expect. The downswing
 900  * will sound stronger than the upswing.
 901  */
 902 void Modulation::updateModulator(float modTime, float modDepth, float frequency)
 903 {
 904     /* Modulation is calculated in two parts.
 905      *
 906      * The modulation time effects the sinus rate, altering the speed of
 907      * frequency changes. An index is incremented for each sample with an
 908      * appropriate step size to generate an LFO, which will vary the feedback
 909      * delay over time.
 910      */
 911     Step = maxu(fastf2u(MOD_FRACONE / (frequency * modTime)), 1);
 912
 913     /* The modulation depth effects the amount of frequency change over the
 914      * range of the sinus. It needs to be scaled by the modulation time so that
 915      * a given depth produces a consistent change in frequency over all ranges
 916      * of time. Since the depth is applied to a sinus value, it needs to be
 917      * halved once for the sinus range and again for the sinus swing in time
 918      * (half of it is spent decreasing the frequency, half is spent increasing
 919      * it).
 920      */
 921     if(modTime >= DefaultModulationTime)
 922     {
 923         /* To cancel the effects of a long period modulation on the late
 924          * reverberation, the amount of pitch should be varied (decreased)
 925          * according to the modulation time. The natural form is varying
 926          * inversely, in fact resulting in an invariant.
 927          */
 928         Depth = MODULATION_DEPTH_COEFF / 4.0f * DefaultModulationTime * modDepth * frequency;
 929     }
 930     else
 931         Depth = MODULATION_DEPTH_COEFF / 4.0f * modTime * modDepth * frequency;
 932 }
 933
 934 /* Update the late reverb line lengths and T60 coefficients. */
 935 void LateReverb::updateLines(const float density_mult, const float diffusion,
 936     const float lfDecayTime, const float mfDecayTime, const float hfDecayTime,
 937     const float lf0norm, const float hf0norm, const float frequency)
 938 {
 939     /* Scaling factor to convert the normalized reference frequencies from
 940      * representing 0...freq to 0...max_reference.
 941      */
 942     constexpr float MaxHFReference{20000.0f};
 943     const float norm_weight_factor{frequency / MaxHFReference};
 944
 945     const float late_allpass_avg{
 946         std::accumulate(LATE_ALLPASS_LENGTHS.begin(), LATE_ALLPASS_LENGTHS.end(), 0.0f) /
 947         float{NUM_LINES}};
 948
 949     /* To compensate for changes in modal density and decay time of the late
 950      * reverb signal, the input is attenuated based on the maximal energy of
 951      * the outgoing signal.  This approximation is used to keep the apparent
 952      * energy of the signal equal for all ranges of density and decay time.
 953      *
 954      * The average length of the delay lines is used to calculate the
 955      * attenuation coefficient.
 956      */
 957     float length{std::accumulate(LATE_LINE_LENGTHS.begin(), LATE_LINE_LENGTHS.end(), 0.0f) /
 958         float{NUM_LINES} + late_allpass_avg};
 959     length *= density_mult;
 960     /* The density gain calculation uses an average decay time weighted by
 961      * approximate bandwidth. This attempts to compensate for losses of energy
 962      * that reduce decay time due to scattering into highly attenuated bands.
 963      */
 964     const float decayTimeWeighted{
 965         lf0norm*norm_weight_factor*lfDecayTime +
 966         (hf0norm - lf0norm)*norm_weight_factor*mfDecayTime +
 967         (1.0f - hf0norm*norm_weight_factor)*hfDecayTime};
 968     DensityGain = CalcDensityGain(CalcDecayCoeff(length, decayTimeWeighted));
 969
 970     /* Calculate the all-pass feed-back/forward coefficient. */
 971     VecAp.Coeff = diffusion*diffusion * InvSqrt2;
 972
 973     for(size_t i{0u};i < NUM_LINES;i++)
 974     {
 975         /* Calculate the delay length of each all-pass line. */
 976         length = LATE_ALLPASS_LENGTHS[i] * density_mult;
 977         VecAp.Offset[i] = float2uint(length * frequency);
 978
 979         /* Calculate the delay length of each feedback delay line. A cubic
 980          * resampler is used for modulation on the feedback delay, which
 981          * includes one sample of delay. Reduce by one to compensate.
 982          */
 983         length = LATE_LINE_LENGTHS[i] * density_mult;
 984         Offset[i] = maxu(float2uint(length*frequency + 0.5f), 1u) - 1u;
 985
 986         /* Approximate the absorption that the vector all-pass would exhibit
 987          * given the current diffusion so we don't have to process a full T60
 988          * filter for each of its four lines. Also include the average
 989          * modulation delay (depth is half the max delay in samples).
 990          */
 991         length += lerpf(LATE_ALLPASS_LENGTHS[i], late_allpass_avg, diffusion)*density_mult +
 992             Mod.Depth/frequency;
 993
 994         /* Calculate the T60 damping coefficients for each line. */
 995         T60[i].calcCoeffs(length, lfDecayTime, mfDecayTime, hfDecayTime, lf0norm, hf0norm);
 996     }
 997 }
 998
 999
1000 /* Update the offsets for the main effect delay line. */
1001 void ReverbPipeline::updateDelayLine(const float earlyDelay, const float lateDelay,
1002     const float density_mult, const float decayTime, const float frequency)
1003 {
1004     /* Early reflection taps are decorrelated by means of an average room
1005      * reflection approximation described above the definition of the taps.
1006      * This approximation is linear and so the above density multiplier can
1007      * be applied to adjust the width of the taps.  A single-band decay
1008      * coefficient is applied to simulate initial attenuation and absorption.
1009      *
1010      * Late reverb taps are based on the late line lengths to allow a zero-
1011      * delay path and offsets that would continue the propagation naturally
1012      * into the late lines.
1013      */
1014     for(size_t i{0u};i < NUM_LINES;i++)
1015     {
1016         float length{EARLY_TAP_LENGTHS[i]*density_mult};
1017         mEarlyDelayTap[i][1] = float2uint((earlyDelay+length) * frequency);
1018         mEarlyDelayCoeff[i] = CalcDecayCoeff(length, decayTime);
1019
1020         length = (LATE_LINE_LENGTHS[i] - LATE_LINE_LENGTHS.front())/float{NUM_LINES}*density_mult +
1021             lateDelay;
1022         mLateDelayTap[i][1] = float2uint(length * frequency);
1023     }
1024 }
1025
1026 /* Creates a transform matrix given a reverb vector. The vector pans the reverb
1027  * reflections toward the given direction, using its magnitude (up to 1) as a
1028  * focal strength. This function results in a B-Format transformation matrix
1029  * that spatially focuses the signal in the desired direction.
1030  */
1031 std::array<std::array<float,4>,4> GetTransformFromVector(const float *vec)
1032 {
1033     /* Normalize the panning vector according to the N3D scale, which has an
1034      * extra sqrt(3) term on the directional components. Converting from OpenAL
1035      * to B-Format also requires negating X (ACN 1) and Z (ACN 3). Note however
1036      * that the reverb panning vectors use left-handed coordinates, unlike the
1037      * rest of OpenAL which use right-handed. This is fixed by negating Z,
1038      * which cancels out with the B-Format Z negation.
1039      */
1040     float norm[3];
1041     float mag{std::sqrt(vec[0]*vec[0] + vec[1]*vec[1] + vec[2]*vec[2])};
1042     if(mag > 1.0f)
1043     {
1044         norm[0] = vec[0] / mag * -al::numbers::sqrt3_v<float>;
1045         norm[1] = vec[1] / mag * al::numbers::sqrt3_v<float>;
1046         norm[2] = vec[2] / mag * al::numbers::sqrt3_v<float>;
1047         mag = 1.0f;
1048     }
1049     else
1050     {
1051         /* If the magnitude is less than or equal to 1, just apply the sqrt(3)
1052          * term. There's no need to renormalize the magnitude since it would
1053          * just be reapplied in the matrix.
1054          */
1055         norm[0] = vec[0] * -al::numbers::sqrt3_v<float>;
1056         norm[1] = vec[1] * al::numbers::sqrt3_v<float>;
1057         norm[2] = vec[2] * al::numbers::sqrt3_v<float>;
1058     }
1059
1060     return std::array<std::array<float,4>,4>{{
1061         {{1.0f,   0.0f,    0.0f,   0.0f}},
1062         {{norm[0], 1.0f-mag, 0.0f, 0.0f}},
1063         {{norm[1], 0.0f, 1.0f-mag, 0.0f}},
1064         {{norm[2], 0.0f, 0.0f, 1.0f-mag}}
1065     }};
1066 }
1067
1068 /* Update the early and late 3D panning gains. */
1069 void ReverbPipeline::update3DPanning(const float *ReflectionsPan, const float *LateReverbPan,
1070     const float earlyGain, const float lateGain, const bool doUpmix, const MixParams *mainMix)
1071 {
1072     /* Create matrices that transform a B-Format signal according to the
1073      * panning vectors.
1074      */
1075     const std::array<std::array<float,4>,4> earlymat{GetTransformFromVector(ReflectionsPan)};
1076     const std::array<std::array<float,4>,4> latemat{GetTransformFromVector(LateReverbPan)};
1077
1078     if(doUpmix)
1079     {
1080         /* When upsampling, combine the early and late transforms with the
1081          * first-order upsample matrix. This results in panning gains that
1082          * apply the panning transform to first-order B-Format, which is then
1083          * upsampled.
1084          */
1085         auto mult_matrix = [](const al::span<const std::array<float,4>,4> mtx1)
1086         {
1087             auto&& mtx2 = AmbiScale::FirstOrderUp;
1088             std::array<std::array<float,MaxAmbiChannels>,NUM_LINES> res{};
1089
1090             for(size_t i{0};i < mtx1[0].size();++i)
1091             {
1092                 float *RESTRICT dst{res[i].data()};
1093                 for(size_t k{0};k < mtx1.size();++k)
1094                 {
1095                     const float *RESTRICT src{mtx2[k].data()};
1096                     const float a{mtx1[k][i]};
1097                     for(size_t j{0};j < mtx2[0].size();++j)
1098                         dst[j] += a * src[j];
1099                 }
1100             }
1101
1102             return res;
1103         };
1104         auto earlycoeffs = mult_matrix(earlymat);
1105         auto latecoeffs = mult_matrix(latemat);
1106
1107         for(size_t i{0u};i < NUM_LINES;i++)
1108             ComputePanGains(mainMix, earlycoeffs[i].data(), earlyGain, mEarly.TargetGains[i]);
1109         for(size_t i{0u};i < NUM_LINES;i++)
1110             ComputePanGains(mainMix, latecoeffs[i].data(), lateGain, mLate.TargetGains[i]);
1111     }
1112     else
1113     {
1114         /* When not upsampling, combine the early and late A-to-B-Format
1115          * conversions with their respective transform. This results panning
1116          * gains that convert A-Format to B-Format, which is then panned.
1117          */
1118         auto mult_matrix = [](const al::span<const std::array<float,NUM_LINES>,4> mtx1,
1119             const al::span<const std::array<float,4>,4> mtx2)
1120         {
1121             std::array<std::array<float,MaxAmbiChannels>,NUM_LINES> res{};
1122
1123             for(size_t i{0};i < mtx1[0].size();++i)
1124             {
1125                 float *RESTRICT dst{res[i].data()};
1126                 for(size_t k{0};k < mtx1.size();++k)
1127                 {
1128                     const float a{mtx1[k][i]};
1129                     for(size_t j{0};j < mtx2.size();++j)
1130                         dst[j] += a * mtx2[j][k];
1131                 }
1132             }
1133
1134             return res;
1135         };
1136         auto earlycoeffs = mult_matrix(EarlyA2B, earlymat);
1137         auto latecoeffs = mult_matrix(LateA2B, latemat);
1138
1139         for(size_t i{0u};i < NUM_LINES;i++)
1140             ComputePanGains(mainMix, earlycoeffs[i].data(), earlyGain, mEarly.TargetGains[i]);
1141         for(size_t i{0u};i < NUM_LINES;i++)
1142             ComputePanGains(mainMix, latecoeffs[i].data(), lateGain, mLate.TargetGains[i]);
1143     }
1144 }
1145
1146 void ReverbState::update(const ContextBase *Context, const EffectSlot *Slot,
1147     const EffectProps *props, const EffectTarget target)
1148 {
1149     const DeviceBase *Device{Context->mDevice};
1150     const auto frequency = static_cast<float>(Device->Frequency);
1151
1152     /* If the HF limit parameter is flagged, calculate an appropriate limit
1153      * based on the air absorption parameter.
1154      */
1155     float hfRatio{props->Reverb.DecayHFRatio};
1156     if(props->Reverb.DecayHFLimit && props->Reverb.AirAbsorptionGainHF < 1.0f)
1157         hfRatio = CalcLimitedHfRatio(hfRatio, props->Reverb.AirAbsorptionGainHF,
1158             props->Reverb.DecayTime);
1159
1160     /* Calculate the LF/HF decay times. */
1161     constexpr float MinDecayTime{0.1f}, MaxDecayTime{20.0f};
1162     const float lfDecayTime{clampf(props->Reverb.DecayTime*props->Reverb.DecayLFRatio,
1163         MinDecayTime, MaxDecayTime)};
1164     const float hfDecayTime{clampf(props->Reverb.DecayTime*hfRatio, MinDecayTime, MaxDecayTime)};
1165
1166     /* Determine if a full update is required. */
1167     const bool fullUpdate{mPipelineState == DeviceClear ||
1168         /* Density is essentially a master control for the feedback delays, so
1169          * changes the offsets of many delay lines.
1170          */
1171         mParams.Density != props->Reverb.Density ||
1172         /* Diffusion and decay times influences the decay rate (gain) of the
1173          * late reverb T60 filter.
1174          */
1175         mParams.Diffusion != props->Reverb.Diffusion ||
1176         mParams.DecayTime != props->Reverb.DecayTime ||
1177         mParams.HFDecayTime != hfDecayTime ||
1178         mParams.LFDecayTime != lfDecayTime ||
1179         /* Modulation time and depth both require fading the modulation delay. */
1180         mParams.ModulationTime != props->Reverb.ModulationTime ||
1181         mParams.ModulationDepth != props->Reverb.ModulationDepth ||
1182         /* HF/LF References control the weighting used to calculate the density
1183          * gain.
1184          */
1185         mParams.HFReference != props->Reverb.HFReference ||
1186         mParams.LFReference != props->Reverb.LFReference};
1187     if(fullUpdate)
1188     {
1189         mParams.Density = props->Reverb.Density;
1190         mParams.Diffusion = props->Reverb.Diffusion;
1191         mParams.DecayTime = props->Reverb.DecayTime;
1192         mParams.HFDecayTime = hfDecayTime;
1193         mParams.LFDecayTime = lfDecayTime;
1194         mParams.ModulationTime = props->Reverb.ModulationTime;
1195         mParams.ModulationDepth = props->Reverb.ModulationDepth;
1196         mParams.HFReference = props->Reverb.HFReference;
1197         mParams.LFReference = props->Reverb.LFReference;
1198
1199         mPipelineState = (mPipelineState != DeviceClear) ? StartFade : Normal;
1200         mCurrentPipeline ^= 1;
1201     }
1202     auto &pipeline = mPipelines[mCurrentPipeline];
1203
1204     /* Update early and late 3D panning. */
1205     mOutTarget = target.Main->Buffer;
1206     const float gain{props->Reverb.Gain * Slot->Gain * ReverbBoost};
1207     pipeline.update3DPanning(props->Reverb.ReflectionsPan, props->Reverb.LateReverbPan,
1208         props->Reverb.ReflectionsGain*gain, props->Reverb.LateReverbGain*gain, mUpmixOutput,
1209         target.Main);
1210
1211     /* Calculate the master filters */
1212     float hf0norm{minf(props->Reverb.HFReference/frequency, 0.49f)};
1213     pipeline.mFilter[0].Lp.setParamsFromSlope(BiquadType::HighShelf, hf0norm, props->Reverb.GainHF, 1.0f);
1214     float lf0norm{minf(props->Reverb.LFReference/frequency, 0.49f)};
1215     pipeline.mFilter[0].Hp.setParamsFromSlope(BiquadType::LowShelf, lf0norm, props->Reverb.GainLF, 1.0f);
1216     for(size_t i{1u};i < NUM_LINES;i++)
1217     {
1218         pipeline.mFilter[i].Lp.copyParamsFrom(pipeline.mFilter[0].Lp);
1219         pipeline.mFilter[i].Hp.copyParamsFrom(pipeline.mFilter[0].Hp);
1220     }
1221
1222     /* The density-based room size (delay length) multiplier. */
1223     const float density_mult{CalcDelayLengthMult(props->Reverb.Density)};
1224
1225     /* Update the main effect delay and associated taps. */
1226     pipeline.updateDelayLine(props->Reverb.ReflectionsDelay, props->Reverb.LateReverbDelay,
1227         density_mult, props->Reverb.DecayTime, frequency);
1228
1229     if(fullUpdate)
1230     {
1231         /* Update the early lines. */
1232         pipeline.mEarly.updateLines(density_mult, props->Reverb.Diffusion, props->Reverb.DecayTime,
1233             frequency);
1234
1235         /* Get the mixing matrix coefficients. */
1236         CalcMatrixCoeffs(props->Reverb.Diffusion, &pipeline.mMixX, &pipeline.mMixY);
1237
1238         /* Update the modulator rate and depth. */
1239         pipeline.mLate.Mod.updateModulator(props->Reverb.ModulationTime,
1240             props->Reverb.ModulationDepth, frequency);
1241
1242         /* Update the late lines. */
1243         pipeline.mLate.updateLines(density_mult, props->Reverb.Diffusion, lfDecayTime,
1244             props->Reverb.DecayTime, hfDecayTime, lf0norm, hf0norm, frequency);
1245     }
1246
1247     const float decaySamples{(props->Reverb.ReflectionsDelay + props->Reverb.LateReverbDelay
1248         + props->Reverb.DecayTime) * frequency};
1249     pipeline.mFadeSampleCount = static_cast<size_t>(minf(decaySamples, 1'000'000.0f));
1250 }
1251
1252
1253 /**************************************
1254  *  Effect Processing                 *
1255  **************************************/
1256
1257 /* Applies a scattering matrix to the 4-line (vector) input.  This is used
1258  * for both the below vector all-pass model and to perform modal feed-back
1259  * delay network (FDN) mixing.
1260  *
1261  * The matrix is derived from a skew-symmetric matrix to form a 4D rotation
1262  * matrix with a single unitary rotational parameter:
1263  *
1264  *     [  d,  a,  b,  c ]          1 = a^2 + b^2 + c^2 + d^2
1265  *     [ -a,  d,  c, -b ]
1266  *     [ -b, -c,  d,  a ]
1267  *     [ -c,  b, -a,  d ]
1268  *
1269  * The rotation is constructed from the effect's diffusion parameter,
1270  * yielding:
1271  *
1272  *     1 = x^2 + 3 y^2
1273  *
1274  * Where a, b, and c are the coefficient y with differing signs, and d is the
1275  * coefficient x.  The final matrix is thus:
1276  *
1277  *     [  x,  y, -y,  y ]          n = sqrt(matrix_order - 1)
1278  *     [ -y,  x,  y,  y ]          t = diffusion_parameter * atan(n)
1279  *     [  y, -y,  x,  y ]          x = cos(t)
1280  *     [ -y, -y, -y,  x ]          y = sin(t) / n
1281  *
1282  * Any square orthogonal matrix with an order that is a power of two will
1283  * work (where ^T is transpose, ^-1 is inverse):
1284  *
1285  *     M^T = M^-1
1286  *
1287  * Using that knowledge, finding an appropriate matrix can be accomplished
1288  * naively by searching all combinations of:
1289  *
1290  *     M = D + S - S^T
1291  *
1292  * Where D is a diagonal matrix (of x), and S is a triangular matrix (of y)
1293  * whose combination of signs are being iterated.
1294  */
1295 inline auto VectorPartialScatter(const std::array<float,NUM_LINES> &RESTRICT in,
1296     const float xCoeff, const float yCoeff) -> std::array<float,NUM_LINES>
1297 {
1298     return std::array<float,NUM_LINES>{{
1299         xCoeff*in[0] + yCoeff*(          in[1] + -in[2] + in[3]),
1300         xCoeff*in[1] + yCoeff*(-in[0]          +  in[2] + in[3]),
1301         xCoeff*in[2] + yCoeff*( in[0] + -in[1]          + in[3]),
1302         xCoeff*in[3] + yCoeff*(-in[0] + -in[1] + -in[2]        )
1303     }};
1304 }
1305
1306 /* Utilizes the above, but reverses the input channels. */
1307 void VectorScatterRevDelayIn(const DelayLineI delay, size_t offset, const float xCoeff,
1308     const float yCoeff, const al::span<const ReverbUpdateLine,NUM_LINES> in, const size_t count)
1309 {
1310     ASSUME(count > 0);
1311
1312     for(size_t i{0u};i < count;)
1313     {
1314         offset &= delay.Mask;
1315         size_t td{minz(delay.Mask+1 - offset, count-i)};
1316         do {
1317             std::array<float,NUM_LINES> f;
1318             for(size_t j{0u};j < NUM_LINES;j++)
1319                 f[NUM_LINES-1-j] = in[j][i];
1320             ++i;
1321
1322             delay.Line[offset++] = VectorPartialScatter(f, xCoeff, yCoeff);
1323         } while(--td);
1324     }
1325 }
1326
1327 /* This applies a Gerzon multiple-in/multiple-out (MIMO) vector all-pass
1328  * filter to the 4-line input.
1329  *
1330  * It works by vectorizing a regular all-pass filter and replacing the delay
1331  * element with a scattering matrix (like the one above) and a diagonal
1332  * matrix of delay elements.
1333  *
1334  * Two static specializations are used for transitional (cross-faded) delay
1335  * line processing and non-transitional processing.
1336  */
1337 void VecAllpass::process(const al::span<ReverbUpdateLine,NUM_LINES> samples, size_t offset,
1338     const float xCoeff, const float yCoeff, const size_t todo)
1339 {
1340     const DelayLineI delay{Delay};
1341     const float feedCoeff{Coeff};
1342
1343     ASSUME(todo > 0);
1344
1345     size_t vap_offset[NUM_LINES];
1346     for(size_t j{0u};j < NUM_LINES;j++)
1347         vap_offset[j] = offset - Offset[j];
1348     for(size_t i{0u};i < todo;)
1349     {
1350         for(size_t j{0u};j < NUM_LINES;j++)
1351             vap_offset[j] &= delay.Mask;
1352         offset &= delay.Mask;
1353
1354         size_t maxoff{offset};
1355         for(size_t j{0u};j < NUM_LINES;j++)
1356             maxoff = maxz(maxoff, vap_offset[j]);
1357         size_t td{minz(delay.Mask+1 - maxoff, todo - i)};
1358
1359         do {
1360             std::array<float,NUM_LINES> f;
1361             for(size_t j{0u};j < NUM_LINES;j++)
1362             {
1363                 const float input{samples[j][i]};
1364                 const float out{delay.Line[vap_offset[j]++][j] - feedCoeff*input};
1365                 f[j] = input + feedCoeff*out;
1366
1367                 samples[j][i] = out;
1368             }
1369             ++i;
1370
1371             delay.Line[offset++] = VectorPartialScatter(f, xCoeff, yCoeff);
1372         } while(--td);
1373     }
1374 }
1375
1376 /* This generates early reflections.
1377  *
1378  * This is done by obtaining the primary reflections (those arriving from the
1379  * same direction as the source) from the main delay line.  These are
1380  * attenuated and all-pass filtered (based on the diffusion parameter).
1381  *
1382  * The early lines are then fed in reverse (according to the approximately
1383  * opposite spatial location of the A-Format lines) to create the secondary
1384  * reflections (those arriving from the opposite direction as the source).
1385  *
1386  * The early response is then completed by combining the primary reflections
1387  * with the delayed and attenuated output from the early lines.
1388  *
1389  * Finally, the early response is reversed, scattered (based on diffusion),
1390  * and fed into the late reverb section of the main delay line.
1391  */
1392 void ReverbPipeline::processEarly(size_t offset, const size_t samplesToDo,
1393     const al::span<ReverbUpdateLine, NUM_LINES> tempSamples,
1394     const al::span<FloatBufferLine, NUM_LINES> outSamples)
1395 {
1396     const DelayLineI early_delay{mEarly.Delay};
1397     const DelayLineI in_delay{mEarlyDelayIn};
1398     const float mixX{mMixX};
1399     const float mixY{mMixY};
1400
1401     ASSUME(samplesToDo > 0);
1402
1403     for(size_t base{0};base < samplesToDo;)
1404     {
1405         const size_t todo{minz(samplesToDo-base, MAX_UPDATE_SAMPLES)};
1406
1407         /* First, load decorrelated samples from the main delay line as the
1408          * primary reflections.
1409          */
1410         const float fadeStep{1.0f / static_cast<float>(todo)};
1411         for(size_t j{0u};j < NUM_LINES;j++)
1412         {
1413             size_t early_delay_tap0{offset - mEarlyDelayTap[j][0]};
1414             size_t early_delay_tap1{offset - mEarlyDelayTap[j][1]};
1415             const float coeff{mEarlyDelayCoeff[j]};
1416             const float coeffStep{early_delay_tap0 != early_delay_tap1 ? coeff*fadeStep : 0.0f};
1417             float fadeCount{0.0f};
1418
1419             for(size_t i{0u};i < todo;)
1420             {
1421                 early_delay_tap0 &= in_delay.Mask;
1422                 early_delay_tap1 &= in_delay.Mask;
1423                 const size_t max_tap{maxz(early_delay_tap0, early_delay_tap1)};
1424                 size_t td{minz(in_delay.Mask+1 - max_tap, todo-i)};
1425                 do {
1426                     const float fade0{coeff - coeffStep*fadeCount};
1427                     const float fade1{coeffStep*fadeCount};
1428                     fadeCount += 1.0f;
1429                     tempSamples[j][i++] = in_delay.Line[early_delay_tap0++][j]*fade0 +
1430                         in_delay.Line[early_delay_tap1++][j]*fade1;
1431                 } while(--td);
1432             }
1433
1434             mEarlyDelayTap[j][0] = mEarlyDelayTap[j][1];
1435         }
1436
1437         /* Apply a vector all-pass, to help color the initial reflections based
1438          * on the diffusion strength.
1439          */
1440         mEarly.VecAp.process(tempSamples, offset, mixX, mixY, todo);
1441
1442         /* Apply a delay and bounce to generate secondary reflections, combine
1443          * with the primary reflections and write out the result for mixing.
1444          */
1445         for(size_t j{0u};j < NUM_LINES;j++)
1446             early_delay.write(offset, NUM_LINES-1-j, tempSamples[j].data(), todo);
1447         for(size_t j{0u};j < NUM_LINES;j++)
1448         {
1449             size_t feedb_tap{offset - mEarly.Offset[j]};
1450             const float feedb_coeff{mEarly.Coeff[j]};
1451             float *RESTRICT out{al::assume_aligned<16>(outSamples[j].data() + base)};
1452
1453             for(size_t i{0u};i < todo;)
1454             {
1455                 feedb_tap &= early_delay.Mask;
1456                 size_t td{minz(early_delay.Mask+1 - feedb_tap, todo - i)};
1457                 do {
1458                     tempSamples[j][i] += early_delay.Line[feedb_tap++][j]*feedb_coeff;
1459                     out[i] = tempSamples[j][i];
1460                     ++i;
1461                 } while(--td);
1462             }
1463         }
1464
1465         /* Finally, write the result to the late delay line input for the late
1466          * reverb stage to pick up at the appropriate time, applying a scatter
1467          * and bounce to improve the initial diffusion in the late reverb.
1468          */
1469         VectorScatterRevDelayIn(mLateDelayIn, offset, mixX, mixY, tempSamples, todo);
1470
1471         base += todo;
1472         offset += todo;
1473     }
1474 }
1475
1476 void Modulation::calcDelays(size_t todo)
1477 {
1478     constexpr float mod_scale{al::numbers::pi_v<float> * 2.0f / MOD_FRACONE};
1479     uint idx{Index};
1480     const uint step{Step};
1481     const float depth{Depth};
1482     for(size_t i{0};i < todo;++i)
1483     {
1484         idx += step;
1485         const float lfo{std::sin(static_cast<float>(idx&MOD_FRACMASK) * mod_scale)};
1486         ModDelays[i] = (lfo+1.0f) * depth;
1487     }
1488     Index = idx;
1489 }
1490
1491
1492 /* This generates the reverb tail using a modified feed-back delay network
1493  * (FDN).
1494  *
1495  * Results from the early reflections are mixed with the output from the
1496  * modulated late delay lines.
1497  *
1498  * The late response is then completed by T60 and all-pass filtering the mix.
1499  *
1500  * Finally, the lines are reversed (so they feed their opposite directions)
1501  * and scattered with the FDN matrix before re-feeding the delay lines.
1502  */
1503 void ReverbPipeline::processLate(size_t offset, const size_t samplesToDo,
1504     const al::span<ReverbUpdateLine, NUM_LINES> tempSamples,
1505     const al::span<FloatBufferLine, NUM_LINES> outSamples)
1506 {
1507     const DelayLineI late_delay{mLate.Delay};
1508     const DelayLineI in_delay{mLateDelayIn};
1509     const float mixX{mMixX};
1510     const float mixY{mMixY};
1511
1512     ASSUME(samplesToDo > 0);
1513
1514     for(size_t base{0};base < samplesToDo;)
1515     {
1516         const size_t todo{minz(samplesToDo-base, minz(mLate.Offset[0], MAX_UPDATE_SAMPLES))};
1517         ASSUME(todo > 0);
1518
1519         /* First, calculate the modulated delays for the late feedback. */
1520         mLate.Mod.calcDelays(todo);
1521
1522         /* Next, load decorrelated samples from the main and feedback delay
1523          * lines. Filter the signal to apply its frequency-dependent decay.
1524          */
1525         const float fadeStep{1.0f / static_cast<float>(todo)};
1526         for(size_t j{0u};j < NUM_LINES;j++)
1527         {
1528             size_t late_delay_tap0{offset - mLateDelayTap[j][0]};
1529             size_t late_delay_tap1{offset - mLateDelayTap[j][1]};
1530             size_t late_feedb_tap{offset - mLate.Offset[j]};
1531             const float midGain{mLate.T60[j].MidGain};
1532             const float densityGain{mLate.DensityGain * midGain};
1533             const float densityStep{late_delay_tap0 != late_delay_tap1 ?
1534                 densityGain*fadeStep : 0.0f};
1535             float fadeCount{0.0f};
1536
1537             for(size_t i{0u};i < todo;)
1538             {
1539                 late_delay_tap0 &= in_delay.Mask;
1540                 late_delay_tap1 &= in_delay.Mask;
1541                 size_t td{minz(todo-i, in_delay.Mask+1 - maxz(late_delay_tap0, late_delay_tap1))};
1542                 do {
1543                     /* Calculate the read offset and offset between it and the
1544                      * next sample.
1545                      */
1546                     const float fdelay{mLate.Mod.ModDelays[i]};
1547                     const size_t idelay{float2uint(fdelay * float{gCubicTable.sTableSteps})};
1548                     const size_t delay{late_feedb_tap - (idelay>>gCubicTable.sTableBits)};
1549                     const size_t delayoffset{idelay & gCubicTable.sTableMask};
1550                     ++late_feedb_tap;
1551
1552                     /* Get the samples around by the delayed offset. */
1553                     const float out0{late_delay.Line[(delay  ) & late_delay.Mask][j]};
1554                     const float out1{late_delay.Line[(delay-1) & late_delay.Mask][j]};
1555                     const float out2{late_delay.Line[(delay-2) & late_delay.Mask][j]};
1556                     const float out3{late_delay.Line[(delay-3) & late_delay.Mask][j]};
1557
1558                     /* The output is obtained by interpolating the four samples
1559                      * that were acquired above, and combined with the main
1560                      * delay tap.
1561                      */
1562                     const float out{out0*gCubicTable.getCoeff0(delayoffset)
1563                         + out1*gCubicTable.getCoeff1(delayoffset)
1564                         + out2*gCubicTable.getCoeff2(delayoffset)
1565                         + out3*gCubicTable.getCoeff3(delayoffset)};
1566                     const float fade0{densityGain - densityStep*fadeCount};
1567                     const float fade1{densityStep*fadeCount};
1568                     fadeCount += 1.0f;
1569                     tempSamples[j][i] = out*midGain +
1570                         in_delay.Line[late_delay_tap0++][j]*fade0 +
1571                         in_delay.Line[late_delay_tap1++][j]*fade1;
1572                     ++i;
1573                 } while(--td);
1574             }
1575             mLateDelayTap[j][0] = mLateDelayTap[j][1];
1576
1577             mLate.T60[j].process({tempSamples[j].data(), todo});
1578         }
1579
1580         /* Apply a vector all-pass to improve micro-surface diffusion, and
1581          * write out the results for mixing.
1582          */
1583         mLate.VecAp.process(tempSamples, offset, mixX, mixY, todo);
1584         for(size_t j{0u};j < NUM_LINES;j++)
1585             std::copy_n(tempSamples[j].begin(), todo, outSamples[j].begin()+base);
1586
1587         /* Finally, scatter and bounce the results to refeed the feedback buffer. */
1588         VectorScatterRevDelayIn(late_delay, offset, mixX, mixY, tempSamples, todo);
1589
1590         base += todo;
1591         offset += todo;
1592     }
1593 }
1594
1595 void ReverbState::process(const size_t samplesToDo, const al::span<const FloatBufferLine> samplesIn, const al::span<FloatBufferLine> samplesOut)
1596 {
1597     const size_t offset{mOffset};
1598
1599     ASSUME(samplesToDo > 0);
1600
1601     auto &oldpipeline = mPipelines[mCurrentPipeline^1];
1602     auto &pipeline = mPipelines[mCurrentPipeline];
1603
1604     if(mPipelineState >= Fading)
1605     {
1606         /* Convert B-Format to A-Format for processing. */
1607         const size_t numInput{minz(samplesIn.size(), NUM_LINES)};
1608         const al::span<float> tmpspan{al::assume_aligned<16>(mTempLine.data()), samplesToDo};
1609         for(size_t c{0u};c < NUM_LINES;c++)
1610         {
1611             std::fill(tmpspan.begin(), tmpspan.end(), 0.0f);
1612             for(size_t i{0};i < numInput;++i)
1613             {
1614                 const float gain{B2A[c][i]};
1615                 const float *RESTRICT input{al::assume_aligned<16>(samplesIn[i].data())};
1616
1617                 auto mix_sample = [gain](const float sample, const float in) noexcept -> float
1618                 { return sample + in*gain; };
1619                 std::transform(tmpspan.begin(), tmpspan.end(), input, tmpspan.begin(),
1620                     mix_sample);
1621             }
1622
1623             /* Band-pass the incoming samples and feed the initial delay line. */
1624             auto&& filter = DualBiquad{pipeline.mFilter[c].Lp, pipeline.mFilter[c].Hp};
1625             filter.process(tmpspan, tmpspan.data());
1626             pipeline.mEarlyDelayIn.write(offset, c, tmpspan.cbegin(), samplesToDo);
1627         }
1628         if(mPipelineState == Fading)
1629         {
1630             /* Give the old pipeline silence if it's still fading out. */
1631             for(size_t c{0u};c < NUM_LINES;c++)
1632             {
1633                 std::fill(tmpspan.begin(), tmpspan.end(), 0.0f);
1634
1635                 auto&& filter = DualBiquad{oldpipeline.mFilter[c].Lp, oldpipeline.mFilter[c].Hp};
1636                 filter.process(tmpspan, tmpspan.data());
1637                 oldpipeline.mEarlyDelayIn.write(offset, c, tmpspan.cbegin(), samplesToDo);
1638             }
1639         }
1640     }
1641     else
1642     {
1643         /* At the start of a fade, fade in input for the current pipeline, and
1644          * fade out input for the old pipeline.
1645          */
1646         const size_t numInput{minz(samplesIn.size(), NUM_LINES)};
1647         const al::span<float> tmpspan{al::assume_aligned<16>(mTempLine.data()), samplesToDo};
1648         const float fadeStep{1.0f / static_cast<float>(samplesToDo)};
1649
1650         for(size_t c{0u};c < NUM_LINES;c++)
1651         {
1652             std::fill(tmpspan.begin(), tmpspan.end(), 0.0f);
1653             for(size_t i{0};i < numInput;++i)
1654             {
1655                 const float gain{B2A[c][i]};
1656                 const float *RESTRICT input{al::assume_aligned<16>(samplesIn[i].data())};
1657
1658                 auto mix_sample = [gain](const float sample, const float in) noexcept -> float
1659                 { return sample + in*gain; };
1660                 std::transform(tmpspan.begin(), tmpspan.end(), input, tmpspan.begin(),
1661                     mix_sample);
1662             }
1663             float stepCount{0.0f};
1664             for(float &sample : tmpspan)
1665             {
1666                 stepCount += 1.0f;
1667                 sample *= stepCount*fadeStep;
1668             }
1669
1670             auto&& filter = DualBiquad{pipeline.mFilter[c].Lp, pipeline.mFilter[c].Hp};
1671             filter.process(tmpspan, tmpspan.data());
1672             pipeline.mEarlyDelayIn.write(offset, c, tmpspan.cbegin(), samplesToDo);
1673         }
1674         for(size_t c{0u};c < NUM_LINES;c++)
1675         {
1676             std::fill(tmpspan.begin(), tmpspan.end(), 0.0f);
1677             for(size_t i{0};i < numInput;++i)
1678             {
1679                 const float gain{B2A[c][i]};
1680                 const float *RESTRICT input{al::assume_aligned<16>(samplesIn[i].data())};
1681
1682                 auto mix_sample = [gain](const float sample, const float in) noexcept -> float
1683                 { return sample + in*gain; };
1684                 std::transform(tmpspan.begin(), tmpspan.end(), input, tmpspan.begin(),
1685                     mix_sample);
1686             }
1687             float stepCount{0.0f};
1688             for(float &sample : tmpspan)
1689             {
1690                 stepCount += 1.0f;
1691                 sample *= 1.0f - stepCount*fadeStep;
1692             }
1693
1694             auto&& filter = DualBiquad{oldpipeline.mFilter[c].Lp, oldpipeline.mFilter[c].Hp};
1695             filter.process(tmpspan, tmpspan.data());
1696             oldpipeline.mEarlyDelayIn.write(offset, c, tmpspan.cbegin(), samplesToDo);
1697         }
1698         mPipelineState = Fading;
1699     }
1700
1701     /* Process reverb for these samples. and mix them to the output. */
1702     pipeline.processEarly(offset, samplesToDo, mTempSamples, mEarlySamples);
1703     pipeline.processLate(offset, samplesToDo, mTempSamples, mLateSamples);
1704     mixOut(pipeline, samplesOut, samplesToDo);
1705
1706     if(mPipelineState != Normal)
1707     {
1708         if(mPipelineState == Cleanup)
1709         {
1710             size_t numSamples{mSampleBuffer.size()/2};
1711             size_t pipelineOffset{numSamples * (mCurrentPipeline^1)};
1712             std::fill_n(mSampleBuffer.data()+pipelineOffset, numSamples,
1713                 decltype(mSampleBuffer)::value_type{});
1714
1715             oldpipeline.clear();
1716             mPipelineState = Normal;
1717         }
1718         else
1719         {
1720             /* If this is the final mix for this old pipeline, set the target
1721              * gains to 0 to ensure a complete fade out, and set the state to
1722              * Cleanup so the next invocation cleans up the delay buffers and
1723              * filters.
1724              */
1725             if(samplesToDo >= oldpipeline.mFadeSampleCount)
1726             {
1727                 for(auto &gains : oldpipeline.mEarly.TargetGains)
1728                     std::fill(std::begin(gains), std::end(gains), 0.0f);
1729                 for(auto &gains : oldpipeline.mLate.TargetGains)
1730                     std::fill(std::begin(gains), std::end(gains), 0.0f);
1731                 oldpipeline.mFadeSampleCount = 0;
1732                 mPipelineState = Cleanup;
1733             }
1734             else
1735                 oldpipeline.mFadeSampleCount -= samplesToDo;
1736
1737             /* Process the old reverb for these samples. */
1738             oldpipeline.processEarly(offset, samplesToDo, mTempSamples, mEarlySamples);
1739             oldpipeline.processLate(offset, samplesToDo, mTempSamples, mLateSamples);
1740             mixOut(oldpipeline, samplesOut, samplesToDo);
1741         }
1742     }
1743
1744     mOffset = offset + samplesToDo;
1745 }
1746
1747
1748 struct ReverbStateFactory final : public EffectStateFactory {
1749     al::intrusive_ptr<EffectState> create() override
1750     { return al::intrusive_ptr<EffectState>{new ReverbState{}}; }
1751 };
1752
1753 struct StdReverbStateFactory final : public EffectStateFactory {
1754     al::intrusive_ptr<EffectState> create() override
1755     { return al::intrusive_ptr<EffectState>{new ReverbState{}}; }
1756 };
1757
1758 } // namespace
1759
1760 EffectStateFactory *ReverbStateFactory_getFactory()
1761 {
1762     static ReverbStateFactory ReverbFactory{};
1763     return &ReverbFactory;
1764 }
1765
1766 EffectStateFactory *StdReverbStateFactory_getFactory()
1767 {
1768     static StdReverbStateFactory ReverbFactory{};
1769     return &ReverbFactory;
1770 }