core/voice.cpp

   1
   2 #include "config.h"
   3
   4 #include "voice.h"
   5
   6 #include <algorithm>
   7 #include <array>
   8 #include <atomic>
   9 #include <cassert>
  10 #include <climits>
  11 #include <cstdint>
  12 #include <iterator>
  13 #include <memory>
  14 #include <new>
  15 #include <stdlib.h>
  16 #include <utility>
  17 #include <vector>
  18
  19 #include "albyte.h"
  20 #include "alnumeric.h"
  21 #include "aloptional.h"
  22 #include "alspan.h"
  23 #include "alstring.h"
  24 #include "ambidefs.h"
  25 #include "async_event.h"
  26 #include "buffer_storage.h"
  27 #include "context.h"
  28 #include "cpu_caps.h"
  29 #include "devformat.h"
  30 #include "device.h"
  31 #include "filters/biquad.h"
  32 #include "filters/nfc.h"
  33 #include "filters/splitter.h"
  34 #include "fmt_traits.h"
  35 #include "logging.h"
  36 #include "mixer.h"
  37 #include "mixer/defs.h"
  38 #include "mixer/hrtfdefs.h"
  39 #include "opthelpers.h"
  40 #include "resampler_limits.h"
  41 #include "ringbuffer.h"
  42 #include "vector.h"
  43 #include "voice_change.h"
  44
  45 struct CTag;
  46 #ifdef HAVE_SSE
  47 struct SSETag;
  48 #endif
  49 #ifdef HAVE_NEON
  50 struct NEONTag;
  51 #endif
  52
  53
  54 static_assert(!(sizeof(DeviceBase::MixerBufferLine)&15),
  55     "DeviceBase::MixerBufferLine must be a multiple of 16 bytes");
  56 static_assert(!(MaxResamplerEdge&3), "MaxResamplerEdge is not a multiple of 4");
  57
  58 static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!");
  59 static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize,
  60     "MaxPitch and/or BufferLineSize are too large for MixerFracBits!");
  61
  62 Resampler ResamplerDefault{Resampler::Cubic};
  63
  64 namespace {
  65
  66 using uint = unsigned int;
  67 using namespace std::chrono;
  68
  69 using HrtfMixerFunc = void(*)(const float *InSamples, float2 *AccumSamples, const uint IrSize,
  70     const MixHrtfFilter *hrtfparams, const size_t BufferSize);
  71 using HrtfMixerBlendFunc = void(*)(const float *InSamples, float2 *AccumSamples,
  72     const uint IrSize, const HrtfFilter *oldparams, const MixHrtfFilter *newparams,
  73     const size_t BufferSize);
  74
  75 HrtfMixerFunc MixHrtfSamples{MixHrtf_<CTag>};
  76 HrtfMixerBlendFunc MixHrtfBlendSamples{MixHrtfBlend_<CTag>};
  77
  78 inline MixerOutFunc SelectMixer()
  79 {
  80 #ifdef HAVE_NEON
  81     if((CPUCapFlags&CPU_CAP_NEON))
  82         return Mix_<NEONTag>;
  83 #endif
  84 #ifdef HAVE_SSE
  85     if((CPUCapFlags&CPU_CAP_SSE))
  86         return Mix_<SSETag>;
  87 #endif
  88     return Mix_<CTag>;
  89 }
  90
  91 inline MixerOneFunc SelectMixerOne()
  92 {
  93 #ifdef HAVE_NEON
  94     if((CPUCapFlags&CPU_CAP_NEON))
  95         return Mix_<NEONTag>;
  96 #endif
  97 #ifdef HAVE_SSE
  98     if((CPUCapFlags&CPU_CAP_SSE))
  99         return Mix_<SSETag>;
 100 #endif
 101     return Mix_<CTag>;
 102 }
 103
 104 inline HrtfMixerFunc SelectHrtfMixer()
 105 {
 106 #ifdef HAVE_NEON
 107     if((CPUCapFlags&CPU_CAP_NEON))
 108         return MixHrtf_<NEONTag>;
 109 #endif
 110 #ifdef HAVE_SSE
 111     if((CPUCapFlags&CPU_CAP_SSE))
 112         return MixHrtf_<SSETag>;
 113 #endif
 114     return MixHrtf_<CTag>;
 115 }
 116
 117 inline HrtfMixerBlendFunc SelectHrtfBlendMixer()
 118 {
 119 #ifdef HAVE_NEON
 120     if((CPUCapFlags&CPU_CAP_NEON))
 121         return MixHrtfBlend_<NEONTag>;
 122 #endif
 123 #ifdef HAVE_SSE
 124     if((CPUCapFlags&CPU_CAP_SSE))
 125         return MixHrtfBlend_<SSETag>;
 126 #endif
 127     return MixHrtfBlend_<CTag>;
 128 }
 129
 130 } // namespace
 131
 132 void Voice::InitMixer(al::optional<std::string> resampler)
 133 {
 134     if(resampler)
 135     {
 136         struct ResamplerEntry {
 137             const char name[16];
 138             const Resampler resampler;
 139         };
 140         constexpr ResamplerEntry ResamplerList[]{
 141             { "none", Resampler::Point },
 142             { "point", Resampler::Point },
 143             { "linear", Resampler::Linear },
 144             { "cubic", Resampler::Cubic },
 145             { "bsinc12", Resampler::BSinc12 },
 146             { "fast_bsinc12", Resampler::FastBSinc12 },
 147             { "bsinc24", Resampler::BSinc24 },
 148             { "fast_bsinc24", Resampler::FastBSinc24 },
 149         };
 150
 151         const char *str{resampler->c_str()};
 152         if(al::strcasecmp(str, "bsinc") == 0)
 153         {
 154             WARN("Resampler option \"%s\" is deprecated, using bsinc12\n", str);
 155             str = "bsinc12";
 156         }
 157         else if(al::strcasecmp(str, "sinc4") == 0 || al::strcasecmp(str, "sinc8") == 0)
 158         {
 159             WARN("Resampler option \"%s\" is deprecated, using cubic\n", str);
 160             str = "cubic";
 161         }
 162
 163         auto iter = std::find_if(std::begin(ResamplerList), std::end(ResamplerList),
 164             [str](const ResamplerEntry &entry) -> bool
 165             { return al::strcasecmp(str, entry.name) == 0; });
 166         if(iter == std::end(ResamplerList))
 167             ERR("Invalid resampler: %s\n", str);
 168         else
 169             ResamplerDefault = iter->resampler;
 170     }
 171
 172     MixSamplesOut = SelectMixer();
 173     MixSamplesOne = SelectMixerOne();
 174     MixHrtfBlendSamples = SelectHrtfBlendMixer();
 175     MixHrtfSamples = SelectHrtfMixer();
 176 }
 177
 178
 179 namespace {
 180
 181 /* IMA ADPCM Stepsize table */
 182 constexpr int IMAStep_size[89] = {
 183        7,    8,    9,   10,   11,   12,   13,   14,   16,   17,   19,
 184       21,   23,   25,   28,   31,   34,   37,   41,   45,   50,   55,
 185       60,   66,   73,   80,   88,   97,  107,  118,  130,  143,  157,
 186      173,  190,  209,  230,  253,  279,  307,  337,  371,  408,  449,
 187      494,  544,  598,  658,  724,  796,  876,  963, 1060, 1166, 1282,
 188     1411, 1552, 1707, 1878, 2066, 2272, 2499, 2749, 3024, 3327, 3660,
 189     4026, 4428, 4871, 5358, 5894, 6484, 7132, 7845, 8630, 9493,10442,
 190    11487,12635,13899,15289,16818,18500,20350,22358,24633,27086,29794,
 191    32767
 192 };
 193
 194 /* IMA4 ADPCM Codeword decode table */
 195 constexpr int IMA4Codeword[16] = {
 196     1, 3, 5, 7, 9, 11, 13, 15,
 197    -1,-3,-5,-7,-9,-11,-13,-15,
 198 };
 199
 200 /* IMA4 ADPCM Step index adjust decode table */
 201 constexpr int IMA4Index_adjust[16] = {
 202    -1,-1,-1,-1, 2, 4, 6, 8,
 203    -1,-1,-1,-1, 2, 4, 6, 8
 204 };
 205
 206 /* MSADPCM Adaption table */
 207 constexpr int MSADPCMAdaption[16] = {
 208     230, 230, 230, 230, 307, 409, 512, 614,
 209     768, 614, 512, 409, 307, 230, 230, 230
 210 };
 211
 212 /* MSADPCM Adaption Coefficient tables */
 213 constexpr int MSADPCMAdaptionCoeff[7][2] = {
 214     { 256,    0 },
 215     { 512, -256 },
 216     {   0,    0 },
 217     { 192,   64 },
 218     { 240,    0 },
 219     { 460, -208 },
 220     { 392, -232 }
 221 };
 222
 223
 224 void SendSourceStoppedEvent(ContextBase *context, uint id)
 225 {
 226     RingBuffer *ring{context->mAsyncEvents.get()};
 227     auto evt_vec = ring->getWriteVector();
 228     if(evt_vec.first.len < 1) return;
 229
 230     AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf),
 231         AsyncEvent::SourceStateChange)};
 232     evt->u.srcstate.id = id;
 233     evt->u.srcstate.state = AsyncEvent::SrcState::Stop;
 234
 235     ring->writeAdvance(1);
 236 }
 237
 238
 239 const float *DoFilters(BiquadFilter &lpfilter, BiquadFilter &hpfilter, float *dst,
 240     const al::span<const float> src, int type)
 241 {
 242     switch(type)
 243     {
 244     case AF_None:
 245         lpfilter.clear();
 246         hpfilter.clear();
 247         break;
 248
 249     case AF_LowPass:
 250         lpfilter.process(src, dst);
 251         hpfilter.clear();
 252         return dst;
 253     case AF_HighPass:
 254         lpfilter.clear();
 255         hpfilter.process(src, dst);
 256         return dst;
 257
 258     case AF_BandPass:
 259         DualBiquad{lpfilter, hpfilter}.process(src, dst);
 260         return dst;
 261     }
 262     return src.data();
 263 }
 264
 265
 266 template<FmtType Type>
 267 inline void LoadSamples(float *RESTRICT dstSamples, const al::byte *src, const size_t srcChan,
 268     const size_t srcOffset, const size_t srcStep, const size_t /*samplesPerBlock*/,
 269     const size_t samplesToLoad) noexcept
 270 {
 271     constexpr size_t sampleSize{sizeof(typename al::FmtTypeTraits<Type>::Type)};
 272     auto s = src + (srcOffset*srcStep + srcChan)*sampleSize;
 273
 274     al::LoadSampleArray<Type>(dstSamples, s, srcStep, samplesToLoad);
 275 }
 276
 277 template<>
 278 inline void LoadSamples<FmtIMA4>(float *RESTRICT dstSamples, const al::byte *src,
 279     const size_t srcChan, const size_t srcOffset, const size_t srcStep,
 280     const size_t samplesPerBlock, const size_t samplesToLoad) noexcept
 281 {
 282     const size_t blockBytes{((samplesPerBlock-1)/2 + 4)*srcStep};
 283
 284     /* Skip to the ADPCM block containing the srcOffset sample. */
 285     src += srcOffset/samplesPerBlock*blockBytes;
 286     /* Calculate how many samples need to be skipped in the block. */
 287     size_t skip{srcOffset % samplesPerBlock};
 288
 289     /* NOTE: This could probably be optimized better. */
 290     size_t wrote{0};
 291     do {
 292         /* Each IMA4 block starts with a signed 16-bit sample, and a signed
 293          * 16-bit table index. The table index needs to be clamped.
 294          */
 295         int sample{src[srcChan*4] | (src[srcChan*4 + 1] << 8)};
 296         int index{src[srcChan*4 + 2] | (src[srcChan*4 + 3] << 8)};
 297
 298         sample = (sample^0x8000) - 32768;
 299         index = clampi((index^0x8000) - 32768, 0, al::size(IMAStep_size)-1);
 300
 301         if(skip == 0)
 302         {
 303             dstSamples[wrote++] = static_cast<float>(sample) / 32768.0f;
 304             if(wrote == samplesToLoad) return;
 305         }
 306         else
 307             --skip;
 308
 309         auto decode_sample = [&sample,&index](const uint nibble)
 310         {
 311             sample += IMA4Codeword[nibble] * IMAStep_size[index] / 8;
 312             sample = clampi(sample, -32768, 32767);
 313
 314             index += IMA4Index_adjust[nibble];
 315             index = clampi(index, 0, al::size(IMAStep_size)-1);
 316
 317             return sample;
 318         };
 319
 320         /* The rest of the block is arranged as a series of nibbles, contained
 321          * in 4 *bytes* per channel interleaved. So every 8 nibbles we need to
 322          * skip 4 bytes per channel to get the next nibbles for this channel.
 323          *
 324          * First, decode the samples that we need to skip in the block (will
 325          * always be less than the block size). They need to be decoded despite
 326          * being ignored for proper state on the remaining samples.
 327          */
 328         const al::byte *nibbleData{src + (srcStep+srcChan)*4};
 329         size_t nibbleOffset{0};
 330         const size_t startOffset{skip + 1};
 331         for(;skip;--skip)
 332         {
 333             const size_t byteShift{(nibbleOffset&1) * 4};
 334             const size_t wordOffset{(nibbleOffset>>1) & ~size_t{3}};
 335             const size_t byteOffset{wordOffset*srcStep + ((nibbleOffset>>1)&3u)};
 336             ++nibbleOffset;
 337
 338             std::ignore = decode_sample((nibbleData[byteOffset]>>byteShift) & 15u);
 339         }
 340
 341         /* Second, decode the rest of the block and write to the output, until
 342          * the end of the block or the end of output.
 343          */
 344         const size_t todo{minz(samplesPerBlock-startOffset, samplesToLoad-wrote)};
 345         for(size_t i{0};i < todo;++i)
 346         {
 347             const size_t byteShift{(nibbleOffset&1) * 4};
 348             const size_t wordOffset{(nibbleOffset>>1) & ~size_t{3}};
 349             const size_t byteOffset{wordOffset*srcStep + ((nibbleOffset>>1)&3u)};
 350             ++nibbleOffset;
 351
 352             const int result{decode_sample((nibbleData[byteOffset]>>byteShift) & 15u)};
 353             dstSamples[wrote++] = static_cast<float>(result) / 32768.0f;
 354         }
 355         if(wrote == samplesToLoad)
 356             return;
 357
 358         src += blockBytes;
 359     } while(true);
 360 }
 361
 362 template<>
 363 inline void LoadSamples<FmtMSADPCM>(float *RESTRICT dstSamples, const al::byte *src,
 364     const size_t srcChan, const size_t srcOffset, const size_t srcStep,
 365     const size_t samplesPerBlock, const size_t samplesToLoad) noexcept
 366 {
 367     const size_t blockBytes{((samplesPerBlock-2)/2 + 7)*srcStep};
 368
 369     src += srcOffset/samplesPerBlock*blockBytes;
 370     size_t skip{srcOffset % samplesPerBlock};
 371
 372     size_t wrote{0};
 373     do {
 374         /* Each MS ADPCM block starts with an 8-bit block predictor, used to
 375          * dictate how the two sample history values are mixed with the decoded
 376          * sample, and an initial signed 16-bit delta value which scales the
 377          * nibble sample value. This is followed by the two initial 16-bit
 378          * sample history values.
 379          */
 380         const al::byte *input{src};
 381         const uint8_t blockpred{std::min(input[srcChan], uint8_t{6})};
 382         input += srcStep;
 383         int delta{input[2*srcChan + 0] | (input[2*srcChan + 1] << 8)};
 384         input += srcStep*2;
 385
 386         int sampleHistory[2]{};
 387         sampleHistory[0] = input[2*srcChan + 0] | (input[2*srcChan + 1]<<8);
 388         input += srcStep*2;
 389         sampleHistory[1] = input[2*srcChan + 0] | (input[2*srcChan + 1]<<8);
 390         input += srcStep*2;
 391
 392         const auto coeffs = al::as_span(MSADPCMAdaptionCoeff[blockpred]);
 393         delta = (delta^0x8000) - 32768;
 394         sampleHistory[0] = (sampleHistory[0]^0x8000) - 32768;
 395         sampleHistory[1] = (sampleHistory[1]^0x8000) - 32768;
 396
 397         /* The second history sample is "older", so it's the first to be
 398          * written out.
 399          */
 400         if(skip == 0)
 401         {
 402             dstSamples[wrote++] = static_cast<float>(sampleHistory[1]) / 32768.0f;
 403             if(wrote == samplesToLoad) return;
 404             dstSamples[wrote++] = static_cast<float>(sampleHistory[0]) / 32768.0f;
 405             if(wrote == samplesToLoad) return;
 406         }
 407         else if(skip == 1)
 408         {
 409             --skip;
 410             dstSamples[wrote++] = static_cast<float>(sampleHistory[0]) / 32768.0f;
 411             if(wrote == samplesToLoad) return;
 412         }
 413         else
 414             skip -= 2;
 415
 416         auto decode_sample = [&sampleHistory,&delta,coeffs](const int nibble)
 417         {
 418             int pred{(sampleHistory[0]*coeffs[0] + sampleHistory[1]*coeffs[1]) / 256};
 419             pred += ((nibble^0x08) - 0x08) * delta;
 420             pred  = clampi(pred, -32768, 32767);
 421
 422             sampleHistory[1] = sampleHistory[0];
 423             sampleHistory[0] = pred;
 424
 425             delta = (MSADPCMAdaption[nibble] * delta) / 256;
 426             delta = maxi(16, delta);
 427
 428             return pred;
 429         };
 430
 431         /* The rest of the block is a series of nibbles, interleaved per-
 432          * channel. First, skip samples.
 433          */
 434         const size_t startOffset{skip + 2};
 435         size_t nibbleOffset{srcChan};
 436         for(;skip;--skip)
 437         {
 438             const size_t byteOffset{nibbleOffset>>1};
 439             const size_t byteShift{((nibbleOffset&1)^1) * 4};
 440             nibbleOffset += srcStep;
 441
 442             std::ignore = decode_sample((input[byteOffset]>>byteShift) & 15);
 443         }
 444
 445         /* Now decode the rest of the block, until the end of the block or the
 446          * dst buffer is filled.
 447          */
 448         const size_t todo{minz(samplesPerBlock-startOffset, samplesToLoad-wrote)};
 449         for(size_t j{0};j < todo;++j)
 450         {
 451             const size_t byteOffset{nibbleOffset>>1};
 452             const size_t byteShift{((nibbleOffset&1)^1) * 4};
 453             nibbleOffset += srcStep;
 454
 455             const int sample{decode_sample((input[byteOffset]>>byteShift) & 15)};
 456             dstSamples[wrote++] = static_cast<float>(sample) / 32768.0f;
 457         }
 458         if(wrote == samplesToLoad)
 459             return;
 460
 461         src += blockBytes;
 462     } while(true);
 463 }
 464
 465 void LoadSamples(float *dstSamples, const al::byte *src, const size_t srcChan,
 466     const size_t srcOffset, const FmtType srcType, const size_t srcStep,
 467     const size_t samplesPerBlock, const size_t samplesToLoad) noexcept
 468 {
 469 #define HANDLE_FMT(T) case T:                                                 \
 470     LoadSamples<T>(dstSamples, src, srcChan, srcOffset, srcStep,              \
 471         samplesPerBlock, samplesToLoad);                                      \
 472     break
 473
 474     switch(srcType)
 475     {
 476     HANDLE_FMT(FmtUByte);
 477     HANDLE_FMT(FmtShort);
 478     HANDLE_FMT(FmtFloat);
 479     HANDLE_FMT(FmtDouble);
 480     HANDLE_FMT(FmtMulaw);
 481     HANDLE_FMT(FmtAlaw);
 482     HANDLE_FMT(FmtIMA4);
 483     HANDLE_FMT(FmtMSADPCM);
 484     }
 485 #undef HANDLE_FMT
 486 }
 487
 488 void LoadBufferStatic(VoiceBufferItem *buffer, VoiceBufferItem *bufferLoopItem,
 489     const size_t dataPosInt, const FmtType sampleType, const size_t srcChannel,
 490     const size_t srcStep, size_t samplesLoaded, const size_t samplesToLoad,
 491     float *voiceSamples)
 492 {
 493     if(!bufferLoopItem)
 494     {
 495         /* Load what's left to play from the buffer */
 496         if(buffer->mSampleLen > dataPosInt) LIKELY
 497         {
 498             const size_t buffer_remaining{buffer->mSampleLen - dataPosInt};
 499             const size_t remaining{minz(samplesToLoad-samplesLoaded, buffer_remaining)};
 500             LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, dataPosInt,
 501                 sampleType, srcStep, buffer->mBlockAlign, remaining);
 502             samplesLoaded += remaining;
 503         }
 504
 505         if(const size_t toFill{samplesToLoad - samplesLoaded})
 506         {
 507             auto srcsamples = voiceSamples + samplesLoaded;
 508             std::fill_n(srcsamples, toFill, *(srcsamples-1));
 509         }
 510     }
 511     else
 512     {
 513         const size_t loopStart{buffer->mLoopStart};
 514         const size_t loopEnd{buffer->mLoopEnd};
 515         ASSUME(loopEnd > loopStart);
 516
 517         const size_t intPos{(dataPosInt < loopEnd) ? dataPosInt
 518             : (((dataPosInt-loopStart)%(loopEnd-loopStart)) + loopStart)};
 519
 520         /* Load what's left of this loop iteration */
 521         const size_t remaining{minz(samplesToLoad-samplesLoaded, loopEnd-dataPosInt)};
 522         LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, intPos, sampleType,
 523             srcStep, buffer->mBlockAlign, remaining);
 524         samplesLoaded += remaining;
 525
 526         /* Load repeats of the loop to fill the buffer. */
 527         const size_t loopSize{loopEnd - loopStart};
 528         while(const size_t toFill{minz(samplesToLoad - samplesLoaded, loopSize)})
 529         {
 530             LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, loopStart,
 531                 sampleType, srcStep, buffer->mBlockAlign, toFill);
 532             samplesLoaded += toFill;
 533         }
 534     }
 535 }
 536
 537 void LoadBufferCallback(VoiceBufferItem *buffer, const size_t dataPosInt,
 538     const size_t numCallbackSamples, const FmtType sampleType, const size_t srcChannel,
 539     const size_t srcStep, size_t samplesLoaded, const size_t samplesToLoad, float *voiceSamples)
 540 {
 541     /* Load what's left to play from the buffer */
 542     if(numCallbackSamples > dataPosInt) LIKELY
 543     {
 544         const size_t remaining{minz(samplesToLoad-samplesLoaded, numCallbackSamples-dataPosInt)};
 545         LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, dataPosInt,
 546             sampleType, srcStep, buffer->mBlockAlign, remaining);
 547         samplesLoaded += remaining;
 548     }
 549
 550     if(const size_t toFill{samplesToLoad - samplesLoaded})
 551     {
 552         auto srcsamples = voiceSamples + samplesLoaded;
 553         std::fill_n(srcsamples, toFill, *(srcsamples-1));
 554     }
 555 }
 556
 557 void LoadBufferQueue(VoiceBufferItem *buffer, VoiceBufferItem *bufferLoopItem,
 558     size_t dataPosInt, const FmtType sampleType, const size_t srcChannel,
 559     const size_t srcStep, size_t samplesLoaded, const size_t samplesToLoad,
 560     float *voiceSamples)
 561 {
 562     /* Crawl the buffer queue to fill in the temp buffer */
 563     while(buffer && samplesLoaded != samplesToLoad)
 564     {
 565         if(dataPosInt >= buffer->mSampleLen)
 566         {
 567             dataPosInt -= buffer->mSampleLen;
 568             buffer = buffer->mNext.load(std::memory_order_acquire);
 569             if(!buffer) buffer = bufferLoopItem;
 570             continue;
 571         }
 572
 573         const size_t remaining{minz(samplesToLoad-samplesLoaded, buffer->mSampleLen-dataPosInt)};
 574         LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, dataPosInt,
 575             sampleType, srcStep, buffer->mBlockAlign, remaining);
 576
 577         samplesLoaded += remaining;
 578         if(samplesLoaded == samplesToLoad)
 579             break;
 580
 581         dataPosInt = 0;
 582         buffer = buffer->mNext.load(std::memory_order_acquire);
 583         if(!buffer) buffer = bufferLoopItem;
 584     }
 585     if(const size_t toFill{samplesToLoad - samplesLoaded})
 586     {
 587         auto srcsamples = voiceSamples + samplesLoaded;
 588         std::fill_n(srcsamples, toFill, *(srcsamples-1));
 589     }
 590 }
 591
 592
 593 void DoHrtfMix(const float *samples, const uint DstBufferSize, DirectParams &parms,
 594     const float TargetGain, const uint Counter, uint OutPos, const bool IsPlaying,
 595     DeviceBase *Device)
 596 {
 597     const uint IrSize{Device->mIrSize};
 598     auto &HrtfSamples = Device->HrtfSourceData;
 599     auto &AccumSamples = Device->HrtfAccumData;
 600
 601     /* Copy the HRTF history and new input samples into a temp buffer. */
 602     auto src_iter = std::copy(parms.Hrtf.History.begin(), parms.Hrtf.History.end(),
 603         std::begin(HrtfSamples));
 604     std::copy_n(samples, DstBufferSize, src_iter);
 605     /* Copy the last used samples back into the history buffer for later. */
 606     if(IsPlaying) LIKELY
 607         std::copy_n(std::begin(HrtfSamples) + DstBufferSize, parms.Hrtf.History.size(),
 608             parms.Hrtf.History.begin());
 609
 610     /* If fading and this is the first mixing pass, fade between the IRs. */
 611     uint fademix{0u};
 612     if(Counter && OutPos == 0)
 613     {
 614         fademix = minu(DstBufferSize, Counter);
 615
 616         float gain{TargetGain};
 617
 618         /* The new coefficients need to fade in completely since they're
 619          * replacing the old ones. To keep the gain fading consistent,
 620          * interpolate between the old and new target gains given how much of
 621          * the fade time this mix handles.
 622          */
 623         if(Counter > fademix)
 624         {
 625             const float a{static_cast<float>(fademix) / static_cast<float>(Counter)};
 626             gain = lerpf(parms.Hrtf.Old.Gain, TargetGain, a);
 627         }
 628
 629         MixHrtfFilter hrtfparams{
 630             parms.Hrtf.Target.Coeffs,
 631             parms.Hrtf.Target.Delay,
 632             0.0f, gain / static_cast<float>(fademix)};
 633         MixHrtfBlendSamples(HrtfSamples, AccumSamples+OutPos, IrSize, &parms.Hrtf.Old, &hrtfparams,
 634             fademix);
 635
 636         /* Update the old parameters with the result. */
 637         parms.Hrtf.Old = parms.Hrtf.Target;
 638         parms.Hrtf.Old.Gain = gain;
 639         OutPos += fademix;
 640     }
 641
 642     if(fademix < DstBufferSize)
 643     {
 644         const uint todo{DstBufferSize - fademix};
 645         float gain{TargetGain};
 646
 647         /* Interpolate the target gain if the gain fading lasts longer than
 648          * this mix.
 649          */
 650         if(Counter > DstBufferSize)
 651         {
 652             const float a{static_cast<float>(todo) / static_cast<float>(Counter-fademix)};
 653             gain = lerpf(parms.Hrtf.Old.Gain, TargetGain, a);
 654         }
 655
 656         MixHrtfFilter hrtfparams{
 657             parms.Hrtf.Target.Coeffs,
 658             parms.Hrtf.Target.Delay,
 659             parms.Hrtf.Old.Gain,
 660             (gain - parms.Hrtf.Old.Gain) / static_cast<float>(todo)};
 661         MixHrtfSamples(HrtfSamples+fademix, AccumSamples+OutPos, IrSize, &hrtfparams, todo);
 662
 663         /* Store the now-current gain for next time. */
 664         parms.Hrtf.Old.Gain = gain;
 665     }
 666 }
 667
 668 void DoNfcMix(const al::span<const float> samples, FloatBufferLine *OutBuffer, DirectParams &parms,
 669     const float *TargetGains, const uint Counter, const uint OutPos, DeviceBase *Device)
 670 {
 671     using FilterProc = void (NfcFilter::*)(const al::span<const float>, float*);
 672     static constexpr FilterProc NfcProcess[MaxAmbiOrder+1]{
 673         nullptr, &NfcFilter::process1, &NfcFilter::process2, &NfcFilter::process3};
 674
 675     float *CurrentGains{parms.Gains.Current.data()};
 676     MixSamples(samples, {OutBuffer, 1u}, CurrentGains, TargetGains, Counter, OutPos);
 677     ++OutBuffer;
 678     ++CurrentGains;
 679     ++TargetGains;
 680
 681     const al::span<float> nfcsamples{Device->NfcSampleData, samples.size()};
 682     size_t order{1};
 683     while(const size_t chancount{Device->NumChannelsPerOrder[order]})
 684     {
 685         (parms.NFCtrlFilter.*NfcProcess[order])(samples, nfcsamples.data());
 686         MixSamples(nfcsamples, {OutBuffer, chancount}, CurrentGains, TargetGains, Counter, OutPos);
 687         OutBuffer += chancount;
 688         CurrentGains += chancount;
 689         TargetGains += chancount;
 690         if(++order == MaxAmbiOrder+1)
 691             break;
 692     }
 693 }
 694
 695 } // namespace
 696
 697 void Voice::mix(const State vstate, ContextBase *Context, const nanoseconds deviceTime,
 698     const uint SamplesToDo)
 699 {
 700     static constexpr std::array<float,MAX_OUTPUT_CHANNELS> SilentTarget{};
 701
 702     ASSUME(SamplesToDo > 0);
 703
 704     DeviceBase *Device{Context->mDevice};
 705     const uint NumSends{Device->NumAuxSends};
 706
 707     /* Get voice info */
 708     int DataPosInt{mPosition.load(std::memory_order_relaxed)};
 709     uint DataPosFrac{mPositionFrac.load(std::memory_order_relaxed)};
 710     VoiceBufferItem *BufferListItem{mCurrentBuffer.load(std::memory_order_relaxed)};
 711     VoiceBufferItem *BufferLoopItem{mLoopBuffer.load(std::memory_order_relaxed)};
 712     const uint increment{mStep};
 713     if(increment < 1) UNLIKELY
 714     {
 715         /* If the voice is supposed to be stopping but can't be mixed, just
 716          * stop it before bailing.
 717          */
 718         if(vstate == Stopping)
 719             mPlayState.store(Stopped, std::memory_order_release);
 720         return;
 721     }
 722
 723     /* If the static voice's current position is beyond the buffer loop end
 724      * position, disable looping.
 725      */
 726     if(mFlags.test(VoiceIsStatic) && BufferLoopItem)
 727     {
 728         if(DataPosInt >= 0 && static_cast<uint>(DataPosInt) >= BufferListItem->mLoopEnd)
 729             BufferLoopItem = nullptr;
 730     }
 731
 732     uint OutPos{0u};
 733
 734     /* Check if we're doing a delayed start, and we start in this update. */
 735     if(mStartTime > deviceTime) UNLIKELY
 736     {
 737         /* If the voice is supposed to be stopping but hasn't actually started
 738          * yet, make sure its stopped.
 739          */
 740         if(vstate == Stopping)
 741         {
 742             mPlayState.store(Stopped, std::memory_order_release);
 743             return;
 744         }
 745
 746         /* If the start time is too far ahead, don't bother. */
 747         auto diff = mStartTime - deviceTime;
 748         if(diff >= seconds{1})
 749             return;
 750
 751         /* Get the number of samples ahead of the current time that output
 752          * should start at. Skip this update if it's beyond the output sample
 753          * count.
 754          *
 755          * Round the start position to a multiple of 4, which some mixers want.
 756          * This makes the start time accurate to 4 samples. This could be made
 757          * sample-accurate by forcing non-SIMD functions on the first run.
 758          */
 759         seconds::rep sampleOffset{duration_cast<seconds>(diff * Device->Frequency).count()};
 760         sampleOffset = (sampleOffset+2) & ~seconds::rep{3};
 761         if(sampleOffset >= SamplesToDo)
 762             return;
 763
 764         OutPos = static_cast<uint>(sampleOffset);
 765     }
 766
 767     /* Calculate the number of samples to mix, and the number of (resampled)
 768      * samples that need to be loaded (mixing samples and decoder padding).
 769      */
 770     const uint samplesToMix{SamplesToDo - OutPos};
 771     const uint samplesToLoad{samplesToMix + mDecoderPadding};
 772
 773     /* Get a span of pointers to hold the floating point, deinterlaced,
 774      * resampled buffer data to be mixed.
 775      */
 776     std::array<float*,DeviceBase::MixerChannelsMax> SamplePointers;
 777     const al::span<float*> MixingSamples{SamplePointers.data(), mChans.size()};
 778     auto get_bufferline = [](DeviceBase::MixerBufferLine &bufline) noexcept -> float*
 779     { return bufline.data(); };
 780     std::transform(Device->mSampleData.end() - mChans.size(), Device->mSampleData.end(),
 781         MixingSamples.begin(), get_bufferline);
 782
 783     /* If there's a matching sample step and no phase offset, use a simple copy
 784      * for resampling.
 785      */
 786     const ResamplerFunc Resample{(increment == MixerFracOne && DataPosFrac == 0)
 787         ? ResamplerFunc{[](const InterpState*, const float *RESTRICT src, uint, const uint,
 788             const al::span<float> dst) { std::copy_n(src, dst.size(), dst.begin()); }}
 789         : mResampler};
 790
 791     /* UHJ2 and SuperStereo only have 2 buffer channels, but 3 mixing channels
 792      * (3rd channel is generated from decoding).
 793      */
 794     const size_t realChannels{(mFmtChannels == FmtUHJ2 || mFmtChannels == FmtSuperStereo) ? 2u
 795         : MixingSamples.size()};
 796     for(size_t chan{0};chan < realChannels;++chan)
 797     {
 798         using ResBufType = decltype(DeviceBase::mResampleData);
 799         static constexpr uint srcSizeMax{static_cast<uint>(ResBufType{}.size()-MaxResamplerEdge)};
 800
 801         const auto prevSamples = al::as_span(mPrevSamples[chan]);
 802         const auto resampleBuffer = std::copy(prevSamples.cbegin(), prevSamples.cend(),
 803             Device->mResampleData.begin()) - MaxResamplerEdge;
 804         int intPos{DataPosInt};
 805         uint fracPos{DataPosFrac};
 806
 807         /* Load samples for this channel from the available buffer(s), with
 808          * resampling.
 809          */
 810         for(uint samplesLoaded{0};samplesLoaded < samplesToLoad;)
 811         {
 812             /* Calculate the number of dst samples that can be loaded this
 813              * iteration, given the available resampler buffer size, and the
 814              * number of src samples that are needed to load it.
 815              */
 816             auto calc_buffer_sizes = [fracPos,increment](uint dstBufferSize)
 817             {
 818                 /* If ext=true, calculate the last written dst pos from the dst
 819                  * count, convert to the last read src pos, then add one to get
 820                  * the src count.
 821                  *
 822                  * If ext=false, convert the dst count to src count directly.
 823                  *
 824                  * Without this, the src count could be short by one when
 825                  * increment < 1.0, or not have a full src at the end when
 826                  * increment > 1.0.
 827                  */
 828                 const bool ext{increment <= MixerFracOne};
 829                 uint64_t dataSize64{dstBufferSize - ext};
 830                 dataSize64 = (dataSize64*increment + fracPos) >> MixerFracBits;
 831                 /* Also include resampler padding. */
 832                 dataSize64 += ext + MaxResamplerEdge;
 833
 834                 if(dataSize64 <= srcSizeMax)
 835                     return std::make_pair(dstBufferSize, static_cast<uint>(dataSize64));
 836
 837                 /* If the source size got saturated, we can't fill the desired
 838                  * dst size. Figure out how many dst samples we can fill.
 839                  */
 840                 dataSize64 = srcSizeMax - MaxResamplerEdge;
 841                 dataSize64 = ((dataSize64<<MixerFracBits) - fracPos) / increment;
 842                 if(dataSize64 < dstBufferSize)
 843                 {
 844                     /* Some resamplers require the destination being 16-byte
 845                      * aligned, so limit to a multiple of 4 samples to maintain
 846                      * alignment if we need to do another iteration after this.
 847                      */
 848                     dstBufferSize = static_cast<uint>(dataSize64) & ~3u;
 849                 }
 850                 return std::make_pair(dstBufferSize, srcSizeMax);
 851             };
 852             const auto bufferSizes = calc_buffer_sizes(samplesToLoad - samplesLoaded);
 853             const auto dstBufferSize = bufferSizes.first;
 854             const auto srcBufferSize = bufferSizes.second;
 855
 856             /* Load the necessary samples from the given buffer(s). */
 857             if(!BufferListItem)
 858             {
 859                 const uint avail{minu(srcBufferSize, MaxResamplerEdge)};
 860                 const uint tofill{maxu(srcBufferSize, MaxResamplerEdge)};
 861
 862                 /* When loading from a voice that ended prematurely, only take
 863                  * the samples that get closest to 0 amplitude. This helps
 864                  * certain sounds fade out better.
 865                  */
 866                 auto abs_lt = [](const float lhs, const float rhs) noexcept -> bool
 867                 { return std::abs(lhs) < std::abs(rhs); };
 868                 auto srciter = std::min_element(resampleBuffer, resampleBuffer+avail, abs_lt);
 869
 870                 std::fill(srciter+1, resampleBuffer+tofill, *srciter);
 871             }
 872             else
 873             {
 874                 size_t srcSampleDelay{0};
 875                 if(intPos < 0) UNLIKELY
 876                 {
 877                     /* If the current position is negative, there's that many
 878                      * silent samples to load before using the buffer.
 879                      */
 880                     srcSampleDelay = static_cast<uint>(-intPos);
 881                     if(srcSampleDelay >= srcBufferSize)
 882                     {
 883                         /* If the number of silent source samples exceeds the
 884                          * number to load, the output will be silent.
 885                          */
 886                         std::fill_n(MixingSamples[chan]+samplesLoaded, dstBufferSize, 0.0f);
 887                         std::fill_n(resampleBuffer, srcBufferSize, 0.0f);
 888                         goto skip_resample;
 889                     }
 890
 891                     std::fill_n(resampleBuffer, srcSampleDelay, 0.0f);
 892                 }
 893                 const uint uintPos{static_cast<uint>(maxi(intPos, 0))};
 894
 895                 if(mFlags.test(VoiceIsStatic))
 896                     LoadBufferStatic(BufferListItem, BufferLoopItem, uintPos, mFmtType, chan,
 897                         mFrameStep, srcSampleDelay, srcBufferSize, al::to_address(resampleBuffer));
 898                 else if(mFlags.test(VoiceIsCallback))
 899                 {
 900                     const uint callbackBase{mCallbackBlockBase * mSamplesPerBlock};
 901                     const size_t bufferOffset{uintPos - callbackBase};
 902                     const size_t needSamples{bufferOffset + srcBufferSize - srcSampleDelay};
 903                     const size_t needBlocks{(needSamples + mSamplesPerBlock-1) / mSamplesPerBlock};
 904                     if(!mFlags.test(VoiceCallbackStopped) && needBlocks > mNumCallbackBlocks)
 905                     {
 906                         const size_t byteOffset{mNumCallbackBlocks*mBytesPerBlock};
 907                         const size_t needBytes{(needBlocks-mNumCallbackBlocks)*mBytesPerBlock};
 908
 909                         const int gotBytes{BufferListItem->mCallback(BufferListItem->mUserData,
 910                             &BufferListItem->mSamples[byteOffset], static_cast<int>(needBytes))};
 911                         if(gotBytes < 0)
 912                             mFlags.set(VoiceCallbackStopped);
 913                         else if(static_cast<uint>(gotBytes) < needBytes)
 914                         {
 915                             mFlags.set(VoiceCallbackStopped);
 916                             mNumCallbackBlocks += static_cast<uint>(gotBytes) / mBytesPerBlock;
 917                         }
 918                         else
 919                             mNumCallbackBlocks = static_cast<uint>(needBlocks);
 920                     }
 921                     const size_t numSamples{uint{mNumCallbackBlocks} * mSamplesPerBlock};
 922                     LoadBufferCallback(BufferListItem, bufferOffset, numSamples, mFmtType, chan,
 923                         mFrameStep, srcSampleDelay, srcBufferSize, al::to_address(resampleBuffer));
 924                 }
 925                 else
 926                     LoadBufferQueue(BufferListItem, BufferLoopItem, uintPos, mFmtType, chan,
 927                         mFrameStep, srcSampleDelay, srcBufferSize, al::to_address(resampleBuffer));
 928             }
 929
 930             Resample(&mResampleState, al::to_address(resampleBuffer), fracPos, increment,
 931                 {MixingSamples[chan]+samplesLoaded, dstBufferSize});
 932
 933             /* Store the last source samples used for next time. */
 934             if(vstate == Playing) LIKELY
 935             {
 936                 /* Only store samples for the end of the mix, excluding what
 937                  * gets loaded for decoder padding.
 938                  */
 939                 const uint loadEnd{samplesLoaded + dstBufferSize};
 940                 if(samplesToMix > samplesLoaded && samplesToMix <= loadEnd) LIKELY
 941                 {
 942                     const size_t dstOffset{samplesToMix - samplesLoaded};
 943                     const size_t srcOffset{(dstOffset*increment + fracPos) >> MixerFracBits};
 944                     std::copy_n(resampleBuffer-MaxResamplerEdge+srcOffset, prevSamples.size(),
 945                         prevSamples.begin());
 946                 }
 947             }
 948
 949         skip_resample:
 950             samplesLoaded += dstBufferSize;
 951             if(samplesLoaded < samplesToLoad)
 952             {
 953                 fracPos += dstBufferSize*increment;
 954                 const uint srcOffset{fracPos >> MixerFracBits};
 955                 fracPos &= MixerFracMask;
 956                 intPos += srcOffset;
 957
 958                 /* If more samples need to be loaded, copy the back of the
 959                  * resampleBuffer to the front to reuse it. prevSamples isn't
 960                  * reliable since it's only updated for the end of the mix.
 961                  */
 962                 std::copy(resampleBuffer-MaxResamplerEdge+srcOffset,
 963                     resampleBuffer+MaxResamplerEdge+srcOffset, resampleBuffer-MaxResamplerEdge);
 964             }
 965         }
 966     }
 967     for(auto &samples : MixingSamples.subspan(realChannels))
 968         std::fill_n(samples, samplesToLoad, 0.0f);
 969
 970     if(mDecoder)
 971         mDecoder->decode(MixingSamples, samplesToMix, (vstate==Playing));
 972
 973     if(mFlags.test(VoiceIsAmbisonic))
 974     {
 975         auto voiceSamples = MixingSamples.begin();
 976         for(auto &chandata : mChans)
 977         {
 978             chandata.mAmbiSplitter.processScale({*voiceSamples, samplesToMix},
 979                 chandata.mAmbiHFScale, chandata.mAmbiLFScale);
 980             ++voiceSamples;
 981         }
 982     }
 983
 984     const uint Counter{mFlags.test(VoiceIsFading) ? minu(samplesToMix, 64u) : 0u};
 985     if(!Counter)
 986     {
 987         /* No fading, just overwrite the old/current params. */
 988         for(auto &chandata : mChans)
 989         {
 990             {
 991                 DirectParams &parms = chandata.mDryParams;
 992                 if(!mFlags.test(VoiceHasHrtf))
 993                     parms.Gains.Current = parms.Gains.Target;
 994                 else
 995                     parms.Hrtf.Old = parms.Hrtf.Target;
 996             }
 997             for(uint send{0};send < NumSends;++send)
 998             {
 999                 if(mSend[send].Buffer.empty())
1000                     continue;
1001
1002                 SendParams &parms = chandata.mWetParams[send];
1003                 parms.Gains.Current = parms.Gains.Target;
1004             }
1005         }
1006     }
1007
1008     auto voiceSamples = MixingSamples.begin();
1009     for(auto &chandata : mChans)
1010     {
1011         /* Now filter and mix to the appropriate outputs. */
1012         const al::span<float,BufferLineSize> FilterBuf{Device->FilteredData};
1013         {
1014             DirectParams &parms = chandata.mDryParams;
1015             const float *samples{DoFilters(parms.LowPass, parms.HighPass, FilterBuf.data(),
1016                 {*voiceSamples, samplesToMix}, mDirect.FilterType)};
1017
1018             if(mFlags.test(VoiceHasHrtf))
1019             {
1020                 const float TargetGain{parms.Hrtf.Target.Gain * (vstate == Playing)};
1021                 DoHrtfMix(samples, samplesToMix, parms, TargetGain, Counter, OutPos,
1022                     (vstate == Playing), Device);
1023             }
1024             else
1025             {
1026                 const float *TargetGains{(vstate == Playing) ? parms.Gains.Target.data()
1027                     : SilentTarget.data()};
1028                 if(mFlags.test(VoiceHasNfc))
1029                     DoNfcMix({samples, samplesToMix}, mDirect.Buffer.data(), parms,
1030                         TargetGains, Counter, OutPos, Device);
1031                 else
1032                     MixSamples({samples, samplesToMix}, mDirect.Buffer,
1033                         parms.Gains.Current.data(), TargetGains, Counter, OutPos);
1034             }
1035         }
1036
1037         for(uint send{0};send < NumSends;++send)
1038         {
1039             if(mSend[send].Buffer.empty())
1040                 continue;
1041
1042             SendParams &parms = chandata.mWetParams[send];
1043             const float *samples{DoFilters(parms.LowPass, parms.HighPass, FilterBuf.data(),
1044                 {*voiceSamples, samplesToMix}, mSend[send].FilterType)};
1045
1046             const float *TargetGains{(vstate == Playing) ? parms.Gains.Target.data()
1047                 : SilentTarget.data()};
1048             MixSamples({samples, samplesToMix}, mSend[send].Buffer,
1049                 parms.Gains.Current.data(), TargetGains, Counter, OutPos);
1050         }
1051
1052         ++voiceSamples;
1053     }
1054
1055     mFlags.set(VoiceIsFading);
1056
1057     /* Don't update positions and buffers if we were stopping. */
1058     if(vstate == Stopping) UNLIKELY
1059     {
1060         mPlayState.store(Stopped, std::memory_order_release);
1061         return;
1062     }
1063
1064     /* Update voice positions and buffers as needed. */
1065     DataPosFrac += increment*samplesToMix;
1066     const uint SrcSamplesDone{DataPosFrac>>MixerFracBits};
1067     DataPosInt  += SrcSamplesDone;
1068     DataPosFrac &= MixerFracMask;
1069
1070     uint buffers_done{0u};
1071     if(BufferListItem && DataPosInt >= 0) LIKELY
1072     {
1073         if(mFlags.test(VoiceIsStatic))
1074         {
1075             if(BufferLoopItem)
1076             {
1077                 /* Handle looping static source */
1078                 const uint LoopStart{BufferListItem->mLoopStart};
1079                 const uint LoopEnd{BufferListItem->mLoopEnd};
1080                 uint DataPosUInt{static_cast<uint>(DataPosInt)};
1081                 if(DataPosUInt >= LoopEnd)
1082                 {
1083                     assert(LoopEnd > LoopStart);
1084                     DataPosUInt = ((DataPosUInt-LoopStart)%(LoopEnd-LoopStart)) + LoopStart;
1085                     DataPosInt = static_cast<int>(DataPosUInt);
1086                 }
1087             }
1088             else
1089             {
1090                 /* Handle non-looping static source */
1091                 if(static_cast<uint>(DataPosInt) >= BufferListItem->mSampleLen)
1092                     BufferListItem = nullptr;
1093             }
1094         }
1095         else if(mFlags.test(VoiceIsCallback))
1096         {
1097             /* Handle callback buffer source */
1098             const uint currentBlock{static_cast<uint>(DataPosInt) / mSamplesPerBlock};
1099             const uint blocksDone{currentBlock - mCallbackBlockBase};
1100             if(blocksDone < mNumCallbackBlocks)
1101             {
1102                 const size_t byteOffset{blocksDone*mBytesPerBlock};
1103                 const size_t byteEnd{mNumCallbackBlocks*mBytesPerBlock};
1104                 al::byte *data{BufferListItem->mSamples};
1105                 std::copy(data+byteOffset, data+byteEnd, data);
1106                 mNumCallbackBlocks -= blocksDone;
1107                 mCallbackBlockBase += blocksDone;
1108             }
1109             else
1110             {
1111                 BufferListItem = nullptr;
1112                 mNumCallbackBlocks = 0;
1113                 mCallbackBlockBase += blocksDone;
1114             }
1115         }
1116         else
1117         {
1118             /* Handle streaming source */
1119             do {
1120                 if(BufferListItem->mSampleLen > static_cast<uint>(DataPosInt))
1121                     break;
1122
1123                 DataPosInt -= BufferListItem->mSampleLen;
1124
1125                 ++buffers_done;
1126                 BufferListItem = BufferListItem->mNext.load(std::memory_order_relaxed);
1127                 if(!BufferListItem) BufferListItem = BufferLoopItem;
1128             } while(BufferListItem);
1129         }
1130     }
1131
1132     /* Capture the source ID in case it gets reset for stopping. */
1133     const uint SourceID{mSourceID.load(std::memory_order_relaxed)};
1134
1135     /* Update voice info */
1136     mPosition.store(DataPosInt, std::memory_order_relaxed);
1137     mPositionFrac.store(DataPosFrac, std::memory_order_relaxed);
1138     mCurrentBuffer.store(BufferListItem, std::memory_order_relaxed);
1139     if(!BufferListItem)
1140     {
1141         mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1142         mSourceID.store(0u, std::memory_order_relaxed);
1143     }
1144     std::atomic_thread_fence(std::memory_order_release);
1145
1146     /* Send any events now, after the position/buffer info was updated. */
1147     const auto enabledevt = Context->mEnabledEvts.load(std::memory_order_acquire);
1148     if(buffers_done > 0 && enabledevt.test(AsyncEvent::BufferCompleted))
1149     {
1150         RingBuffer *ring{Context->mAsyncEvents.get()};
1151         auto evt_vec = ring->getWriteVector();
1152         if(evt_vec.first.len > 0)
1153         {
1154             AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf),
1155                 AsyncEvent::BufferCompleted)};
1156             evt->u.bufcomp.id = SourceID;
1157             evt->u.bufcomp.count = buffers_done;
1158             ring->writeAdvance(1);
1159         }
1160     }
1161
1162     if(!BufferListItem)
1163     {
1164         /* If the voice just ended, set it to Stopping so the next render
1165          * ensures any residual noise fades to 0 amplitude.
1166          */
1167         mPlayState.store(Stopping, std::memory_order_release);
1168         if(enabledevt.test(AsyncEvent::SourceStateChange))
1169             SendSourceStoppedEvent(Context, SourceID);
1170     }
1171 }
1172
1173 void Voice::prepare(DeviceBase *device)
1174 {
1175     /* Even if storing really high order ambisonics, we only mix channels for
1176      * orders up to the device order. The rest are simply dropped.
1177      */
1178     uint num_channels{(mFmtChannels == FmtUHJ2 || mFmtChannels == FmtSuperStereo) ? 3 :
1179         ChannelsFromFmt(mFmtChannels, minu(mAmbiOrder, device->mAmbiOrder))};
1180     if(num_channels > device->mSampleData.size()) UNLIKELY
1181     {
1182         ERR("Unexpected channel count: %u (limit: %zu, %d:%d)\n", num_channels,
1183             device->mSampleData.size(), mFmtChannels, mAmbiOrder);
1184         num_channels = static_cast<uint>(device->mSampleData.size());
1185     }
1186     if(mChans.capacity() > 2 && num_channels < mChans.capacity())
1187     {
1188         decltype(mChans){}.swap(mChans);
1189         decltype(mPrevSamples){}.swap(mPrevSamples);
1190     }
1191     mChans.reserve(maxu(2, num_channels));
1192     mChans.resize(num_channels);
1193     mPrevSamples.reserve(maxu(2, num_channels));
1194     mPrevSamples.resize(num_channels);
1195
1196     mDecoder = nullptr;
1197     mDecoderPadding = 0;
1198     if(mFmtChannels == FmtSuperStereo)
1199     {
1200         switch(UhjDecodeQuality)
1201         {
1202         case UhjQualityType::IIR:
1203             mDecoder = std::make_unique<UhjStereoDecoderIIR>();
1204             mDecoderPadding = UhjStereoDecoderIIR::sInputPadding;
1205             break;
1206         case UhjQualityType::FIR256:
1207             mDecoder = std::make_unique<UhjStereoDecoder<UhjLength256>>();
1208             mDecoderPadding = UhjStereoDecoder<UhjLength256>::sInputPadding;
1209             break;
1210         case UhjQualityType::FIR512:
1211             mDecoder = std::make_unique<UhjStereoDecoder<UhjLength512>>();
1212             mDecoderPadding = UhjStereoDecoder<UhjLength512>::sInputPadding;
1213             break;
1214         }
1215     }
1216     else if(IsUHJ(mFmtChannels))
1217     {
1218         switch(UhjDecodeQuality)
1219         {
1220         case UhjQualityType::IIR:
1221             mDecoder = std::make_unique<UhjDecoderIIR>();
1222             mDecoderPadding = UhjDecoderIIR::sInputPadding;
1223             break;
1224         case UhjQualityType::FIR256:
1225             mDecoder = std::make_unique<UhjDecoder<UhjLength256>>();
1226             mDecoderPadding = UhjDecoder<UhjLength256>::sInputPadding;
1227             break;
1228         case UhjQualityType::FIR512:
1229             mDecoder = std::make_unique<UhjDecoder<UhjLength512>>();
1230             mDecoderPadding = UhjDecoder<UhjLength512>::sInputPadding;
1231             break;
1232         }
1233     }
1234
1235     /* Clear the stepping value explicitly so the mixer knows not to mix this
1236      * until the update gets applied.
1237      */
1238     mStep = 0;
1239
1240     /* Make sure the sample history is cleared. */
1241     std::fill(mPrevSamples.begin(), mPrevSamples.end(), HistoryLine{});
1242
1243     if(mFmtChannels == FmtUHJ2 && !device->mUhjEncoder)
1244     {
1245         /* 2-channel UHJ needs different shelf filters. However, we can't just
1246          * use different shelf filters after mixing it, given any old speaker
1247          * setup the user has. To make this work, we apply the expected shelf
1248          * filters for decoding UHJ2 to quad (only needs LF scaling), and act
1249          * as if those 4 quad channels are encoded right back into B-Format.
1250          *
1251          * This isn't perfect, but without an entirely separate and limited
1252          * UHJ2 path, it's better than nothing.
1253          *
1254          * Note this isn't needed with UHJ output (UHJ2->B-Format->UHJ2 is
1255          * identity, so don't mess with it).
1256          */
1257         const BandSplitter splitter{device->mXOverFreq / static_cast<float>(device->Frequency)};
1258         for(auto &chandata : mChans)
1259         {
1260             chandata.mAmbiHFScale = 1.0f;
1261             chandata.mAmbiLFScale = 1.0f;
1262             chandata.mAmbiSplitter = splitter;
1263             chandata.mDryParams = DirectParams{};
1264             chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter;
1265             std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
1266         }
1267         mChans[0].mAmbiLFScale = DecoderBase::sWLFScale;
1268         mChans[1].mAmbiLFScale = DecoderBase::sXYLFScale;
1269         mChans[2].mAmbiLFScale = DecoderBase::sXYLFScale;
1270         mFlags.set(VoiceIsAmbisonic);
1271     }
1272     /* Don't need to set the VoiceIsAmbisonic flag if the device is not higher
1273      * order than the voice. No HF scaling is necessary to mix it.
1274      */
1275     else if(mAmbiOrder && device->mAmbiOrder > mAmbiOrder)
1276     {
1277         const uint8_t *OrderFromChan{Is2DAmbisonic(mFmtChannels) ?
1278             AmbiIndex::OrderFrom2DChannel().data() : AmbiIndex::OrderFromChannel().data()};
1279         const auto scales = AmbiScale::GetHFOrderScales(mAmbiOrder, device->mAmbiOrder,
1280             device->m2DMixing);
1281
1282         const BandSplitter splitter{device->mXOverFreq / static_cast<float>(device->Frequency)};
1283         for(auto &chandata : mChans)
1284         {
1285             chandata.mAmbiHFScale = scales[*(OrderFromChan++)];
1286             chandata.mAmbiLFScale = 1.0f;
1287             chandata.mAmbiSplitter = splitter;
1288             chandata.mDryParams = DirectParams{};
1289             chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter;
1290             std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
1291         }
1292         mFlags.set(VoiceIsAmbisonic);
1293     }
1294     else
1295     {
1296         for(auto &chandata : mChans)
1297         {
1298             chandata.mDryParams = DirectParams{};
1299             chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter;
1300             std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
1301         }
1302         mFlags.reset(VoiceIsAmbisonic);
1303     }
1304 }