20 #include "alnumeric.h"
21 #include "aloptional.h"
25 #include "async_event.h"
26 #include "buffer_storage.h"
29 #include "devformat.h"
31 #include "filters/biquad.h"
32 #include "filters/nfc.h"
33 #include "filters/splitter.h"
34 #include "fmt_traits.h"
37 #include "mixer/defs.h"
38 #include "mixer/hrtfdefs.h"
39 #include "opthelpers.h"
40 #include "resampler_limits.h"
41 #include "ringbuffer.h"
43 #include "voice_change.h"
54 static_assert(!(sizeof(DeviceBase::MixerBufferLine)&15),
55 "DeviceBase::MixerBufferLine must be a multiple of 16 bytes");
56 static_assert(!(MaxResamplerEdge&3), "MaxResamplerEdge is not a multiple of 4");
58 static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!");
59 static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize,
60 "MaxPitch and/or BufferLineSize are too large for MixerFracBits!");
62 Resampler ResamplerDefault{Resampler::Cubic};
66 using uint = unsigned int;
67 using namespace std::chrono;
69 using HrtfMixerFunc = void(*)(const float *InSamples, float2 *AccumSamples, const uint IrSize,
70 const MixHrtfFilter *hrtfparams, const size_t BufferSize);
71 using HrtfMixerBlendFunc = void(*)(const float *InSamples, float2 *AccumSamples,
72 const uint IrSize, const HrtfFilter *oldparams, const MixHrtfFilter *newparams,
73 const size_t BufferSize);
75 HrtfMixerFunc MixHrtfSamples{MixHrtf_<CTag>};
76 HrtfMixerBlendFunc MixHrtfBlendSamples{MixHrtfBlend_<CTag>};
78 inline MixerOutFunc SelectMixer()
81 if((CPUCapFlags&CPU_CAP_NEON))
85 if((CPUCapFlags&CPU_CAP_SSE))
91 inline MixerOneFunc SelectMixerOne()
94 if((CPUCapFlags&CPU_CAP_NEON))
98 if((CPUCapFlags&CPU_CAP_SSE))
104 inline HrtfMixerFunc SelectHrtfMixer()
107 if((CPUCapFlags&CPU_CAP_NEON))
108 return MixHrtf_<NEONTag>;
111 if((CPUCapFlags&CPU_CAP_SSE))
112 return MixHrtf_<SSETag>;
114 return MixHrtf_<CTag>;
117 inline HrtfMixerBlendFunc SelectHrtfBlendMixer()
120 if((CPUCapFlags&CPU_CAP_NEON))
121 return MixHrtfBlend_<NEONTag>;
124 if((CPUCapFlags&CPU_CAP_SSE))
125 return MixHrtfBlend_<SSETag>;
127 return MixHrtfBlend_<CTag>;
132 void Voice::InitMixer(al::optional<std::string> resampler)
136 struct ResamplerEntry {
138 const Resampler resampler;
140 constexpr ResamplerEntry ResamplerList[]{
141 { "none", Resampler::Point },
142 { "point", Resampler::Point },
143 { "linear", Resampler::Linear },
144 { "cubic", Resampler::Cubic },
145 { "bsinc12", Resampler::BSinc12 },
146 { "fast_bsinc12", Resampler::FastBSinc12 },
147 { "bsinc24", Resampler::BSinc24 },
148 { "fast_bsinc24", Resampler::FastBSinc24 },
151 const char *str{resampler->c_str()};
152 if(al::strcasecmp(str, "bsinc") == 0)
154 WARN("Resampler option \"%s\" is deprecated, using bsinc12\n", str);
157 else if(al::strcasecmp(str, "sinc4") == 0 || al::strcasecmp(str, "sinc8") == 0)
159 WARN("Resampler option \"%s\" is deprecated, using cubic\n", str);
163 auto iter = std::find_if(std::begin(ResamplerList), std::end(ResamplerList),
164 [str](const ResamplerEntry &entry) -> bool
165 { return al::strcasecmp(str, entry.name) == 0; });
166 if(iter == std::end(ResamplerList))
167 ERR("Invalid resampler: %s\n", str);
169 ResamplerDefault = iter->resampler;
172 MixSamplesOut = SelectMixer();
173 MixSamplesOne = SelectMixerOne();
174 MixHrtfBlendSamples = SelectHrtfBlendMixer();
175 MixHrtfSamples = SelectHrtfMixer();
181 /* IMA ADPCM Stepsize table */
182 constexpr int IMAStep_size[89] = {
183 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 19,
184 21, 23, 25, 28, 31, 34, 37, 41, 45, 50, 55,
185 60, 66, 73, 80, 88, 97, 107, 118, 130, 143, 157,
186 173, 190, 209, 230, 253, 279, 307, 337, 371, 408, 449,
187 494, 544, 598, 658, 724, 796, 876, 963, 1060, 1166, 1282,
188 1411, 1552, 1707, 1878, 2066, 2272, 2499, 2749, 3024, 3327, 3660,
189 4026, 4428, 4871, 5358, 5894, 6484, 7132, 7845, 8630, 9493,10442,
190 11487,12635,13899,15289,16818,18500,20350,22358,24633,27086,29794,
194 /* IMA4 ADPCM Codeword decode table */
195 constexpr int IMA4Codeword[16] = {
196 1, 3, 5, 7, 9, 11, 13, 15,
197 -1,-3,-5,-7,-9,-11,-13,-15,
200 /* IMA4 ADPCM Step index adjust decode table */
201 constexpr int IMA4Index_adjust[16] = {
202 -1,-1,-1,-1, 2, 4, 6, 8,
203 -1,-1,-1,-1, 2, 4, 6, 8
206 /* MSADPCM Adaption table */
207 constexpr int MSADPCMAdaption[16] = {
208 230, 230, 230, 230, 307, 409, 512, 614,
209 768, 614, 512, 409, 307, 230, 230, 230
212 /* MSADPCM Adaption Coefficient tables */
213 constexpr int MSADPCMAdaptionCoeff[7][2] = {
224 void SendSourceStoppedEvent(ContextBase *context, uint id)
226 RingBuffer *ring{context->mAsyncEvents.get()};
227 auto evt_vec = ring->getWriteVector();
228 if(evt_vec.first.len < 1) return;
230 AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf),
231 AsyncEvent::SourceStateChange)};
232 evt->u.srcstate.id = id;
233 evt->u.srcstate.state = AsyncEvent::SrcState::Stop;
235 ring->writeAdvance(1);
239 const float *DoFilters(BiquadFilter &lpfilter, BiquadFilter &hpfilter, float *dst,
240 const al::span<const float> src, int type)
250 lpfilter.process(src, dst);
255 hpfilter.process(src, dst);
259 DualBiquad{lpfilter, hpfilter}.process(src, dst);
266 template<FmtType Type>
267 inline void LoadSamples(float *RESTRICT dstSamples, const al::byte *src, const size_t srcChan,
268 const size_t srcOffset, const size_t srcStep, const size_t /*samplesPerBlock*/,
269 const size_t samplesToLoad) noexcept
271 constexpr size_t sampleSize{sizeof(typename al::FmtTypeTraits<Type>::Type)};
272 auto s = src + (srcOffset*srcStep + srcChan)*sampleSize;
274 al::LoadSampleArray<Type>(dstSamples, s, srcStep, samplesToLoad);
278 inline void LoadSamples<FmtIMA4>(float *RESTRICT dstSamples, const al::byte *src,
279 const size_t srcChan, const size_t srcOffset, const size_t srcStep,
280 const size_t samplesPerBlock, const size_t samplesToLoad) noexcept
282 const size_t blockBytes{((samplesPerBlock-1)/2 + 4)*srcStep};
284 /* Skip to the ADPCM block containing the srcOffset sample. */
285 src += srcOffset/samplesPerBlock*blockBytes;
286 /* Calculate how many samples need to be skipped in the block. */
287 size_t skip{srcOffset % samplesPerBlock};
289 /* NOTE: This could probably be optimized better. */
292 /* Each IMA4 block starts with a signed 16-bit sample, and a signed
293 * 16-bit table index. The table index needs to be clamped.
295 int sample{src[srcChan*4] | (src[srcChan*4 + 1] << 8)};
296 int index{src[srcChan*4 + 2] | (src[srcChan*4 + 3] << 8)};
298 sample = (sample^0x8000) - 32768;
299 index = clampi((index^0x8000) - 32768, 0, al::size(IMAStep_size)-1);
303 dstSamples[wrote++] = static_cast<float>(sample) / 32768.0f;
304 if(wrote == samplesToLoad) return;
309 auto decode_sample = [&sample,&index](const uint nibble)
311 sample += IMA4Codeword[nibble] * IMAStep_size[index] / 8;
312 sample = clampi(sample, -32768, 32767);
314 index += IMA4Index_adjust[nibble];
315 index = clampi(index, 0, al::size(IMAStep_size)-1);
320 /* The rest of the block is arranged as a series of nibbles, contained
321 * in 4 *bytes* per channel interleaved. So every 8 nibbles we need to
322 * skip 4 bytes per channel to get the next nibbles for this channel.
324 * First, decode the samples that we need to skip in the block (will
325 * always be less than the block size). They need to be decoded despite
326 * being ignored for proper state on the remaining samples.
328 const al::byte *nibbleData{src + (srcStep+srcChan)*4};
329 size_t nibbleOffset{0};
330 const size_t startOffset{skip + 1};
333 const size_t byteShift{(nibbleOffset&1) * 4};
334 const size_t wordOffset{(nibbleOffset>>1) & ~size_t{3}};
335 const size_t byteOffset{wordOffset*srcStep + ((nibbleOffset>>1)&3u)};
338 std::ignore = decode_sample((nibbleData[byteOffset]>>byteShift) & 15u);
341 /* Second, decode the rest of the block and write to the output, until
342 * the end of the block or the end of output.
344 const size_t todo{minz(samplesPerBlock-startOffset, samplesToLoad-wrote)};
345 for(size_t i{0};i < todo;++i)
347 const size_t byteShift{(nibbleOffset&1) * 4};
348 const size_t wordOffset{(nibbleOffset>>1) & ~size_t{3}};
349 const size_t byteOffset{wordOffset*srcStep + ((nibbleOffset>>1)&3u)};
352 const int result{decode_sample((nibbleData[byteOffset]>>byteShift) & 15u)};
353 dstSamples[wrote++] = static_cast<float>(result) / 32768.0f;
355 if(wrote == samplesToLoad)
363 inline void LoadSamples<FmtMSADPCM>(float *RESTRICT dstSamples, const al::byte *src,
364 const size_t srcChan, const size_t srcOffset, const size_t srcStep,
365 const size_t samplesPerBlock, const size_t samplesToLoad) noexcept
367 const size_t blockBytes{((samplesPerBlock-2)/2 + 7)*srcStep};
369 src += srcOffset/samplesPerBlock*blockBytes;
370 size_t skip{srcOffset % samplesPerBlock};
374 /* Each MS ADPCM block starts with an 8-bit block predictor, used to
375 * dictate how the two sample history values are mixed with the decoded
376 * sample, and an initial signed 16-bit delta value which scales the
377 * nibble sample value. This is followed by the two initial 16-bit
378 * sample history values.
380 const al::byte *input{src};
381 const uint8_t blockpred{std::min(input[srcChan], uint8_t{6})};
383 int delta{input[2*srcChan + 0] | (input[2*srcChan + 1] << 8)};
386 int sampleHistory[2]{};
387 sampleHistory[0] = input[2*srcChan + 0] | (input[2*srcChan + 1]<<8);
389 sampleHistory[1] = input[2*srcChan + 0] | (input[2*srcChan + 1]<<8);
392 const auto coeffs = al::as_span(MSADPCMAdaptionCoeff[blockpred]);
393 delta = (delta^0x8000) - 32768;
394 sampleHistory[0] = (sampleHistory[0]^0x8000) - 32768;
395 sampleHistory[1] = (sampleHistory[1]^0x8000) - 32768;
397 /* The second history sample is "older", so it's the first to be
402 dstSamples[wrote++] = static_cast<float>(sampleHistory[1]) / 32768.0f;
403 if(wrote == samplesToLoad) return;
404 dstSamples[wrote++] = static_cast<float>(sampleHistory[0]) / 32768.0f;
405 if(wrote == samplesToLoad) return;
410 dstSamples[wrote++] = static_cast<float>(sampleHistory[0]) / 32768.0f;
411 if(wrote == samplesToLoad) return;
416 auto decode_sample = [&sampleHistory,&delta,coeffs](const int nibble)
418 int pred{(sampleHistory[0]*coeffs[0] + sampleHistory[1]*coeffs[1]) / 256};
419 pred += ((nibble^0x08) - 0x08) * delta;
420 pred = clampi(pred, -32768, 32767);
422 sampleHistory[1] = sampleHistory[0];
423 sampleHistory[0] = pred;
425 delta = (MSADPCMAdaption[nibble] * delta) / 256;
426 delta = maxi(16, delta);
431 /* The rest of the block is a series of nibbles, interleaved per-
432 * channel. First, skip samples.
434 const size_t startOffset{skip + 2};
435 size_t nibbleOffset{srcChan};
438 const size_t byteOffset{nibbleOffset>>1};
439 const size_t byteShift{((nibbleOffset&1)^1) * 4};
440 nibbleOffset += srcStep;
442 std::ignore = decode_sample((input[byteOffset]>>byteShift) & 15);
445 /* Now decode the rest of the block, until the end of the block or the
446 * dst buffer is filled.
448 const size_t todo{minz(samplesPerBlock-startOffset, samplesToLoad-wrote)};
449 for(size_t j{0};j < todo;++j)
451 const size_t byteOffset{nibbleOffset>>1};
452 const size_t byteShift{((nibbleOffset&1)^1) * 4};
453 nibbleOffset += srcStep;
455 const int sample{decode_sample((input[byteOffset]>>byteShift) & 15)};
456 dstSamples[wrote++] = static_cast<float>(sample) / 32768.0f;
458 if(wrote == samplesToLoad)
465 void LoadSamples(float *dstSamples, const al::byte *src, const size_t srcChan,
466 const size_t srcOffset, const FmtType srcType, const size_t srcStep,
467 const size_t samplesPerBlock, const size_t samplesToLoad) noexcept
469 #define HANDLE_FMT(T) case T: \
470 LoadSamples<T>(dstSamples, src, srcChan, srcOffset, srcStep, \
471 samplesPerBlock, samplesToLoad); \
476 HANDLE_FMT(FmtUByte);
477 HANDLE_FMT(FmtShort);
478 HANDLE_FMT(FmtFloat);
479 HANDLE_FMT(FmtDouble);
480 HANDLE_FMT(FmtMulaw);
483 HANDLE_FMT(FmtMSADPCM);
488 void LoadBufferStatic(VoiceBufferItem *buffer, VoiceBufferItem *bufferLoopItem,
489 const size_t dataPosInt, const FmtType sampleType, const size_t srcChannel,
490 const size_t srcStep, size_t samplesLoaded, const size_t samplesToLoad,
495 /* Load what's left to play from the buffer */
496 if(buffer->mSampleLen > dataPosInt) LIKELY
498 const size_t buffer_remaining{buffer->mSampleLen - dataPosInt};
499 const size_t remaining{minz(samplesToLoad-samplesLoaded, buffer_remaining)};
500 LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, dataPosInt,
501 sampleType, srcStep, buffer->mBlockAlign, remaining);
502 samplesLoaded += remaining;
505 if(const size_t toFill{samplesToLoad - samplesLoaded})
507 auto srcsamples = voiceSamples + samplesLoaded;
508 std::fill_n(srcsamples, toFill, *(srcsamples-1));
513 const size_t loopStart{buffer->mLoopStart};
514 const size_t loopEnd{buffer->mLoopEnd};
515 ASSUME(loopEnd > loopStart);
517 const size_t intPos{(dataPosInt < loopEnd) ? dataPosInt
518 : (((dataPosInt-loopStart)%(loopEnd-loopStart)) + loopStart)};
520 /* Load what's left of this loop iteration */
521 const size_t remaining{minz(samplesToLoad-samplesLoaded, loopEnd-dataPosInt)};
522 LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, intPos, sampleType,
523 srcStep, buffer->mBlockAlign, remaining);
524 samplesLoaded += remaining;
526 /* Load repeats of the loop to fill the buffer. */
527 const size_t loopSize{loopEnd - loopStart};
528 while(const size_t toFill{minz(samplesToLoad - samplesLoaded, loopSize)})
530 LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, loopStart,
531 sampleType, srcStep, buffer->mBlockAlign, toFill);
532 samplesLoaded += toFill;
537 void LoadBufferCallback(VoiceBufferItem *buffer, const size_t dataPosInt,
538 const size_t numCallbackSamples, const FmtType sampleType, const size_t srcChannel,
539 const size_t srcStep, size_t samplesLoaded, const size_t samplesToLoad, float *voiceSamples)
541 /* Load what's left to play from the buffer */
542 if(numCallbackSamples > dataPosInt) LIKELY
544 const size_t remaining{minz(samplesToLoad-samplesLoaded, numCallbackSamples-dataPosInt)};
545 LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, dataPosInt,
546 sampleType, srcStep, buffer->mBlockAlign, remaining);
547 samplesLoaded += remaining;
550 if(const size_t toFill{samplesToLoad - samplesLoaded})
552 auto srcsamples = voiceSamples + samplesLoaded;
553 std::fill_n(srcsamples, toFill, *(srcsamples-1));
557 void LoadBufferQueue(VoiceBufferItem *buffer, VoiceBufferItem *bufferLoopItem,
558 size_t dataPosInt, const FmtType sampleType, const size_t srcChannel,
559 const size_t srcStep, size_t samplesLoaded, const size_t samplesToLoad,
562 /* Crawl the buffer queue to fill in the temp buffer */
563 while(buffer && samplesLoaded != samplesToLoad)
565 if(dataPosInt >= buffer->mSampleLen)
567 dataPosInt -= buffer->mSampleLen;
568 buffer = buffer->mNext.load(std::memory_order_acquire);
569 if(!buffer) buffer = bufferLoopItem;
573 const size_t remaining{minz(samplesToLoad-samplesLoaded, buffer->mSampleLen-dataPosInt)};
574 LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, dataPosInt,
575 sampleType, srcStep, buffer->mBlockAlign, remaining);
577 samplesLoaded += remaining;
578 if(samplesLoaded == samplesToLoad)
582 buffer = buffer->mNext.load(std::memory_order_acquire);
583 if(!buffer) buffer = bufferLoopItem;
585 if(const size_t toFill{samplesToLoad - samplesLoaded})
587 auto srcsamples = voiceSamples + samplesLoaded;
588 std::fill_n(srcsamples, toFill, *(srcsamples-1));
593 void DoHrtfMix(const float *samples, const uint DstBufferSize, DirectParams &parms,
594 const float TargetGain, const uint Counter, uint OutPos, const bool IsPlaying,
597 const uint IrSize{Device->mIrSize};
598 auto &HrtfSamples = Device->HrtfSourceData;
599 auto &AccumSamples = Device->HrtfAccumData;
601 /* Copy the HRTF history and new input samples into a temp buffer. */
602 auto src_iter = std::copy(parms.Hrtf.History.begin(), parms.Hrtf.History.end(),
603 std::begin(HrtfSamples));
604 std::copy_n(samples, DstBufferSize, src_iter);
605 /* Copy the last used samples back into the history buffer for later. */
607 std::copy_n(std::begin(HrtfSamples) + DstBufferSize, parms.Hrtf.History.size(),
608 parms.Hrtf.History.begin());
610 /* If fading and this is the first mixing pass, fade between the IRs. */
612 if(Counter && OutPos == 0)
614 fademix = minu(DstBufferSize, Counter);
616 float gain{TargetGain};
618 /* The new coefficients need to fade in completely since they're
619 * replacing the old ones. To keep the gain fading consistent,
620 * interpolate between the old and new target gains given how much of
621 * the fade time this mix handles.
623 if(Counter > fademix)
625 const float a{static_cast<float>(fademix) / static_cast<float>(Counter)};
626 gain = lerpf(parms.Hrtf.Old.Gain, TargetGain, a);
629 MixHrtfFilter hrtfparams{
630 parms.Hrtf.Target.Coeffs,
631 parms.Hrtf.Target.Delay,
632 0.0f, gain / static_cast<float>(fademix)};
633 MixHrtfBlendSamples(HrtfSamples, AccumSamples+OutPos, IrSize, &parms.Hrtf.Old, &hrtfparams,
636 /* Update the old parameters with the result. */
637 parms.Hrtf.Old = parms.Hrtf.Target;
638 parms.Hrtf.Old.Gain = gain;
642 if(fademix < DstBufferSize)
644 const uint todo{DstBufferSize - fademix};
645 float gain{TargetGain};
647 /* Interpolate the target gain if the gain fading lasts longer than
650 if(Counter > DstBufferSize)
652 const float a{static_cast<float>(todo) / static_cast<float>(Counter-fademix)};
653 gain = lerpf(parms.Hrtf.Old.Gain, TargetGain, a);
656 MixHrtfFilter hrtfparams{
657 parms.Hrtf.Target.Coeffs,
658 parms.Hrtf.Target.Delay,
660 (gain - parms.Hrtf.Old.Gain) / static_cast<float>(todo)};
661 MixHrtfSamples(HrtfSamples+fademix, AccumSamples+OutPos, IrSize, &hrtfparams, todo);
663 /* Store the now-current gain for next time. */
664 parms.Hrtf.Old.Gain = gain;
668 void DoNfcMix(const al::span<const float> samples, FloatBufferLine *OutBuffer, DirectParams &parms,
669 const float *TargetGains, const uint Counter, const uint OutPos, DeviceBase *Device)
671 using FilterProc = void (NfcFilter::*)(const al::span<const float>, float*);
672 static constexpr FilterProc NfcProcess[MaxAmbiOrder+1]{
673 nullptr, &NfcFilter::process1, &NfcFilter::process2, &NfcFilter::process3};
675 float *CurrentGains{parms.Gains.Current.data()};
676 MixSamples(samples, {OutBuffer, 1u}, CurrentGains, TargetGains, Counter, OutPos);
681 const al::span<float> nfcsamples{Device->NfcSampleData, samples.size()};
683 while(const size_t chancount{Device->NumChannelsPerOrder[order]})
685 (parms.NFCtrlFilter.*NfcProcess[order])(samples, nfcsamples.data());
686 MixSamples(nfcsamples, {OutBuffer, chancount}, CurrentGains, TargetGains, Counter, OutPos);
687 OutBuffer += chancount;
688 CurrentGains += chancount;
689 TargetGains += chancount;
690 if(++order == MaxAmbiOrder+1)
697 void Voice::mix(const State vstate, ContextBase *Context, const nanoseconds deviceTime,
698 const uint SamplesToDo)
700 static constexpr std::array<float,MAX_OUTPUT_CHANNELS> SilentTarget{};
702 ASSUME(SamplesToDo > 0);
704 DeviceBase *Device{Context->mDevice};
705 const uint NumSends{Device->NumAuxSends};
708 int DataPosInt{mPosition.load(std::memory_order_relaxed)};
709 uint DataPosFrac{mPositionFrac.load(std::memory_order_relaxed)};
710 VoiceBufferItem *BufferListItem{mCurrentBuffer.load(std::memory_order_relaxed)};
711 VoiceBufferItem *BufferLoopItem{mLoopBuffer.load(std::memory_order_relaxed)};
712 const uint increment{mStep};
713 if(increment < 1) UNLIKELY
715 /* If the voice is supposed to be stopping but can't be mixed, just
716 * stop it before bailing.
718 if(vstate == Stopping)
719 mPlayState.store(Stopped, std::memory_order_release);
723 /* If the static voice's current position is beyond the buffer loop end
724 * position, disable looping.
726 if(mFlags.test(VoiceIsStatic) && BufferLoopItem)
728 if(DataPosInt >= 0 && static_cast<uint>(DataPosInt) >= BufferListItem->mLoopEnd)
729 BufferLoopItem = nullptr;
734 /* Check if we're doing a delayed start, and we start in this update. */
735 if(mStartTime > deviceTime) UNLIKELY
737 /* If the voice is supposed to be stopping but hasn't actually started
738 * yet, make sure its stopped.
740 if(vstate == Stopping)
742 mPlayState.store(Stopped, std::memory_order_release);
746 /* If the start time is too far ahead, don't bother. */
747 auto diff = mStartTime - deviceTime;
748 if(diff >= seconds{1})
751 /* Get the number of samples ahead of the current time that output
752 * should start at. Skip this update if it's beyond the output sample
755 * Round the start position to a multiple of 4, which some mixers want.
756 * This makes the start time accurate to 4 samples. This could be made
757 * sample-accurate by forcing non-SIMD functions on the first run.
759 seconds::rep sampleOffset{duration_cast<seconds>(diff * Device->Frequency).count()};
760 sampleOffset = (sampleOffset+2) & ~seconds::rep{3};
761 if(sampleOffset >= SamplesToDo)
764 OutPos = static_cast<uint>(sampleOffset);
767 /* Calculate the number of samples to mix, and the number of (resampled)
768 * samples that need to be loaded (mixing samples and decoder padding).
770 const uint samplesToMix{SamplesToDo - OutPos};
771 const uint samplesToLoad{samplesToMix + mDecoderPadding};
773 /* Get a span of pointers to hold the floating point, deinterlaced,
774 * resampled buffer data to be mixed.
776 std::array<float*,DeviceBase::MixerChannelsMax> SamplePointers;
777 const al::span<float*> MixingSamples{SamplePointers.data(), mChans.size()};
778 auto get_bufferline = [](DeviceBase::MixerBufferLine &bufline) noexcept -> float*
779 { return bufline.data(); };
780 std::transform(Device->mSampleData.end() - mChans.size(), Device->mSampleData.end(),
781 MixingSamples.begin(), get_bufferline);
783 /* If there's a matching sample step and no phase offset, use a simple copy
786 const ResamplerFunc Resample{(increment == MixerFracOne && DataPosFrac == 0)
787 ? ResamplerFunc{[](const InterpState*, const float *RESTRICT src, uint, const uint,
788 const al::span<float> dst) { std::copy_n(src, dst.size(), dst.begin()); }}
791 /* UHJ2 and SuperStereo only have 2 buffer channels, but 3 mixing channels
792 * (3rd channel is generated from decoding).
794 const size_t realChannels{(mFmtChannels == FmtUHJ2 || mFmtChannels == FmtSuperStereo) ? 2u
795 : MixingSamples.size()};
796 for(size_t chan{0};chan < realChannels;++chan)
798 using ResBufType = decltype(DeviceBase::mResampleData);
799 static constexpr uint srcSizeMax{static_cast<uint>(ResBufType{}.size()-MaxResamplerEdge)};
801 const auto prevSamples = al::as_span(mPrevSamples[chan]);
802 const auto resampleBuffer = std::copy(prevSamples.cbegin(), prevSamples.cend(),
803 Device->mResampleData.begin()) - MaxResamplerEdge;
804 int intPos{DataPosInt};
805 uint fracPos{DataPosFrac};
807 /* Load samples for this channel from the available buffer(s), with
810 for(uint samplesLoaded{0};samplesLoaded < samplesToLoad;)
812 /* Calculate the number of dst samples that can be loaded this
813 * iteration, given the available resampler buffer size, and the
814 * number of src samples that are needed to load it.
816 auto calc_buffer_sizes = [fracPos,increment](uint dstBufferSize)
818 /* If ext=true, calculate the last written dst pos from the dst
819 * count, convert to the last read src pos, then add one to get
822 * If ext=false, convert the dst count to src count directly.
824 * Without this, the src count could be short by one when
825 * increment < 1.0, or not have a full src at the end when
828 const bool ext{increment <= MixerFracOne};
829 uint64_t dataSize64{dstBufferSize - ext};
830 dataSize64 = (dataSize64*increment + fracPos) >> MixerFracBits;
831 /* Also include resampler padding. */
832 dataSize64 += ext + MaxResamplerEdge;
834 if(dataSize64 <= srcSizeMax)
835 return std::make_pair(dstBufferSize, static_cast<uint>(dataSize64));
837 /* If the source size got saturated, we can't fill the desired
838 * dst size. Figure out how many dst samples we can fill.
840 dataSize64 = srcSizeMax - MaxResamplerEdge;
841 dataSize64 = ((dataSize64<<MixerFracBits) - fracPos) / increment;
842 if(dataSize64 < dstBufferSize)
844 /* Some resamplers require the destination being 16-byte
845 * aligned, so limit to a multiple of 4 samples to maintain
846 * alignment if we need to do another iteration after this.
848 dstBufferSize = static_cast<uint>(dataSize64) & ~3u;
850 return std::make_pair(dstBufferSize, srcSizeMax);
852 const auto bufferSizes = calc_buffer_sizes(samplesToLoad - samplesLoaded);
853 const auto dstBufferSize = bufferSizes.first;
854 const auto srcBufferSize = bufferSizes.second;
856 /* Load the necessary samples from the given buffer(s). */
859 const uint avail{minu(srcBufferSize, MaxResamplerEdge)};
860 const uint tofill{maxu(srcBufferSize, MaxResamplerEdge)};
862 /* When loading from a voice that ended prematurely, only take
863 * the samples that get closest to 0 amplitude. This helps
864 * certain sounds fade out better.
866 auto abs_lt = [](const float lhs, const float rhs) noexcept -> bool
867 { return std::abs(lhs) < std::abs(rhs); };
868 auto srciter = std::min_element(resampleBuffer, resampleBuffer+avail, abs_lt);
870 std::fill(srciter+1, resampleBuffer+tofill, *srciter);
874 size_t srcSampleDelay{0};
875 if(intPos < 0) UNLIKELY
877 /* If the current position is negative, there's that many
878 * silent samples to load before using the buffer.
880 srcSampleDelay = static_cast<uint>(-intPos);
881 if(srcSampleDelay >= srcBufferSize)
883 /* If the number of silent source samples exceeds the
884 * number to load, the output will be silent.
886 std::fill_n(MixingSamples[chan]+samplesLoaded, dstBufferSize, 0.0f);
887 std::fill_n(resampleBuffer, srcBufferSize, 0.0f);
891 std::fill_n(resampleBuffer, srcSampleDelay, 0.0f);
893 const uint uintPos{static_cast<uint>(maxi(intPos, 0))};
895 if(mFlags.test(VoiceIsStatic))
896 LoadBufferStatic(BufferListItem, BufferLoopItem, uintPos, mFmtType, chan,
897 mFrameStep, srcSampleDelay, srcBufferSize, al::to_address(resampleBuffer));
898 else if(mFlags.test(VoiceIsCallback))
900 const uint callbackBase{mCallbackBlockBase * mSamplesPerBlock};
901 const size_t bufferOffset{uintPos - callbackBase};
902 const size_t needSamples{bufferOffset + srcBufferSize - srcSampleDelay};
903 const size_t needBlocks{(needSamples + mSamplesPerBlock-1) / mSamplesPerBlock};
904 if(!mFlags.test(VoiceCallbackStopped) && needBlocks > mNumCallbackBlocks)
906 const size_t byteOffset{mNumCallbackBlocks*mBytesPerBlock};
907 const size_t needBytes{(needBlocks-mNumCallbackBlocks)*mBytesPerBlock};
909 const int gotBytes{BufferListItem->mCallback(BufferListItem->mUserData,
910 &BufferListItem->mSamples[byteOffset], static_cast<int>(needBytes))};
912 mFlags.set(VoiceCallbackStopped);
913 else if(static_cast<uint>(gotBytes) < needBytes)
915 mFlags.set(VoiceCallbackStopped);
916 mNumCallbackBlocks += static_cast<uint>(gotBytes) / mBytesPerBlock;
919 mNumCallbackBlocks = static_cast<uint>(needBlocks);
921 const size_t numSamples{uint{mNumCallbackBlocks} * mSamplesPerBlock};
922 LoadBufferCallback(BufferListItem, bufferOffset, numSamples, mFmtType, chan,
923 mFrameStep, srcSampleDelay, srcBufferSize, al::to_address(resampleBuffer));
926 LoadBufferQueue(BufferListItem, BufferLoopItem, uintPos, mFmtType, chan,
927 mFrameStep, srcSampleDelay, srcBufferSize, al::to_address(resampleBuffer));
930 Resample(&mResampleState, al::to_address(resampleBuffer), fracPos, increment,
931 {MixingSamples[chan]+samplesLoaded, dstBufferSize});
933 /* Store the last source samples used for next time. */
934 if(vstate == Playing) LIKELY
936 /* Only store samples for the end of the mix, excluding what
937 * gets loaded for decoder padding.
939 const uint loadEnd{samplesLoaded + dstBufferSize};
940 if(samplesToMix > samplesLoaded && samplesToMix <= loadEnd) LIKELY
942 const size_t dstOffset{samplesToMix - samplesLoaded};
943 const size_t srcOffset{(dstOffset*increment + fracPos) >> MixerFracBits};
944 std::copy_n(resampleBuffer-MaxResamplerEdge+srcOffset, prevSamples.size(),
945 prevSamples.begin());
950 samplesLoaded += dstBufferSize;
951 if(samplesLoaded < samplesToLoad)
953 fracPos += dstBufferSize*increment;
954 const uint srcOffset{fracPos >> MixerFracBits};
955 fracPos &= MixerFracMask;
958 /* If more samples need to be loaded, copy the back of the
959 * resampleBuffer to the front to reuse it. prevSamples isn't
960 * reliable since it's only updated for the end of the mix.
962 std::copy(resampleBuffer-MaxResamplerEdge+srcOffset,
963 resampleBuffer+MaxResamplerEdge+srcOffset, resampleBuffer-MaxResamplerEdge);
967 for(auto &samples : MixingSamples.subspan(realChannels))
968 std::fill_n(samples, samplesToLoad, 0.0f);
971 mDecoder->decode(MixingSamples, samplesToMix, (vstate==Playing));
973 if(mFlags.test(VoiceIsAmbisonic))
975 auto voiceSamples = MixingSamples.begin();
976 for(auto &chandata : mChans)
978 chandata.mAmbiSplitter.processScale({*voiceSamples, samplesToMix},
979 chandata.mAmbiHFScale, chandata.mAmbiLFScale);
984 const uint Counter{mFlags.test(VoiceIsFading) ? minu(samplesToMix, 64u) : 0u};
987 /* No fading, just overwrite the old/current params. */
988 for(auto &chandata : mChans)
991 DirectParams &parms = chandata.mDryParams;
992 if(!mFlags.test(VoiceHasHrtf))
993 parms.Gains.Current = parms.Gains.Target;
995 parms.Hrtf.Old = parms.Hrtf.Target;
997 for(uint send{0};send < NumSends;++send)
999 if(mSend[send].Buffer.empty())
1002 SendParams &parms = chandata.mWetParams[send];
1003 parms.Gains.Current = parms.Gains.Target;
1008 auto voiceSamples = MixingSamples.begin();
1009 for(auto &chandata : mChans)
1011 /* Now filter and mix to the appropriate outputs. */
1012 const al::span<float,BufferLineSize> FilterBuf{Device->FilteredData};
1014 DirectParams &parms = chandata.mDryParams;
1015 const float *samples{DoFilters(parms.LowPass, parms.HighPass, FilterBuf.data(),
1016 {*voiceSamples, samplesToMix}, mDirect.FilterType)};
1018 if(mFlags.test(VoiceHasHrtf))
1020 const float TargetGain{parms.Hrtf.Target.Gain * (vstate == Playing)};
1021 DoHrtfMix(samples, samplesToMix, parms, TargetGain, Counter, OutPos,
1022 (vstate == Playing), Device);
1026 const float *TargetGains{(vstate == Playing) ? parms.Gains.Target.data()
1027 : SilentTarget.data()};
1028 if(mFlags.test(VoiceHasNfc))
1029 DoNfcMix({samples, samplesToMix}, mDirect.Buffer.data(), parms,
1030 TargetGains, Counter, OutPos, Device);
1032 MixSamples({samples, samplesToMix}, mDirect.Buffer,
1033 parms.Gains.Current.data(), TargetGains, Counter, OutPos);
1037 for(uint send{0};send < NumSends;++send)
1039 if(mSend[send].Buffer.empty())
1042 SendParams &parms = chandata.mWetParams[send];
1043 const float *samples{DoFilters(parms.LowPass, parms.HighPass, FilterBuf.data(),
1044 {*voiceSamples, samplesToMix}, mSend[send].FilterType)};
1046 const float *TargetGains{(vstate == Playing) ? parms.Gains.Target.data()
1047 : SilentTarget.data()};
1048 MixSamples({samples, samplesToMix}, mSend[send].Buffer,
1049 parms.Gains.Current.data(), TargetGains, Counter, OutPos);
1055 mFlags.set(VoiceIsFading);
1057 /* Don't update positions and buffers if we were stopping. */
1058 if(vstate == Stopping) UNLIKELY
1060 mPlayState.store(Stopped, std::memory_order_release);
1064 /* Update voice positions and buffers as needed. */
1065 DataPosFrac += increment*samplesToMix;
1066 const uint SrcSamplesDone{DataPosFrac>>MixerFracBits};
1067 DataPosInt += SrcSamplesDone;
1068 DataPosFrac &= MixerFracMask;
1070 uint buffers_done{0u};
1071 if(BufferListItem && DataPosInt >= 0) LIKELY
1073 if(mFlags.test(VoiceIsStatic))
1077 /* Handle looping static source */
1078 const uint LoopStart{BufferListItem->mLoopStart};
1079 const uint LoopEnd{BufferListItem->mLoopEnd};
1080 uint DataPosUInt{static_cast<uint>(DataPosInt)};
1081 if(DataPosUInt >= LoopEnd)
1083 assert(LoopEnd > LoopStart);
1084 DataPosUInt = ((DataPosUInt-LoopStart)%(LoopEnd-LoopStart)) + LoopStart;
1085 DataPosInt = static_cast<int>(DataPosUInt);
1090 /* Handle non-looping static source */
1091 if(static_cast<uint>(DataPosInt) >= BufferListItem->mSampleLen)
1092 BufferListItem = nullptr;
1095 else if(mFlags.test(VoiceIsCallback))
1097 /* Handle callback buffer source */
1098 const uint currentBlock{static_cast<uint>(DataPosInt) / mSamplesPerBlock};
1099 const uint blocksDone{currentBlock - mCallbackBlockBase};
1100 if(blocksDone < mNumCallbackBlocks)
1102 const size_t byteOffset{blocksDone*mBytesPerBlock};
1103 const size_t byteEnd{mNumCallbackBlocks*mBytesPerBlock};
1104 al::byte *data{BufferListItem->mSamples};
1105 std::copy(data+byteOffset, data+byteEnd, data);
1106 mNumCallbackBlocks -= blocksDone;
1107 mCallbackBlockBase += blocksDone;
1111 BufferListItem = nullptr;
1112 mNumCallbackBlocks = 0;
1113 mCallbackBlockBase += blocksDone;
1118 /* Handle streaming source */
1120 if(BufferListItem->mSampleLen > static_cast<uint>(DataPosInt))
1123 DataPosInt -= BufferListItem->mSampleLen;
1126 BufferListItem = BufferListItem->mNext.load(std::memory_order_relaxed);
1127 if(!BufferListItem) BufferListItem = BufferLoopItem;
1128 } while(BufferListItem);
1132 /* Capture the source ID in case it gets reset for stopping. */
1133 const uint SourceID{mSourceID.load(std::memory_order_relaxed)};
1135 /* Update voice info */
1136 mPosition.store(DataPosInt, std::memory_order_relaxed);
1137 mPositionFrac.store(DataPosFrac, std::memory_order_relaxed);
1138 mCurrentBuffer.store(BufferListItem, std::memory_order_relaxed);
1141 mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1142 mSourceID.store(0u, std::memory_order_relaxed);
1144 std::atomic_thread_fence(std::memory_order_release);
1146 /* Send any events now, after the position/buffer info was updated. */
1147 const auto enabledevt = Context->mEnabledEvts.load(std::memory_order_acquire);
1148 if(buffers_done > 0 && enabledevt.test(AsyncEvent::BufferCompleted))
1150 RingBuffer *ring{Context->mAsyncEvents.get()};
1151 auto evt_vec = ring->getWriteVector();
1152 if(evt_vec.first.len > 0)
1154 AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf),
1155 AsyncEvent::BufferCompleted)};
1156 evt->u.bufcomp.id = SourceID;
1157 evt->u.bufcomp.count = buffers_done;
1158 ring->writeAdvance(1);
1164 /* If the voice just ended, set it to Stopping so the next render
1165 * ensures any residual noise fades to 0 amplitude.
1167 mPlayState.store(Stopping, std::memory_order_release);
1168 if(enabledevt.test(AsyncEvent::SourceStateChange))
1169 SendSourceStoppedEvent(Context, SourceID);
1173 void Voice::prepare(DeviceBase *device)
1175 /* Even if storing really high order ambisonics, we only mix channels for
1176 * orders up to the device order. The rest are simply dropped.
1178 uint num_channels{(mFmtChannels == FmtUHJ2 || mFmtChannels == FmtSuperStereo) ? 3 :
1179 ChannelsFromFmt(mFmtChannels, minu(mAmbiOrder, device->mAmbiOrder))};
1180 if(num_channels > device->mSampleData.size()) UNLIKELY
1182 ERR("Unexpected channel count: %u (limit: %zu, %d:%d)\n", num_channels,
1183 device->mSampleData.size(), mFmtChannels, mAmbiOrder);
1184 num_channels = static_cast<uint>(device->mSampleData.size());
1186 if(mChans.capacity() > 2 && num_channels < mChans.capacity())
1188 decltype(mChans){}.swap(mChans);
1189 decltype(mPrevSamples){}.swap(mPrevSamples);
1191 mChans.reserve(maxu(2, num_channels));
1192 mChans.resize(num_channels);
1193 mPrevSamples.reserve(maxu(2, num_channels));
1194 mPrevSamples.resize(num_channels);
1197 mDecoderPadding = 0;
1198 if(mFmtChannels == FmtSuperStereo)
1200 switch(UhjDecodeQuality)
1202 case UhjQualityType::IIR:
1203 mDecoder = std::make_unique<UhjStereoDecoderIIR>();
1204 mDecoderPadding = UhjStereoDecoderIIR::sInputPadding;
1206 case UhjQualityType::FIR256:
1207 mDecoder = std::make_unique<UhjStereoDecoder<UhjLength256>>();
1208 mDecoderPadding = UhjStereoDecoder<UhjLength256>::sInputPadding;
1210 case UhjQualityType::FIR512:
1211 mDecoder = std::make_unique<UhjStereoDecoder<UhjLength512>>();
1212 mDecoderPadding = UhjStereoDecoder<UhjLength512>::sInputPadding;
1216 else if(IsUHJ(mFmtChannels))
1218 switch(UhjDecodeQuality)
1220 case UhjQualityType::IIR:
1221 mDecoder = std::make_unique<UhjDecoderIIR>();
1222 mDecoderPadding = UhjDecoderIIR::sInputPadding;
1224 case UhjQualityType::FIR256:
1225 mDecoder = std::make_unique<UhjDecoder<UhjLength256>>();
1226 mDecoderPadding = UhjDecoder<UhjLength256>::sInputPadding;
1228 case UhjQualityType::FIR512:
1229 mDecoder = std::make_unique<UhjDecoder<UhjLength512>>();
1230 mDecoderPadding = UhjDecoder<UhjLength512>::sInputPadding;
1235 /* Clear the stepping value explicitly so the mixer knows not to mix this
1236 * until the update gets applied.
1240 /* Make sure the sample history is cleared. */
1241 std::fill(mPrevSamples.begin(), mPrevSamples.end(), HistoryLine{});
1243 if(mFmtChannels == FmtUHJ2 && !device->mUhjEncoder)
1245 /* 2-channel UHJ needs different shelf filters. However, we can't just
1246 * use different shelf filters after mixing it, given any old speaker
1247 * setup the user has. To make this work, we apply the expected shelf
1248 * filters for decoding UHJ2 to quad (only needs LF scaling), and act
1249 * as if those 4 quad channels are encoded right back into B-Format.
1251 * This isn't perfect, but without an entirely separate and limited
1252 * UHJ2 path, it's better than nothing.
1254 * Note this isn't needed with UHJ output (UHJ2->B-Format->UHJ2 is
1255 * identity, so don't mess with it).
1257 const BandSplitter splitter{device->mXOverFreq / static_cast<float>(device->Frequency)};
1258 for(auto &chandata : mChans)
1260 chandata.mAmbiHFScale = 1.0f;
1261 chandata.mAmbiLFScale = 1.0f;
1262 chandata.mAmbiSplitter = splitter;
1263 chandata.mDryParams = DirectParams{};
1264 chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter;
1265 std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
1267 mChans[0].mAmbiLFScale = DecoderBase::sWLFScale;
1268 mChans[1].mAmbiLFScale = DecoderBase::sXYLFScale;
1269 mChans[2].mAmbiLFScale = DecoderBase::sXYLFScale;
1270 mFlags.set(VoiceIsAmbisonic);
1272 /* Don't need to set the VoiceIsAmbisonic flag if the device is not higher
1273 * order than the voice. No HF scaling is necessary to mix it.
1275 else if(mAmbiOrder && device->mAmbiOrder > mAmbiOrder)
1277 const uint8_t *OrderFromChan{Is2DAmbisonic(mFmtChannels) ?
1278 AmbiIndex::OrderFrom2DChannel().data() : AmbiIndex::OrderFromChannel().data()};
1279 const auto scales = AmbiScale::GetHFOrderScales(mAmbiOrder, device->mAmbiOrder,
1282 const BandSplitter splitter{device->mXOverFreq / static_cast<float>(device->Frequency)};
1283 for(auto &chandata : mChans)
1285 chandata.mAmbiHFScale = scales[*(OrderFromChan++)];
1286 chandata.mAmbiLFScale = 1.0f;
1287 chandata.mAmbiSplitter = splitter;
1288 chandata.mDryParams = DirectParams{};
1289 chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter;
1290 std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
1292 mFlags.set(VoiceIsAmbisonic);
1296 for(auto &chandata : mChans)
1298 chandata.mDryParams = DirectParams{};
1299 chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter;
1300 std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
1302 mFlags.reset(VoiceIsAmbisonic);