17 KiB
IMPORTANT - GLSL ES 3.00 Critical Rules:
- Type strictness:
intandfloatcannot be mixed directly; array indices must be ofinttype - Reserved words:
sampleis a reserved word in GLSL ES 3.00; it cannot be used as a variable name - Constant arrays: Must explicitly specify size when declaring, e.g.,
const float ARR[4] = float[4](1.,2.,3.,4.); - Integer division: In GLSL ES 3.00,
1/2evaluates to 0 (integer division); must use1.0/2.0orfloat(1)/float(2)
Sound Synthesis (Procedural Audio)
Use Cases
- Generate procedural audio using
mainSound()in ShaderToy - Synthesize melodies, chords, rhythm patterns, and complete music
- Synthesize instrument timbres: piano, bass, acid synth, percussion
- Implement audio effects: delay, reverb, distortion, filters
- Pure mathematical audio generation without external samples
Core Principles
ShaderToy sound shader four-layer architecture:
- Oscillator layer:
sin(2π·f·t), layering harmonics or FM modulation to build timbre - Envelope layer:
exp(-rate·t)+smoothstepattack, simulating strike→decay - Sequencer layer: Macro definitions / array lookup / hash pseudo-random for arranging melodies
- Effects layer: Reverb, delay, distortion, filters, and other post-processing
Key formulas:
- MIDI → frequency:
f = 440.0 × 2^((n - 69) / 12) - Sine oscillator:
y = sin(2π × freq × time) - Exponential decay:
env = exp(-decay_rate × time) - FM modulation:
y = sin(2π × f_c × t + depth × sin(2π × f_m × t))
Implementation Steps
Step 1: mainSound Entry Framework
#define TAU 6.28318530718
#define BPM 120.0
#define SPB (60.0 / BPM)
vec2 mainSound(int samp, float time) {
vec2 audio = vec2(0.0);
// Layer each instrument/track
audio *= 0.5 * smoothstep(0.0, 0.5, time); // Master volume + pop prevention
return clamp(audio, -1.0, 1.0);
}
Step 2: MIDI Note to Frequency
float noteFreq(float note) {
return 440.0 * pow(2.0, (note - 69.0) / 12.0);
}
Step 3: Basic Oscillators
float osc_sin(float t) { return sin(TAU * t); }
float osc_saw(float t) { return fract(t) * 2.0 - 1.0; }
float osc_sqr(float t) { return step(fract(t), 0.5) * 2.0 - 1.0; }
float osc_tri(float t) { return abs(fract(t) - 0.5) * 4.0 - 1.0; }
Step 4: Additive Synthesis Instrument
// Layer harmonics to build timbre; higher harmonics decay faster
float instrument_additive(float freq, float t) {
float y = 0.0;
y += 0.50 * sin(TAU * 1.00 * freq * t) * exp(-0.0015 * 1.0 * freq * t);
y += 0.30 * sin(TAU * 2.01 * freq * t) * exp(-0.0015 * 2.0 * freq * t);
y += 0.20 * sin(TAU * 4.01 * freq * t) * exp(-0.0015 * 4.0 * freq * t);
y += 0.1 * y * y * y; // Nonlinear waveshaping
y *= 0.9 + 0.1 * cos(40.0 * t); // Tremolo
y *= smoothstep(0.0, 0.01, t); // Smooth attack
return y;
}
Step 5: FM Synthesis Instrument
// FM electric piano (stereo)
vec2 fm_epiano(float freq, float t) {
vec2 f0 = vec2(freq * 0.998, freq * 1.002); // Stereo micro-detuning
// "Glass" layer - high-frequency FM, metallic attack
vec2 glass = sin(TAU * (f0 + 3.0) * t
+ sin(TAU * 14.0 * f0 * t) * exp(-30.0 * t)
) * exp(-4.0 * t);
glass = sin(glass);
// "Body" layer - low-frequency FM, warm sustained tone
vec2 body = sin(TAU * f0 * t
+ sin(TAU * f0 * t) * exp(-0.5 * t) * pow(440.0 / f0.x, 0.5)
) * exp(-t);
return (glass + body) * smoothstep(0.0, 0.001, t) * 0.1;
}
// FM generic instrument (struct parameterized)
struct Instr {
float att, fo, vibe, vphas, phas, dtun;
};
float fm_instrument(float freq, float t, float beatTime, Instr ins) {
float f = freq - beatTime * ins.dtun;
float phase = f * t * TAU;
float vibrato = cos(beatTime * ins.vibe * 3.14159 / 8.0 + ins.vphas * 1.5708);
float fm = sin(phase + vibrato * sin(phase * ins.phas));
float env = exp(-beatTime * ins.fo) * (1.0 - exp(-beatTime * ins.att));
return fm * env * (1.0 - beatTime * 0.125);
}
Step 6: Percussion Synthesis
float hash(float p) {
p = fract(p * 0.1031); p *= p + 33.33; p *= p + p; return fract(p);
}
// 909 kick drum: frequency sweep + noise click
float kick(float t) {
float phase = TAU * (60.0 * t - 512.0 * 0.01 * exp(-t / 0.01));
float body = sin(phase) * smoothstep(0.3, 0.0, t) * 1.5;
float click = sin(TAU * 8000.0 * fract(t)) * hash(t * 2000.0)
* smoothstep(0.007, 0.0, t);
return body + click;
}
// Hi-hat: noise + exponential decay. decay: 5.0=open, 15.0=closed
float hihat(float t, float decay) {
float noise = hash(floor(t * 44100.0)) * 2.0 - 1.0;
return noise * exp(-decay * t) * smoothstep(0.0, 0.02, t);
}
// Clap/snare
float clap(float t) {
float noise = hash(floor(t * 44100.0)) * 2.0 - 1.0;
return noise * smoothstep(0.1, 0.0, t);
}
Step 7: Note Sequence Arrangement
// === Method A: D() macro accumulation (good for handwritten melodies) ===
#define D(duration, note) b += float(duration); if(t > b) { x = b; n = float(note); }
float melody_macro(float time) {
float t = time / 0.18;
float n = 0.0, b = 0.0, x = 0.0;
D(10,71) D(2,76) D(3,79) D(1,78) D(2,76) D(4,83) D(2,81) D(6,78)
float freq = noteFreq(n);
float noteTime = 0.18 * (t - x);
return instrument_additive(freq, noteTime);
}
// === Method B: Array lookup (good for complex arrangements) ===
// NOTE: Array indices must be int type in GLSL ES 3.00
const float NOTES[16] = float[16](
60., 62., 64., 65., 67., 69., 71., 72.,
60., 64., 67., 72., 65., 69., 64., 60.
);
float melody_array(float time, float bpm) {
float beat = time * bpm / 60.0;
int idx = int(mod(beat, 16.0)); // IMPORTANT: Must use int() conversion
float noteTime = fract(beat);
float freq = noteFreq(NOTES[idx]);
return instrument_additive(freq, noteTime * 60.0 / bpm);
}
// === Method C: Hash pseudo-random (good for algorithmic composition) ===
float nse(float x) { return fract(sin(x * 110.082) * 19871.8972); }
float scale_filter(float note) {
float n2 = mod(note, 12.0);
if (n2==1.||n2==3.||n2==6.||n2==8.||n2==10.) return -100.0;
return note;
}
float melody_random(float time, float bpm) {
float beat = time * bpm / 60.0;
float note = 48.0 + floor(nse(floor(beat)) * 24.0);
note = scale_filter(note);
return instrument_additive(noteFreq(note), fract(beat) * 60.0 / bpm);
}
Step 8: Chord Construction
vec2 chord(float time, float root, float isMinor) {
vec2 result = vec2(0.0);
float bass = root - 24.0;
result += fm_epiano(noteFreq(bass), time, 2.0);
result += fm_epiano(noteFreq(root), time - SPB * 0.5, 1.25);
result += fm_epiano(noteFreq(root + 4.0 - isMinor), time - SPB, 1.5); // Third
result += fm_epiano(noteFreq(root + 7.0), time - SPB * 0.5, 1.25); // Fifth
result += fm_epiano(noteFreq(root + 11.0 - isMinor), time - SPB, 1.5); // Seventh
result += fm_epiano(noteFreq(root + 14.0), time - SPB, 1.5); // Ninth
return result;
}
Step 9: Delay and Reverb
// Multi-tap echo
// NOTE: "sample" is a reserved word in GLSL ES 3.00; use "samp" instead
vec2 echo_reverb(float time) {
vec2 tot = vec2(0.0);
float hh = 1.0;
for (int i = 0; i < 6; i++) {
float h = float(i) / 5.0;
float samp = get_instrument_sample(time - 0.7 * h);
tot += samp * vec2(0.5 + 0.1 * h, 0.5 - 0.1 * h) * hh;
hh *= 0.5;
}
return tot;
}
// Ping-pong stereo delay
vec2 pingpong_delay(float time) {
vec2 mx = get_stereo_sample(time) * 0.5;
float ec = 0.4, fb = 0.6, dt = 0.222;
float et = dt;
mx += get_stereo_sample(time - et) * ec * vec2(1.0, 0.5); ec *= fb; et += dt;
mx += get_stereo_sample(time - et) * ec * vec2(0.5, 1.0); ec *= fb; et += dt;
mx += get_stereo_sample(time - et) * ec * vec2(1.0, 0.5); ec *= fb; et += dt;
mx += get_stereo_sample(time - et) * ec * vec2(0.5, 1.0); ec *= fb; et += dt;
return mx;
}
Step 10: Beat and Arrangement Structure
vec2 mainSound(int samp, float time) {
vec2 audio = vec2(0.0);
float beat = time * BPM / 60.0;
float bar = beat / 4.0;
// Kick (every beat) + hi-hat (every half beat) + melody
float kickTime = mod(time, SPB);
audio += vec2(kick(kickTime) * 0.5);
float hatTime = mod(time, SPB * 0.5);
audio += vec2(hihat(hatTime, 15.0) * 0.15);
audio += vec2(melody_array(time, BPM)) * 0.3;
// Arrangement: smoothstep controls intro/outro
audio *= smoothstep(0.0, 4.0, bar); // Fade in over first 4 bars
audio *= 0.35 * smoothstep(0.0, 0.5, time);
// IMPORTANT: Array indices must be int type
// float idx = mod(beat, 16.0); // WRONG: float cannot be used as index
int idx = int(mod(beat, 16.0)); // CORRECT: int(mod(...)) conversion
return clamp(audio, -1.0, 1.0);
}
Complete Code Template
Can be pasted directly into the ShaderToy Sound tab to run. Includes FM piano melody, kick drum rhythm, and ping-pong delay.
// === Sound Synthesis Complete Template ===
#define TAU 6.28318530718
#define BPM 130.0
#define SPB (60.0 / BPM)
#define NUM_HARMONICS 4
#define ECHO_TAPS 4
#define ECHO_DELAY 0.18
#define ECHO_DECAY 0.45
float noteFreq(float note) {
return 440.0 * pow(2.0, (note - 69.0) / 12.0);
}
float hash11(float p) {
p = fract(p * 0.1031); p *= p + 33.33; p *= p + p; return fract(p);
}
float osc_tri(float t) { return abs(fract(t) - 0.5) * 4.0 - 1.0; }
float instrument(float freq, float t) {
float y = 0.0;
for (int i = 1; i <= NUM_HARMONICS; i++) {
float h = float(i);
float amp = 0.6 / h;
float decay = 0.002 * h * freq;
y += amp * sin(TAU * h * 1.003 * freq * t) * exp(-decay * t);
}
y += 0.15 * y * y * y;
y *= 0.9 + 0.1 * cos(35.0 * t);
y *= smoothstep(0.0, 0.008, t);
return y;
}
vec2 epiano(float freq, float t) {
vec2 f0 = vec2(freq * 0.998, freq * 1.002);
vec2 glass = sin(TAU * (f0 + 3.0) * t
+ sin(TAU * 14.0 * f0 * t) * exp(-30.0 * t)
) * exp(-4.0 * t);
glass = sin(glass);
vec2 body = sin(TAU * f0 * t
+ sin(TAU * f0 * t) * exp(-0.5 * t) * pow(440.0 / max(f0.x, 1.0), 0.5)
) * exp(-t);
return (glass + body) * smoothstep(0.0, 0.001, t) * 0.12;
}
float kick(float t) {
float df = 512.0, dftime = 0.01, freq = 60.0;
float phase = TAU * (freq * t - df * dftime * exp(-t / dftime));
float body = sin(phase) * smoothstep(0.3, 0.0, t) * 1.5;
float click = sin(TAU * 8000.0 * fract(t)) * hash11(t * 2048.0)
* smoothstep(0.007, 0.0, t);
return body + click;
}
float hihat(float t) {
float noise = hash11(floor(t * 44100.0)) * 2.0 - 1.0;
return noise * exp(-15.0 * t) * smoothstep(0.0, 0.002, t);
}
const float MELODY[16] = float[16](
67., 67., 72., 71., 69., 67., 64., 64.,
65., 65., 69., 67., 67., 65., 64., 62.
);
const float BASS[4] = float[4](43., 48., 45., 41.);
vec2 mainSound(int samp, float time) {
time = mod(time, 32.0 * SPB * 4.0);
vec2 audio = vec2(0.0);
float beat = time / SPB;
float bar = beat / 4.0;
// Melody
{ int idx = int(mod(beat, 16.0));
float noteTime = fract(beat) * SPB;
audio += vec2(instrument(noteFreq(MELODY[idx]), noteTime) * 0.25); }
// Bass
{ int idx = int(mod(bar, 4.0));
float noteTime = fract(bar) * SPB * 4.0;
float freq = noteFreq(BASS[idx]);
audio += vec2(osc_tri(freq * noteTime) * exp(-1.5 * noteTime)
* smoothstep(0.0, 0.01, noteTime) * 0.3); }
// Kick (every beat) + sidechain compression
{ float kt = mod(time, SPB);
float k = kick(kt) * 0.4;
audio *= min(1.0, kt * 6.0 / SPB);
audio += vec2(k); }
// Hi-hat (every half beat, panned right)
{ float ht = mod(time, SPB * 0.5);
audio += vec2(0.4, 0.6) * hihat(ht) * 0.12; }
// Ping-pong delay (melody)
{ float ec = 0.3;
for (int i = 1; i <= ECHO_TAPS; i++) {
float dt = ECHO_DELAY * float(i);
int idx = int(mod((time - dt) / SPB, 16.0));
float nt = fract((time - dt) / SPB) * SPB;
float echoed = instrument(noteFreq(MELODY[idx]), nt) * 0.25 * ec;
if (i % 2 == 0) audio += vec2(0.3, 1.0) * echoed;
else audio += vec2(1.0, 0.3) * echoed;
ec *= ECHO_DECAY;
} }
audio *= 0.4 * smoothstep(0.0, 2.0, time);
return clamp(audio, -1.0, 1.0);
}
Common Variants
Variant 1: Subtractive Synthesis / TB-303 Acid Synth
Sawtooth wave through resonant low-pass filter, cutoff frequency modulated by envelope to produce the "wow" sound.
#define NSPC 128
float lpf_response(float h, float cutoff, float reso) {
cutoff -= 20.0;
float df = max(h - cutoff, 0.0);
float df2 = abs(h - cutoff);
return exp(-0.005 * df * df) * 0.5 + exp(df2 * df2 * -0.1) * reso;
}
vec2 acid_synth(float freq, float noteTime) {
vec2 v = vec2(0.0);
float cutoff = exp(noteTime * -1.5) * 50.0 + 10.0;
float sqr = step(0.5, fract(noteTime * 4.5));
for (int i = 0; i < NSPC; i++) {
float h = float(i + 1);
float inten = 1.0 / h;
inten = mix(inten, inten * mod(h, 2.0), sqr);
inten *= lpf_response(h, cutoff, 2.2);
v.x += inten * sin((TAU + 0.01) * noteTime * freq * h);
v.y += inten * sin(TAU * noteTime * freq * h);
}
float amp = smoothstep(0.05, 0.0, abs(noteTime - 0.31) - 0.26) * exp(noteTime * -1.0);
return clamp(v * amp * 2.0, -1.0, 1.0);
}
Variant 2: IIR Biquad Filter
Time-domain IIR filter based on the Audio EQ Cookbook, supporting 7 types including low-pass/high-pass/band-pass.
float waveSaw(float freq, int samp) {
return fract(freq * float(samp) / iSampleRate) * 2.0 - 1.0;
}
vec2 widerSaw(float freq, int samp) {
int offset = int(freq) * 64;
return vec2(waveSaw(freq, samp - offset), waveSaw(freq, samp + offset));
}
void biquadLPF(float freq, float Q, float sr,
out float b0, out float b1, out float b2,
out float a0, out float a1, out float a2) {
float omega = TAU * freq / sr;
float sn = sin(omega), cs = cos(omega);
float alpha = sn / (2.0 * Q);
b0 = (1.0 - cs) * 0.5; b1 = 1.0 - cs; b2 = (1.0 - cs) * 0.5;
a0 = 1.0 + alpha; a1 = -2.0 * cs; a2 = 1.0 - alpha;
}
Variant 3: Vocal / Formant Synthesis
Vocal tract model simulating human voice by synthesizing vowels through formant frequencies and bandwidths.
float tract(float x, float formantFreq, float bandwidth) {
return sin(TAU * formantFreq * x) * exp(-bandwidth * 3.14159 * x);
}
float vowel_aah(float t, float pitch) {
float x = mod(t, 1.0 / pitch);
float aud = tract(x, 710.0, 70.0) * 0.5 // F1
+ tract(x, 1000.0, 90.0) * 0.6 // F2
+ tract(x, 2450.0, 140.0) * 0.4; // F3
return aud;
}
float fricative(float t, float formantFreq) {
return (hash11(floor(formantFreq * t) * 20.0) - 0.5) * 3.0;
}
Variant 4: Algorithmic Composition
Hash pseudo-random melody + scale quantization, multi-layer rhythmic subdivision producing fractal music structures.
vec2 noteRing(float n) {
float r = 0.5 + 0.5 * fract(sin(mod(floor(n), 32.123) * 32.123) * 41.123);
n = mod(n, 8.0);
float note = n<1.?0. : n<2.?5. : n<3.?-2. : n<4.?4. : n<5.?7. : n<6.?4. : n<7.?2. : 0.;
return vec2(note, r);
}
vec2 generativeNote(float beat) {
float b2 = floor(beat * 0.25);
return noteRing(b2 * 0.0625) + noteRing(b2 * 0.25) + noteRing(b2);
}
Variant 5: Circle of Fifths Chord Progressions
Automatically generates harmony based on the circle of fifths, advancing +7 semitones every 4 beats, alternating major/minor chords.
vec2 mainSound(int samp, float time) {
float id = floor(time / SPB / 4.0);
float offset = id * 7.0;
float minor = mod(id, 4.0) >= 3.0 ? 1.0 : 0.0;
float t = mod(time, SPB * 4.0);
float root = 57.0 + mod(offset, 12.0);
vec2 result = chord(t, root, minor);
result += vec2(0.5, 0.2) * chord(t - SPB * 0.5, root, minor);
result += vec2(0.05, 0.1) * chord(t - SPB, root, minor);
return result;
}
Performance & Composition
Performance Tips:
- Harmonic count (
NUM_HARMONICS/NSPC) is the biggest bottleneck; start with 4-8, stop when sufficient - IIR filters require looping through sample history per output sample; prefer frequency-domain methods
- Each delay tap requires recomputing the full signal chain; 4 taps = 5x computation
fract(x)is faster thanmod(x, 1.0); hoist constants out of loops- Use Common Pass to share constants; avoid redundant computation between Sound and Image
Composition Tips:
- Audio visualization: Sound output is read via
iChannel0in the Image shader for spectrum display - Raymarching sync: Common Pass defines shared timeline; Sound/Image reference it synchronously
- Particle systems: Use kick triggers to drive particle emission; share BPM/SPB for beat position calculation
- Post-processing linkage: Sidechain compression coefficients drive bloom/chromatic aberration/dithering via Common Pass
- Text overlay:
message()in Image shader renders parameter display or interaction instructions
Further Reading
For complete step-by-step tutorials, mathematical derivations, and advanced usage, see reference