Gotta Go Faster, Part 9.

This last entry in this series of posts about using the Teensy instead of the Arduino contains the full source code for my amplifier. Remember is is quite specific for my needs and taste but I post it as a reference. I use a Teensy 4.0 together with an ADAU1701 based DSP connected over I2S. The output is rendered on a 8 x 32 RGB LED matrix commonly found on Amazon and the like.

The difference between using the Arduino with an analog input, FHT and integer math etc. compared to the Teensy, I2S, FFT, floating point math, WS2812Serial and FastLED is like night and day. The response now is amazingly quick with a great resolution and butter smooth output. I am extremely happy with the end result. Below is the full source code listing including the support classes library.

// Teensy
#include <Audio.h>
#include <Wire.h>
#include <SPI.h>
#include <SD.h>
#include <SerialFlash.h>
// PIN
const uint8_t   LED_PIN       = 1;
const uint8_t   INPUT_PINS[]  = {2, 3, 4, 5};
const uint8_t   MODE_PIN      = 6;
// pin 8 used for I2S IN1 on Teensy 4.0
// pin 13 is onboard led pin on Teensy 4.0
const uint8_t   STATUS_PIN    = 13;
const uint8_t   OUTPUT_PINS[] = {16, 17, 18, 19};
// pin 20 uded for I2S LRCLK on Teenst 4.0
// pin 21 uded for I2S BCLK on Teenst 4.0
// MEM
const uint8_t   BUFFER_LEN    = 20;
const uint8_t   BUFFER_ADR    = 0x10;
// BTN
const uint8_t   NUM_INPUT     = sizeof(INPUT_PINS) / sizeof(INPUT_PINS[0]);
const uint8_t   DEF_INPUT     = 0;
// LDR
const uint8_t   LDR_ADC       = 0; // pin 14
const uint16_t  LDR_MIN       = 100;
const uint16_t  LDR_MAX       = 600;
const uint8_t   MAX_DIM       = 192;
const float     LDR_ALPHA     = 0.01;
// LED
const uint8_t   XRES          = 32;
const uint8_t   YRES          = 8;
const uint16_t  NUM_LEDS      = XRES * YRES;
const uint16_t  MAX_POWER     = 4 * 1000; // milliwatts
const uint8_t   BRIGHTNESS    = 128;
#include <WS2812Serial.h>
#define USE_WS2812SERIAL
#include <FastLED.h>
// FFT
const uint8_t   NUM_BAND      = XRES / 2;
const float     BND_ALPHA     = 0.2;
// SND
const float     SILENCE       = 0.02;
const uint8_t   PEAK_HOLD     = 32;
const float     PEAK_DECAY    = 0.02;
const float     CHN_ALPHA     = 0.9;
// ETC
const uint16_t  POWERUP_MS    = 2000;
const uint16_t  GRADIENT_MS   = 1000;
const uint32_t  SILENCE_MS    = 60000;
const uint8_t   UPDATE_FPS    = 60;
const uint8_t   UPDATE_MS     = 1000 / UPDATE_FPS;
const uint8_t   FIRE_FPS      = 15;
const uint8_t   FIRE_MS       = 1000 / FIRE_FPS;
#include "SupportClasses.h"

struct SoundLevel {
  float value   = 0;
  float peak    = 0;
  uint8_t hold  = 0;
  // update with new reading
  void update(float reading) {
    value = reading;
    if (value > peak) {
      peak = value;
      hold = PEAK_HOLD;
    } else if (hold > 0) {
    } else if (peak > 0) {
      peak -= PEAK_DECAY;

bool                    altMode       = false;
bool                    noLight       = true;
bool                    noSound       = true;
uint8_t                 dimValue      = 0;
uint8_t                 gradientHue   = 0;

SoundLevel              leftCh;
SoundLevel              rightCh;
SoundLevel              band[NUM_BAND];
Persistent<uint8_t>     selectedInput;
Timer                   fireTimer;
Timer                   silenceTimer;
Trigger                 updateTrigger;
Trigger                 gradientTrigger;
Debounce                modeButton;
Debounce                inputButton[NUM_INPUT];
FireMatrix<XRES, YRES>  fire;
CRGB                    leds[NUM_LEDS];
// Teensy Audio
AudioInputI2Sslave      i2sSlave;
AudioMixer4             mixer;
AudioAnalyzePeak        peakLeft;
AudioAnalyzePeak        peakRight;
AudioAnalyzeFFT1024     fft1024;
AudioConnection         patchCordPeakLeft(i2sSlave, 0, peakLeft, 0);
AudioConnection         patchCordMixerLeft(i2sSlave, 0, mixer, 0);
AudioConnection         patchCordPeakRight(i2sSlave, 1, peakRight, 0);
AudioConnection         patchCordMixerRight(i2sSlave, 1, mixer, 1);
AudioConnection         patchCordFft1024(mixer, fft1024);

// map x, y to pixel index
uint16_t pixel(uint8_t x, uint8_t y) {
  const uint16_t index = XRES * (YRES - y);
  return (y & 0x01) ? index - XRES + x : index - 1 - x;

// update with light adjusted rgb pixel
void lightAdjPixel(uint8_t x, uint8_t y, CRGB color) {
  const uint16_t index = pixel(x, y);
  leds[index] = color;
  if (noLight) leds[index].fadeLightBy(128);

// clamp reading between 0.0 and 1.0
float clamp(float reading) {
  const float temp = reading < 0 ? 0 : reading;
  return temp > 1 ? 1 : temp;

// scale reading to display resolution
uint8_t scale(float reading, uint8_t resolution) {
  reading = clamp(reading);
  return reading > SILENCE ? round(reading * resolution) : 0;

// convert I2S reading [0,1] to dBFS
float dbfs(float reading) {
  return 20 * log10(abs(reading));

// simple exponential smoothing, alpha is the smoothing factor [0,1]
// alpha closer to 1, less smoothing, greater weight to recent changes
// alpha closer to 0, more smoothing, less responsive to recent changes
float ses(float input, float average, float alpha) {
  return average + alpha * (input - average);

float readBand(byte index) {
  const int BAND_LUT[NUM_BAND][2] = {
    {  1,   1}, {  2,   3}, {  4,   5}, {  6,   8},
    {  9,  12}, { 13,  18}, { 19,  25}, { 26,  35},
    { 36,  48}, { 49,  65}, { 66,  88}, { 89, 119},
    {120, 160}, {161, 214}, {215, 287}, {288, 383},
  return[index][0], BAND_LUT[index][1]);

void processBands() {
  if (fft1024.available()) {
    float reading[NUM_BAND];
    float maxReading = 0;
    for (byte i = 0; i < NUM_BAND; i++) {
      reading[i] = readBand(i);
      maxReading = max(maxReading, reading[i]);
    // get scale to max reading factor
    float f = maxReading > SILENCE ? 1.0 / maxReading : 0;
    for (byte i = 0; i < NUM_BAND; i++) {
      // filter and scale reading to max reading
      band[i].update(ses(reading[i] * f, band[i].value, BND_ALPHA));

void processChannels() {
  if (peakLeft.available() && peakRight.available()) {
    leftCh.update(ses(, leftCh.value, CHN_ALPHA));
    rightCh.update(ses(, rightCh.value, CHN_ALPHA));
    if (leftCh.value > SILENCE && rightCh.value > SILENCE) {

void processLdr() {
  static float avg = 0;
  int value = analogRead(LDR_ADC);
  avg = ses(value, avg, LDR_ALPHA);                       // filter ldr reading
  value = round(avg);
  value = constrain(value, LDR_MIN, LDR_MAX);             // clamp to ldr min-max
  value = map(value, LDR_MIN, LDR_MAX, 0, MAX_DIM);       // map value to min-max
  dimValue = MAX_DIM - value;                             // update based on ldr value
  noLight = (dimValue == MAX_DIM);                        // is it pitch black?

void displayFire() {
  if (fireTimer.expired()) {
    for (byte x = 0; x < XRES; x++) {
      for (byte y = 0; y < YRES; y++) {
        lightAdjPixel(x, y, fire.color(x, y));

void displayChannel(SoundLevel channel, byte y) {
  static const uint8_t HI = (XRES / 4) * 3;
  static const uint8_t LO = XRES / 2;
  // get dBFS as a positive number
  uint16_t peak = round(abs(dbfs(channel.peak)));
  uint16_t level = round(abs(dbfs(channel.value)));
  // constrain to max resolution and convert to headroom
  peak = XRES - constrain(peak, 0, XRES);
  level = XRES - constrain(level, 0, XRES);
  CRGB hiColor;
  CRGB miColor;
  CRGB loColor;
  for (byte x = 1; x < XRES; x += 2) {
    hiColor = x < LO ? CRGB::Green : x > HI ? CRGB::Red : CRGB::Yellow;
    miColor = hiColor;
    loColor = miColor;
    if (peak >= x) {
      lightAdjPixel(x, y, loColor);
    if (level >= x) {
      lightAdjPixel(x, y + 1, hiColor);
      lightAdjPixel(x, y + 2, hiColor);
      lightAdjPixel(x, y + 3, miColor);
    } else if (peak >= x) {
      lightAdjPixel(x, y + 1, loColor);
      lightAdjPixel(x, y + 2, loColor);
      lightAdjPixel(x, y + 3, loColor);

void displayHeadroom() {
  static const uint8_t UPPER = YRES / 2;
  static const uint8_t LOWER = 0;
  // left channel on upper half
  displayChannel(leftCh, UPPER);
  // right channel on lower half
  displayChannel(rightCh, LOWER);;

void displaySpectrum() {
  static const uint8_t HUE_STEP = 32 / NUM_BAND;
  uint8_t level;
  uint8_t peak;
  uint8_t hue;
  uint8_t x;
  uint8_t y;
  CHSV hiColor;
  CHSV loColor;
  for (byte i = 0; i < NUM_BAND; i++) {
    x = i << 1;
    peak = scale(band[i].peak, YRES);
    level = scale(band[i].value, YRES);
    hue = gradientHue - (i * HUE_STEP);
    hiColor = CHSV(hue, 255, 255);
    loColor = CHSV(hue, 255, 127);
    for (y = 0; y < YRES; y++) {
      if (level > y) {
        lightAdjPixel(x, y, hiColor);
      if (peak > y) {
        lightAdjPixel(x + 1, y, loColor);

// set selected output and save to eeprom
void setOutput(uint8_t selection) {
  selection = selection < NUM_INPUT ? selection : DEF_INPUT;
  for (byte i = 0; i < NUM_INPUT; i++) {
    if (selection == i) {
      digitalWrite(OUTPUT_PINS[i], HIGH);
    } else {
      digitalWrite(OUTPUT_PINS[i], LOW);
  if (selection != selectedInput.get()) {

// input selection callback
void inputCallback(uint8_t pin, uint8_t mode) {
  if (mode == LOW) return;
  for (byte i = 0; i < NUM_INPUT; i++) {
    if (pin == INPUT_PINS[i]) {

// mode selection callback
void modeCallback(uint8_t pin, uint8_t mode) {
  altMode = (mode == HIGH);

// gradient hue callback
void gradientCallback() {

void updateCallback() {
  if (silenceTimer.expired()) {
  } else {
    if (altMode) {
    } else {

void setup() {
  // turn on onboard led
  digitalWrite(STATUS_PIN, HIGH);
  // init output pins and input buttons
  modeButton.setup(MODE_PIN, modeCallback);
  for (byte i = 0; i < NUM_INPUT; i++) {
    pinMode(OUTPUT_PINS[i], OUTPUT);
    inputButton[i].setup(INPUT_PINS[i], inputCallback);
  // get persisted data
  selectedInput.setup(DEF_INPUT, BUFFER_ADR, BUFFER_LEN);
  // output selected input
  // init sound system
  mixer.gain(0, 0.5);
  mixer.gain(1, 0.5);
  // init leds
  //FastLED.addLeds<NEOPIXEL, LED_PIN>(leds, NUM_LEDS);
  FastLED.addLeds<WS2812SERIAL, LED_PIN, BRG>(leds, NUM_LEDS);
  // init timers and triggers
  updateTrigger.setup(UPDATE_MS, updateCallback);
  gradientTrigger.setup(GRADIENT_MS, gradientCallback);
  // turn off onboard led
  digitalWrite(STATUS_PIN, LOW);

void loop() {
  for (byte i = 0; i < NUM_INPUT; i++) {

#ifndef SupportClasses_h
#define SupportClasses_h
#include "Arduino.h"
#include "EEPROM.h"
#include <FastLED.h>

// simple class for wear level storage in EEPROM
template <class T> 
class Persistent {
    static const uint8_t BLOCK_MARK = 0xfe;
    static const uint8_t BLOCK_FREE = 0xff;
    uint8_t blockSize;
    uint8_t blockNum;
    int blockAddr;
    int startAddr;
    int endAddr;
    T value;
    // seek last block mark and return address if found else -1
    int seek() {
      uint8_t mark;
      int result = -1;
      for (int addr = startAddr; addr < endAddr; addr += blockSize) {
        mark = EEPROM[addr];
        if (mark == BLOCK_MARK) {
          result = addr; // block mark found
        } else if (mark != BLOCK_FREE) {
          result = -1; // unformatted
      return result;
    // mark all status bytes as free
    void format() {
      for (int addr = startAddr; addr < endAddr; addr += blockSize) {
    // default data, start address and ring buffer slots
    void setup(T data, int address, uint8_t slots) {
      value = data;
      blockNum = slots;
      blockSize = sizeof(T) + 1;
      startAddr = address;
      endAddr = startAddr + blockNum * blockSize;
      if (endAddr > EEPROM.length()) endAddr = EEPROM.length();
      blockAddr = seek();
      if (blockAddr < 0) {
        // unformatted, format ring buffer
        blockAddr = startAddr;
        // put default data into ring buffer
      } else {
        // get stored data from EEPROM as value
        EEPROM.get(blockAddr + 1, value);
    // default data with a ten slot ring buffer from beginning of EEPROM
    void setup(T data) {
      setup(data, 0, 10);
    // put data into wear level ring buffer
    void put(T data) {
      if (blockAddr < 0) return;
      int nextAddr = blockAddr + blockSize;
      if (nextAddr >= endAddr) nextAddr = startAddr;
      // put new data into ring buffer
      EEPROM.put(nextAddr + 1, data);
      // update with new mark
      EEPROM.update(nextAddr, BLOCK_MARK);
      // clear old mark after new update
      EEPROM.update(blockAddr, BLOCK_FREE);
      blockAddr = nextAddr;
      // store data as value
      value = data;
    // get data from wear level ring buffer
    T get() {
      // get stored value insted of reading from EEPROM
      return value;
    // return start address
    int begin() {
      return startAddr;
    // return end address
    int end() {
      return endAddr;
    // return length of ring buffer
    uint16_t length() {
      return blockNum * blockSize;

class Timer {
    // a simple timer class to check if time set has expired
    unsigned long started  = 0;
    unsigned long timeout  = 0;
    void set(unsigned long ms) {
      timeout = ms;
    void reset() {
      started = millis();
    bool expired() {
      return elapsed() >= timeout;
    unsigned long elapsed() {
      return millis() - started;

// trigger callback function
typedef void (*trigger_cb)();

class Trigger {
    // a simple trigger class with callback on expiry
    trigger_cb callback = NULL;
    unsigned long started   = 0;
    unsigned long interval  = 0;
    void setup(unsigned long ms, trigger_cb callback) {
      this->callback = callback;
    void set(unsigned long ms) {
      interval = ms;
    void reset() {
      started = millis();
    void update() {
      unsigned long now = millis();
      if (now - started >= interval) {
        if (callback) callback();
        started = now;

// debounce callback function, called with pin and state (HIGH = closed, LOW = open)
typedef void (*debounce_cb)(uint8_t, uint8_t);

class Debounce {
    // debounces a normally open (NO) switch connected for internal pullup resistor
    static const uint8_t DEF_MS   = 35;
    debounce_cb callback          = NULL;
    uint8_t lastState             = LOW;
    unsigned long lastChangedTime = 0;
    uint8_t debounceTime;
    uint8_t pin;
    void setup(uint8_t pin, debounce_cb callback, uint8_t ms) {
      this->pin = pin;
      this->callback = callback;
      this->debounceTime = ms;
      pinMode(pin, INPUT_PULLUP);
    void setup(uint8_t pin, debounce_cb callback) {
      setup(pin, callback, DEF_MS);
    void setup(uint8_t pin) {
      setup(pin, NULL, DEF_MS);
    void update() {
      unsigned long now = millis();
      if (now - lastChangedTime >= debounceTime) {
        uint8_t currentState = !digitalRead(pin);
        if (currentState == lastState) return;
        lastChangedTime = now;
        lastState = currentState;
        if (callback) callback(pin, currentState);
    // state is LOW if switch is open and HIGH if closed
    uint8_t state() {
      return lastState;

class EmaFilter {
    // an exponential moving average filter using only integer math
    // supports up to 24-bit inputs and takes an alpha from 0 to 255
    // an alpha of 0 means the filtered value changes very slowly
    // an alpha of 255 means the filter returns the value of the input
    static const uint32_t MAX_VAL = 0xFFFFFF; // 24-bit max
    bool noState = true;
    uint32_t state = 0;
    uint32_t update(uint32_t value, uint8_t alpha) {
      if (noState) {
        return set(value);
      } else {
        value = value > MAX_VAL ? MAX_VAL : value;
        uint32_t newState = (value * (alpha + 1) + (state * (255 - alpha))) / 256;
        if (state == newState && value != newState) {
          if (value > newState) {
          } else if (value < newState) {
        state = newState;
      return get();
    uint32_t reset() {
      state = 0;
      noState = true;
      return get();
    uint32_t set(uint32_t value) {
      state = value > MAX_VAL ? MAX_VAL : value;
      noState = false;
      return get();
    uint32_t get() {
      return state;

template <int16_t W, int16_t H> 
class FireMatrix {
    struct Flare {
      int16_t x;
      int16_t y;
      uint8_t heat;
    // width of the fire matrix
    static const int16_t WIDTH  = W > 8 ? W : 8;
    // height of the fire matrix
    static const int16_t HEIGHT = H > 8 ? H : 8;
    // max number of flares
    static const uint8_t FLARES = W / 4;
    // array of flares
    Flare flare[FLARES];
    // fire matrix 4 bit heat points
    uint8_t heatPoint[WIDTH / 2][HEIGHT];
    // number of flares
    uint8_t flares = 0;

    // store a 4 bit heat point
    void putHeat(int16_t x, int16_t y, uint8_t heat) {
      uint8_t h;
      if (x < 0 || x >= WIDTH || y < 0 || y >= HEIGHT) return;
      x >>= 1; // divide x by 2
      h = heatPoint[x][y];
      // if y is odd put heat in lower 4 bits else in upper 4 bits
      h = y & 1 ? (h & 0xf0) | (heat & 0xf) : (h & 0xf) | (heat << 4);
      heatPoint[x][y] = h;

    // retrieve a 4 bit heat point
    uint8_t getHeat(int16_t x, int16_t y) {
      uint8_t h;
      if (x < 0 || x >= WIDTH || y < 0 || y >= HEIGHT) return 0;
      x >>= 1; // divide x by 2
      h = heatPoint[x][y];
      // if y is odd get heat from lower 4 bits else from upper 4 bits
      h = y & 1 ? h & 0xf : (h >> 4) & 0xf;
      return h;

    // heat-up flare and update heat points
    void heatFlare(uint8_t index) {
      Flare f = flare[index];
      int16_t b = f.heat * 10 / cooling + 1;
      for (int16_t x = (f.x - b); x < (f.x + b); x++) {
        for (int16_t y = (f.y - b); y < (f.y + b); y++) {
          if (x >= 0 && x < WIDTH && y >= 0 && y < HEIGHT) {
            int16_t d = (cooling * sqrt16((f.x - x) * (f.x - x) + (f.y - y) * (f.y - y)) + 5) / 10;
            uint8_t n = f.heat > d ? f.heat - d : 0;
            if (n > getHeat(x, y)) { // can only get brighter
              putHeat(x, y, n);

    // cool down flare and delete it from flare array if out
    void coolFlare(uint8_t index) {
      Flare f = flare[index];
      if (f.heat > 0) {
        flare[index] = f;
      } else {
        // flare is out
        for (int16_t i = index + 1; i < flares; i++) {
          flare[i - 1] = flare[i];

    // try to ignite new flare if there is room in flare array
    void sparkFlare() {
      if (flares < FLARES && random8(100) < sparking) {
        Flare f;
        f.x = random16(0, WIDTH);
        f.y = random16(0, (HEIGHT / 8) + 1);
        f.heat = 10;
        flare[flares] = f;

    // default palette based on FastLED HeatColors
    CRGBPalette16 palette = CRGBPalette16(
                              0x000000, 0x330000, 0x660000, 0x990000, 0xCC0000, 0xFF0000, 0xFF3300, 0xFF6600,
                              0xFF9900, 0xFFCC00, 0xFFFF00, 0xFFFF33, 0xFFFF66, 0xFFFF99, 0xFFFFCC, 0xFFFFFF
    // flame cool-off rate, default 14
    uint8_t cooling = 14;
    // chance of sparking new flame in pecent, default 40
    uint8_t sparking = 30;

    // call periodicly to update fire matrix
    void update() {
      int16_t x, y;
      // move all existing heat points up the display and cool off
      for (y = HEIGHT - 1; y > 0; y--) {
        for (x = 0; x < WIDTH; x++) {
          uint8_t h = getHeat(x, y - 1);
          putHeat(x, y, h > 0 ? h - 1 : 0);
      // heat-up the bottom row
      for (x = 0; x < WIDTH; x++) {
        putHeat(x, 0, random8(5, 9));
      // glow and cool off flares
      for (x = 0; x < flares; x++) {
        // heat-up flare
        // cool-down flare
      // try spark a new flare

    // get color from fire matrix
    CRGB color(int16_t x, int16_t y) {
      if (x >= 0 && x < WIDTH && y >= 0 && y < WIDTH) {
        return ColorFromPalette(palette, 24 * getHeat(x, y));
      } else {
        return CRGB::Black;
