diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..7dc8bb2
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,15 @@
+[submodule "story-editor/externals/SDL_mixer"]
+	path = story-editor/externals/SDL_mixer
+	url = https://github.com/libsdl-org/SDL_mixer
+[submodule "story-editor/externals/SDL"]
+	path = story-editor/externals/SDL
+	url = https://github.com/libsdl-org/SDL
+[submodule "story-editor/externals/SDL_image"]
+	path = story-editor/externals/SDL_image
+	url = https://github.com/libsdl-org/SDL_image
+[submodule "story-editor/externals/civetweb"]
+	path = story-editor/externals/civetweb
+	url = https://github.com/civetweb/civetweb
+[submodule "story-editor/externals/curl"]
+	path = story-editor/externals/curl
+	url = https://github.com/curl/curl
diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json
new file mode 100644
index 0000000..e61fd4b
--- /dev/null
+++ b/.vscode/c_cpp_properties.json
@@ -0,0 +1,10 @@
+{
+"configurations": [
+    {
+        "name": "Linux",
+        "includePath": ["${workspaceFolder}/**", "~/.conan2/**"]
+    }
+],
+"version": 4,
+"enableConfigurationSquiggles": true
+}
\ No newline at end of file
diff --git a/core/chip32/chip32_assembler.cpp b/core/chip32/chip32_assembler.cpp
index d710148..f2826a0 100644
--- a/core/chip32/chip32_assembler.cpp
+++ b/core/chip32/chip32_assembler.cpp
@@ -459,7 +459,7 @@ bool Assembler::Parse(const std::string &data)
             else // RAM DATA, only one argument is used: the size of the array
             {
                 instr.addr = ram_addr;
-                instr.dataLen = static_cast<uint16_t>(strtol(lineParts[2].c_str(),  NULL, 0));
+                instr.dataLen = static_cast<uint16_t>(strtol(lineParts[2].c_str(),  NULL, 0)) * instr.dataTypeSize/8;
                 ram_addr += instr.dataLen;
                 m_labels[opcode] = instr;
                 m_instructions.push_back(instr);
diff --git a/core/story-manager/interfaces/i_story_manager.h b/core/story-manager/interfaces/i_story_manager.h
index d92cf15..eef9c37 100644
--- a/core/story-manager/interfaces/i_story_manager.h
+++ b/core/story-manager/interfaces/i_story_manager.h
@@ -10,6 +10,7 @@
 #include "resource.h"
 #include "connection.h"
 #include "base_node.h"
+#include "variable.h"
 
 template <typename T>
 struct Callback;
@@ -49,9 +50,15 @@ public:
     // Node interaction
     virtual void BuildNodes(bool compileonly) = 0;
     virtual void BuildCode(bool compileonly) = 0;
+    virtual void SetExternalSourceFile(const std::string &filename) = 0;
     virtual void LoadBinaryStory(const std::string &filename) = 0;
     virtual void ToggleBreakpoint(int line) = 0;
     virtual uint32_t GetRegister(int reg) = 0;
+
+    // Variables management
+    virtual void ScanVariable(const std::function<void(Variable& element)>& operation) = 0;
+    virtual void AddVariable() = 0;
+    virtual void DeleteVariable(int i) = 0;
  
     virtual void Play() = 0;
     virtual void Step() = 0;
diff --git a/core/story-manager/lib/variable.h b/core/story-manager/lib/variable.h
new file mode 100644
index 0000000..a286204
--- /dev/null
+++ b/core/story-manager/lib/variable.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <string>
+#include <cstdint>
+
+struct Variable {
+    static const uint32_t NameMaxSize = 50;
+    std::string name;      // Nom de la variable
+    std::string type;      // Type de la variable (par exemple int32_t, int64_t)
+    int64_t value;         // Valeur stockée en tant qu'entier en virgule fixe
+    std::string valueText;
+    int scalePower;    // Nombre de bits pour la partie fractionnaire
+
+    Variable(const std::string &n, const std::string &t, int64_t v, int s) {
+        name = n;
+        type = t;
+        value = v;
+        scalePower = s;
+    }
+};
diff --git a/core/story-manager/src/branch_node.cpp b/core/story-manager/src/branch_node.cpp
new file mode 100644
index 0000000..53092d0
--- /dev/null
+++ b/core/story-manager/src/branch_node.cpp
@@ -0,0 +1,53 @@
+#include "branch_node.h"
+#include "story_project.h"
+#include "connection.h"
+#include "sys_lib.h"
+#include "compiler.h"
+
+
+BranchNode::BranchNode(const std::string &type)
+    : BaseNode(type, "Branch Node")
+{
+
+}
+
+
+void BranchNode::Initialize()
+{
+
+}
+
+std::string BranchNode::GenerateConstants(IStoryPage &page, IStoryProject &project, int nb_out_conns)
+{
+    std::string s;
+
+   
+
+    return s;
+}
+
+std::string BranchNode::Build(IStoryPage &page, const StoryOptions &options, int nb_out_conns)
+{
+    std::stringstream ss;
+
+    std::list<std::shared_ptr<Connection>> conns;
+    page.GetNodeConnections(conns, GetId());
+    int i = 0;
+    std::list<std::shared_ptr<Connection>>::iterator c = conns.begin();
+
+    if (conns.size() == 2)
+    {
+        ss << R"(; ---------------------------- )"
+        << GetTitle()
+        << " Type: Branch"
+        << "\n";
+
+        ss << "eq  r0, r0, r1\n"
+           << "skipz r0\n"
+           << "jump " << BaseNode::GetEntryLabel((*c)->inNodeId);
+        ++c;
+        ss << "jump " << BaseNode::GetEntryLabel((*c)->inNodeId);
+    }
+    return ss.str();
+}
+
diff --git a/core/story-manager/src/branch_node.h b/core/story-manager/src/branch_node.h
new file mode 100644
index 0000000..95aabce
--- /dev/null
+++ b/core/story-manager/src/branch_node.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <string>
+#include "i_story_manager.h"
+#include "base_node.h"
+#include "i_script_node.h"
+#include "i_story_project.h"
+
+class BranchNode : public BaseNode
+{
+public:
+    BranchNode(const std::string &type);
+
+    virtual void Initialize() override;
+    virtual std::string Build(IStoryPage &page, const StoryOptions &options, int nb_out_conns) override;
+    virtual std::string GenerateConstants(IStoryPage &page, IStoryProject &project, int nb_out_conns) override;
+
+
+private:
+};
+
diff --git a/core/story-manager/src/compare_node.cpp b/core/story-manager/src/compare_node.cpp
new file mode 100644
index 0000000..f539181
--- /dev/null
+++ b/core/story-manager/src/compare_node.cpp
@@ -0,0 +1,79 @@
+#include "compare_node.h"
+#include "story_project.h"
+#include "connection.h"
+#include "sys_lib.h"
+#include "compiler.h"
+
+
+CompareNode::CompareNode(const std::string &type)
+    : BaseNode(type, "Branch Node")
+{
+
+}
+
+
+void CompareNode::Initialize()
+{
+
+}
+
+std::string CompareNode::GenerateConstants(IStoryPage &page, IStoryProject &project, int nb_out_conns)
+{
+    std::string s;
+
+   
+
+    return s;
+}
+
+std::string CompareNode::Build(IStoryPage &page, const StoryOptions &options, int nb_out_conns)
+{
+    std::stringstream ss;
+
+    std::list<std::shared_ptr<Connection>> conns;
+    page.GetNodeConnections(conns, GetId());
+    int i = 0;
+    std::list<std::shared_ptr<Connection>>::iterator c = conns.begin();
+
+/*
+
+; Déclaration des variables en RAM
+$var1    DV32    1    ; Première variable à comparer
+$var2    DV32    1    ; Deuxième variable à comparer
+
+; Code principal
+.compare_ge:
+    ; Charger les valeurs des variables dans les registres
+    lcons r1, $var1
+    load r1, @r1, 4   ; Charger 4 bytes (32 bits) de var1 dans r1
+    lcons r2, $var2
+    load r2, @r2, 4   ; Charger 4 bytes (32 bits) de var2 dans r2
+
+    ; Comparer r1 >= r2
+    gt r3, r1, r2     ; r3 = 1 si r1 > r2, sinon 0
+    eq r4, r1, r2     ; r4 = 1 si r1 == r2, sinon 0
+    or r0, r3, r4     ; r0 = 1 si r1 > r2 OU r1 == r2, sinon 0
+
+    ret
+
+
+
+ */
+
+
+    if (conns.size() == 2)
+    {
+        ss << R"(; ---------------------------- )"
+        << GetTitle()
+        << " Type: Branch"
+        << "\n";
+        
+        ss << "eq  r0, r0, r1\n"
+           << "skipz r0\n"
+           << "jump " << BaseNode::GetEntryLabel((*c)->inNodeId);
+        ++c;
+        ss << "jump " << BaseNode::GetEntryLabel((*c)->inNodeId);
+    }
+    return ss.str();
+}
+
diff --git a/core/story-manager/src/compare_node.h b/core/story-manager/src/compare_node.h
new file mode 100644
index 0000000..8704ed3
--- /dev/null
+++ b/core/story-manager/src/compare_node.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <string>
+#include "i_story_manager.h"
+#include "base_node.h"
+#include "i_script_node.h"
+#include "i_story_project.h"
+
+class CompareNode : public BaseNode
+{
+public:
+    CompareNode(const std::string &type);
+
+    virtual void Initialize() override;
+    virtual std::string Build(IStoryPage &page, const StoryOptions &options, int nb_out_conns) override;
+    virtual std::string GenerateConstants(IStoryPage &page, IStoryProject &project, int nb_out_conns) override;
+
+
+private:
+};
+
diff --git a/core/story-manager/src/story_project.cpp b/core/story-manager/src/story_project.cpp
index 301f3de..d829e47 100644
--- a/core/story-manager/src/story_project.cpp
+++ b/core/story-manager/src/story_project.cpp
@@ -9,6 +9,7 @@
 #include "json.hpp"
 #include "media_node.h"
 #include "function_node.h"
+#include "variable_node.h"
 #include "sys_lib.h"
 
 StoryProject::StoryProject(ILogger &log)
@@ -16,6 +17,7 @@ StoryProject::StoryProject(ILogger &log)
 {
     registerNode<MediaNode>("media-node");
     registerNode<FunctionNode>("function-node");
+    registerNode<VariableNode>("variable-node");
 }
 
 StoryProject::~StoryProject()
@@ -221,6 +223,24 @@ std::pair<std::list<std::shared_ptr<Connection>>::iterator, std::list<std::share
     return std::pair<std::list<std::shared_ptr<Connection>>::iterator, std::list<std::shared_ptr<Connection>>::iterator>();
 }
 
+void StoryProject::ScanVariable(const std::function<void(Variable& element)>& operation)
+{
+    for (auto &v : m_variables)
+    {
+        operation(v);
+    }
+}
+
+void StoryProject::AddVariable() 
+{
+    m_variables.push_back(Variable("var_" + std::to_string(m_variables.size()), "int32_t", 0, 8));
+}
+
+void StoryProject::DeleteVariable(int i)
+{
+    m_variables.erase(m_variables.begin() + i);
+}
+
 bool StoryProject::ModelFromJson(const nlohmann::json &model)
 {
     bool success = false;
diff --git a/core/story-manager/src/story_project.h b/core/story-manager/src/story_project.h
index 460b26d..d8790b7 100644
--- a/core/story-manager/src/story_project.h
+++ b/core/story-manager/src/story_project.h
@@ -15,7 +15,7 @@
 #include "chip32_assembler.h"
 #include "story_page.h"
 #include "story_options.h"
-
+#include "variable.h"
 
 struct StoryProject : public IStoryProject
 {
@@ -102,6 +102,11 @@ public:
     std::pair<std::list<std::shared_ptr<BaseNode>>::iterator, std::list<std::shared_ptr<BaseNode>>::iterator> Nodes(const std::string_view &page_uuid);
     std::pair<std::list<std::shared_ptr<Connection>>::iterator, std::list<std::shared_ptr<Connection>>::iterator> Links(const std::string_view &page_uuid);
 
+
+    void ScanVariable(const std::function<void(Variable& element)>& operation);
+    void AddVariable();
+    void DeleteVariable(int i);
+
     std::vector<std::string> GetNodeTypes() const { 
         std::vector<std::string> l;
         for(auto const& imap: m_registry) l.push_back(imap.first);
@@ -129,6 +134,8 @@ private:
 
     std::list<std::shared_ptr<StoryPage>> m_pages;
 
+    std::vector<Variable> m_variables;
+
     StoryOptions m_storyOptions;
 
     bool m_initialized{false};
diff --git a/core/story-manager/src/variable_node.cpp b/core/story-manager/src/variable_node.cpp
new file mode 100644
index 0000000..1a979a2
--- /dev/null
+++ b/core/story-manager/src/variable_node.cpp
@@ -0,0 +1,43 @@
+#include "variable_node.h"
+#include "story_project.h"
+#include "connection.h"
+#include "sys_lib.h"
+
+
+VariableNode::VariableNode(const std::string &type)
+    : BaseNode(type, "Function Node")
+{
+    nlohmann::json j{ {"function", ""} };
+    SetInternalData(j);
+}
+
+void VariableNode::StoreInternalData()
+{
+    nlohmann::json j;
+    // j["image"] = m_image;
+    // j["sound"] = m_sound;
+
+    SetInternalData(j);
+}
+
+void VariableNode::Initialize()
+{
+    nlohmann::json j = GetInternalData();
+    // m_image = j["image"].get<std::string>();
+    // m_sound = j["sound"].get<std::string>();
+}
+
+std::string VariableNode::Build(IStoryPage &page, const StoryOptions &options, int nb_out_conns)
+{
+    return std::string();
+}
+
+std::string VariableNode::GenerateConstants(IStoryPage &page, IStoryProject &project, int nb_out_conns)
+{
+    std::string s;
+
+
+
+    return s;
+}
+
diff --git a/core/story-manager/src/variable_node.h b/core/story-manager/src/variable_node.h
new file mode 100644
index 0000000..a9aa049
--- /dev/null
+++ b/core/story-manager/src/variable_node.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <string>
+#include "i_story_manager.h"
+#include "base_node.h"
+#include "i_script_node.h"
+#include "i_story_project.h"
+
+class VariableNode : public BaseNode
+{
+public:
+    VariableNode(const std::string &type);
+
+    virtual void Initialize() override;
+    virtual std::string Build(IStoryPage &page, const StoryOptions &options, int nb_out_conns) override;
+    virtual std::string GenerateConstants(IStoryPage &page, IStoryProject &project, int nb_out_conns) override;
+
+    void StoreInternalData();
+
+private:
+
+};
+
diff --git a/firmware/library/mini_qoi.c b/firmware/library/mini_qoi.c
index e3b255f..35f0443 100644
--- a/firmware/library/mini_qoi.c
+++ b/firmware/library/mini_qoi.c
@@ -21,6 +21,7 @@ Initializes an mQOI image descriptor object.
 void mqoi_desc_init(mqoi_desc_t *desc)
 {
     memset(desc, 0, sizeof(mqoi_desc_t));
+    desc->head = 3;
 }
 
 /*
@@ -74,7 +75,7 @@ Returns true when the mQOI image descriptor object is completely populated.
 */
 inline bool mqoi_desc_done(const mqoi_desc_t *desc)
 {
-    return desc->head >= sizeof(mqoi_desc_t) - 1;
+    return desc->head >= sizeof(mqoi_desc_t) - 4;
 }
 
 // ==== mqoi_dec_t ====
diff --git a/firmware/library/mini_qoi.h b/firmware/library/mini_qoi.h
index 22564c8..e1e19ff 100644
--- a/firmware/library/mini_qoi.h
+++ b/firmware/library/mini_qoi.h
@@ -75,7 +75,7 @@ extern "C"
 
     typedef struct
     {
-        uint8_t head;
+        uint32_t head; // fix: allow alignment of 32-bit addresses
 
         uint8_t magic[4];
         uint8_t width[4];  // big-endian width
diff --git a/firmware/library/qoi.h b/firmware/library/qoi.h
deleted file mode 100644
index f2800b0..0000000
--- a/firmware/library/qoi.h
+++ /dev/null
@@ -1,649 +0,0 @@
-/*
-
-Copyright (c) 2021, Dominic Szablewski - https://phoboslab.org
-SPDX-License-Identifier: MIT
-
-
-QOI - The "Quite OK Image" format for fast, lossless image compression
-
--- About
-
-QOI encodes and decodes images in a lossless format. Compared to stb_image and
-stb_image_write QOI offers 20x-50x faster encoding, 3x-4x faster decoding and
-20% better compression.
-
-
--- Synopsis
-
-// Define `QOI_IMPLEMENTATION` in *one* C/C++ file before including this
-// library to create the implementation.
-
-#define QOI_IMPLEMENTATION
-#include "qoi.h"
-
-// Encode and store an RGBA buffer to the file system. The qoi_desc describes
-// the input pixel data.
-qoi_write("image_new.qoi", rgba_pixels, &(qoi_desc){
-	.width = 1920,
-	.height = 1080,
-	.channels = 4,
-	.colorspace = QOI_SRGB
-});
-
-// Load and decode a QOI image from the file system into a 32bbp RGBA buffer.
-// The qoi_desc struct will be filled with the width, height, number of channels
-// and colorspace read from the file header.
-qoi_desc desc;
-void *rgba_pixels = qoi_read("image.qoi", &desc, 4);
-
-
-
--- Documentation
-
-This library provides the following functions;
-- qoi_read    -- read and decode a QOI file
-- qoi_decode  -- decode the raw bytes of a QOI image from memory
-- qoi_write   -- encode and write a QOI file
-- qoi_encode  -- encode an rgba buffer into a QOI image in memory
-
-See the function declaration below for the signature and more information.
-
-If you don't want/need the qoi_read and qoi_write functions, you can define
-QOI_NO_STDIO before including this library.
-
-This library uses malloc() and free(). To supply your own malloc implementation
-you can define QOI_MALLOC and QOI_FREE before including this library.
-
-This library uses memset() to zero-initialize the index. To supply your own
-implementation you can define QOI_ZEROARR before including this library.
-
-
--- Data Format
-
-A QOI file has a 14 byte header, followed by any number of data "chunks" and an
-8-byte end marker.
-
-struct qoi_header_t {
-	char     magic[4];   // magic bytes "qoif"
-	uint32_t width;      // image width in pixels (BE)
-	uint32_t height;     // image height in pixels (BE)
-	uint8_t  channels;   // 3 = RGB, 4 = RGBA
-	uint8_t  colorspace; // 0 = sRGB with linear alpha, 1 = all channels linear
-};
-
-Images are encoded row by row, left to right, top to bottom. The decoder and
-encoder start with {r: 0, g: 0, b: 0, a: 255} as the previous pixel value. An
-image is complete when all pixels specified by width * height have been covered.
-
-Pixels are encoded as
- - a run of the previous pixel
- - an index into an array of previously seen pixels
- - a difference to the previous pixel value in r,g,b
- - full r,g,b or r,g,b,a values
-
-The color channels are assumed to not be premultiplied with the alpha channel
-("un-premultiplied alpha").
-
-A running array[64] (zero-initialized) of previously seen pixel values is
-maintained by the encoder and decoder. Each pixel that is seen by the encoder
-and decoder is put into this array at the position formed by a hash function of
-the color value. In the encoder, if the pixel value at the index matches the
-current pixel, this index position is written to the stream as QOI_OP_INDEX.
-The hash function for the index is:
-
-	index_position = (r * 3 + g * 5 + b * 7 + a * 11) % 64
-
-Each chunk starts with a 2- or 8-bit tag, followed by a number of data bits. The
-bit length of chunks is divisible by 8 - i.e. all chunks are byte aligned. All
-values encoded in these data bits have the most significant bit on the left.
-
-The 8-bit tags have precedence over the 2-bit tags. A decoder must check for the
-presence of an 8-bit tag first.
-
-The byte stream's end is marked with 7 0x00 bytes followed a single 0x01 byte.
-
-
-The possible chunks are:
-
-
-.- QOI_OP_INDEX ----------.
-|         Byte[0]         |
-|  7  6  5  4  3  2  1  0 |
-|-------+-----------------|
-|  0  0 |     index       |
-`-------------------------`
-2-bit tag b00
-6-bit index into the color index array: 0..63
-
-A valid encoder must not issue 2 or more consecutive QOI_OP_INDEX chunks to the
-same index. QOI_OP_RUN should be used instead.
-
-
-.- QOI_OP_DIFF -----------.
-|         Byte[0]         |
-|  7  6  5  4  3  2  1  0 |
-|-------+-----+-----+-----|
-|  0  1 |  dr |  dg |  db |
-`-------------------------`
-2-bit tag b01
-2-bit   red channel difference from the previous pixel between -2..1
-2-bit green channel difference from the previous pixel between -2..1
-2-bit  blue channel difference from the previous pixel between -2..1
-
-The difference to the current channel values are using a wraparound operation,
-so "1 - 2" will result in 255, while "255 + 1" will result in 0.
-
-Values are stored as unsigned integers with a bias of 2. E.g. -2 is stored as
-0 (b00). 1 is stored as 3 (b11).
-
-The alpha value remains unchanged from the previous pixel.
-
-
-.- QOI_OP_LUMA -------------------------------------.
-|         Byte[0]         |         Byte[1]         |
-|  7  6  5  4  3  2  1  0 |  7  6  5  4  3  2  1  0 |
-|-------+-----------------+-------------+-----------|
-|  1  0 |  green diff     |   dr - dg   |  db - dg  |
-`---------------------------------------------------`
-2-bit tag b10
-6-bit green channel difference from the previous pixel -32..31
-4-bit   red channel difference minus green channel difference -8..7
-4-bit  blue channel difference minus green channel difference -8..7
-
-The green channel is used to indicate the general direction of change and is
-encoded in 6 bits. The red and blue channels (dr and db) base their diffs off
-of the green channel difference and are encoded in 4 bits. I.e.:
-	dr_dg = (cur_px.r - prev_px.r) - (cur_px.g - prev_px.g)
-	db_dg = (cur_px.b - prev_px.b) - (cur_px.g - prev_px.g)
-
-The difference to the current channel values are using a wraparound operation,
-so "10 - 13" will result in 253, while "250 + 7" will result in 1.
-
-Values are stored as unsigned integers with a bias of 32 for the green channel
-and a bias of 8 for the red and blue channel.
-
-The alpha value remains unchanged from the previous pixel.
-
-
-.- QOI_OP_RUN ------------.
-|         Byte[0]         |
-|  7  6  5  4  3  2  1  0 |
-|-------+-----------------|
-|  1  1 |       run       |
-`-------------------------`
-2-bit tag b11
-6-bit run-length repeating the previous pixel: 1..62
-
-The run-length is stored with a bias of -1. Note that the run-lengths 63 and 64
-(b111110 and b111111) are illegal as they are occupied by the QOI_OP_RGB and
-QOI_OP_RGBA tags.
-
-
-.- QOI_OP_RGB ------------------------------------------.
-|         Byte[0]         | Byte[1] | Byte[2] | Byte[3] |
-|  7  6  5  4  3  2  1  0 | 7 .. 0  | 7 .. 0  | 7 .. 0  |
-|-------------------------+---------+---------+---------|
-|  1  1  1  1  1  1  1  0 |   red   |  green  |  blue   |
-`-------------------------------------------------------`
-8-bit tag b11111110
-8-bit   red channel value
-8-bit green channel value
-8-bit  blue channel value
-
-The alpha value remains unchanged from the previous pixel.
-
-
-.- QOI_OP_RGBA ---------------------------------------------------.
-|         Byte[0]         | Byte[1] | Byte[2] | Byte[3] | Byte[4] |
-|  7  6  5  4  3  2  1  0 | 7 .. 0  | 7 .. 0  | 7 .. 0  | 7 .. 0  |
-|-------------------------+---------+---------+---------+---------|
-|  1  1  1  1  1  1  1  1 |   red   |  green  |  blue   |  alpha  |
-`-----------------------------------------------------------------`
-8-bit tag b11111111
-8-bit   red channel value
-8-bit green channel value
-8-bit  blue channel value
-8-bit alpha channel value
-
-*/
-
-
-/* -----------------------------------------------------------------------------
-Header - Public functions */
-
-#ifndef QOI_H
-#define QOI_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* A pointer to a qoi_desc struct has to be supplied to all of qoi's functions.
-It describes either the input format (for qoi_write and qoi_encode), or is
-filled with the description read from the file header (for qoi_read and
-qoi_decode).
-
-The colorspace in this qoi_desc is an enum where
-	0 = sRGB, i.e. gamma scaled RGB channels and a linear alpha channel
-	1 = all channels are linear
-You may use the constants QOI_SRGB or QOI_LINEAR. The colorspace is purely
-informative. It will be saved to the file header, but does not affect
-how chunks are en-/decoded. */
-
-#define QOI_SRGB   0
-#define QOI_LINEAR 1
-
-typedef struct {
-	unsigned int width;
-	unsigned int height;
-	unsigned char channels;
-	unsigned char colorspace;
-} qoi_desc;
-
-#ifndef QOI_NO_STDIO
-
-/* Encode raw RGB or RGBA pixels into a QOI image and write it to the file
-system. The qoi_desc struct must be filled with the image width, height,
-number of channels (3 = RGB, 4 = RGBA) and the colorspace.
-
-The function returns 0 on failure (invalid parameters, or fopen or malloc
-failed) or the number of bytes written on success. */
-
-int qoi_write(const char *filename, const void *data, const qoi_desc *desc);
-
-
-/* Read and decode a QOI image from the file system. If channels is 0, the
-number of channels from the file header is used. If channels is 3 or 4 the
-output format will be forced into this number of channels.
-
-The function either returns NULL on failure (invalid data, or malloc or fopen
-failed) or a pointer to the decoded pixels. On success, the qoi_desc struct
-will be filled with the description from the file header.
-
-The returned pixel data should be free()d after use. */
-
-void *qoi_read(const char *filename, qoi_desc *desc, int channels);
-
-#endif /* QOI_NO_STDIO */
-
-
-/* Encode raw RGB or RGBA pixels into a QOI image in memory.
-
-The function either returns NULL on failure (invalid parameters or malloc
-failed) or a pointer to the encoded data on success. On success the out_len
-is set to the size in bytes of the encoded data.
-
-The returned qoi data should be free()d after use. */
-
-void *qoi_encode(const void *data, const qoi_desc *desc, int *out_len);
-
-
-/* Decode a QOI image from memory.
-
-The function either returns NULL on failure (invalid parameters or malloc
-failed) or a pointer to the decoded pixels. On success, the qoi_desc struct
-is filled with the description from the file header.
-
-The returned pixel data should be free()d after use. */
-
-void *qoi_decode(const void *data, int size, qoi_desc *desc, int channels);
-
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* QOI_H */
-
-
-/* -----------------------------------------------------------------------------
-Implementation */
-
-#ifdef QOI_IMPLEMENTATION
-#include <stdlib.h>
-#include <string.h>
-
-#ifndef QOI_MALLOC
-	#define QOI_MALLOC(sz) malloc(sz)
-	#define QOI_FREE(p)    free(p)
-#endif
-#ifndef QOI_ZEROARR
-	#define QOI_ZEROARR(a) memset((a),0,sizeof(a))
-#endif
-
-#define QOI_OP_INDEX  0x00 /* 00xxxxxx */
-#define QOI_OP_DIFF   0x40 /* 01xxxxxx */
-#define QOI_OP_LUMA   0x80 /* 10xxxxxx */
-#define QOI_OP_RUN    0xc0 /* 11xxxxxx */
-#define QOI_OP_RGB    0xfe /* 11111110 */
-#define QOI_OP_RGBA   0xff /* 11111111 */
-
-#define QOI_MASK_2    0xc0 /* 11000000 */
-
-#define QOI_COLOR_HASH(C) (C.rgba.r*3 + C.rgba.g*5 + C.rgba.b*7 + C.rgba.a*11)
-#define QOI_MAGIC \
-	(((unsigned int)'q') << 24 | ((unsigned int)'o') << 16 | \
-	 ((unsigned int)'i') <<  8 | ((unsigned int)'f'))
-#define QOI_HEADER_SIZE 14
-
-/* 2GB is the max file size that this implementation can safely handle. We guard
-against anything larger than that, assuming the worst case with 5 bytes per
-pixel, rounded down to a nice clean value. 400 million pixels ought to be
-enough for anybody. */
-#define QOI_PIXELS_MAX ((unsigned int)400000000)
-
-typedef union {
-	struct { unsigned char r, g, b, a; } rgba;
-	unsigned int v;
-} qoi_rgba_t;
-
-static const unsigned char qoi_padding[8] = {0,0,0,0,0,0,0,1};
-
-static void qoi_write_32(unsigned char *bytes, int *p, unsigned int v) {
-	bytes[(*p)++] = (0xff000000 & v) >> 24;
-	bytes[(*p)++] = (0x00ff0000 & v) >> 16;
-	bytes[(*p)++] = (0x0000ff00 & v) >> 8;
-	bytes[(*p)++] = (0x000000ff & v);
-}
-
-static unsigned int qoi_read_32(const unsigned char *bytes, int *p) {
-	unsigned int a = bytes[(*p)++];
-	unsigned int b = bytes[(*p)++];
-	unsigned int c = bytes[(*p)++];
-	unsigned int d = bytes[(*p)++];
-	return a << 24 | b << 16 | c << 8 | d;
-}
-
-void *qoi_encode(const void *data, const qoi_desc *desc, int *out_len) {
-	int i, max_size, p, run;
-	int px_len, px_end, px_pos, channels;
-	unsigned char *bytes;
-	const unsigned char *pixels;
-	qoi_rgba_t index[64];
-	qoi_rgba_t px, px_prev;
-
-	if (
-		data == NULL || out_len == NULL || desc == NULL ||
-		desc->width == 0 || desc->height == 0 ||
-		desc->channels < 3 || desc->channels > 4 ||
-		desc->colorspace > 1 ||
-		desc->height >= QOI_PIXELS_MAX / desc->width
-	) {
-		return NULL;
-	}
-
-	max_size =
-		desc->width * desc->height * (desc->channels + 1) +
-		QOI_HEADER_SIZE + sizeof(qoi_padding);
-
-	p = 0;
-	bytes = (unsigned char *) QOI_MALLOC(max_size);
-	if (!bytes) {
-		return NULL;
-	}
-
-	qoi_write_32(bytes, &p, QOI_MAGIC);
-	qoi_write_32(bytes, &p, desc->width);
-	qoi_write_32(bytes, &p, desc->height);
-	bytes[p++] = desc->channels;
-	bytes[p++] = desc->colorspace;
-
-
-	pixels = (const unsigned char *)data;
-
-	QOI_ZEROARR(index);
-
-	run = 0;
-	px_prev.rgba.r = 0;
-	px_prev.rgba.g = 0;
-	px_prev.rgba.b = 0;
-	px_prev.rgba.a = 255;
-	px = px_prev;
-
-	px_len = desc->width * desc->height * desc->channels;
-	px_end = px_len - desc->channels;
-	channels = desc->channels;
-
-	for (px_pos = 0; px_pos < px_len; px_pos += channels) {
-		px.rgba.r = pixels[px_pos + 0];
-		px.rgba.g = pixels[px_pos + 1];
-		px.rgba.b = pixels[px_pos + 2];
-
-		if (channels == 4) {
-			px.rgba.a = pixels[px_pos + 3];
-		}
-
-		if (px.v == px_prev.v) {
-			run++;
-			if (run == 62 || px_pos == px_end) {
-				bytes[p++] = QOI_OP_RUN | (run - 1);
-				run = 0;
-			}
-		}
-		else {
-			int index_pos;
-
-			if (run > 0) {
-				bytes[p++] = QOI_OP_RUN | (run - 1);
-				run = 0;
-			}
-
-			index_pos = QOI_COLOR_HASH(px) % 64;
-
-			if (index[index_pos].v == px.v) {
-				bytes[p++] = QOI_OP_INDEX | index_pos;
-			}
-			else {
-				index[index_pos] = px;
-
-				if (px.rgba.a == px_prev.rgba.a) {
-					signed char vr = px.rgba.r - px_prev.rgba.r;
-					signed char vg = px.rgba.g - px_prev.rgba.g;
-					signed char vb = px.rgba.b - px_prev.rgba.b;
-
-					signed char vg_r = vr - vg;
-					signed char vg_b = vb - vg;
-
-					if (
-						vr > -3 && vr < 2 &&
-						vg > -3 && vg < 2 &&
-						vb > -3 && vb < 2
-					) {
-						bytes[p++] = QOI_OP_DIFF | (vr + 2) << 4 | (vg + 2) << 2 | (vb + 2);
-					}
-					else if (
-						vg_r >  -9 && vg_r <  8 &&
-						vg   > -33 && vg   < 32 &&
-						vg_b >  -9 && vg_b <  8
-					) {
-						bytes[p++] = QOI_OP_LUMA     | (vg   + 32);
-						bytes[p++] = (vg_r + 8) << 4 | (vg_b +  8);
-					}
-					else {
-						bytes[p++] = QOI_OP_RGB;
-						bytes[p++] = px.rgba.r;
-						bytes[p++] = px.rgba.g;
-						bytes[p++] = px.rgba.b;
-					}
-				}
-				else {
-					bytes[p++] = QOI_OP_RGBA;
-					bytes[p++] = px.rgba.r;
-					bytes[p++] = px.rgba.g;
-					bytes[p++] = px.rgba.b;
-					bytes[p++] = px.rgba.a;
-				}
-			}
-		}
-		px_prev = px;
-	}
-
-	for (i = 0; i < (int)sizeof(qoi_padding); i++) {
-		bytes[p++] = qoi_padding[i];
-	}
-
-	*out_len = p;
-	return bytes;
-}
-
-void *qoi_decode(const void *data, int size, qoi_desc *desc, int channels) {
-	const unsigned char *bytes;
-	unsigned int header_magic;
-	unsigned char *pixels;
-	qoi_rgba_t index[64];
-	qoi_rgba_t px;
-	int px_len, chunks_len, px_pos;
-	int p = 0, run = 0;
-
-	if (
-		data == NULL || desc == NULL ||
-		(channels != 0 && channels != 3 && channels != 4) ||
-		size < QOI_HEADER_SIZE + (int)sizeof(qoi_padding)
-	) {
-		return NULL;
-	}
-
-	bytes = (const unsigned char *)data;
-
-	header_magic = qoi_read_32(bytes, &p);
-	desc->width = qoi_read_32(bytes, &p);
-	desc->height = qoi_read_32(bytes, &p);
-	desc->channels = bytes[p++];
-	desc->colorspace = bytes[p++];
-
-	if (
-		desc->width == 0 || desc->height == 0 ||
-		desc->channels < 3 || desc->channels > 4 ||
-		desc->colorspace > 1 ||
-		header_magic != QOI_MAGIC ||
-		desc->height >= QOI_PIXELS_MAX / desc->width
-	) {
-		return NULL;
-	}
-
-	if (channels == 0) {
-		channels = desc->channels;
-	}
-
-	px_len = desc->width * desc->height * channels;
-	pixels = (unsigned char *) QOI_MALLOC(px_len);
-	if (!pixels) {
-		return NULL;
-	}
-
-	QOI_ZEROARR(index);
-	px.rgba.r = 0;
-	px.rgba.g = 0;
-	px.rgba.b = 0;
-	px.rgba.a = 255;
-
-	chunks_len = size - (int)sizeof(qoi_padding);
-	for (px_pos = 0; px_pos < px_len; px_pos += channels) {
-		if (run > 0) {
-			run--;
-		}
-		else if (p < chunks_len) {
-			int b1 = bytes[p++];
-
-			if (b1 == QOI_OP_RGB) {
-				px.rgba.r = bytes[p++];
-				px.rgba.g = bytes[p++];
-				px.rgba.b = bytes[p++];
-			}
-			else if (b1 == QOI_OP_RGBA) {
-				px.rgba.r = bytes[p++];
-				px.rgba.g = bytes[p++];
-				px.rgba.b = bytes[p++];
-				px.rgba.a = bytes[p++];
-			}
-			else if ((b1 & QOI_MASK_2) == QOI_OP_INDEX) {
-				px = index[b1];
-			}
-			else if ((b1 & QOI_MASK_2) == QOI_OP_DIFF) {
-				px.rgba.r += ((b1 >> 4) & 0x03) - 2;
-				px.rgba.g += ((b1 >> 2) & 0x03) - 2;
-				px.rgba.b += ( b1       & 0x03) - 2;
-			}
-			else if ((b1 & QOI_MASK_2) == QOI_OP_LUMA) {
-				int b2 = bytes[p++];
-				int vg = (b1 & 0x3f) - 32;
-				px.rgba.r += vg - 8 + ((b2 >> 4) & 0x0f);
-				px.rgba.g += vg;
-				px.rgba.b += vg - 8 +  (b2       & 0x0f);
-			}
-			else if ((b1 & QOI_MASK_2) == QOI_OP_RUN) {
-				run = (b1 & 0x3f);
-			}
-
-			index[QOI_COLOR_HASH(px) % 64] = px;
-		}
-
-		pixels[px_pos + 0] = px.rgba.r;
-		pixels[px_pos + 1] = px.rgba.g;
-		pixels[px_pos + 2] = px.rgba.b;
-		
-		if (channels == 4) {
-			pixels[px_pos + 3] = px.rgba.a;
-		}
-	}
-
-	return pixels;
-}
-
-#ifndef QOI_NO_STDIO
-#include <stdio.h>
-
-int qoi_write(const char *filename, const void *data, const qoi_desc *desc) {
-	FILE *f = fopen(filename, "wb");
-	int size, err;
-	void *encoded;
-
-	if (!f) {
-		return 0;
-	}
-
-	encoded = qoi_encode(data, desc, &size);
-	if (!encoded) {
-		fclose(f);
-		return 0;
-	}
-
-	fwrite(encoded, 1, size, f);
-	fflush(f);
-	err = ferror(f);
-	fclose(f);
-
-	QOI_FREE(encoded);
-	return err ? 0 : size;
-}
-
-void *qoi_read(const char *filename, qoi_desc *desc, int channels) {
-	FILE *f = fopen(filename, "rb");
-	int size, bytes_read;
-	void *pixels, *data;
-
-	if (!f) {
-		return NULL;
-	}
-
-	fseek(f, 0, SEEK_END);
-	size = ftell(f);
-	if (size <= 0 || fseek(f, 0, SEEK_SET) != 0) {
-		fclose(f);
-		return NULL;
-	}
-
-	data = QOI_MALLOC(size);
-	if (!data) {
-		fclose(f);
-		return NULL;
-	}
-
-	bytes_read = fread(data, 1, size, f);
-	fclose(f);
-	pixels = (bytes_read != size) ? NULL : qoi_decode(data, bytes_read, desc, channels);
-	QOI_FREE(data);
-	return pixels;
-}
-
-#endif /* QOI_NO_STDIO */
-#endif /* QOI_IMPLEMENTATION */
diff --git a/shared/audio_player.h b/shared/audio_player.h
index e60874c..9d1e016 100644
--- a/shared/audio_player.h
+++ b/shared/audio_player.h
@@ -43,6 +43,9 @@ private:
 
     void AudioThread();
 
+    int StartAudio(const std::string &filename);
+    void StopAudio();
+
 };
 
 
diff --git a/shared/audio_player_raudio_impl.cpp b/shared/audio_player_raudio_impl.cpp
new file mode 100644
index 0000000..0c8cdeb
--- /dev/null
+++ b/shared/audio_player_raudio_impl.cpp
@@ -0,0 +1,172 @@
+
+#include <functional>
+#include <stdio.h>
+#include "audio_player.h"
+
+// #define MINIAUDIO_IMPLEMENTATION
+// #include "miniaudio.h"
+#include <SDL3/SDL.h>
+#include <stdio.h>
+#include "raudio.h"
+
+static int audio_open = 0;
+static int next_track = 0;
+
+// static ma_result result;
+// static ma_decoder decoder;
+// static ma_device_config deviceConfig;
+// static ma_device device;
+
+SDL_AudioSpec spec;
+    
+
+static ThreadSafeQueue<AudioCommand> g_audioQueue;
+
+// void data_callback(ma_device* pDevice, void* pOutput, const void* pInput, ma_uint32 frameCount)
+// {
+//     ma_decoder* pDecoder = (ma_decoder*)pDevice->pUserData;
+//     if (pDecoder == NULL)
+//     {
+//         return;
+//     }
+
+//     if (ma_decoder_read_pcm_frames(pDecoder, pOutput, frameCount, NULL) != MA_SUCCESS)
+//     {
+//         g_audioQueue.push({"end", ""});  
+//     }
+
+
+//     (void)pInput;
+// }
+
+static Music music;
+static Sound sound;
+
+AudioPlayer::AudioPlayer(IAudioEvent &event)
+    : m_event(event)
+{
+    m_audioThread = std::thread( std::bind(&AudioPlayer::AudioThread, this) );
+}
+
+void AudioPlayer::Initialize()
+{
+    InitAudioDevice();
+    audio_open = 1;
+}
+
+void AudioPlayer::Play(const std::string &filename)
+{
+    // On coupe le son en cours
+    g_audioQueue.clear();
+    g_audioQueue.push({"play", filename});
+}
+
+AudioPlayer::~AudioPlayer()
+{
+    // Quit audio thread
+    g_audioQueue.clear();
+    g_audioQueue.push({"quit", ""});
+    if (m_audioThread.joinable())
+    {
+        m_audioThread.join();
+    }
+
+    CloseAudioDevice();
+}
+
+int AudioPlayer::StartAudio(const std::string &filename)
+{
+
+    // music = LoadMusicStream(filename.c_str());
+    // PlayMusicStream(music);
+
+    sound = LoadSound(filename.c_str());
+    PlaySound(sound);
+
+    /*
+    result = ma_decoder_init_file(filename.c_str(), NULL, &decoder);
+    if (result != MA_SUCCESS) 
+    {
+        // FIXME: show error
+        return -1;
+    }
+
+    deviceConfig = ma_device_config_init(ma_device_type_playback);
+    deviceConfig.playback.format   = decoder.outputFormat;
+    deviceConfig.playback.channels = decoder.outputChannels;
+    deviceConfig.sampleRate        = decoder.outputSampleRate;
+    deviceConfig.dataCallback      = data_callback;
+    deviceConfig.pUserData         = &decoder;
+
+    if (ma_device_init(NULL, &deviceConfig, &device) != MA_SUCCESS)
+    {
+        printf("Failed to open playback device.\n");
+        ma_decoder_uninit(&decoder);
+        return -3;
+    }
+
+    if (ma_device_start(&device) != MA_SUCCESS)
+    {
+        printf("Failed to start playback device.\n");
+        StopAudio();
+        return -4;
+    }
+    */
+
+    return 0;
+}
+
+void AudioPlayer::StopAudio()
+{
+    // ma_device_uninit(&device);
+    // ma_decoder_uninit(&decoder);
+
+    StopSound(sound);
+    UnloadSound(sound); 
+
+    // StopMusicStream(music);
+    // UnloadMusicStream(music);
+}
+
+ void AudioPlayer::Stop()
+ {
+    g_audioQueue.clear();
+    g_audioQueue.push({"end", ""});
+ }
+
+
+#define AUDIO_STATE_WAIT_PLAY  1
+#define AUDIO_STATE_WAIT_END  2
+
+void AudioPlayer::AudioThread()
+{
+    int state = AUDIO_STATE_WAIT_PLAY;
+    for (;;)
+    {
+        auto cmd = g_audioQueue.front();
+        g_audioQueue.pop();
+
+        if (cmd.order == "quit")
+        {
+            return;
+        }
+        else if (cmd.order == "play")
+        {
+            if (state == AUDIO_STATE_WAIT_PLAY)
+            {
+                state = AUDIO_STATE_WAIT_END;
+                StartAudio(cmd.filename);
+            }
+        }
+        else if (cmd.order == "end")
+        {
+            if (state == AUDIO_STATE_WAIT_END)
+            {
+                state = AUDIO_STATE_WAIT_PLAY;
+                StopAudio();
+                m_event.EndOfAudio();
+            }
+        }
+    }
+}
+
diff --git a/story-editor/src/base64.hpp b/shared/base64.hpp
similarity index 100%
rename from story-editor/src/base64.hpp
rename to shared/base64.hpp
diff --git a/shared/downloader.cpp b/shared/downloader.cpp
index f8ce708..53a5781 100644
--- a/shared/downloader.cpp
+++ b/shared/downloader.cpp
@@ -1,4 +1,4 @@
-#include <mbedtls/aes.h>
+// #include <mbedtls/aes.h>
 #include "downloader.h"
 #include "json.hpp"
 
diff --git a/story-editor/src/dr_mp3.h b/shared/dr_mp3.h
similarity index 100%
rename from story-editor/src/dr_mp3.h
rename to shared/dr_mp3.h
diff --git a/shared/external/dr_flac.h b/shared/external/dr_flac.h
new file mode 100644
index 0000000..14324cf
--- /dev/null
+++ b/shared/external/dr_flac.h
@@ -0,0 +1,12536 @@
+/*
+FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
+dr_flac - v0.12.42 - 2023-11-02
+
+David Reid - mackron@gmail.com
+
+GitHub: https://github.com/mackron/dr_libs
+*/
+
+/*
+RELEASE NOTES - v0.12.0
+=======================
+Version 0.12.0 has breaking API changes including changes to the existing API and the removal of deprecated APIs.
+
+
+Improved Client-Defined Memory Allocation
+-----------------------------------------
+The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The
+existing system of DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE are still in place and will be used by default when no custom
+allocation callbacks are specified.
+
+To use the new system, you pass in a pointer to a drflac_allocation_callbacks object to drflac_open() and family, like this:
+
+    void* my_malloc(size_t sz, void* pUserData)
+    {
+        return malloc(sz);
+    }
+    void* my_realloc(void* p, size_t sz, void* pUserData)
+    {
+        return realloc(p, sz);
+    }
+    void my_free(void* p, void* pUserData)
+    {
+        free(p);
+    }
+
+    ...
+
+    drflac_allocation_callbacks allocationCallbacks;
+    allocationCallbacks.pUserData = &myData;
+    allocationCallbacks.onMalloc  = my_malloc;
+    allocationCallbacks.onRealloc = my_realloc;
+    allocationCallbacks.onFree    = my_free;
+    drflac* pFlac = drflac_open_file("my_file.flac", &allocationCallbacks);
+
+The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines.
+
+Passing in null for the allocation callbacks object will cause dr_flac to use defaults which is the same as DRFLAC_MALLOC,
+DRFLAC_REALLOC and DRFLAC_FREE and the equivalent of how it worked in previous versions.
+
+Every API that opens a drflac object now takes this extra parameter. These include the following:
+
+    drflac_open()
+    drflac_open_relaxed()
+    drflac_open_with_metadata()
+    drflac_open_with_metadata_relaxed()
+    drflac_open_file()
+    drflac_open_file_with_metadata()
+    drflac_open_memory()
+    drflac_open_memory_with_metadata()
+    drflac_open_and_read_pcm_frames_s32()
+    drflac_open_and_read_pcm_frames_s16()
+    drflac_open_and_read_pcm_frames_f32()
+    drflac_open_file_and_read_pcm_frames_s32()
+    drflac_open_file_and_read_pcm_frames_s16()
+    drflac_open_file_and_read_pcm_frames_f32()
+    drflac_open_memory_and_read_pcm_frames_s32()
+    drflac_open_memory_and_read_pcm_frames_s16()
+    drflac_open_memory_and_read_pcm_frames_f32()
+
+
+
+Optimizations
+-------------
+Seeking performance has been greatly improved. A new binary search based seeking algorithm has been introduced which significantly
+improves performance over the brute force method which was used when no seek table was present. Seek table based seeking also takes
+advantage of the new binary search seeking system to further improve performance there as well. Note that this depends on CRC which
+means it will be disabled when DR_FLAC_NO_CRC is used.
+
+The SSE4.1 pipeline has been cleaned up and optimized. You should see some improvements with decoding speed of 24-bit files in
+particular. 16-bit streams should also see some improvement.
+
+drflac_read_pcm_frames_s16() has been optimized. Previously this sat on top of drflac_read_pcm_frames_s32() and performed it's s32
+to s16 conversion in a second pass. This is now all done in a single pass. This includes SSE2 and ARM NEON optimized paths.
+
+A minor optimization has been implemented for drflac_read_pcm_frames_s32(). This will now use an SSE2 optimized pipeline for stereo
+channel reconstruction which is the last part of the decoding process.
+
+The ARM build has seen a few improvements. The CLZ (count leading zeroes) and REV (byte swap) instructions are now used when
+compiling with GCC and Clang which is achieved using inline assembly. The CLZ instruction requires ARM architecture version 5 at
+compile time and the REV instruction requires ARM architecture version 6.
+
+An ARM NEON optimized pipeline has been implemented. To enable this you'll need to add -mfpu=neon to the command line when compiling.
+
+
+Removed APIs
+------------
+The following APIs were deprecated in version 0.11.0 and have been completely removed in version 0.12.0:
+
+    drflac_read_s32()                   -> drflac_read_pcm_frames_s32()
+    drflac_read_s16()                   -> drflac_read_pcm_frames_s16()
+    drflac_read_f32()                   -> drflac_read_pcm_frames_f32()
+    drflac_seek_to_sample()             -> drflac_seek_to_pcm_frame()
+    drflac_open_and_decode_s32()        -> drflac_open_and_read_pcm_frames_s32()
+    drflac_open_and_decode_s16()        -> drflac_open_and_read_pcm_frames_s16()
+    drflac_open_and_decode_f32()        -> drflac_open_and_read_pcm_frames_f32()
+    drflac_open_and_decode_file_s32()   -> drflac_open_file_and_read_pcm_frames_s32()
+    drflac_open_and_decode_file_s16()   -> drflac_open_file_and_read_pcm_frames_s16()
+    drflac_open_and_decode_file_f32()   -> drflac_open_file_and_read_pcm_frames_f32()
+    drflac_open_and_decode_memory_s32() -> drflac_open_memory_and_read_pcm_frames_s32()
+    drflac_open_and_decode_memory_s16() -> drflac_open_memory_and_read_pcm_frames_s16()
+    drflac_open_and_decode_memory_f32() -> drflac_open_memroy_and_read_pcm_frames_f32()
+
+Prior versions of dr_flac operated on a per-sample basis whereas now it operates on PCM frames. The removed APIs all relate
+to the old per-sample APIs. You now need to use the "pcm_frame" versions.
+*/
+
+
+/*
+Introduction
+============
+dr_flac is a single file library. To use it, do something like the following in one .c file.
+
+    ```c
+    #define DR_FLAC_IMPLEMENTATION
+    #include "dr_flac.h"
+    ```
+
+You can then #include this file in other parts of the program as you would with any other header file. To decode audio data, do something like the following:
+
+    ```c
+    drflac* pFlac = drflac_open_file("MySong.flac", NULL);
+    if (pFlac == NULL) {
+        // Failed to open FLAC file
+    }
+
+    drflac_int32* pSamples = malloc(pFlac->totalPCMFrameCount * pFlac->channels * sizeof(drflac_int32));
+    drflac_uint64 numberOfInterleavedSamplesActuallyRead = drflac_read_pcm_frames_s32(pFlac, pFlac->totalPCMFrameCount, pSamples);
+    ```
+
+The drflac object represents the decoder. It is a transparent type so all the information you need, such as the number of channels and the bits per sample,
+should be directly accessible - just make sure you don't change their values. Samples are always output as interleaved signed 32-bit PCM. In the example above
+a native FLAC stream was opened, however dr_flac has seamless support for Ogg encapsulated FLAC streams as well.
+
+You do not need to decode the entire stream in one go - you just specify how many samples you'd like at any given time and the decoder will give you as many
+samples as it can, up to the amount requested. Later on when you need the next batch of samples, just call it again. Example:
+
+    ```c
+    while (drflac_read_pcm_frames_s32(pFlac, chunkSizeInPCMFrames, pChunkSamples) > 0) {
+        do_something();
+    }
+    ```
+
+You can seek to a specific PCM frame with `drflac_seek_to_pcm_frame()`.
+
+If you just want to quickly decode an entire FLAC file in one go you can do something like this:
+
+    ```c
+    unsigned int channels;
+    unsigned int sampleRate;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_int32* pSampleData = drflac_open_file_and_read_pcm_frames_s32("MySong.flac", &channels, &sampleRate, &totalPCMFrameCount, NULL);
+    if (pSampleData == NULL) {
+        // Failed to open and decode FLAC file.
+    }
+
+    ...
+
+    drflac_free(pSampleData, NULL);
+    ```
+
+You can read samples as signed 16-bit integer and 32-bit floating-point PCM with the *_s16() and *_f32() family of APIs respectively, but note that these
+should be considered lossy.
+
+
+If you need access to metadata (album art, etc.), use `drflac_open_with_metadata()`, `drflac_open_file_with_metdata()` or `drflac_open_memory_with_metadata()`.
+The rationale for keeping these APIs separate is that they're slightly slower than the normal versions and also just a little bit harder to use. dr_flac
+reports metadata to the application through the use of a callback, and every metadata block is reported before `drflac_open_with_metdata()` returns.
+
+The main opening APIs (`drflac_open()`, etc.) will fail if the header is not present. The presents a problem in certain scenarios such as broadcast style
+streams or internet radio where the header may not be present because the user has started playback mid-stream. To handle this, use the relaxed APIs:
+    
+    `drflac_open_relaxed()`
+    `drflac_open_with_metadata_relaxed()`
+
+It is not recommended to use these APIs for file based streams because a missing header would usually indicate a corrupt or perverse file. In addition, these
+APIs can take a long time to initialize because they may need to spend a lot of time finding the first frame.
+
+
+
+Build Options
+=============
+#define these options before including this file.
+
+#define DR_FLAC_NO_STDIO
+  Disable `drflac_open_file()` and family.
+
+#define DR_FLAC_NO_OGG
+  Disables support for Ogg/FLAC streams.
+
+#define DR_FLAC_BUFFER_SIZE <number>
+  Defines the size of the internal buffer to store data from onRead(). This buffer is used to reduce the number of calls back to the client for more data.
+  Larger values means more memory, but better performance. My tests show diminishing returns after about 4KB (which is the default). Consider reducing this if
+  you have a very efficient implementation of onRead(), or increase it if it's very inefficient. Must be a multiple of 8.
+
+#define DR_FLAC_NO_CRC
+  Disables CRC checks. This will offer a performance boost when CRC is unnecessary. This will disable binary search seeking. When seeking, the seek table will
+  be used if available. Otherwise the seek will be performed using brute force.
+
+#define DR_FLAC_NO_SIMD
+  Disables SIMD optimizations (SSE on x86/x64 architectures, NEON on ARM architectures). Use this if you are having compatibility issues with your compiler.
+
+#define DR_FLAC_NO_WCHAR
+  Disables all functions ending with `_w`. Use this if your compiler does not provide wchar.h. Not required if DR_FLAC_NO_STDIO is also defined.
+
+
+
+Notes
+=====
+- dr_flac does not support changing the sample rate nor channel count mid stream.
+- dr_flac is not thread-safe, but its APIs can be called from any thread so long as you do your own synchronization.
+- When using Ogg encapsulation, a corrupted metadata block will result in `drflac_open_with_metadata()` and `drflac_open()` returning inconsistent samples due
+  to differences in corrupted stream recorvery logic between the two APIs.
+*/
+
+#ifndef dr_flac_h
+#define dr_flac_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DRFLAC_STRINGIFY(x)      #x
+#define DRFLAC_XSTRINGIFY(x)     DRFLAC_STRINGIFY(x)
+
+#define DRFLAC_VERSION_MAJOR     0
+#define DRFLAC_VERSION_MINOR     12
+#define DRFLAC_VERSION_REVISION  42
+#define DRFLAC_VERSION_STRING    DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION)
+
+#include <stddef.h> /* For size_t. */
+
+/* Sized Types */
+typedef   signed char           drflac_int8;
+typedef unsigned char           drflac_uint8;
+typedef   signed short          drflac_int16;
+typedef unsigned short          drflac_uint16;
+typedef   signed int            drflac_int32;
+typedef unsigned int            drflac_uint32;
+#if defined(_MSC_VER) && !defined(__clang__)
+    typedef   signed __int64    drflac_int64;
+    typedef unsigned __int64    drflac_uint64;
+#else
+    #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
+        #pragma GCC diagnostic push
+        #pragma GCC diagnostic ignored "-Wlong-long"
+        #if defined(__clang__)
+            #pragma GCC diagnostic ignored "-Wc++11-long-long"
+        #endif
+    #endif
+    typedef   signed long long  drflac_int64;
+    typedef unsigned long long  drflac_uint64;
+    #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
+        #pragma GCC diagnostic pop
+    #endif
+#endif
+#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined(_M_IA64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__powerpc64__)
+    typedef drflac_uint64       drflac_uintptr;
+#else
+    typedef drflac_uint32       drflac_uintptr;
+#endif
+typedef drflac_uint8            drflac_bool8;
+typedef drflac_uint32           drflac_bool32;
+#define DRFLAC_TRUE             1
+#define DRFLAC_FALSE            0
+/* End Sized Types */
+
+/* Decorations */
+#if !defined(DRFLAC_API)
+    #if defined(DRFLAC_DLL)
+        #if defined(_WIN32)
+            #define DRFLAC_DLL_IMPORT  __declspec(dllimport)
+            #define DRFLAC_DLL_EXPORT  __declspec(dllexport)
+            #define DRFLAC_DLL_PRIVATE static
+        #else
+            #if defined(__GNUC__) && __GNUC__ >= 4
+                #define DRFLAC_DLL_IMPORT  __attribute__((visibility("default")))
+                #define DRFLAC_DLL_EXPORT  __attribute__((visibility("default")))
+                #define DRFLAC_DLL_PRIVATE __attribute__((visibility("hidden")))
+            #else
+                #define DRFLAC_DLL_IMPORT
+                #define DRFLAC_DLL_EXPORT
+                #define DRFLAC_DLL_PRIVATE static
+            #endif
+        #endif
+
+        #if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION)
+            #define DRFLAC_API  DRFLAC_DLL_EXPORT
+        #else
+            #define DRFLAC_API  DRFLAC_DLL_IMPORT
+        #endif
+        #define DRFLAC_PRIVATE DRFLAC_DLL_PRIVATE
+    #else
+        #define DRFLAC_API extern
+        #define DRFLAC_PRIVATE static
+    #endif
+#endif
+/* End Decorations */
+
+#if defined(_MSC_VER) && _MSC_VER >= 1700   /* Visual Studio 2012 */
+    #define DRFLAC_DEPRECATED       __declspec(deprecated)
+#elif (defined(__GNUC__) && __GNUC__ >= 4)  /* GCC 4 */
+    #define DRFLAC_DEPRECATED       __attribute__((deprecated))
+#elif defined(__has_feature)                /* Clang */
+    #if __has_feature(attribute_deprecated)
+        #define DRFLAC_DEPRECATED   __attribute__((deprecated))
+    #else
+        #define DRFLAC_DEPRECATED
+    #endif
+#else
+    #define DRFLAC_DEPRECATED
+#endif
+
+DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision);
+DRFLAC_API const char* drflac_version_string(void);
+
+/* Allocation Callbacks */
+typedef struct
+{
+    void* pUserData;
+    void* (* onMalloc)(size_t sz, void* pUserData);
+    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
+    void  (* onFree)(void* p, void* pUserData);
+} drflac_allocation_callbacks;
+/* End Allocation Callbacks */
+
+/*
+As data is read from the client it is placed into an internal buffer for fast access. This controls the size of that buffer. Larger values means more speed,
+but also more memory. In my testing there is diminishing returns after about 4KB, but you can fiddle with this to suit your own needs. Must be a multiple of 8.
+*/
+#ifndef DR_FLAC_BUFFER_SIZE
+#define DR_FLAC_BUFFER_SIZE   4096
+#endif
+
+
+/* Architecture Detection */
+#if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
+#define DRFLAC_64BIT
+#endif
+
+#if defined(__x86_64__) || defined(_M_X64)
+    #define DRFLAC_X64
+#elif defined(__i386) || defined(_M_IX86)
+    #define DRFLAC_X86
+#elif defined(__arm__) || defined(_M_ARM) || defined(__arm64) || defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM64)
+    #define DRFLAC_ARM
+#endif
+/* End Architecture Detection */
+
+
+#ifdef DRFLAC_64BIT
+typedef drflac_uint64 drflac_cache_t;
+#else
+typedef drflac_uint32 drflac_cache_t;
+#endif
+
+/* The various metadata block types. */
+#define DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO       0
+#define DRFLAC_METADATA_BLOCK_TYPE_PADDING          1
+#define DRFLAC_METADATA_BLOCK_TYPE_APPLICATION      2
+#define DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE        3
+#define DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT   4
+#define DRFLAC_METADATA_BLOCK_TYPE_CUESHEET         5
+#define DRFLAC_METADATA_BLOCK_TYPE_PICTURE          6
+#define DRFLAC_METADATA_BLOCK_TYPE_INVALID          127
+
+/* The various picture types specified in the PICTURE block. */
+#define DRFLAC_PICTURE_TYPE_OTHER                   0
+#define DRFLAC_PICTURE_TYPE_FILE_ICON               1
+#define DRFLAC_PICTURE_TYPE_OTHER_FILE_ICON         2
+#define DRFLAC_PICTURE_TYPE_COVER_FRONT             3
+#define DRFLAC_PICTURE_TYPE_COVER_BACK              4
+#define DRFLAC_PICTURE_TYPE_LEAFLET_PAGE            5
+#define DRFLAC_PICTURE_TYPE_MEDIA                   6
+#define DRFLAC_PICTURE_TYPE_LEAD_ARTIST             7
+#define DRFLAC_PICTURE_TYPE_ARTIST                  8
+#define DRFLAC_PICTURE_TYPE_CONDUCTOR               9
+#define DRFLAC_PICTURE_TYPE_BAND                    10
+#define DRFLAC_PICTURE_TYPE_COMPOSER                11
+#define DRFLAC_PICTURE_TYPE_LYRICIST                12
+#define DRFLAC_PICTURE_TYPE_RECORDING_LOCATION      13
+#define DRFLAC_PICTURE_TYPE_DURING_RECORDING        14
+#define DRFLAC_PICTURE_TYPE_DURING_PERFORMANCE      15
+#define DRFLAC_PICTURE_TYPE_SCREEN_CAPTURE          16
+#define DRFLAC_PICTURE_TYPE_BRIGHT_COLORED_FISH     17
+#define DRFLAC_PICTURE_TYPE_ILLUSTRATION            18
+#define DRFLAC_PICTURE_TYPE_BAND_LOGOTYPE           19
+#define DRFLAC_PICTURE_TYPE_PUBLISHER_LOGOTYPE      20
+
+typedef enum
+{
+    drflac_container_native,
+    drflac_container_ogg,
+    drflac_container_unknown
+} drflac_container;
+
+typedef enum
+{
+    drflac_seek_origin_start,
+    drflac_seek_origin_current
+} drflac_seek_origin;
+
+/* The order of members in this structure is important because we map this directly to the raw data within the SEEKTABLE metadata block. */
+typedef struct
+{
+    drflac_uint64 firstPCMFrame;
+    drflac_uint64 flacFrameOffset;   /* The offset from the first byte of the header of the first frame. */
+    drflac_uint16 pcmFrameCount;
+} drflac_seekpoint;
+
+typedef struct
+{
+    drflac_uint16 minBlockSizeInPCMFrames;
+    drflac_uint16 maxBlockSizeInPCMFrames;
+    drflac_uint32 minFrameSizeInPCMFrames;
+    drflac_uint32 maxFrameSizeInPCMFrames;
+    drflac_uint32 sampleRate;
+    drflac_uint8  channels;
+    drflac_uint8  bitsPerSample;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_uint8  md5[16];
+} drflac_streaminfo;
+
+typedef struct
+{
+    /*
+    The metadata type. Use this to know how to interpret the data below. Will be set to one of the
+    DRFLAC_METADATA_BLOCK_TYPE_* tokens.
+    */
+    drflac_uint32 type;
+
+    /*
+    A pointer to the raw data. This points to a temporary buffer so don't hold on to it. It's best to
+    not modify the contents of this buffer. Use the structures below for more meaningful and structured
+    information about the metadata. It's possible for this to be null.
+    */
+    const void* pRawData;
+
+    /* The size in bytes of the block and the buffer pointed to by pRawData if it's non-NULL. */
+    drflac_uint32 rawDataSize;
+
+    union
+    {
+        drflac_streaminfo streaminfo;
+
+        struct
+        {
+            int unused;
+        } padding;
+
+        struct
+        {
+            drflac_uint32 id;
+            const void* pData;
+            drflac_uint32 dataSize;
+        } application;
+
+        struct
+        {
+            drflac_uint32 seekpointCount;
+            const drflac_seekpoint* pSeekpoints;
+        } seektable;
+
+        struct
+        {
+            drflac_uint32 vendorLength;
+            const char* vendor;
+            drflac_uint32 commentCount;
+            const void* pComments;
+        } vorbis_comment;
+
+        struct
+        {
+            char catalog[128];
+            drflac_uint64 leadInSampleCount;
+            drflac_bool32 isCD;
+            drflac_uint8 trackCount;
+            const void* pTrackData;
+        } cuesheet;
+
+        struct
+        {
+            drflac_uint32 type;
+            drflac_uint32 mimeLength;
+            const char* mime;
+            drflac_uint32 descriptionLength;
+            const char* description;
+            drflac_uint32 width;
+            drflac_uint32 height;
+            drflac_uint32 colorDepth;
+            drflac_uint32 indexColorCount;
+            drflac_uint32 pictureDataSize;
+            const drflac_uint8* pPictureData;
+        } picture;
+    } data;
+} drflac_metadata;
+
+
+/*
+Callback for when data needs to be read from the client.
+
+
+Parameters
+----------
+pUserData (in)
+    The user data that was passed to drflac_open() and family.
+
+pBufferOut (out)
+    The output buffer.
+
+bytesToRead (in)
+    The number of bytes to read.
+
+
+Return Value
+------------
+The number of bytes actually read.
+
+
+Remarks
+-------
+A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until either the entire bytesToRead is filled or
+you have reached the end of the stream.
+*/
+typedef size_t (* drflac_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
+
+/*
+Callback for when data needs to be seeked.
+
+
+Parameters
+----------
+pUserData (in)
+    The user data that was passed to drflac_open() and family.
+
+offset (in)
+    The number of bytes to move, relative to the origin. Will never be negative.
+
+origin (in)
+    The origin of the seek - the current position or the start of the stream.
+
+
+Return Value
+------------
+Whether or not the seek was successful.
+
+
+Remarks
+-------
+The offset will never be negative. Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which will be
+either drflac_seek_origin_start or drflac_seek_origin_current.
+
+When seeking to a PCM frame using drflac_seek_to_pcm_frame(), dr_flac may call this with an offset beyond the end of the FLAC stream. This needs to be detected
+and handled by returning DRFLAC_FALSE.
+*/
+typedef drflac_bool32 (* drflac_seek_proc)(void* pUserData, int offset, drflac_seek_origin origin);
+
+/*
+Callback for when a metadata block is read.
+
+
+Parameters
+----------
+pUserData (in)
+    The user data that was passed to drflac_open() and family.
+
+pMetadata (in)
+    A pointer to a structure containing the data of the metadata block.
+
+
+Remarks
+-------
+Use pMetadata->type to determine which metadata block is being handled and how to read the data. This
+will be set to one of the DRFLAC_METADATA_BLOCK_TYPE_* tokens.
+*/
+typedef void (* drflac_meta_proc)(void* pUserData, drflac_metadata* pMetadata);
+
+
+/* Structure for internal use. Only used for decoders opened with drflac_open_memory. */
+typedef struct
+{
+    const drflac_uint8* data;
+    size_t dataSize;
+    size_t currentReadPos;
+} drflac__memory_stream;
+
+/* Structure for internal use. Used for bit streaming. */
+typedef struct
+{
+    /* The function to call when more data needs to be read. */
+    drflac_read_proc onRead;
+
+    /* The function to call when the current read position needs to be moved. */
+    drflac_seek_proc onSeek;
+
+    /* The user data to pass around to onRead and onSeek. */
+    void* pUserData;
+
+
+    /*
+    The number of unaligned bytes in the L2 cache. This will always be 0 until the end of the stream is hit. At the end of the
+    stream there will be a number of bytes that don't cleanly fit in an L1 cache line, so we use this variable to know whether
+    or not the bistreamer needs to run on a slower path to read those last bytes. This will never be more than sizeof(drflac_cache_t).
+    */
+    size_t unalignedByteCount;
+
+    /* The content of the unaligned bytes. */
+    drflac_cache_t unalignedCache;
+
+    /* The index of the next valid cache line in the "L2" cache. */
+    drflac_uint32 nextL2Line;
+
+    /* The number of bits that have been consumed by the cache. This is used to determine how many valid bits are remaining. */
+    drflac_uint32 consumedBits;
+
+    /*
+    The cached data which was most recently read from the client. There are two levels of cache. Data flows as such:
+    Client -> L2 -> L1. The L2 -> L1 movement is aligned and runs on a fast path in just a few instructions.
+    */
+    drflac_cache_t cacheL2[DR_FLAC_BUFFER_SIZE/sizeof(drflac_cache_t)];
+    drflac_cache_t cache;
+
+    /*
+    CRC-16. This is updated whenever bits are read from the bit stream. Manually set this to 0 to reset the CRC. For FLAC, this
+    is reset to 0 at the beginning of each frame.
+    */
+    drflac_uint16 crc16;
+    drflac_cache_t crc16Cache;              /* A cache for optimizing CRC calculations. This is filled when when the L1 cache is reloaded. */
+    drflac_uint32 crc16CacheIgnoredBytes;   /* The number of bytes to ignore when updating the CRC-16 from the CRC-16 cache. */
+} drflac_bs;
+
+typedef struct
+{
+    /* The type of the subframe: SUBFRAME_CONSTANT, SUBFRAME_VERBATIM, SUBFRAME_FIXED or SUBFRAME_LPC. */
+    drflac_uint8 subframeType;
+
+    /* The number of wasted bits per sample as specified by the sub-frame header. */
+    drflac_uint8 wastedBitsPerSample;
+
+    /* The order to use for the prediction stage for SUBFRAME_FIXED and SUBFRAME_LPC. */
+    drflac_uint8 lpcOrder;
+
+    /* A pointer to the buffer containing the decoded samples in the subframe. This pointer is an offset from drflac::pExtraData. */
+    drflac_int32* pSamplesS32;
+} drflac_subframe;
+
+typedef struct
+{
+    /*
+    If the stream uses variable block sizes, this will be set to the index of the first PCM frame. If fixed block sizes are used, this will
+    always be set to 0. This is 64-bit because the decoded PCM frame number will be 36 bits.
+    */
+    drflac_uint64 pcmFrameNumber;
+
+    /*
+    If the stream uses fixed block sizes, this will be set to the frame number. If variable block sizes are used, this will always be 0. This
+    is 32-bit because in fixed block sizes, the maximum frame number will be 31 bits.
+    */
+    drflac_uint32 flacFrameNumber;
+
+    /* The sample rate of this frame. */
+    drflac_uint32 sampleRate;
+
+    /* The number of PCM frames in each sub-frame within this frame. */
+    drflac_uint16 blockSizeInPCMFrames;
+
+    /*
+    The channel assignment of this frame. This is not always set to the channel count. If interchannel decorrelation is being used this
+    will be set to DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE, DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE or DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE.
+    */
+    drflac_uint8 channelAssignment;
+
+    /* The number of bits per sample within this frame. */
+    drflac_uint8 bitsPerSample;
+
+    /* The frame's CRC. */
+    drflac_uint8 crc8;
+} drflac_frame_header;
+
+typedef struct
+{
+    /* The header. */
+    drflac_frame_header header;
+
+    /*
+    The number of PCM frames left to be read in this FLAC frame. This is initially set to the block size. As PCM frames are read,
+    this will be decremented. When it reaches 0, the decoder will see this frame as fully consumed and load the next frame.
+    */
+    drflac_uint32 pcmFramesRemaining;
+
+    /* The list of sub-frames within the frame. There is one sub-frame for each channel, and there's a maximum of 8 channels. */
+    drflac_subframe subframes[8];
+} drflac_frame;
+
+typedef struct
+{
+    /* The function to call when a metadata block is read. */
+    drflac_meta_proc onMeta;
+
+    /* The user data posted to the metadata callback function. */
+    void* pUserDataMD;
+
+    /* Memory allocation callbacks. */
+    drflac_allocation_callbacks allocationCallbacks;
+
+
+    /* The sample rate. Will be set to something like 44100. */
+    drflac_uint32 sampleRate;
+
+    /*
+    The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. Maximum 8. This is set based on the
+    value specified in the STREAMINFO block.
+    */
+    drflac_uint8 channels;
+
+    /* The bits per sample. Will be set to something like 16, 24, etc. */
+    drflac_uint8 bitsPerSample;
+
+    /* The maximum block size, in samples. This number represents the number of samples in each channel (not combined). */
+    drflac_uint16 maxBlockSizeInPCMFrames;
+
+    /*
+    The total number of PCM Frames making up the stream. Can be 0 in which case it's still a valid stream, but just means
+    the total PCM frame count is unknown. Likely the case with streams like internet radio.
+    */
+    drflac_uint64 totalPCMFrameCount;
+
+
+    /* The container type. This is set based on whether or not the decoder was opened from a native or Ogg stream. */
+    drflac_container container;
+
+    /* The number of seekpoints in the seektable. */
+    drflac_uint32 seekpointCount;
+
+
+    /* Information about the frame the decoder is currently sitting on. */
+    drflac_frame currentFLACFrame;
+
+
+    /* The index of the PCM frame the decoder is currently sitting on. This is only used for seeking. */
+    drflac_uint64 currentPCMFrame;
+
+    /* The position of the first FLAC frame in the stream. This is only ever used for seeking. */
+    drflac_uint64 firstFLACFramePosInBytes;
+
+
+    /* A hack to avoid a malloc() when opening a decoder with drflac_open_memory(). */
+    drflac__memory_stream memoryStream;
+
+
+    /* A pointer to the decoded sample data. This is an offset of pExtraData. */
+    drflac_int32* pDecodedSamples;
+
+    /* A pointer to the seek table. This is an offset of pExtraData, or NULL if there is no seek table. */
+    drflac_seekpoint* pSeekpoints;
+
+    /* Internal use only. Only used with Ogg containers. Points to a drflac_oggbs object. This is an offset of pExtraData. */
+    void* _oggbs;
+
+    /* Internal use only. Used for profiling and testing different seeking modes. */
+    drflac_bool32 _noSeekTableSeek    : 1;
+    drflac_bool32 _noBinarySearchSeek : 1;
+    drflac_bool32 _noBruteForceSeek   : 1;
+
+    /* The bit streamer. The raw FLAC data is fed through this object. */
+    drflac_bs bs;
+
+    /* Variable length extra data. We attach this to the end of the object so we can avoid unnecessary mallocs. */
+    drflac_uint8 pExtraData[1];
+} drflac;
+
+
+/*
+Opens a FLAC decoder.
+
+
+Parameters
+----------
+onRead (in)
+    The function to call when data needs to be read from the client.
+
+onSeek (in)
+    The function to call when the read position of the client data needs to move.
+
+pUserData (in, optional)
+    A pointer to application defined data that will be passed to onRead and onSeek.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+Returns a pointer to an object representing the decoder.
+
+
+Remarks
+-------
+Close the decoder with `drflac_close()`.
+
+`pAllocationCallbacks` can be NULL in which case it will use `DRFLAC_MALLOC`, `DRFLAC_REALLOC` and `DRFLAC_FREE`.
+
+This function will automatically detect whether or not you are attempting to open a native or Ogg encapsulated FLAC, both of which should work seamlessly
+without any manual intervention. Ogg encapsulation also works with multiplexed streams which basically means it can play FLAC encoded audio tracks in videos.
+
+This is the lowest level function for opening a FLAC stream. You can also use `drflac_open_file()` and `drflac_open_memory()` to open the stream from a file or
+from a block of memory respectively.
+
+The STREAMINFO block must be present for this to succeed. Use `drflac_open_relaxed()` to open a FLAC stream where the header may not be present.
+
+Use `drflac_open_with_metadata()` if you need access to metadata.
+
+
+Seek Also
+---------
+drflac_open_file()
+drflac_open_memory()
+drflac_open_with_metadata()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Opens a FLAC stream with relaxed validation of the header block.
+
+
+Parameters
+----------
+onRead (in)
+    The function to call when data needs to be read from the client.
+
+onSeek (in)
+    The function to call when the read position of the client data needs to move.
+
+container (in)
+    Whether or not the FLAC stream is encapsulated using standard FLAC encapsulation or Ogg encapsulation.
+
+pUserData (in, optional)
+    A pointer to application defined data that will be passed to onRead and onSeek.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+A pointer to an object representing the decoder.
+
+
+Remarks
+-------
+The same as drflac_open(), except attempts to open the stream even when a header block is not present.
+
+Because the header is not necessarily available, the caller must explicitly define the container (Native or Ogg). Do not set this to `drflac_container_unknown`
+as that is for internal use only.
+
+Opening in relaxed mode will continue reading data from onRead until it finds a valid frame. If a frame is never found it will continue forever. To abort,
+force your `onRead` callback to return 0, which dr_flac will use as an indicator that the end of the stream was found.
+
+Use `drflac_open_with_metadata_relaxed()` if you need access to metadata.
+*/
+DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Opens a FLAC decoder and notifies the caller of the metadata chunks (album art, etc.).
+
+
+Parameters
+----------
+onRead (in)
+    The function to call when data needs to be read from the client.
+
+onSeek (in)
+    The function to call when the read position of the client data needs to move.
+
+onMeta (in)
+    The function to call for every metadata block.
+
+pUserData (in, optional)
+    A pointer to application defined data that will be passed to onRead, onSeek and onMeta.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+A pointer to an object representing the decoder.
+
+
+Remarks
+-------
+Close the decoder with `drflac_close()`.
+
+`pAllocationCallbacks` can be NULL in which case it will use `DRFLAC_MALLOC`, `DRFLAC_REALLOC` and `DRFLAC_FREE`.
+
+This is slower than `drflac_open()`, so avoid this one if you don't need metadata. Internally, this will allocate and free memory on the heap for every
+metadata block except for STREAMINFO and PADDING blocks.
+
+The caller is notified of the metadata via the `onMeta` callback. All metadata blocks will be handled before the function returns. This callback takes a
+pointer to a `drflac_metadata` object which is a union containing the data of all relevant metadata blocks. Use the `type` member to discriminate against
+the different metadata types.
+
+The STREAMINFO block must be present for this to succeed. Use `drflac_open_with_metadata_relaxed()` to open a FLAC stream where the header may not be present.
+
+Note that this will behave inconsistently with `drflac_open()` if the stream is an Ogg encapsulated stream and a metadata block is corrupted. This is due to
+the way the Ogg stream recovers from corrupted pages. When `drflac_open_with_metadata()` is being used, the open routine will try to read the contents of the
+metadata block, whereas `drflac_open()` will simply seek past it (for the sake of efficiency). This inconsistency can result in different samples being
+returned depending on whether or not the stream is being opened with metadata.
+
+
+Seek Also
+---------
+drflac_open_file_with_metadata()
+drflac_open_memory_with_metadata()
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+The same as drflac_open_with_metadata(), except attempts to open the stream even when a header block is not present.
+
+See Also
+--------
+drflac_open_with_metadata()
+drflac_open_relaxed()
+*/
+DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Closes the given FLAC decoder.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder to close.
+
+
+Remarks
+-------
+This will destroy the decoder object.
+
+
+See Also
+--------
+drflac_open()
+drflac_open_with_metadata()
+drflac_open_file()
+drflac_open_file_w()
+drflac_open_file_with_metadata()
+drflac_open_file_with_metadata_w()
+drflac_open_memory()
+drflac_open_memory_with_metadata()
+*/
+DRFLAC_API void drflac_close(drflac* pFlac);
+
+
+/*
+Reads sample data from the given FLAC decoder, output as interleaved signed 32-bit PCM.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder.
+
+framesToRead (in)
+    The number of PCM frames to read.
+
+pBufferOut (out, optional)
+    A pointer to the buffer that will receive the decoded samples.
+
+
+Return Value
+------------
+Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
+
+
+Remarks
+-------
+pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
+*/
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut);
+
+
+/*
+Reads sample data from the given FLAC decoder, output as interleaved signed 16-bit PCM.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder.
+
+framesToRead (in)
+    The number of PCM frames to read.
+
+pBufferOut (out, optional)
+    A pointer to the buffer that will receive the decoded samples.
+
+
+Return Value
+------------
+Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
+
+
+Remarks
+-------
+pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
+
+Note that this is lossy for streams where the bits per sample is larger than 16.
+*/
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut);
+
+/*
+Reads sample data from the given FLAC decoder, output as interleaved 32-bit floating point PCM.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder.
+
+framesToRead (in)
+    The number of PCM frames to read.
+
+pBufferOut (out, optional)
+    A pointer to the buffer that will receive the decoded samples.
+
+
+Return Value
+------------
+Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
+
+
+Remarks
+-------
+pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
+
+Note that this should be considered lossy due to the nature of floating point numbers not being able to exactly represent every possible number.
+*/
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut);
+
+/*
+Seeks to the PCM frame at the given index.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder.
+
+pcmFrameIndex (in)
+    The index of the PCM frame to seek to. See notes below.
+
+
+Return Value
+-------------
+`DRFLAC_TRUE` if successful; `DRFLAC_FALSE` otherwise.
+*/
+DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex);
+
+
+
+#ifndef DR_FLAC_NO_STDIO
+/*
+Opens a FLAC decoder from the file at the given path.
+
+
+Parameters
+----------
+pFileName (in)
+    The path of the file to open, either absolute or relative to the current directory.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+A pointer to an object representing the decoder.
+
+
+Remarks
+-------
+Close the decoder with drflac_close().
+
+
+Remarks
+-------
+This will hold a handle to the file until the decoder is closed with drflac_close(). Some platforms will restrict the number of files a process can have open
+at any given time, so keep this mind if you have many decoders open at the same time.
+
+
+See Also
+--------
+drflac_open_file_with_metadata()
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks);
+DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Opens a FLAC decoder from the file at the given path and notifies the caller of the metadata chunks (album art, etc.)
+
+
+Parameters
+----------
+pFileName (in)
+    The path of the file to open, either absolute or relative to the current directory.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+onMeta (in)
+    The callback to fire for each metadata block.
+
+pUserData (in)
+    A pointer to the user data to pass to the metadata callback.
+
+pAllocationCallbacks (in)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Remarks
+-------
+Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled.
+
+
+See Also
+--------
+drflac_open_with_metadata()
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+#endif
+
+/*
+Opens a FLAC decoder from a pre-allocated block of memory
+
+
+Parameters
+----------
+pData (in)
+    A pointer to the raw encoded FLAC data.
+
+dataSize (in)
+    The size in bytes of `data`.
+
+pAllocationCallbacks (in)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+A pointer to an object representing the decoder.
+
+
+Remarks
+-------
+This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for the lifetime of the decoder.
+
+
+See Also
+--------
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Opens a FLAC decoder from a pre-allocated block of memory and notifies the caller of the metadata chunks (album art, etc.)
+
+
+Parameters
+----------
+pData (in)
+    A pointer to the raw encoded FLAC data.
+
+dataSize (in)
+    The size in bytes of `data`.
+
+onMeta (in)
+    The callback to fire for each metadata block.
+
+pUserData (in)
+    A pointer to the user data to pass to the metadata callback.
+
+pAllocationCallbacks (in)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Remarks
+-------
+Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled.
+
+
+See Also
+-------
+drflac_open_with_metadata()
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+
+
+/* High Level APIs */
+
+/*
+Opens a FLAC stream from the given callbacks and fully decodes it in a single operation. The return value is a
+pointer to the sample data as interleaved signed 32-bit PCM. The returned data must be freed with drflac_free().
+
+You can pass in custom memory allocation callbacks via the pAllocationCallbacks parameter. This can be NULL in which
+case it will use DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE.
+
+Sometimes a FLAC file won't keep track of the total sample count. In this situation the function will continuously
+read samples into a dynamically sized buffer on the heap until no samples are left.
+
+Do not call this function on a broadcast type of stream (like internet radio streams and whatnot).
+*/
+DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
+DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
+DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+#ifndef DR_FLAC_NO_STDIO
+/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a file. */
+DRFLAC_API drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
+DRFLAC_API drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
+DRFLAC_API float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+#endif
+
+/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a block of memory. */
+DRFLAC_API drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
+DRFLAC_API drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
+DRFLAC_API float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Frees memory that was allocated internally by dr_flac.
+
+Set pAllocationCallbacks to the same object that was passed to drflac_open_*_and_read_pcm_frames_*(). If you originally passed in NULL, pass in NULL for this.
+*/
+DRFLAC_API void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+
+/* Structure representing an iterator for vorbis comments in a VORBIS_COMMENT metadata block. */
+typedef struct
+{
+    drflac_uint32 countRemaining;
+    const char* pRunningData;
+} drflac_vorbis_comment_iterator;
+
+/*
+Initializes a vorbis comment iterator. This can be used for iterating over the vorbis comments in a VORBIS_COMMENT
+metadata block.
+*/
+DRFLAC_API void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments);
+
+/*
+Goes to the next vorbis comment in the given iterator. If null is returned it means there are no more comments. The
+returned string is NOT null terminated.
+*/
+DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut);
+
+
+/* Structure representing an iterator for cuesheet tracks in a CUESHEET metadata block. */
+typedef struct
+{
+    drflac_uint32 countRemaining;
+    const char* pRunningData;
+} drflac_cuesheet_track_iterator;
+
+/* The order of members here is important because we map this directly to the raw data within the CUESHEET metadata block. */
+typedef struct
+{
+    drflac_uint64 offset;
+    drflac_uint8 index;
+    drflac_uint8 reserved[3];
+} drflac_cuesheet_track_index;
+
+typedef struct
+{
+    drflac_uint64 offset;
+    drflac_uint8 trackNumber;
+    char ISRC[12];
+    drflac_bool8 isAudio;
+    drflac_bool8 preEmphasis;
+    drflac_uint8 indexCount;
+    const drflac_cuesheet_track_index* pIndexPoints;
+} drflac_cuesheet_track;
+
+/*
+Initializes a cuesheet track iterator. This can be used for iterating over the cuesheet tracks in a CUESHEET metadata
+block.
+*/
+DRFLAC_API void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData);
+
+/* Goes to the next cuesheet track in the given iterator. If DRFLAC_FALSE is returned it means there are no more comments. */
+DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* dr_flac_h */
+
+
+/************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************
+
+ IMPLEMENTATION
+
+ ************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************/
+#if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION)
+#ifndef dr_flac_c
+#define dr_flac_c
+
+/* Disable some annoying warnings. */
+#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
+    #pragma GCC diagnostic push
+    #if __GNUC__ >= 7
+    #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
+    #endif
+#endif
+
+#ifdef __linux__
+    #ifndef _BSD_SOURCE
+        #define _BSD_SOURCE
+    #endif
+    #ifndef _DEFAULT_SOURCE
+        #define _DEFAULT_SOURCE
+    #endif
+    #ifndef __USE_BSD
+        #define __USE_BSD
+    #endif
+    #include <endian.h>
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+/* Inline */
+#ifdef _MSC_VER
+    #define DRFLAC_INLINE __forceinline
+#elif defined(__GNUC__)
+    /*
+    I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when
+    the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some
+    case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the
+    command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue
+    I am using "__inline__" only when we're compiling in strict ANSI mode.
+    */
+    #if defined(__STRICT_ANSI__)
+        #define DRFLAC_GNUC_INLINE_HINT __inline__
+    #else
+        #define DRFLAC_GNUC_INLINE_HINT inline
+    #endif
+
+    #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)) || defined(__clang__)
+        #define DRFLAC_INLINE DRFLAC_GNUC_INLINE_HINT __attribute__((always_inline))
+    #else
+        #define DRFLAC_INLINE DRFLAC_GNUC_INLINE_HINT
+    #endif
+#elif defined(__WATCOMC__)
+    #define DRFLAC_INLINE __inline
+#else
+    #define DRFLAC_INLINE
+#endif
+/* End Inline */
+
+/*
+Intrinsics Support
+
+There's a bug in GCC 4.2.x which results in an incorrect compilation error when using _mm_slli_epi32() where it complains with
+
+    "error: shift must be an immediate"
+
+Unfortuantely dr_flac depends on this for a few things so we're just going to disable SSE on GCC 4.2 and below.
+*/
+#if !defined(DR_FLAC_NO_SIMD)
+    #if defined(DRFLAC_X64) || defined(DRFLAC_X86)
+        #if defined(_MSC_VER) && !defined(__clang__)
+            /* MSVC. */
+            #if _MSC_VER >= 1400 && !defined(DRFLAC_NO_SSE2)    /* 2005 */
+                #define DRFLAC_SUPPORT_SSE2
+            #endif
+            #if _MSC_VER >= 1600 && !defined(DRFLAC_NO_SSE41)   /* 2010 */
+                #define DRFLAC_SUPPORT_SSE41
+            #endif
+        #elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)))
+            /* Assume GNUC-style. */
+            #if defined(__SSE2__) && !defined(DRFLAC_NO_SSE2)
+                #define DRFLAC_SUPPORT_SSE2
+            #endif
+            #if defined(__SSE4_1__) && !defined(DRFLAC_NO_SSE41)
+                #define DRFLAC_SUPPORT_SSE41
+            #endif
+        #endif
+
+        /* If at this point we still haven't determined compiler support for the intrinsics just fall back to __has_include. */
+        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
+            #if !defined(DRFLAC_SUPPORT_SSE2) && !defined(DRFLAC_NO_SSE2) && __has_include(<emmintrin.h>)
+                #define DRFLAC_SUPPORT_SSE2
+            #endif
+            #if !defined(DRFLAC_SUPPORT_SSE41) && !defined(DRFLAC_NO_SSE41) && __has_include(<smmintrin.h>)
+                #define DRFLAC_SUPPORT_SSE41
+            #endif
+        #endif
+
+        #if defined(DRFLAC_SUPPORT_SSE41)
+            #include <smmintrin.h>
+        #elif defined(DRFLAC_SUPPORT_SSE2)
+            #include <emmintrin.h>
+        #endif
+    #endif
+
+    #if defined(DRFLAC_ARM)
+        #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
+            #define DRFLAC_SUPPORT_NEON
+            #include <arm_neon.h>
+        #endif
+    #endif
+#endif
+
+/* Compile-time CPU feature support. */
+#if !defined(DR_FLAC_NO_SIMD) && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
+    #if defined(_MSC_VER) && !defined(__clang__)
+        #if _MSC_VER >= 1400
+            #include <intrin.h>
+            static void drflac__cpuid(int info[4], int fid)
+            {
+                __cpuid(info, fid);
+            }
+        #else
+            #define DRFLAC_NO_CPUID
+        #endif
+    #else
+        #if defined(__GNUC__) || defined(__clang__)
+            static void drflac__cpuid(int info[4], int fid)
+            {
+                /*
+                It looks like the -fPIC option uses the ebx register which GCC complains about. We can work around this by just using a different register, the
+                specific register of which I'm letting the compiler decide on. The "k" prefix is used to specify a 32-bit register. The {...} syntax is for
+                supporting different assembly dialects.
+
+                What's basically happening is that we're saving and restoring the ebx register manually.
+                */
+                #if defined(DRFLAC_X86) && defined(__PIC__)
+                    __asm__ __volatile__ (
+                        "xchg{l} {%%}ebx, %k1;"
+                        "cpuid;"
+                        "xchg{l} {%%}ebx, %k1;"
+                        : "=a"(info[0]), "=&r"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0)
+                    );
+                #else
+                    __asm__ __volatile__ (
+                        "cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0)
+                    );
+                #endif
+            }
+        #else
+            #define DRFLAC_NO_CPUID
+        #endif
+    #endif
+#else
+    #define DRFLAC_NO_CPUID
+#endif
+
+static DRFLAC_INLINE drflac_bool32 drflac_has_sse2(void)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE2)
+        #if defined(DRFLAC_X64)
+            return DRFLAC_TRUE;    /* 64-bit targets always support SSE2. */
+        #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__)
+            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate SSE2 code we can assume support. */
+        #else
+            #if defined(DRFLAC_NO_CPUID)
+                return DRFLAC_FALSE;
+            #else
+                int info[4];
+                drflac__cpuid(info, 1);
+                return (info[3] & (1 << 26)) != 0;
+            #endif
+        #endif
+    #else
+        return DRFLAC_FALSE;       /* SSE2 is only supported on x86 and x64 architectures. */
+    #endif
+#else
+    return DRFLAC_FALSE;           /* No compiler support. */
+#endif
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void)
+{
+#if defined(DRFLAC_SUPPORT_SSE41)
+    #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41)
+        #if defined(__SSE4_1__) || defined(__AVX__)
+            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate SSE41 code we can assume support. */
+        #else
+            #if defined(DRFLAC_NO_CPUID)
+                return DRFLAC_FALSE;
+            #else
+                int info[4];
+                drflac__cpuid(info, 1);
+                return (info[2] & (1 << 19)) != 0;
+            #endif
+        #endif
+    #else
+        return DRFLAC_FALSE;       /* SSE41 is only supported on x86 and x64 architectures. */
+    #endif
+#else
+    return DRFLAC_FALSE;           /* No compiler support. */
+#endif
+}
+
+
+#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) && !defined(__clang__)
+    #define DRFLAC_HAS_LZCNT_INTRINSIC
+#elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
+    #define DRFLAC_HAS_LZCNT_INTRINSIC
+#elif defined(__clang__)
+    #if defined(__has_builtin)
+        #if __has_builtin(__builtin_clzll) || __has_builtin(__builtin_clzl)
+            #define DRFLAC_HAS_LZCNT_INTRINSIC
+        #endif
+    #endif
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400 && !defined(__clang__)
+    #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+#elif defined(__clang__)
+    #if defined(__has_builtin)
+        #if __has_builtin(__builtin_bswap16)
+            #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+        #endif
+        #if __has_builtin(__builtin_bswap32)
+            #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+        #endif
+        #if __has_builtin(__builtin_bswap64)
+            #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+        #endif
+    #endif
+#elif defined(__GNUC__)
+    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+        #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+        #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+    #endif
+    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+        #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #endif
+#elif defined(__WATCOMC__) && defined(__386__)
+    #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+    extern __inline drflac_uint16 _watcom_bswap16(drflac_uint16);
+    extern __inline drflac_uint32 _watcom_bswap32(drflac_uint32);
+    extern __inline drflac_uint64 _watcom_bswap64(drflac_uint64);
+#pragma aux _watcom_bswap16 = \
+    "xchg al, ah" \
+    parm  [ax]    \
+    value [ax]    \
+    modify nomemory;
+#pragma aux _watcom_bswap32 = \
+    "bswap eax" \
+    parm  [eax] \
+    value [eax] \
+    modify nomemory;
+#pragma aux _watcom_bswap64 = \
+    "bswap eax"     \
+    "bswap edx"     \
+    "xchg eax,edx"  \
+    parm [eax edx]  \
+    value [eax edx] \
+    modify nomemory;
+#endif
+
+
+/* Standard library stuff. */
+#ifndef DRFLAC_ASSERT
+#include <assert.h>
+#define DRFLAC_ASSERT(expression)           assert(expression)
+#endif
+#ifndef DRFLAC_MALLOC
+#define DRFLAC_MALLOC(sz)                   malloc((sz))
+#endif
+#ifndef DRFLAC_REALLOC
+#define DRFLAC_REALLOC(p, sz)               realloc((p), (sz))
+#endif
+#ifndef DRFLAC_FREE
+#define DRFLAC_FREE(p)                      free((p))
+#endif
+#ifndef DRFLAC_COPY_MEMORY
+#define DRFLAC_COPY_MEMORY(dst, src, sz)    memcpy((dst), (src), (sz))
+#endif
+#ifndef DRFLAC_ZERO_MEMORY
+#define DRFLAC_ZERO_MEMORY(p, sz)           memset((p), 0, (sz))
+#endif
+#ifndef DRFLAC_ZERO_OBJECT
+#define DRFLAC_ZERO_OBJECT(p)               DRFLAC_ZERO_MEMORY((p), sizeof(*(p)))
+#endif
+
+#define DRFLAC_MAX_SIMD_VECTOR_SIZE                     64  /* 64 for AVX-512 in the future. */
+
+/* Result Codes */
+typedef drflac_int32 drflac_result;
+#define DRFLAC_SUCCESS                                   0
+#define DRFLAC_ERROR                                    -1   /* A generic error. */
+#define DRFLAC_INVALID_ARGS                             -2
+#define DRFLAC_INVALID_OPERATION                        -3
+#define DRFLAC_OUT_OF_MEMORY                            -4
+#define DRFLAC_OUT_OF_RANGE                             -5
+#define DRFLAC_ACCESS_DENIED                            -6
+#define DRFLAC_DOES_NOT_EXIST                           -7
+#define DRFLAC_ALREADY_EXISTS                           -8
+#define DRFLAC_TOO_MANY_OPEN_FILES                      -9
+#define DRFLAC_INVALID_FILE                             -10
+#define DRFLAC_TOO_BIG                                  -11
+#define DRFLAC_PATH_TOO_LONG                            -12
+#define DRFLAC_NAME_TOO_LONG                            -13
+#define DRFLAC_NOT_DIRECTORY                            -14
+#define DRFLAC_IS_DIRECTORY                             -15
+#define DRFLAC_DIRECTORY_NOT_EMPTY                      -16
+#define DRFLAC_END_OF_FILE                              -17
+#define DRFLAC_NO_SPACE                                 -18
+#define DRFLAC_BUSY                                     -19
+#define DRFLAC_IO_ERROR                                 -20
+#define DRFLAC_INTERRUPT                                -21
+#define DRFLAC_UNAVAILABLE                              -22
+#define DRFLAC_ALREADY_IN_USE                           -23
+#define DRFLAC_BAD_ADDRESS                              -24
+#define DRFLAC_BAD_SEEK                                 -25
+#define DRFLAC_BAD_PIPE                                 -26
+#define DRFLAC_DEADLOCK                                 -27
+#define DRFLAC_TOO_MANY_LINKS                           -28
+#define DRFLAC_NOT_IMPLEMENTED                          -29
+#define DRFLAC_NO_MESSAGE                               -30
+#define DRFLAC_BAD_MESSAGE                              -31
+#define DRFLAC_NO_DATA_AVAILABLE                        -32
+#define DRFLAC_INVALID_DATA                             -33
+#define DRFLAC_TIMEOUT                                  -34
+#define DRFLAC_NO_NETWORK                               -35
+#define DRFLAC_NOT_UNIQUE                               -36
+#define DRFLAC_NOT_SOCKET                               -37
+#define DRFLAC_NO_ADDRESS                               -38
+#define DRFLAC_BAD_PROTOCOL                             -39
+#define DRFLAC_PROTOCOL_UNAVAILABLE                     -40
+#define DRFLAC_PROTOCOL_NOT_SUPPORTED                   -41
+#define DRFLAC_PROTOCOL_FAMILY_NOT_SUPPORTED            -42
+#define DRFLAC_ADDRESS_FAMILY_NOT_SUPPORTED             -43
+#define DRFLAC_SOCKET_NOT_SUPPORTED                     -44
+#define DRFLAC_CONNECTION_RESET                         -45
+#define DRFLAC_ALREADY_CONNECTED                        -46
+#define DRFLAC_NOT_CONNECTED                            -47
+#define DRFLAC_CONNECTION_REFUSED                       -48
+#define DRFLAC_NO_HOST                                  -49
+#define DRFLAC_IN_PROGRESS                              -50
+#define DRFLAC_CANCELLED                                -51
+#define DRFLAC_MEMORY_ALREADY_MAPPED                    -52
+#define DRFLAC_AT_END                                   -53
+
+#define DRFLAC_CRC_MISMATCH                             -100
+/* End Result Codes */
+
+
+#define DRFLAC_SUBFRAME_CONSTANT                        0
+#define DRFLAC_SUBFRAME_VERBATIM                        1
+#define DRFLAC_SUBFRAME_FIXED                           8
+#define DRFLAC_SUBFRAME_LPC                             32
+#define DRFLAC_SUBFRAME_RESERVED                        255
+
+#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE  0
+#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2 1
+
+#define DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT           0
+#define DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE             8
+#define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE            9
+#define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE              10
+
+#define DRFLAC_SEEKPOINT_SIZE_IN_BYTES                  18
+#define DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES             36
+#define DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES       12
+
+#define drflac_align(x, a)                              ((((x) + (a) - 1) / (a)) * (a))
+
+
+DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision)
+{
+    if (pMajor) {
+        *pMajor = DRFLAC_VERSION_MAJOR;
+    }
+
+    if (pMinor) {
+        *pMinor = DRFLAC_VERSION_MINOR;
+    }
+
+    if (pRevision) {
+        *pRevision = DRFLAC_VERSION_REVISION;
+    }
+}
+
+DRFLAC_API const char* drflac_version_string(void)
+{
+    return DRFLAC_VERSION_STRING;
+}
+
+
+/* CPU caps. */
+#if defined(__has_feature)
+    #if __has_feature(thread_sanitizer)
+        #define DRFLAC_NO_THREAD_SANITIZE __attribute__((no_sanitize("thread")))
+    #else
+        #define DRFLAC_NO_THREAD_SANITIZE
+    #endif
+#else
+    #define DRFLAC_NO_THREAD_SANITIZE
+#endif
+
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
+static drflac_bool32 drflac__gIsLZCNTSupported = DRFLAC_FALSE;
+#endif
+
+#ifndef DRFLAC_NO_CPUID
+static drflac_bool32 drflac__gIsSSE2Supported  = DRFLAC_FALSE;
+static drflac_bool32 drflac__gIsSSE41Supported = DRFLAC_FALSE;
+
+/*
+I've had a bug report that Clang's ThreadSanitizer presents a warning in this function. Having reviewed this, this does
+actually make sense. However, since CPU caps should never differ for a running process, I don't think the trade off of
+complicating internal API's by passing around CPU caps versus just disabling the warnings is worthwhile. I'm therefore
+just going to disable these warnings. This is disabled via the DRFLAC_NO_THREAD_SANITIZE attribute.
+*/
+DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps(void)
+{
+    static drflac_bool32 isCPUCapsInitialized = DRFLAC_FALSE;
+
+    if (!isCPUCapsInitialized) {
+        /* LZCNT */
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
+        int info[4] = {0};
+        drflac__cpuid(info, 0x80000001);
+        drflac__gIsLZCNTSupported = (info[2] & (1 << 5)) != 0;
+#endif
+
+        /* SSE2 */
+        drflac__gIsSSE2Supported = drflac_has_sse2();
+
+        /* SSE4.1 */
+        drflac__gIsSSE41Supported = drflac_has_sse41();
+
+        /* Initialized. */
+        isCPUCapsInitialized = DRFLAC_TRUE;
+    }
+}
+#else
+static drflac_bool32 drflac__gIsNEONSupported  = DRFLAC_FALSE;
+
+static DRFLAC_INLINE drflac_bool32 drflac__has_neon(void)
+{
+#if defined(DRFLAC_SUPPORT_NEON)
+    #if defined(DRFLAC_ARM) && !defined(DRFLAC_NO_NEON)
+        #if (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
+            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate NEON code we can assume support. */
+        #else
+            /* TODO: Runtime check. */
+            return DRFLAC_FALSE;
+        #endif
+    #else
+        return DRFLAC_FALSE;       /* NEON is only supported on ARM architectures. */
+    #endif
+#else
+    return DRFLAC_FALSE;           /* No compiler support. */
+#endif
+}
+
+DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps(void)
+{
+    drflac__gIsNEONSupported = drflac__has_neon();
+
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
+    drflac__gIsLZCNTSupported = DRFLAC_TRUE;
+#endif
+}
+#endif
+
+
+/* Endian Management */
+static DRFLAC_INLINE drflac_bool32 drflac__is_little_endian(void)
+{
+#if defined(DRFLAC_X86) || defined(DRFLAC_X64)
+    return DRFLAC_TRUE;
+#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN
+    return DRFLAC_TRUE;
+#else
+    int n = 1;
+    return (*(char*)&n) == 1;
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n)
+{
+#ifdef DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #if defined(_MSC_VER) && !defined(__clang__)
+        return _byteswap_ushort(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap16(n);
+    #elif defined(__WATCOMC__) && defined(__386__)
+        return _watcom_bswap16(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & 0xFF00) >> 8) |
+           ((n & 0x00FF) << 8);
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n)
+{
+#ifdef DRFLAC_HAS_BYTESWAP32_INTRINSIC
+    #if defined(_MSC_VER) && !defined(__clang__)
+        return _byteswap_ulong(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        #if defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(__ARM_ARCH_6M__) && !defined(DRFLAC_64BIT)   /* <-- 64-bit inline assembly has not been tested, so disabling for now. */
+            /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */
+            drflac_uint32 r;
+            __asm__ __volatile__ (
+            #if defined(DRFLAC_64BIT)
+                "rev %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(n)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
+            #else
+                "rev %[out], %[in]" : [out]"=r"(r) : [in]"r"(n)
+            #endif
+            );
+            return r;
+        #else
+            return __builtin_bswap32(n);
+        #endif
+    #elif defined(__WATCOMC__) && defined(__386__)
+        return _watcom_bswap32(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & 0xFF000000) >> 24) |
+           ((n & 0x00FF0000) >>  8) |
+           ((n & 0x0000FF00) <<  8) |
+           ((n & 0x000000FF) << 24);
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n)
+{
+#ifdef DRFLAC_HAS_BYTESWAP64_INTRINSIC
+    #if defined(_MSC_VER) && !defined(__clang__)
+        return _byteswap_uint64(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap64(n);
+    #elif defined(__WATCOMC__) && defined(__386__)
+        return _watcom_bswap64(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */
+    return ((n & ((drflac_uint64)0xFF000000 << 32)) >> 56) |
+           ((n & ((drflac_uint64)0x00FF0000 << 32)) >> 40) |
+           ((n & ((drflac_uint64)0x0000FF00 << 32)) >> 24) |
+           ((n & ((drflac_uint64)0x000000FF << 32)) >>  8) |
+           ((n & ((drflac_uint64)0xFF000000      )) <<  8) |
+           ((n & ((drflac_uint64)0x00FF0000      )) << 24) |
+           ((n & ((drflac_uint64)0x0000FF00      )) << 40) |
+           ((n & ((drflac_uint64)0x000000FF      )) << 56);
+#endif
+}
+
+
+static DRFLAC_INLINE drflac_uint16 drflac__be2host_16(drflac_uint16 n)
+{
+    if (drflac__is_little_endian()) {
+        return drflac__swap_endian_uint16(n);
+    }
+
+    return n;
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n)
+{
+    if (drflac__is_little_endian()) {
+        return drflac__swap_endian_uint32(n);
+    }
+
+    return n;
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac__be2host_32_ptr_unaligned(const void* pData)
+{
+    const drflac_uint8* pNum = (drflac_uint8*)pData;
+    return *(pNum) << 24 | *(pNum+1) << 16 | *(pNum+2) << 8 | *(pNum+3);
+}
+
+static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n)
+{
+    if (drflac__is_little_endian()) {
+        return drflac__swap_endian_uint64(n);
+    }
+
+    return n;
+}
+
+
+static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n)
+{
+    if (!drflac__is_little_endian()) {
+        return drflac__swap_endian_uint32(n);
+    }
+
+    return n;
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac__le2host_32_ptr_unaligned(const void* pData)
+{
+    const drflac_uint8* pNum = (drflac_uint8*)pData;
+    return *pNum | *(pNum+1) << 8 |  *(pNum+2) << 16 | *(pNum+3) << 24;
+}
+
+
+static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n)
+{
+    drflac_uint32 result = 0;
+    result |= (n & 0x7F000000) >> 3;
+    result |= (n & 0x007F0000) >> 2;
+    result |= (n & 0x00007F00) >> 1;
+    result |= (n & 0x0000007F) >> 0;
+
+    return result;
+}
+
+
+
+/* The CRC code below is based on this document: http://zlib.net/crc_v3.txt */
+static drflac_uint8 drflac__crc8_table[] = {
+    0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15, 0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D,
+    0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65, 0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D,
+    0xE0, 0xE7, 0xEE, 0xE9, 0xFC, 0xFB, 0xF2, 0xF5, 0xD8, 0xDF, 0xD6, 0xD1, 0xC4, 0xC3, 0xCA, 0xCD,
+    0x90, 0x97, 0x9E, 0x99, 0x8C, 0x8B, 0x82, 0x85, 0xA8, 0xAF, 0xA6, 0xA1, 0xB4, 0xB3, 0xBA, 0xBD,
+    0xC7, 0xC0, 0xC9, 0xCE, 0xDB, 0xDC, 0xD5, 0xD2, 0xFF, 0xF8, 0xF1, 0xF6, 0xE3, 0xE4, 0xED, 0xEA,
+    0xB7, 0xB0, 0xB9, 0xBE, 0xAB, 0xAC, 0xA5, 0xA2, 0x8F, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9D, 0x9A,
+    0x27, 0x20, 0x29, 0x2E, 0x3B, 0x3C, 0x35, 0x32, 0x1F, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0D, 0x0A,
+    0x57, 0x50, 0x59, 0x5E, 0x4B, 0x4C, 0x45, 0x42, 0x6F, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7D, 0x7A,
+    0x89, 0x8E, 0x87, 0x80, 0x95, 0x92, 0x9B, 0x9C, 0xB1, 0xB6, 0xBF, 0xB8, 0xAD, 0xAA, 0xA3, 0xA4,
+    0xF9, 0xFE, 0xF7, 0xF0, 0xE5, 0xE2, 0xEB, 0xEC, 0xC1, 0xC6, 0xCF, 0xC8, 0xDD, 0xDA, 0xD3, 0xD4,
+    0x69, 0x6E, 0x67, 0x60, 0x75, 0x72, 0x7B, 0x7C, 0x51, 0x56, 0x5F, 0x58, 0x4D, 0x4A, 0x43, 0x44,
+    0x19, 0x1E, 0x17, 0x10, 0x05, 0x02, 0x0B, 0x0C, 0x21, 0x26, 0x2F, 0x28, 0x3D, 0x3A, 0x33, 0x34,
+    0x4E, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5C, 0x5B, 0x76, 0x71, 0x78, 0x7F, 0x6A, 0x6D, 0x64, 0x63,
+    0x3E, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2C, 0x2B, 0x06, 0x01, 0x08, 0x0F, 0x1A, 0x1D, 0x14, 0x13,
+    0xAE, 0xA9, 0xA0, 0xA7, 0xB2, 0xB5, 0xBC, 0xBB, 0x96, 0x91, 0x98, 0x9F, 0x8A, 0x8D, 0x84, 0x83,
+    0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB, 0xE6, 0xE1, 0xE8, 0xEF, 0xFA, 0xFD, 0xF4, 0xF3
+};
+
+static drflac_uint16 drflac__crc16_table[] = {
+    0x0000, 0x8005, 0x800F, 0x000A, 0x801B, 0x001E, 0x0014, 0x8011,
+    0x8033, 0x0036, 0x003C, 0x8039, 0x0028, 0x802D, 0x8027, 0x0022,
+    0x8063, 0x0066, 0x006C, 0x8069, 0x0078, 0x807D, 0x8077, 0x0072,
+    0x0050, 0x8055, 0x805F, 0x005A, 0x804B, 0x004E, 0x0044, 0x8041,
+    0x80C3, 0x00C6, 0x00CC, 0x80C9, 0x00D8, 0x80DD, 0x80D7, 0x00D2,
+    0x00F0, 0x80F5, 0x80FF, 0x00FA, 0x80EB, 0x00EE, 0x00E4, 0x80E1,
+    0x00A0, 0x80A5, 0x80AF, 0x00AA, 0x80BB, 0x00BE, 0x00B4, 0x80B1,
+    0x8093, 0x0096, 0x009C, 0x8099, 0x0088, 0x808D, 0x8087, 0x0082,
+    0x8183, 0x0186, 0x018C, 0x8189, 0x0198, 0x819D, 0x8197, 0x0192,
+    0x01B0, 0x81B5, 0x81BF, 0x01BA, 0x81AB, 0x01AE, 0x01A4, 0x81A1,
+    0x01E0, 0x81E5, 0x81EF, 0x01EA, 0x81FB, 0x01FE, 0x01F4, 0x81F1,
+    0x81D3, 0x01D6, 0x01DC, 0x81D9, 0x01C8, 0x81CD, 0x81C7, 0x01C2,
+    0x0140, 0x8145, 0x814F, 0x014A, 0x815B, 0x015E, 0x0154, 0x8151,
+    0x8173, 0x0176, 0x017C, 0x8179, 0x0168, 0x816D, 0x8167, 0x0162,
+    0x8123, 0x0126, 0x012C, 0x8129, 0x0138, 0x813D, 0x8137, 0x0132,
+    0x0110, 0x8115, 0x811F, 0x011A, 0x810B, 0x010E, 0x0104, 0x8101,
+    0x8303, 0x0306, 0x030C, 0x8309, 0x0318, 0x831D, 0x8317, 0x0312,
+    0x0330, 0x8335, 0x833F, 0x033A, 0x832B, 0x032E, 0x0324, 0x8321,
+    0x0360, 0x8365, 0x836F, 0x036A, 0x837B, 0x037E, 0x0374, 0x8371,
+    0x8353, 0x0356, 0x035C, 0x8359, 0x0348, 0x834D, 0x8347, 0x0342,
+    0x03C0, 0x83C5, 0x83CF, 0x03CA, 0x83DB, 0x03DE, 0x03D4, 0x83D1,
+    0x83F3, 0x03F6, 0x03FC, 0x83F9, 0x03E8, 0x83ED, 0x83E7, 0x03E2,
+    0x83A3, 0x03A6, 0x03AC, 0x83A9, 0x03B8, 0x83BD, 0x83B7, 0x03B2,
+    0x0390, 0x8395, 0x839F, 0x039A, 0x838B, 0x038E, 0x0384, 0x8381,
+    0x0280, 0x8285, 0x828F, 0x028A, 0x829B, 0x029E, 0x0294, 0x8291,
+    0x82B3, 0x02B6, 0x02BC, 0x82B9, 0x02A8, 0x82AD, 0x82A7, 0x02A2,
+    0x82E3, 0x02E6, 0x02EC, 0x82E9, 0x02F8, 0x82FD, 0x82F7, 0x02F2,
+    0x02D0, 0x82D5, 0x82DF, 0x02DA, 0x82CB, 0x02CE, 0x02C4, 0x82C1,
+    0x8243, 0x0246, 0x024C, 0x8249, 0x0258, 0x825D, 0x8257, 0x0252,
+    0x0270, 0x8275, 0x827F, 0x027A, 0x826B, 0x026E, 0x0264, 0x8261,
+    0x0220, 0x8225, 0x822F, 0x022A, 0x823B, 0x023E, 0x0234, 0x8231,
+    0x8213, 0x0216, 0x021C, 0x8219, 0x0208, 0x820D, 0x8207, 0x0202
+};
+
+static DRFLAC_INLINE drflac_uint8 drflac_crc8_byte(drflac_uint8 crc, drflac_uint8 data)
+{
+    return drflac__crc8_table[crc ^ data];
+}
+
+static DRFLAC_INLINE drflac_uint8 drflac_crc8(drflac_uint8 crc, drflac_uint32 data, drflac_uint32 count)
+{
+#ifdef DR_FLAC_NO_CRC
+    (void)crc;
+    (void)data;
+    (void)count;
+    return 0;
+#else
+#if 0
+    /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc8(crc, 0, 8);") */
+    drflac_uint8 p = 0x07;
+    for (int i = count-1; i >= 0; --i) {
+        drflac_uint8 bit = (data & (1 << i)) >> i;
+        if (crc & 0x80) {
+            crc = ((crc << 1) | bit) ^ p;
+        } else {
+            crc = ((crc << 1) | bit);
+        }
+    }
+    return crc;
+#else
+    drflac_uint32 wholeBytes;
+    drflac_uint32 leftoverBits;
+    drflac_uint64 leftoverDataMask;
+
+    static drflac_uint64 leftoverDataMaskTable[8] = {
+        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
+    };
+
+    DRFLAC_ASSERT(count <= 32);
+
+    wholeBytes = count >> 3;
+    leftoverBits = count - (wholeBytes*8);
+    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+
+    switch (wholeBytes) {
+        case 4: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits)));
+        case 3: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits)));
+        case 2: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits)));
+        case 1: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits)));
+        case 0: if (leftoverBits > 0) crc = (drflac_uint8)((crc << leftoverBits) ^ drflac__crc8_table[(crc >> (8 - leftoverBits)) ^ (data & leftoverDataMask)]);
+    }
+    return crc;
+#endif
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16_byte(drflac_uint16 crc, drflac_uint8 data)
+{
+    return (crc << 8) ^ drflac__crc16_table[(drflac_uint8)(crc >> 8) ^ data];
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16_cache(drflac_uint16 crc, drflac_cache_t data)
+{
+#ifdef DRFLAC_64BIT
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF));
+#endif
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  8) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  0) & 0xFF));
+
+    return crc;
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16_bytes(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 byteCount)
+{
+    switch (byteCount)
+    {
+#ifdef DRFLAC_64BIT
+    case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF));
+    case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF));
+    case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF));
+    case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF));
+#endif
+    case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF));
+    case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF));
+    case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  8) & 0xFF));
+    case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  0) & 0xFF));
+    }
+
+    return crc;
+}
+
+#if 0
+static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac_uint32 data, drflac_uint32 count)
+{
+#ifdef DR_FLAC_NO_CRC
+    (void)crc;
+    (void)data;
+    (void)count;
+    return 0;
+#else
+#if 0
+    /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc16(crc, 0, 16);") */
+    drflac_uint16 p = 0x8005;
+    for (int i = count-1; i >= 0; --i) {
+        drflac_uint16 bit = (data & (1ULL << i)) >> i;
+        if (r & 0x8000) {
+            r = ((r << 1) | bit) ^ p;
+        } else {
+            r = ((r << 1) | bit);
+        }
+    }
+
+    return crc;
+#else
+    drflac_uint32 wholeBytes;
+    drflac_uint32 leftoverBits;
+    drflac_uint64 leftoverDataMask;
+
+    static drflac_uint64 leftoverDataMaskTable[8] = {
+        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
+    };
+
+    DRFLAC_ASSERT(count <= 64);
+
+    wholeBytes = count >> 3;
+    leftoverBits = count & 7;
+    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+
+    switch (wholeBytes) {
+        default:
+        case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits)));
+        case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits)));
+        case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits)));
+        case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits)));
+        case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
+    }
+    return crc;
+#endif
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16__64bit(drflac_uint16 crc, drflac_uint64 data, drflac_uint32 count)
+{
+#ifdef DR_FLAC_NO_CRC
+    (void)crc;
+    (void)data;
+    (void)count;
+    return 0;
+#else
+    drflac_uint32 wholeBytes;
+    drflac_uint32 leftoverBits;
+    drflac_uint64 leftoverDataMask;
+
+    static drflac_uint64 leftoverDataMaskTable[8] = {
+        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
+    };
+
+    DRFLAC_ASSERT(count <= 64);
+
+    wholeBytes = count >> 3;
+    leftoverBits = count & 7;
+    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+
+    switch (wholeBytes) {
+        default:
+        case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000 << 32) << leftoverBits)) >> (56 + leftoverBits)));    /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */
+        case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000 << 32) << leftoverBits)) >> (48 + leftoverBits)));
+        case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00 << 32) << leftoverBits)) >> (40 + leftoverBits)));
+        case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF << 32) << leftoverBits)) >> (32 + leftoverBits)));
+        case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000      ) << leftoverBits)) >> (24 + leftoverBits)));
+        case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000      ) << leftoverBits)) >> (16 + leftoverBits)));
+        case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00      ) << leftoverBits)) >> ( 8 + leftoverBits)));
+        case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF      ) << leftoverBits)) >> ( 0 + leftoverBits)));
+        case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
+    }
+    return crc;
+#endif
+}
+
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 count)
+{
+#ifdef DRFLAC_64BIT
+    return drflac_crc16__64bit(crc, data, count);
+#else
+    return drflac_crc16__32bit(crc, data, count);
+#endif
+}
+#endif
+
+
+#ifdef DRFLAC_64BIT
+#define drflac__be2host__cache_line drflac__be2host_64
+#else
+#define drflac__be2host__cache_line drflac__be2host_32
+#endif
+
+/*
+BIT READING ATTEMPT #2
+
+This uses a 32- or 64-bit bit-shifted cache - as bits are read, the cache is shifted such that the first valid bit is sitting
+on the most significant bit. It uses the notion of an L1 and L2 cache (borrowed from CPU architecture), where the L1 cache
+is a 32- or 64-bit unsigned integer (depending on whether or not a 32- or 64-bit build is being compiled) and the L2 is an
+array of "cache lines", with each cache line being the same size as the L1. The L2 is a buffer of about 4KB and is where data
+from onRead() is read into.
+*/
+#define DRFLAC_CACHE_L1_SIZE_BYTES(bs)                      (sizeof((bs)->cache))
+#define DRFLAC_CACHE_L1_SIZE_BITS(bs)                       (sizeof((bs)->cache)*8)
+#define DRFLAC_CACHE_L1_BITS_REMAINING(bs)                  (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (bs)->consumedBits)
+#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)           (~((~(drflac_cache_t)0) >> (_bitCount)))
+#define DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, _bitCount)      (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (_bitCount))
+#define DRFLAC_CACHE_L1_SELECT(bs, _bitCount)               (((bs)->cache) & DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount))
+#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, _bitCount)     (DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >>  DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)))
+#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, _bitCount)(DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> (DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)) & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1)))
+#define DRFLAC_CACHE_L2_SIZE_BYTES(bs)                      (sizeof((bs)->cacheL2))
+#define DRFLAC_CACHE_L2_LINE_COUNT(bs)                      (DRFLAC_CACHE_L2_SIZE_BYTES(bs) / sizeof((bs)->cacheL2[0]))
+#define DRFLAC_CACHE_L2_LINES_REMAINING(bs)                 (DRFLAC_CACHE_L2_LINE_COUNT(bs) - (bs)->nextL2Line)
+
+
+#ifndef DR_FLAC_NO_CRC
+static DRFLAC_INLINE void drflac__reset_crc16(drflac_bs* bs)
+{
+    bs->crc16 = 0;
+    bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
+}
+
+static DRFLAC_INLINE void drflac__update_crc16(drflac_bs* bs)
+{
+    if (bs->crc16CacheIgnoredBytes == 0) {
+        bs->crc16 = drflac_crc16_cache(bs->crc16, bs->crc16Cache);
+    } else {
+        bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache, DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bs->crc16CacheIgnoredBytes);
+        bs->crc16CacheIgnoredBytes = 0;
+    }
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac__flush_crc16(drflac_bs* bs)
+{
+    /* We should never be flushing in a situation where we are not aligned on a byte boundary. */
+    DRFLAC_ASSERT((DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7) == 0);
+
+    /*
+    The bits that were read from the L1 cache need to be accumulated. The number of bytes needing to be accumulated is determined
+    by the number of bits that have been consumed.
+    */
+    if (DRFLAC_CACHE_L1_BITS_REMAINING(bs) == 0) {
+        drflac__update_crc16(bs);
+    } else {
+        /* We only accumulate the consumed bits. */
+        bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache >> DRFLAC_CACHE_L1_BITS_REMAINING(bs), (bs->consumedBits >> 3) - bs->crc16CacheIgnoredBytes);
+
+        /*
+        The bits that we just accumulated should never be accumulated again. We need to keep track of how many bytes were accumulated
+        so we can handle that later.
+        */
+        bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
+    }
+
+    return bs->crc16;
+}
+#endif
+
+static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs)
+{
+    size_t bytesRead;
+    size_t alignedL1LineCount;
+
+    /* Fast path. Try loading straight from L2. */
+    if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+        bs->cache = bs->cacheL2[bs->nextL2Line++];
+        return DRFLAC_TRUE;
+    }
+
+    /*
+    If we get here it means we've run out of data in the L2 cache. We'll need to fetch more from the client, if there's
+    any left.
+    */
+    if (bs->unalignedByteCount > 0) {
+        return DRFLAC_FALSE;   /* If we have any unaligned bytes it means there's no more aligned bytes left in the client. */
+    }
+
+    bytesRead = bs->onRead(bs->pUserData, bs->cacheL2, DRFLAC_CACHE_L2_SIZE_BYTES(bs));
+
+    bs->nextL2Line = 0;
+    if (bytesRead == DRFLAC_CACHE_L2_SIZE_BYTES(bs)) {
+        bs->cache = bs->cacheL2[bs->nextL2Line++];
+        return DRFLAC_TRUE;
+    }
+
+
+    /*
+    If we get here it means we were unable to retrieve enough data to fill the entire L2 cache. It probably
+    means we've just reached the end of the file. We need to move the valid data down to the end of the buffer
+    and adjust the index of the next line accordingly. Also keep in mind that the L2 cache must be aligned to
+    the size of the L1 so we'll need to seek backwards by any misaligned bytes.
+    */
+    alignedL1LineCount = bytesRead / DRFLAC_CACHE_L1_SIZE_BYTES(bs);
+
+    /* We need to keep track of any unaligned bytes for later use. */
+    bs->unalignedByteCount = bytesRead - (alignedL1LineCount * DRFLAC_CACHE_L1_SIZE_BYTES(bs));
+    if (bs->unalignedByteCount > 0) {
+        bs->unalignedCache = bs->cacheL2[alignedL1LineCount];
+    }
+
+    if (alignedL1LineCount > 0) {
+        size_t offset = DRFLAC_CACHE_L2_LINE_COUNT(bs) - alignedL1LineCount;
+        size_t i;
+        for (i = alignedL1LineCount; i > 0; --i) {
+            bs->cacheL2[i-1 + offset] = bs->cacheL2[i-1];
+        }
+
+        bs->nextL2Line = (drflac_uint32)offset;
+        bs->cache = bs->cacheL2[bs->nextL2Line++];
+        return DRFLAC_TRUE;
+    } else {
+        /* If we get into this branch it means we weren't able to load any L1-aligned data. */
+        bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs);
+        return DRFLAC_FALSE;
+    }
+}
+
+static drflac_bool32 drflac__reload_cache(drflac_bs* bs)
+{
+    size_t bytesRead;
+
+#ifndef DR_FLAC_NO_CRC
+    drflac__update_crc16(bs);
+#endif
+
+    /* Fast path. Try just moving the next value in the L2 cache to the L1 cache. */
+    if (drflac__reload_l1_cache_from_l2(bs)) {
+        bs->cache = drflac__be2host__cache_line(bs->cache);
+        bs->consumedBits = 0;
+#ifndef DR_FLAC_NO_CRC
+        bs->crc16Cache = bs->cache;
+#endif
+        return DRFLAC_TRUE;
+    }
+
+    /* Slow path. */
+
+    /*
+    If we get here it means we have failed to load the L1 cache from the L2. Likely we've just reached the end of the stream and the last
+    few bytes did not meet the alignment requirements for the L2 cache. In this case we need to fall back to a slower path and read the
+    data from the unaligned cache.
+    */
+    bytesRead = bs->unalignedByteCount;
+    if (bytesRead == 0) {
+        bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);   /* <-- The stream has been exhausted, so marked the bits as consumed. */
+        return DRFLAC_FALSE;
+    }
+
+    DRFLAC_ASSERT(bytesRead < DRFLAC_CACHE_L1_SIZE_BYTES(bs));
+    bs->consumedBits = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bytesRead) * 8;
+
+    bs->cache = drflac__be2host__cache_line(bs->unalignedCache);
+    bs->cache &= DRFLAC_CACHE_L1_SELECTION_MASK(DRFLAC_CACHE_L1_BITS_REMAINING(bs));    /* <-- Make sure the consumed bits are always set to zero. Other parts of the library depend on this property. */
+    bs->unalignedByteCount = 0;     /* <-- At this point the unaligned bytes have been moved into the cache and we thus have no more unaligned bytes. */
+
+#ifndef DR_FLAC_NO_CRC
+    bs->crc16Cache = bs->cache >> bs->consumedBits;
+    bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
+#endif
+    return DRFLAC_TRUE;
+}
+
+static void drflac__reset_cache(drflac_bs* bs)
+{
+    bs->nextL2Line   = DRFLAC_CACHE_L2_LINE_COUNT(bs);  /* <-- This clears the L2 cache. */
+    bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);   /* <-- This clears the L1 cache. */
+    bs->cache = 0;
+    bs->unalignedByteCount = 0;                         /* <-- This clears the trailing unaligned bytes. */
+    bs->unalignedCache = 0;
+
+#ifndef DR_FLAC_NO_CRC
+    bs->crc16Cache = 0;
+    bs->crc16CacheIgnoredBytes = 0;
+#endif
+}
+
+
+static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned int bitCount, drflac_uint32* pResultOut)
+{
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResultOut != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 32);
+
+    if (bs->consumedBits == DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    if (bitCount <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        /*
+        If we want to load all 32-bits from a 32-bit cache we need to do it slightly differently because we can't do
+        a 32-bit shift on a 32-bit integer. This will never be the case on 64-bit caches, so we can have a slightly
+        more optimal solution for this.
+        */
+#ifdef DRFLAC_64BIT
+        *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount);
+        bs->consumedBits += bitCount;
+        bs->cache <<= bitCount;
+#else
+        if (bitCount < DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+            *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount);
+            bs->consumedBits += bitCount;
+            bs->cache <<= bitCount;
+        } else {
+            /* Cannot shift by 32-bits, so need to do it differently. */
+            *pResultOut = (drflac_uint32)bs->cache;
+            bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);
+            bs->cache = 0;
+        }
+#endif
+
+        return DRFLAC_TRUE;
+    } else {
+        /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */
+        drflac_uint32 bitCountHi = DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        drflac_uint32 bitCountLo = bitCount - bitCountHi;
+        drflac_uint32 resultHi;
+
+        DRFLAC_ASSERT(bitCountHi > 0);
+        DRFLAC_ASSERT(bitCountHi < 32);
+        resultHi = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountHi);
+
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+        if (bitCountLo > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+            /* This happens when we get to end of stream */
+            return DRFLAC_FALSE;
+        }
+
+        *pResultOut = (resultHi << bitCountLo) | (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo);
+        bs->consumedBits += bitCountLo;
+        bs->cache <<= bitCountLo;
+        return DRFLAC_TRUE;
+    }
+}
+
+static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, drflac_int32* pResult)
+{
+    drflac_uint32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 32);
+
+    if (!drflac__read_uint32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Do not attempt to shift by 32 as it's undefined. */
+    if (bitCount < 32) {
+        drflac_uint32 signbit;
+        signbit = ((result >> (bitCount-1)) & 0x01);
+        result |= (~signbit + 1) << bitCount;
+    }
+
+    *pResult = (drflac_int32)result;
+    return DRFLAC_TRUE;
+}
+
+#ifdef DRFLAC_64BIT
+static drflac_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, drflac_uint64* pResultOut)
+{
+    drflac_uint32 resultHi;
+    drflac_uint32 resultLo;
+
+    DRFLAC_ASSERT(bitCount <= 64);
+    DRFLAC_ASSERT(bitCount >  32);
+
+    if (!drflac__read_uint32(bs, bitCount - 32, &resultHi)) {
+        return DRFLAC_FALSE;
+    }
+
+    if (!drflac__read_uint32(bs, 32, &resultLo)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResultOut = (((drflac_uint64)resultHi) << 32) | ((drflac_uint64)resultLo);
+    return DRFLAC_TRUE;
+}
+#endif
+
+/* Function below is unused, but leaving it here in case I need to quickly add it again. */
+#if 0
+static drflac_bool32 drflac__read_int64(drflac_bs* bs, unsigned int bitCount, drflac_int64* pResultOut)
+{
+    drflac_uint64 result;
+    drflac_uint64 signbit;
+
+    DRFLAC_ASSERT(bitCount <= 64);
+
+    if (!drflac__read_uint64(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    signbit = ((result >> (bitCount-1)) & 0x01);
+    result |= (~signbit + 1) << bitCount;
+
+    *pResultOut = (drflac_int64)result;
+    return DRFLAC_TRUE;
+}
+#endif
+
+static drflac_bool32 drflac__read_uint16(drflac_bs* bs, unsigned int bitCount, drflac_uint16* pResult)
+{
+    drflac_uint32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 16);
+
+    if (!drflac__read_uint32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResult = (drflac_uint16)result;
+    return DRFLAC_TRUE;
+}
+
+#if 0
+static drflac_bool32 drflac__read_int16(drflac_bs* bs, unsigned int bitCount, drflac_int16* pResult)
+{
+    drflac_int32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 16);
+
+    if (!drflac__read_int32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResult = (drflac_int16)result;
+    return DRFLAC_TRUE;
+}
+#endif
+
+static drflac_bool32 drflac__read_uint8(drflac_bs* bs, unsigned int bitCount, drflac_uint8* pResult)
+{
+    drflac_uint32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 8);
+
+    if (!drflac__read_uint32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResult = (drflac_uint8)result;
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__read_int8(drflac_bs* bs, unsigned int bitCount, drflac_int8* pResult)
+{
+    drflac_int32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 8);
+
+    if (!drflac__read_int32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResult = (drflac_int8)result;
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek)
+{
+    if (bitsToSeek <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        bs->consumedBits += (drflac_uint32)bitsToSeek;
+        bs->cache <<= bitsToSeek;
+        return DRFLAC_TRUE;
+    } else {
+        /* It straddles the cached data. This function isn't called too frequently so I'm favouring simplicity here. */
+        bitsToSeek       -= DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        bs->consumedBits += DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        bs->cache         = 0;
+
+        /* Simple case. Seek in groups of the same number as bits that fit within a cache line. */
+#ifdef DRFLAC_64BIT
+        while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+            drflac_uint64 bin;
+            if (!drflac__read_uint64(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs);
+        }
+#else
+        while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+            drflac_uint32 bin;
+            if (!drflac__read_uint32(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs);
+        }
+#endif
+
+        /* Whole leftover bytes. */
+        while (bitsToSeek >= 8) {
+            drflac_uint8 bin;
+            if (!drflac__read_uint8(bs, 8, &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek -= 8;
+        }
+
+        /* Leftover bits. */
+        if (bitsToSeek > 0) {
+            drflac_uint8 bin;
+            if (!drflac__read_uint8(bs, (drflac_uint32)bitsToSeek, &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek = 0; /* <-- Necessary for the assert below. */
+        }
+
+        DRFLAC_ASSERT(bitsToSeek == 0);
+        return DRFLAC_TRUE;
+    }
+}
+
+
+/* This function moves the bit streamer to the first bit after the sync code (bit 15 of the of the frame header). It will also update the CRC-16. */
+static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs)
+{
+    DRFLAC_ASSERT(bs != NULL);
+
+    /*
+    The sync code is always aligned to 8 bits. This is convenient for us because it means we can do byte-aligned movements. The first
+    thing to do is align to the next byte.
+    */
+    if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) {
+        return DRFLAC_FALSE;
+    }
+
+    for (;;) {
+        drflac_uint8 hi;
+
+#ifndef DR_FLAC_NO_CRC
+        drflac__reset_crc16(bs);
+#endif
+
+        if (!drflac__read_uint8(bs, 8, &hi)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (hi == 0xFF) {
+            drflac_uint8 lo;
+            if (!drflac__read_uint8(bs, 6, &lo)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (lo == 0x3E) {
+                return DRFLAC_TRUE;
+            } else {
+                if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) {
+                    return DRFLAC_FALSE;
+                }
+            }
+        }
+    }
+
+    /* Should never get here. */
+    /*return DRFLAC_FALSE;*/
+}
+
+
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
+#define DRFLAC_IMPLEMENT_CLZ_LZCNT
+#endif
+#if  defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(__clang__)
+#define DRFLAC_IMPLEMENT_CLZ_MSVC
+#endif
+#if  defined(__WATCOMC__) && defined(__386__)
+#define DRFLAC_IMPLEMENT_CLZ_WATCOM
+#endif
+#ifdef __MRC__
+#include <intrinsics.h>
+#define DRFLAC_IMPLEMENT_CLZ_MRC
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x)
+{
+    drflac_uint32 n;
+    static drflac_uint32 clz_table_4[] = {
+        0,
+        4,
+        3, 3,
+        2, 2, 2, 2,
+        1, 1, 1, 1, 1, 1, 1, 1
+    };
+
+    if (x == 0) {
+        return sizeof(x)*8;
+    }
+
+    n = clz_table_4[x >> (sizeof(x)*8 - 4)];
+    if (n == 0) {
+#ifdef DRFLAC_64BIT
+        if ((x & ((drflac_uint64)0xFFFFFFFF << 32)) == 0) { n  = 32; x <<= 32; }
+        if ((x & ((drflac_uint64)0xFFFF0000 << 32)) == 0) { n += 16; x <<= 16; }
+        if ((x & ((drflac_uint64)0xFF000000 << 32)) == 0) { n += 8;  x <<= 8;  }
+        if ((x & ((drflac_uint64)0xF0000000 << 32)) == 0) { n += 4;  x <<= 4;  }
+#else
+        if ((x & 0xFFFF0000) == 0) { n  = 16; x <<= 16; }
+        if ((x & 0xFF000000) == 0) { n += 8;  x <<= 8;  }
+        if ((x & 0xF0000000) == 0) { n += 4;  x <<= 4;  }
+#endif
+        n += clz_table_4[x >> (sizeof(x)*8 - 4)];
+    }
+
+    return n - 1;
+}
+
+#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
+static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported(void)
+{
+    /* Fast compile time check for ARM. */
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
+    return DRFLAC_TRUE;
+#elif defined(__MRC__)
+    return DRFLAC_TRUE;
+#else
+    /* If the compiler itself does not support the intrinsic then we'll need to return false. */
+    #ifdef DRFLAC_HAS_LZCNT_INTRINSIC
+        return drflac__gIsLZCNTSupported;
+    #else
+        return DRFLAC_FALSE;
+    #endif
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x)
+{
+    /*
+    It's critical for competitive decoding performance that this function be highly optimal. With MSVC we can use the __lzcnt64() and __lzcnt() intrinsics
+    to achieve good performance, however on GCC and Clang it's a little bit more annoying. The __builtin_clzl() and __builtin_clzll() intrinsics leave
+    it undefined as to the return value when `x` is 0. We need this to be well defined as returning 32 or 64, depending on whether or not it's a 32- or
+    64-bit build. To work around this we would need to add a conditional to check for the x = 0 case, but this creates unnecessary inefficiency. To work
+    around this problem I have written some inline assembly to emit the LZCNT (x86) or CLZ (ARM) instruction directly which removes the need to include
+    the conditional. This has worked well in the past, but for some reason Clang's MSVC compatible driver, clang-cl, does not seem to be handling this
+    in the same way as the normal Clang driver. It seems that `clang-cl` is just outputting the wrong results sometimes, maybe due to some register
+    getting clobbered?
+
+    I'm not sure if this is a bug with dr_flac's inlined assembly (most likely), a bug in `clang-cl` or just a misunderstanding on my part with inline
+    assembly rules for `clang-cl`. If somebody can identify an error in dr_flac's inlined assembly I'm happy to get that fixed.
+
+    Fortunately there is an easy workaround for this. Clang implements MSVC-specific intrinsics for compatibility. It also defines _MSC_VER for extra
+    compatibility. We can therefore just check for _MSC_VER and use the MSVC intrinsic which, fortunately for us, Clang supports. It would still be nice
+    to know how to fix the inlined assembly for correctness sake, however.
+    */
+
+#if defined(_MSC_VER) /*&& !defined(__clang__)*/    /* <-- Intentionally wanting Clang to use the MSVC __lzcnt64/__lzcnt intrinsics due to above ^. */
+    #ifdef DRFLAC_64BIT
+        return (drflac_uint32)__lzcnt64(x);
+    #else
+        return (drflac_uint32)__lzcnt(x);
+    #endif
+#else
+    #if defined(__GNUC__) || defined(__clang__)
+        #if defined(DRFLAC_X64)
+            {
+                drflac_uint64 r;
+                __asm__ __volatile__ (
+                    "lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc"
+                );
+
+                return (drflac_uint32)r;
+            }
+        #elif defined(DRFLAC_X86)
+            {
+                drflac_uint32 r;
+                __asm__ __volatile__ (
+                    "lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc"
+                );
+
+                return r;
+            }
+        #elif defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) && !defined(__ARM_ARCH_6M__) && !defined(DRFLAC_64BIT)   /* <-- I haven't tested 64-bit inline assembly, so only enabling this for the 32-bit build for now. */
+            {
+                unsigned int r;
+                __asm__ __volatile__ (
+                #if defined(DRFLAC_64BIT)
+                    "clz %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(x)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
+                #else
+                    "clz %[out], %[in]" : [out]"=r"(r) : [in]"r"(x)
+                #endif
+                );
+
+                return r;
+            }
+        #else
+            if (x == 0) {
+                return sizeof(x)*8;
+            }
+            #ifdef DRFLAC_64BIT
+                return (drflac_uint32)__builtin_clzll((drflac_uint64)x);
+            #else
+                return (drflac_uint32)__builtin_clzl((drflac_uint32)x);
+            #endif
+        #endif
+    #else
+        /* Unsupported compiler. */
+        #error "This compiler does not support the lzcnt intrinsic."
+    #endif
+#endif
+}
+#endif
+
+#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
+#include <intrin.h> /* For BitScanReverse(). */
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x)
+{
+    drflac_uint32 n;
+
+    if (x == 0) {
+        return sizeof(x)*8;
+    }
+
+#ifdef DRFLAC_64BIT
+    _BitScanReverse64((unsigned long*)&n, x);
+#else
+    _BitScanReverse((unsigned long*)&n, x);
+#endif
+    return sizeof(x)*8 - n - 1;
+}
+#endif
+
+#ifdef DRFLAC_IMPLEMENT_CLZ_WATCOM
+static __inline drflac_uint32 drflac__clz_watcom (drflac_uint32);
+#ifdef DRFLAC_IMPLEMENT_CLZ_WATCOM_LZCNT
+/* Use the LZCNT instruction (only available on some processors since the 2010s). */
+#pragma aux drflac__clz_watcom_lzcnt = \
+    "db 0F3h, 0Fh, 0BDh, 0C0h" /* lzcnt eax, eax */ \
+    parm [eax] \
+    value [eax] \
+    modify nomemory;
+#else
+/* Use the 386+-compatible implementation. */
+#pragma aux drflac__clz_watcom = \
+    "bsr eax, eax" \
+    "xor eax, 31" \
+    parm [eax] nomemory \
+    value [eax] \
+    modify exact [eax] nomemory;
+#endif
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x)
+{
+#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
+    if (drflac__is_lzcnt_supported()) {
+        return drflac__clz_lzcnt(x);
+    } else
+#endif
+    {
+#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
+        return drflac__clz_msvc(x);
+#elif defined(DRFLAC_IMPLEMENT_CLZ_WATCOM_LZCNT)
+        return drflac__clz_watcom_lzcnt(x);
+#elif defined(DRFLAC_IMPLEMENT_CLZ_WATCOM)
+        return (x == 0) ? sizeof(x)*8 : drflac__clz_watcom(x);
+#elif defined(__MRC__)
+        return __cntlzw(x);
+#else
+        return drflac__clz_software(x);
+#endif
+    }
+}
+
+
+static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsigned int* pOffsetOut)
+{
+    drflac_uint32 zeroCounter = 0;
+    drflac_uint32 setBitOffsetPlus1;
+
+    while (bs->cache == 0) {
+        zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    if (bs->cache == 1) {
+        /* Not catching this would lead to undefined behaviour: a shift of a 32-bit number by 32 or more is undefined */
+        *pOffsetOut = zeroCounter + (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs) - 1;
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+
+        return DRFLAC_TRUE;
+    }
+
+    setBitOffsetPlus1 = drflac__clz(bs->cache);
+    setBitOffsetPlus1 += 1;
+
+    if (setBitOffsetPlus1 > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        /* This happens when we get to end of stream */
+        return DRFLAC_FALSE;
+    }
+
+    bs->consumedBits += setBitOffsetPlus1;
+    bs->cache <<= setBitOffsetPlus1;
+
+    *pOffsetOut = zeroCounter + setBitOffsetPlus1 - 1;
+    return DRFLAC_TRUE;
+}
+
+
+
+static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFromStart)
+{
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(offsetFromStart > 0);
+
+    /*
+    Seeking from the start is not quite as trivial as it sounds because the onSeek callback takes a signed 32-bit integer (which
+    is intentional because it simplifies the implementation of the onSeek callbacks), however offsetFromStart is unsigned 64-bit.
+    To resolve we just need to do an initial seek from the start, and then a series of offset seeks to make up the remainder.
+    */
+    if (offsetFromStart > 0x7FFFFFFF) {
+        drflac_uint64 bytesRemaining = offsetFromStart;
+        if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
+            return DRFLAC_FALSE;
+        }
+        bytesRemaining -= 0x7FFFFFFF;
+
+        while (bytesRemaining > 0x7FFFFFFF) {
+            if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+            bytesRemaining -= 0x7FFFFFFF;
+        }
+
+        if (bytesRemaining > 0) {
+            if (!bs->onSeek(bs->pUserData, (int)bytesRemaining, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+        }
+    } else {
+        if (!bs->onSeek(bs->pUserData, (int)offsetFromStart, drflac_seek_origin_start)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    /* The cache should be reset to force a reload of fresh data from the client. */
+    drflac__reset_cache(bs);
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64* pNumberOut, drflac_uint8* pCRCOut)
+{
+    drflac_uint8 crc;
+    drflac_uint64 result;
+    drflac_uint8 utf8[7] = {0};
+    int byteCount;
+    int i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pNumberOut != NULL);
+    DRFLAC_ASSERT(pCRCOut != NULL);
+
+    crc = *pCRCOut;
+
+    if (!drflac__read_uint8(bs, 8, utf8)) {
+        *pNumberOut = 0;
+        return DRFLAC_AT_END;
+    }
+    crc = drflac_crc8(crc, utf8[0], 8);
+
+    if ((utf8[0] & 0x80) == 0) {
+        *pNumberOut = utf8[0];
+        *pCRCOut = crc;
+        return DRFLAC_SUCCESS;
+    }
+
+    /*byteCount = 1;*/
+    if ((utf8[0] & 0xE0) == 0xC0) {
+        byteCount = 2;
+    } else if ((utf8[0] & 0xF0) == 0xE0) {
+        byteCount = 3;
+    } else if ((utf8[0] & 0xF8) == 0xF0) {
+        byteCount = 4;
+    } else if ((utf8[0] & 0xFC) == 0xF8) {
+        byteCount = 5;
+    } else if ((utf8[0] & 0xFE) == 0xFC) {
+        byteCount = 6;
+    } else if ((utf8[0] & 0xFF) == 0xFE) {
+        byteCount = 7;
+    } else {
+        *pNumberOut = 0;
+        return DRFLAC_CRC_MISMATCH;     /* Bad UTF-8 encoding. */
+    }
+
+    /* Read extra bytes. */
+    DRFLAC_ASSERT(byteCount > 1);
+
+    result = (drflac_uint64)(utf8[0] & (0xFF >> (byteCount + 1)));
+    for (i = 1; i < byteCount; ++i) {
+        if (!drflac__read_uint8(bs, 8, utf8 + i)) {
+            *pNumberOut = 0;
+            return DRFLAC_AT_END;
+        }
+        crc = drflac_crc8(crc, utf8[i], 8);
+
+        result = (result << 6) | (utf8[i] & 0x3F);
+    }
+
+    *pNumberOut = result;
+    *pCRCOut = crc;
+    return DRFLAC_SUCCESS;
+}
+
+
+static DRFLAC_INLINE drflac_uint32 drflac__ilog2_u32(drflac_uint32 x)
+{
+#if 1   /* Needs optimizing. */
+    drflac_uint32 result = 0;
+    while (x > 0) {
+        result += 1;
+        x >>= 1;
+    }
+
+    return result;
+#endif
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac__use_64_bit_prediction(drflac_uint32 bitsPerSample, drflac_uint32 order, drflac_uint32 precision)
+{
+    /* https://web.archive.org/web/20220205005724/https://github.com/ietf-wg-cellar/flac-specification/blob/37a49aa48ba4ba12e8757badfc59c0df35435fec/rfc_backmatter.md */
+    return bitsPerSample + precision + drflac__ilog2_u32(order) > 32;
+}
+
+
+/*
+The next two functions are responsible for calculating the prediction.
+
+When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's
+safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16.
+*/
+#if defined(__clang__)
+__attribute__((no_sanitize("signed-integer-overflow")))
+#endif
+static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
+{
+    drflac_int32 prediction = 0;
+
+    DRFLAC_ASSERT(order <= 32);
+
+    /* 32-bit version. */
+
+    /* VC++ optimizes this to a single jmp. I've not yet verified this for other compilers. */
+    switch (order)
+    {
+    case 32: prediction += coefficients[31] * pDecodedSamples[-32];
+    case 31: prediction += coefficients[30] * pDecodedSamples[-31];
+    case 30: prediction += coefficients[29] * pDecodedSamples[-30];
+    case 29: prediction += coefficients[28] * pDecodedSamples[-29];
+    case 28: prediction += coefficients[27] * pDecodedSamples[-28];
+    case 27: prediction += coefficients[26] * pDecodedSamples[-27];
+    case 26: prediction += coefficients[25] * pDecodedSamples[-26];
+    case 25: prediction += coefficients[24] * pDecodedSamples[-25];
+    case 24: prediction += coefficients[23] * pDecodedSamples[-24];
+    case 23: prediction += coefficients[22] * pDecodedSamples[-23];
+    case 22: prediction += coefficients[21] * pDecodedSamples[-22];
+    case 21: prediction += coefficients[20] * pDecodedSamples[-21];
+    case 20: prediction += coefficients[19] * pDecodedSamples[-20];
+    case 19: prediction += coefficients[18] * pDecodedSamples[-19];
+    case 18: prediction += coefficients[17] * pDecodedSamples[-18];
+    case 17: prediction += coefficients[16] * pDecodedSamples[-17];
+    case 16: prediction += coefficients[15] * pDecodedSamples[-16];
+    case 15: prediction += coefficients[14] * pDecodedSamples[-15];
+    case 14: prediction += coefficients[13] * pDecodedSamples[-14];
+    case 13: prediction += coefficients[12] * pDecodedSamples[-13];
+    case 12: prediction += coefficients[11] * pDecodedSamples[-12];
+    case 11: prediction += coefficients[10] * pDecodedSamples[-11];
+    case 10: prediction += coefficients[ 9] * pDecodedSamples[-10];
+    case  9: prediction += coefficients[ 8] * pDecodedSamples[- 9];
+    case  8: prediction += coefficients[ 7] * pDecodedSamples[- 8];
+    case  7: prediction += coefficients[ 6] * pDecodedSamples[- 7];
+    case  6: prediction += coefficients[ 5] * pDecodedSamples[- 6];
+    case  5: prediction += coefficients[ 4] * pDecodedSamples[- 5];
+    case  4: prediction += coefficients[ 3] * pDecodedSamples[- 4];
+    case  3: prediction += coefficients[ 2] * pDecodedSamples[- 3];
+    case  2: prediction += coefficients[ 1] * pDecodedSamples[- 2];
+    case  1: prediction += coefficients[ 0] * pDecodedSamples[- 1];
+    }
+
+    return (drflac_int32)(prediction >> shift);
+}
+
+static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
+{
+    drflac_int64 prediction;
+
+    DRFLAC_ASSERT(order <= 32);
+
+    /* 64-bit version. */
+
+    /* This method is faster on the 32-bit build when compiling with VC++. See note below. */
+#ifndef DRFLAC_64BIT
+    if (order == 8)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8];
+    }
+    else if (order == 7)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7];
+    }
+    else if (order == 3)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+    }
+    else if (order == 6)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
+    }
+    else if (order == 5)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+    }
+    else if (order == 4)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+    }
+    else if (order == 12)
+    {
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
+        prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
+        prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12];
+    }
+    else if (order == 2)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+    }
+    else if (order == 1)
+    {
+        prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+    }
+    else if (order == 10)
+    {
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
+    }
+    else if (order == 9)
+    {
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+    }
+    else if (order == 11)
+    {
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
+        prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
+    }
+    else
+    {
+        int j;
+
+        prediction = 0;
+        for (j = 0; j < (int)order; ++j) {
+            prediction += coefficients[j] * (drflac_int64)pDecodedSamples[-j-1];
+        }
+    }
+#endif
+
+    /*
+    VC++ optimizes this to a single jmp instruction, but only the 64-bit build. The 32-bit build generates less efficient code for some
+    reason. The ugly version above is faster so we'll just switch between the two depending on the target platform.
+    */
+#ifdef DRFLAC_64BIT
+    prediction = 0;
+    switch (order)
+    {
+    case 32: prediction += coefficients[31] * (drflac_int64)pDecodedSamples[-32];
+    case 31: prediction += coefficients[30] * (drflac_int64)pDecodedSamples[-31];
+    case 30: prediction += coefficients[29] * (drflac_int64)pDecodedSamples[-30];
+    case 29: prediction += coefficients[28] * (drflac_int64)pDecodedSamples[-29];
+    case 28: prediction += coefficients[27] * (drflac_int64)pDecodedSamples[-28];
+    case 27: prediction += coefficients[26] * (drflac_int64)pDecodedSamples[-27];
+    case 26: prediction += coefficients[25] * (drflac_int64)pDecodedSamples[-26];
+    case 25: prediction += coefficients[24] * (drflac_int64)pDecodedSamples[-25];
+    case 24: prediction += coefficients[23] * (drflac_int64)pDecodedSamples[-24];
+    case 23: prediction += coefficients[22] * (drflac_int64)pDecodedSamples[-23];
+    case 22: prediction += coefficients[21] * (drflac_int64)pDecodedSamples[-22];
+    case 21: prediction += coefficients[20] * (drflac_int64)pDecodedSamples[-21];
+    case 20: prediction += coefficients[19] * (drflac_int64)pDecodedSamples[-20];
+    case 19: prediction += coefficients[18] * (drflac_int64)pDecodedSamples[-19];
+    case 18: prediction += coefficients[17] * (drflac_int64)pDecodedSamples[-18];
+    case 17: prediction += coefficients[16] * (drflac_int64)pDecodedSamples[-17];
+    case 16: prediction += coefficients[15] * (drflac_int64)pDecodedSamples[-16];
+    case 15: prediction += coefficients[14] * (drflac_int64)pDecodedSamples[-15];
+    case 14: prediction += coefficients[13] * (drflac_int64)pDecodedSamples[-14];
+    case 13: prediction += coefficients[12] * (drflac_int64)pDecodedSamples[-13];
+    case 12: prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12];
+    case 11: prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
+    case 10: prediction += coefficients[ 9] * (drflac_int64)pDecodedSamples[-10];
+    case  9: prediction += coefficients[ 8] * (drflac_int64)pDecodedSamples[- 9];
+    case  8: prediction += coefficients[ 7] * (drflac_int64)pDecodedSamples[- 8];
+    case  7: prediction += coefficients[ 6] * (drflac_int64)pDecodedSamples[- 7];
+    case  6: prediction += coefficients[ 5] * (drflac_int64)pDecodedSamples[- 6];
+    case  5: prediction += coefficients[ 4] * (drflac_int64)pDecodedSamples[- 5];
+    case  4: prediction += coefficients[ 3] * (drflac_int64)pDecodedSamples[- 4];
+    case  3: prediction += coefficients[ 2] * (drflac_int64)pDecodedSamples[- 3];
+    case  2: prediction += coefficients[ 1] * (drflac_int64)pDecodedSamples[- 2];
+    case  1: prediction += coefficients[ 0] * (drflac_int64)pDecodedSamples[- 1];
+    }
+#endif
+
+    return (drflac_int32)(prediction >> shift);
+}
+
+
+#if 0
+/*
+Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the
+sake of readability and should only be used as a reference.
+*/
+static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    for (i = 0; i < count; ++i) {
+        drflac_uint32 zeroCounter = 0;
+        for (;;) {
+            drflac_uint8 bit;
+            if (!drflac__read_uint8(bs, 1, &bit)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (bit == 0) {
+                zeroCounter += 1;
+            } else {
+                break;
+            }
+        }
+
+        drflac_uint32 decodedRice;
+        if (riceParam > 0) {
+            if (!drflac__read_uint32(bs, riceParam, &decodedRice)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            decodedRice = 0;
+        }
+
+        decodedRice |= (zeroCounter << riceParam);
+        if ((decodedRice & 0x01)) {
+            decodedRice = ~(decodedRice >> 1);
+        } else {
+            decodedRice =  (decodedRice >> 1);
+        }
+
+
+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
+        } else {
+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+#endif
+
+#if 0
+static drflac_bool32 drflac__read_rice_parts__reference(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
+{
+    drflac_uint32 zeroCounter = 0;
+    drflac_uint32 decodedRice;
+
+    for (;;) {
+        drflac_uint8 bit;
+        if (!drflac__read_uint8(bs, 1, &bit)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (bit == 0) {
+            zeroCounter += 1;
+        } else {
+            break;
+        }
+    }
+
+    if (riceParam > 0) {
+        if (!drflac__read_uint32(bs, riceParam, &decodedRice)) {
+            return DRFLAC_FALSE;
+        }
+    } else {
+        decodedRice = 0;
+    }
+
+    *pZeroCounterOut = zeroCounter;
+    *pRiceParamPartOut = decodedRice;
+    return DRFLAC_TRUE;
+}
+#endif
+
+#if 0
+static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
+{
+    drflac_cache_t riceParamMask;
+    drflac_uint32 zeroCounter;
+    drflac_uint32 setBitOffsetPlus1;
+    drflac_uint32 riceParamPart;
+    drflac_uint32 riceLength;
+
+    DRFLAC_ASSERT(riceParam > 0);   /* <-- riceParam should never be 0. drflac__read_rice_parts__param_equals_zero() should be used instead for this case. */
+
+    riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParam);
+
+    zeroCounter = 0;
+    while (bs->cache == 0) {
+        zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    setBitOffsetPlus1 = drflac__clz(bs->cache);
+    zeroCounter += setBitOffsetPlus1;
+    setBitOffsetPlus1 += 1;
+
+    riceLength = setBitOffsetPlus1 + riceParam;
+    if (riceLength < DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        riceParamPart = (drflac_uint32)((bs->cache & (riceParamMask >> setBitOffsetPlus1)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceLength));
+
+        bs->consumedBits += riceLength;
+        bs->cache <<= riceLength;
+    } else {
+        drflac_uint32 bitCountLo;
+        drflac_cache_t resultHi;
+
+        bs->consumedBits += riceLength;
+        bs->cache <<= setBitOffsetPlus1 & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1);    /* <-- Equivalent to "if (setBitOffsetPlus1 < DRFLAC_CACHE_L1_SIZE_BITS(bs)) { bs->cache <<= setBitOffsetPlus1; }" */
+
+        /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */
+        bitCountLo = bs->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS(bs);
+        resultHi = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, riceParam);  /* <-- Use DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE() if ever this function allows riceParam=0. */
+
+        if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+#ifndef DR_FLAC_NO_CRC
+            drflac__update_crc16(bs);
+#endif
+            bs->cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+            bs->consumedBits = 0;
+#ifndef DR_FLAC_NO_CRC
+            bs->crc16Cache = bs->cache;
+#endif
+        } else {
+            /* Slow path. We need to fetch more data from the client. */
+            if (!drflac__reload_cache(bs)) {
+                return DRFLAC_FALSE;
+            }
+            if (bitCountLo > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+                /* This happens when we get to end of stream */
+                return DRFLAC_FALSE;
+            }
+        }
+
+        riceParamPart = (drflac_uint32)(resultHi | DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo));
+
+        bs->consumedBits += bitCountLo;
+        bs->cache <<= bitCountLo;
+    }
+
+    pZeroCounterOut[0] = zeroCounter;
+    pRiceParamPartOut[0] = riceParamPart;
+
+    return DRFLAC_TRUE;
+}
+#endif
+
+static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
+{
+    drflac_uint32  riceParamPlus1 = riceParam + 1;
+    /*drflac_cache_t riceParamPlus1Mask  = DRFLAC_CACHE_L1_SELECTION_MASK(riceParamPlus1);*/
+    drflac_uint32  riceParamPlus1Shift = DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPlus1);
+    drflac_uint32  riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1;
+
+    /*
+    The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have
+    no idea how this will work in practice...
+    */
+    drflac_cache_t bs_cache = bs->cache;
+    drflac_uint32  bs_consumedBits = bs->consumedBits;
+
+    /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */
+    drflac_uint32  lzcount = drflac__clz(bs_cache);
+    if (lzcount < sizeof(bs_cache)*8) {
+        pZeroCounterOut[0] = lzcount;
+
+        /*
+        It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting
+        this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled
+        outside of this function at a higher level.
+        */
+    extract_rice_param_part:
+        bs_cache       <<= lzcount;
+        bs_consumedBits += lzcount;
+
+        if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
+            /* Getting here means the rice parameter part is wholly contained within the current cache line. */
+            pRiceParamPartOut[0] = (drflac_uint32)(bs_cache >> riceParamPlus1Shift);
+            bs_cache       <<= riceParamPlus1;
+            bs_consumedBits += riceParamPlus1;
+        } else {
+            drflac_uint32 riceParamPartHi;
+            drflac_uint32 riceParamPartLo;
+            drflac_uint32 riceParamPartLoBitCount;
+
+            /*
+            Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache
+            line, reload the cache, and then combine it with the head of the next cache line.
+            */
+
+            /* Grab the high part of the rice parameter part. */
+            riceParamPartHi = (drflac_uint32)(bs_cache >> riceParamPlus1Shift);
+
+            /* Before reloading the cache we need to grab the size in bits of the low part. */
+            riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
+            DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
+
+            /* Now reload the cache. */
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = riceParamPartLoBitCount;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+                if (riceParamPartLoBitCount > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+                    /* This happens when we get to end of stream */
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
+            }
+
+            /* We should now have enough information to construct the rice parameter part. */
+            riceParamPartLo = (drflac_uint32)(bs_cache >> (DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPartLoBitCount)));
+            pRiceParamPartOut[0] = riceParamPartHi | riceParamPartLo;
+
+            bs_cache <<= riceParamPartLoBitCount;
+        }
+    } else {
+        /*
+        Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call
+        to drflac__clz() and we need to reload the cache.
+        */
+        drflac_uint32 zeroCounter = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BITS(bs) - bs_consumedBits);
+        for (;;) {
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = 0;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits;
+            }
+
+            lzcount = drflac__clz(bs_cache);
+            zeroCounter += lzcount;
+
+            if (lzcount < sizeof(bs_cache)*8) {
+                break;
+            }
+        }
+
+        pZeroCounterOut[0] = zeroCounter;
+        goto extract_rice_param_part;
+    }
+
+    /* Make sure the cache is restored at the end of it all. */
+    bs->cache = bs_cache;
+    bs->consumedBits = bs_consumedBits;
+
+    return DRFLAC_TRUE;
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac_uint8 riceParam)
+{
+    drflac_uint32  riceParamPlus1 = riceParam + 1;
+    drflac_uint32  riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1;
+
+    /*
+    The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have
+    no idea how this will work in practice...
+    */
+    drflac_cache_t bs_cache = bs->cache;
+    drflac_uint32  bs_consumedBits = bs->consumedBits;
+
+    /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */
+    drflac_uint32  lzcount = drflac__clz(bs_cache);
+    if (lzcount < sizeof(bs_cache)*8) {
+        /*
+        It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting
+        this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled
+        outside of this function at a higher level.
+        */
+    extract_rice_param_part:
+        bs_cache       <<= lzcount;
+        bs_consumedBits += lzcount;
+
+        if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
+            /* Getting here means the rice parameter part is wholly contained within the current cache line. */
+            bs_cache       <<= riceParamPlus1;
+            bs_consumedBits += riceParamPlus1;
+        } else {
+            /*
+            Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache
+            line, reload the cache, and then combine it with the head of the next cache line.
+            */
+
+            /* Before reloading the cache we need to grab the size in bits of the low part. */
+            drflac_uint32 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
+            DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
+
+            /* Now reload the cache. */
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = riceParamPartLoBitCount;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (riceParamPartLoBitCount > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+                    /* This happens when we get to end of stream */
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
+            }
+
+            bs_cache <<= riceParamPartLoBitCount;
+        }
+    } else {
+        /*
+        Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call
+        to drflac__clz() and we need to reload the cache.
+        */
+        for (;;) {
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = 0;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits;
+            }
+
+            lzcount = drflac__clz(bs_cache);
+            if (lzcount < sizeof(bs_cache)*8) {
+                break;
+            }
+        }
+
+        goto extract_rice_param_part;
+    }
+
+    /* Make sure the cache is restored at the end of it all. */
+    bs->cache = bs_cache;
+    bs->consumedBits = bs_consumedBits;
+
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorder(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+    drflac_uint32 zeroCountPart0;
+    drflac_uint32 riceParamPart0;
+    drflac_uint32 riceParamMask;
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    (void)bitsPerSample;
+    (void)order;
+    (void)shift;
+    (void)coefficients;
+
+    riceParamMask  = (drflac_uint32)~((~0UL) << riceParam);
+
+    i = 0;
+    while (i < count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamPart0 &= riceParamMask;
+        riceParamPart0 |= (zeroCountPart0 << riceParam);
+        riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+
+        pSamplesOut[i] = riceParamPart0;
+
+        i += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+    drflac_uint32 zeroCountPart0 = 0;
+    drflac_uint32 zeroCountPart1 = 0;
+    drflac_uint32 zeroCountPart2 = 0;
+    drflac_uint32 zeroCountPart3 = 0;
+    drflac_uint32 riceParamPart0 = 0;
+    drflac_uint32 riceParamPart1 = 0;
+    drflac_uint32 riceParamPart2 = 0;
+    drflac_uint32 riceParamPart3 = 0;
+    drflac_uint32 riceParamMask;
+    const drflac_int32* pSamplesOutEnd;
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    if (lpcOrder == 0) {
+        return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
+    }
+
+    riceParamMask  = (drflac_uint32)~((~0UL) << riceParam);
+    pSamplesOutEnd = pSamplesOut + (count & ~3);
+
+    if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
+        while (pSamplesOut < pSamplesOutEnd) {
+            /*
+            Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version
+            against an array. Not sure why, but perhaps it's making more efficient use of registers?
+            */
+            if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) {
+                return DRFLAC_FALSE;
+            }
+
+            riceParamPart0 &= riceParamMask;
+            riceParamPart1 &= riceParamMask;
+            riceParamPart2 &= riceParamMask;
+            riceParamPart3 &= riceParamMask;
+
+            riceParamPart0 |= (zeroCountPart0 << riceParam);
+            riceParamPart1 |= (zeroCountPart1 << riceParam);
+            riceParamPart2 |= (zeroCountPart2 << riceParam);
+            riceParamPart3 |= (zeroCountPart3 << riceParam);
+
+            riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+            riceParamPart1  = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
+            riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
+            riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
+
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
+            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 1);
+            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 2);
+            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 3);
+
+            pSamplesOut += 4;
+        }
+    } else {
+        while (pSamplesOut < pSamplesOutEnd) {
+            if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) {
+                return DRFLAC_FALSE;
+            }
+
+            riceParamPart0 &= riceParamMask;
+            riceParamPart1 &= riceParamMask;
+            riceParamPart2 &= riceParamMask;
+            riceParamPart3 &= riceParamMask;
+
+            riceParamPart0 |= (zeroCountPart0 << riceParam);
+            riceParamPart1 |= (zeroCountPart1 << riceParam);
+            riceParamPart2 |= (zeroCountPart2 << riceParam);
+            riceParamPart3 |= (zeroCountPart3 << riceParam);
+
+            riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+            riceParamPart1  = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
+            riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
+            riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
+
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
+            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 1);
+            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 2);
+            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 3);
+
+            pSamplesOut += 4;
+        }
+    }
+
+    i = (count & ~3);
+    while (i < count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamPart0 &= riceParamMask;
+        riceParamPart0 |= (zeroCountPart0 << riceParam);
+        riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+        /*riceParamPart0  = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/
+
+        /* Sample reconstruction. */
+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
+        } else {
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
+        }
+
+        i += 1;
+        pSamplesOut += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE __m128i drflac__mm_packs_interleaved_epi32(__m128i a, __m128i b)
+{
+    __m128i r;
+
+    /* Pack. */
+    r = _mm_packs_epi32(a, b);
+
+    /* a3a2 a1a0 b3b2 b1b0 -> a3a2 b3b2 a1a0 b1b0 */
+    r = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 1, 2, 0));
+
+    /* a3a2 b3b2 a1a0 b1b0 -> a3b3 a2b2 a1b1 a0b0 */
+    r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
+    r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
+
+    return r;
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_SSE41)
+static DRFLAC_INLINE __m128i drflac__mm_not_si128(__m128i a)
+{
+    return _mm_xor_si128(a, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
+}
+
+static DRFLAC_INLINE __m128i drflac__mm_hadd_epi32(__m128i x)
+{
+    __m128i x64 = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
+    __m128i x32 = _mm_shufflelo_epi16(x64, _MM_SHUFFLE(1, 0, 3, 2));
+    return _mm_add_epi32(x64, x32);
+}
+
+static DRFLAC_INLINE __m128i drflac__mm_hadd_epi64(__m128i x)
+{
+    return _mm_add_epi64(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
+}
+
+static DRFLAC_INLINE __m128i drflac__mm_srai_epi64(__m128i x, int count)
+{
+    /*
+    To simplify this we are assuming count < 32. This restriction allows us to work on a low side and a high side. The low side
+    is shifted with zero bits, whereas the right side is shifted with sign bits.
+    */
+    __m128i lo = _mm_srli_epi64(x, count);
+    __m128i hi = _mm_srai_epi32(x, count);
+
+    hi = _mm_and_si128(hi, _mm_set_epi32(0xFFFFFFFF, 0, 0xFFFFFFFF, 0));    /* The high part needs to have the low part cleared. */
+
+    return _mm_or_si128(lo, hi);
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    int i;
+    drflac_uint32 riceParamMask;
+    drflac_int32* pDecodedSamples    = pSamplesOut;
+    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
+    drflac_uint32 zeroCountParts0 = 0;
+    drflac_uint32 zeroCountParts1 = 0;
+    drflac_uint32 zeroCountParts2 = 0;
+    drflac_uint32 zeroCountParts3 = 0;
+    drflac_uint32 riceParamParts0 = 0;
+    drflac_uint32 riceParamParts1 = 0;
+    drflac_uint32 riceParamParts2 = 0;
+    drflac_uint32 riceParamParts3 = 0;
+    __m128i coefficients128_0;
+    __m128i coefficients128_4;
+    __m128i coefficients128_8;
+    __m128i samples128_0;
+    __m128i samples128_4;
+    __m128i samples128_8;
+    __m128i riceParamMask128;
+
+    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
+    riceParamMask128 = _mm_set1_epi32(riceParamMask);
+
+    /* Pre-load. */
+    coefficients128_0 = _mm_setzero_si128();
+    coefficients128_4 = _mm_setzero_si128();
+    coefficients128_8 = _mm_setzero_si128();
+
+    samples128_0 = _mm_setzero_si128();
+    samples128_4 = _mm_setzero_si128();
+    samples128_8 = _mm_setzero_si128();
+
+    /*
+    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
+    what's available in the input buffers. It would be convenient to use a fall-through switch to do this, but this results
+    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
+    so I think there's opportunity for this to be simplified.
+    */
+#if 1
+    {
+        int runningOrder = order;
+
+        /* 0 - 3. */
+        if (runningOrder >= 4) {
+            coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0));
+            samples128_0      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 4));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break;
+                case 2: coefficients128_0 = _mm_set_epi32(0, 0,               coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0,               0); break;
+                case 1: coefficients128_0 = _mm_set_epi32(0, 0,               0,               coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0,               0,               0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* 4 - 7 */
+        if (runningOrder >= 4) {
+            coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4));
+            samples128_4      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 8));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break;
+                case 2: coefficients128_4 = _mm_set_epi32(0, 0,               coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0,               0); break;
+                case 1: coefficients128_4 = _mm_set_epi32(0, 0,               0,               coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0,               0,               0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* 8 - 11 */
+        if (runningOrder == 4) {
+            coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8));
+            samples128_8      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 12));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break;
+                case 2: coefficients128_8 = _mm_set_epi32(0, 0,                coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0,                0); break;
+                case 1: coefficients128_8 = _mm_set_epi32(0, 0,                0,               coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0,                0,                0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
+        coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
+        coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
+        coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
+    }
+#else
+    /* This causes strict-aliasing warnings with GCC. */
+    switch (order)
+    {
+    case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
+    case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
+    case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
+    case 9:  ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
+    case 8:  ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
+    case 7:  ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
+    case 6:  ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
+    case 5:  ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
+    case 4:  ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
+    case 3:  ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
+    case 2:  ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
+    case 1:  ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
+    }
+#endif
+
+    /* For this version we are doing one sample at a time. */
+    while (pDecodedSamples < pDecodedSamplesEnd) {
+        __m128i prediction128;
+        __m128i zeroCountPart128;
+        __m128i riceParamPart128;
+
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) {
+            return DRFLAC_FALSE;
+        }
+
+        zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
+        riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
+
+        riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
+        riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
+        riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01))), _mm_set1_epi32(0x01)));  /* <-- SSE2 compatible */
+        /*riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_mullo_epi32(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01)), _mm_set1_epi32(0xFFFFFFFF)));*/   /* <-- Only supported from SSE4.1 and is slower in my testing... */
+
+        if (order <= 4) {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 = _mm_mullo_epi32(coefficients128_0, samples128_0);
+
+                /* Horizontal add and shift. */
+                prediction128 = drflac__mm_hadd_epi32(prediction128);
+                prediction128 = _mm_srai_epi32(prediction128, shift);
+                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
+
+                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
+                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
+            }
+        } else if (order <= 8) {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 =                              _mm_mullo_epi32(coefficients128_4, samples128_4);
+                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
+
+                /* Horizontal add and shift. */
+                prediction128 = drflac__mm_hadd_epi32(prediction128);
+                prediction128 = _mm_srai_epi32(prediction128, shift);
+                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
+
+                samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
+                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
+                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
+            }
+        } else {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 =                              _mm_mullo_epi32(coefficients128_8, samples128_8);
+                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_4, samples128_4));
+                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
+
+                /* Horizontal add and shift. */
+                prediction128 = drflac__mm_hadd_epi32(prediction128);
+                prediction128 = _mm_srai_epi32(prediction128, shift);
+                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
+
+                samples128_8 = _mm_alignr_epi8(samples128_4,  samples128_8, 4);
+                samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
+                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
+                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
+            }
+        }
+
+        /* We store samples in groups of 4. */
+        _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
+        pDecodedSamples += 4;
+    }
+
+    /* Make sure we process the last few samples. */
+    i = (count & ~3);
+    while (i < (int)count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts0 &= riceParamMask;
+        riceParamParts0 |= (zeroCountParts0 << riceParam);
+        riceParamParts0  = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
+
+        /* Sample reconstruction. */
+        pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples);
+
+        i += 1;
+        pDecodedSamples += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    int i;
+    drflac_uint32 riceParamMask;
+    drflac_int32* pDecodedSamples    = pSamplesOut;
+    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
+    drflac_uint32 zeroCountParts0 = 0;
+    drflac_uint32 zeroCountParts1 = 0;
+    drflac_uint32 zeroCountParts2 = 0;
+    drflac_uint32 zeroCountParts3 = 0;
+    drflac_uint32 riceParamParts0 = 0;
+    drflac_uint32 riceParamParts1 = 0;
+    drflac_uint32 riceParamParts2 = 0;
+    drflac_uint32 riceParamParts3 = 0;
+    __m128i coefficients128_0;
+    __m128i coefficients128_4;
+    __m128i coefficients128_8;
+    __m128i samples128_0;
+    __m128i samples128_4;
+    __m128i samples128_8;
+    __m128i prediction128;
+    __m128i riceParamMask128;
+
+    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    DRFLAC_ASSERT(order <= 12);
+
+    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
+    riceParamMask128 = _mm_set1_epi32(riceParamMask);
+
+    prediction128 = _mm_setzero_si128();
+
+    /* Pre-load. */
+    coefficients128_0  = _mm_setzero_si128();
+    coefficients128_4  = _mm_setzero_si128();
+    coefficients128_8  = _mm_setzero_si128();
+
+    samples128_0  = _mm_setzero_si128();
+    samples128_4  = _mm_setzero_si128();
+    samples128_8  = _mm_setzero_si128();
+
+#if 1
+    {
+        int runningOrder = order;
+
+        /* 0 - 3. */
+        if (runningOrder >= 4) {
+            coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0));
+            samples128_0      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 4));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break;
+                case 2: coefficients128_0 = _mm_set_epi32(0, 0,               coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0,               0); break;
+                case 1: coefficients128_0 = _mm_set_epi32(0, 0,               0,               coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0,               0,               0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* 4 - 7 */
+        if (runningOrder >= 4) {
+            coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4));
+            samples128_4      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 8));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break;
+                case 2: coefficients128_4 = _mm_set_epi32(0, 0,               coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0,               0); break;
+                case 1: coefficients128_4 = _mm_set_epi32(0, 0,               0,               coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0,               0,               0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* 8 - 11 */
+        if (runningOrder == 4) {
+            coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8));
+            samples128_8      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 12));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break;
+                case 2: coefficients128_8 = _mm_set_epi32(0, 0,                coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0,                0); break;
+                case 1: coefficients128_8 = _mm_set_epi32(0, 0,                0,               coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0,                0,                0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
+        coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
+        coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
+        coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
+    }
+#else
+    switch (order)
+    {
+    case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
+    case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
+    case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
+    case 9:  ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
+    case 8:  ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
+    case 7:  ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
+    case 6:  ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
+    case 5:  ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
+    case 4:  ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
+    case 3:  ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
+    case 2:  ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
+    case 1:  ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
+    }
+#endif
+
+    /* For this version we are doing one sample at a time. */
+    while (pDecodedSamples < pDecodedSamplesEnd) {
+        __m128i zeroCountPart128;
+        __m128i riceParamPart128;
+
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) {
+            return DRFLAC_FALSE;
+        }
+
+        zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
+        riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
+
+        riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
+        riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
+        riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(1))), _mm_set1_epi32(1)));
+
+        for (i = 0; i < 4; i += 1) {
+            prediction128 = _mm_xor_si128(prediction128, prediction128);    /* Reset to 0. */
+
+            switch (order)
+            {
+            case 12:
+            case 11: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(1, 1, 0, 0))));
+            case 10:
+            case  9: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(3, 3, 2, 2))));
+            case  8:
+            case  7: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(1, 1, 0, 0))));
+            case  6:
+            case  5: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(3, 3, 2, 2))));
+            case  4:
+            case  3: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(1, 1, 0, 0))));
+            case  2:
+            case  1: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(3, 3, 2, 2))));
+            }
+
+            /* Horizontal add and shift. */
+            prediction128 = drflac__mm_hadd_epi64(prediction128);
+            prediction128 = drflac__mm_srai_epi64(prediction128, shift);
+            prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
+
+            /* Our value should be sitting in prediction128[0]. We need to combine this with our SSE samples. */
+            samples128_8 = _mm_alignr_epi8(samples128_4,  samples128_8, 4);
+            samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
+            samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
+
+            /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */
+            riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
+        }
+
+        /* We store samples in groups of 4. */
+        _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
+        pDecodedSamples += 4;
+    }
+
+    /* Make sure we process the last few samples. */
+    i = (count & ~3);
+    while (i < (int)count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts0 &= riceParamMask;
+        riceParamParts0 |= (zeroCountParts0 << riceParam);
+        riceParamParts0  = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
+
+        /* Sample reconstruction. */
+        pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples);
+
+        i += 1;
+        pDecodedSamples += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    /* In my testing the order is rarely > 12, so in this case I'm going to simplify the SSE implementation by only handling order <= 12. */
+    if (lpcOrder > 0 && lpcOrder <= 12) {
+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
+            return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
+        } else {
+            return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
+        }
+    } else {
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac__vst2q_s32(drflac_int32* p, int32x4x2_t x)
+{
+    vst1q_s32(p+0, x.val[0]);
+    vst1q_s32(p+4, x.val[1]);
+}
+
+static DRFLAC_INLINE void drflac__vst2q_u32(drflac_uint32* p, uint32x4x2_t x)
+{
+    vst1q_u32(p+0, x.val[0]);
+    vst1q_u32(p+4, x.val[1]);
+}
+
+static DRFLAC_INLINE void drflac__vst2q_f32(float* p, float32x4x2_t x)
+{
+    vst1q_f32(p+0, x.val[0]);
+    vst1q_f32(p+4, x.val[1]);
+}
+
+static DRFLAC_INLINE void drflac__vst2q_s16(drflac_int16* p, int16x4x2_t x)
+{
+    vst1q_s16(p, vcombine_s16(x.val[0], x.val[1]));
+}
+
+static DRFLAC_INLINE void drflac__vst2q_u16(drflac_uint16* p, uint16x4x2_t x)
+{
+    vst1q_u16(p, vcombine_u16(x.val[0], x.val[1]));
+}
+
+static DRFLAC_INLINE int32x4_t drflac__vdupq_n_s32x4(drflac_int32 x3, drflac_int32 x2, drflac_int32 x1, drflac_int32 x0)
+{
+    drflac_int32 x[4];
+    x[3] = x3;
+    x[2] = x2;
+    x[1] = x1;
+    x[0] = x0;
+    return vld1q_s32(x);
+}
+
+static DRFLAC_INLINE int32x4_t drflac__valignrq_s32_1(int32x4_t a, int32x4_t b)
+{
+    /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */
+
+    /* Reference */
+    /*return drflac__vdupq_n_s32x4(
+        vgetq_lane_s32(a, 0),
+        vgetq_lane_s32(b, 3),
+        vgetq_lane_s32(b, 2),
+        vgetq_lane_s32(b, 1)
+    );*/
+
+    return vextq_s32(b, a, 1);
+}
+
+static DRFLAC_INLINE uint32x4_t drflac__valignrq_u32_1(uint32x4_t a, uint32x4_t b)
+{
+    /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */
+
+    /* Reference */
+    /*return drflac__vdupq_n_s32x4(
+        vgetq_lane_s32(a, 0),
+        vgetq_lane_s32(b, 3),
+        vgetq_lane_s32(b, 2),
+        vgetq_lane_s32(b, 1)
+    );*/
+
+    return vextq_u32(b, a, 1);
+}
+
+static DRFLAC_INLINE int32x2_t drflac__vhaddq_s32(int32x4_t x)
+{
+    /* The sum must end up in position 0. */
+
+    /* Reference */
+    /*return vdupq_n_s32(
+        vgetq_lane_s32(x, 3) +
+        vgetq_lane_s32(x, 2) +
+        vgetq_lane_s32(x, 1) +
+        vgetq_lane_s32(x, 0)
+    );*/
+
+    int32x2_t r = vadd_s32(vget_high_s32(x), vget_low_s32(x));
+    return vpadd_s32(r, r);
+}
+
+static DRFLAC_INLINE int64x1_t drflac__vhaddq_s64(int64x2_t x)
+{
+    return vadd_s64(vget_high_s64(x), vget_low_s64(x));
+}
+
+static DRFLAC_INLINE int32x4_t drflac__vrevq_s32(int32x4_t x)
+{
+    /* Reference */
+    /*return drflac__vdupq_n_s32x4(
+        vgetq_lane_s32(x, 0),
+        vgetq_lane_s32(x, 1),
+        vgetq_lane_s32(x, 2),
+        vgetq_lane_s32(x, 3)
+    );*/
+
+    return vrev64q_s32(vcombine_s32(vget_high_s32(x), vget_low_s32(x)));
+}
+
+static DRFLAC_INLINE int32x4_t drflac__vnotq_s32(int32x4_t x)
+{
+    return veorq_s32(x, vdupq_n_s32(0xFFFFFFFF));
+}
+
+static DRFLAC_INLINE uint32x4_t drflac__vnotq_u32(uint32x4_t x)
+{
+    return veorq_u32(x, vdupq_n_u32(0xFFFFFFFF));
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    int i;
+    drflac_uint32 riceParamMask;
+    drflac_int32* pDecodedSamples    = pSamplesOut;
+    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
+    drflac_uint32 zeroCountParts[4];
+    drflac_uint32 riceParamParts[4];
+    int32x4_t coefficients128_0;
+    int32x4_t coefficients128_4;
+    int32x4_t coefficients128_8;
+    int32x4_t samples128_0;
+    int32x4_t samples128_4;
+    int32x4_t samples128_8;
+    uint32x4_t riceParamMask128;
+    int32x4_t riceParam128;
+    int32x2_t shift64;
+    uint32x4_t one128;
+
+    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
+    riceParamMask128 = vdupq_n_u32(riceParamMask);
+
+    riceParam128 = vdupq_n_s32(riceParam);
+    shift64 = vdup_n_s32(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */
+    one128 = vdupq_n_u32(1);
+
+    /*
+    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
+    what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results
+    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
+    so I think there's opportunity for this to be simplified.
+    */
+    {
+        int runningOrder = order;
+        drflac_int32 tempC[4] = {0, 0, 0, 0};
+        drflac_int32 tempS[4] = {0, 0, 0, 0};
+
+        /* 0 - 3. */
+        if (runningOrder >= 4) {
+            coefficients128_0 = vld1q_s32(coefficients + 0);
+            samples128_0      = vld1q_s32(pSamplesOut  - 4);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */
+                case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */
+                case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */
+            }
+
+            coefficients128_0 = vld1q_s32(tempC);
+            samples128_0      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* 4 - 7 */
+        if (runningOrder >= 4) {
+            coefficients128_4 = vld1q_s32(coefficients + 4);
+            samples128_4      = vld1q_s32(pSamplesOut  - 8);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */
+                case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */
+                case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */
+            }
+
+            coefficients128_4 = vld1q_s32(tempC);
+            samples128_4      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* 8 - 11 */
+        if (runningOrder == 4) {
+            coefficients128_8 = vld1q_s32(coefficients + 8);
+            samples128_8      = vld1q_s32(pSamplesOut  - 12);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */
+                case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */
+                case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */
+            }
+
+            coefficients128_8 = vld1q_s32(tempC);
+            samples128_8      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
+        coefficients128_0 = drflac__vrevq_s32(coefficients128_0);
+        coefficients128_4 = drflac__vrevq_s32(coefficients128_4);
+        coefficients128_8 = drflac__vrevq_s32(coefficients128_8);
+    }
+
+    /* For this version we are doing one sample at a time. */
+    while (pDecodedSamples < pDecodedSamplesEnd) {
+        int32x4_t prediction128;
+        int32x2_t prediction64;
+        uint32x4_t zeroCountPart128;
+        uint32x4_t riceParamPart128;
+
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) {
+            return DRFLAC_FALSE;
+        }
+
+        zeroCountPart128 = vld1q_u32(zeroCountParts);
+        riceParamPart128 = vld1q_u32(riceParamParts);
+
+        riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128);
+        riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128));
+        riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128));
+
+        if (order <= 4) {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 = vmulq_s32(coefficients128_0, samples128_0);
+
+                /* Horizontal add and shift. */
+                prediction64 = drflac__vhaddq_s32(prediction128);
+                prediction64 = vshl_s32(prediction64, shift64);
+                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
+
+                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
+                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
+            }
+        } else if (order <= 8) {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 =                vmulq_s32(coefficients128_4, samples128_4);
+                prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0);
+
+                /* Horizontal add and shift. */
+                prediction64 = drflac__vhaddq_s32(prediction128);
+                prediction64 = vshl_s32(prediction64, shift64);
+                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
+
+                samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
+                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
+                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
+            }
+        } else {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 =                vmulq_s32(coefficients128_8, samples128_8);
+                prediction128 = vmlaq_s32(prediction128, coefficients128_4, samples128_4);
+                prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0);
+
+                /* Horizontal add and shift. */
+                prediction64 = drflac__vhaddq_s32(prediction128);
+                prediction64 = vshl_s32(prediction64, shift64);
+                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
+
+                samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8);
+                samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
+                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
+                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
+            }
+        }
+
+        /* We store samples in groups of 4. */
+        vst1q_s32(pDecodedSamples, samples128_0);
+        pDecodedSamples += 4;
+    }
+
+    /* Make sure we process the last few samples. */
+    i = (count & ~3);
+    while (i < (int)count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts[0] &= riceParamMask;
+        riceParamParts[0] |= (zeroCountParts[0] << riceParam);
+        riceParamParts[0]  = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01];
+
+        /* Sample reconstruction. */
+        pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples);
+
+        i += 1;
+        pDecodedSamples += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    int i;
+    drflac_uint32 riceParamMask;
+    drflac_int32* pDecodedSamples    = pSamplesOut;
+    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
+    drflac_uint32 zeroCountParts[4];
+    drflac_uint32 riceParamParts[4];
+    int32x4_t coefficients128_0;
+    int32x4_t coefficients128_4;
+    int32x4_t coefficients128_8;
+    int32x4_t samples128_0;
+    int32x4_t samples128_4;
+    int32x4_t samples128_8;
+    uint32x4_t riceParamMask128;
+    int32x4_t riceParam128;
+    int64x1_t shift64;
+    uint32x4_t one128;
+    int64x2_t prediction128 = { 0 };
+    uint32x4_t zeroCountPart128;
+    uint32x4_t riceParamPart128;
+
+    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
+    riceParamMask128 = vdupq_n_u32(riceParamMask);
+
+    riceParam128 = vdupq_n_s32(riceParam);
+    shift64 = vdup_n_s64(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */
+    one128 = vdupq_n_u32(1);
+
+    /*
+    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
+    what's available in the input buffers. It would be convenient to use a fall-through switch to do this, but this results
+    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
+    so I think there's opportunity for this to be simplified.
+    */
+    {
+        int runningOrder = order;
+        drflac_int32 tempC[4] = {0, 0, 0, 0};
+        drflac_int32 tempS[4] = {0, 0, 0, 0};
+
+        /* 0 - 3. */
+        if (runningOrder >= 4) {
+            coefficients128_0 = vld1q_s32(coefficients + 0);
+            samples128_0      = vld1q_s32(pSamplesOut  - 4);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */
+                case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */
+                case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */
+            }
+
+            coefficients128_0 = vld1q_s32(tempC);
+            samples128_0      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* 4 - 7 */
+        if (runningOrder >= 4) {
+            coefficients128_4 = vld1q_s32(coefficients + 4);
+            samples128_4      = vld1q_s32(pSamplesOut  - 8);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */
+                case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */
+                case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */
+            }
+
+            coefficients128_4 = vld1q_s32(tempC);
+            samples128_4      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* 8 - 11 */
+        if (runningOrder == 4) {
+            coefficients128_8 = vld1q_s32(coefficients + 8);
+            samples128_8      = vld1q_s32(pSamplesOut  - 12);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */
+                case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */
+                case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */
+            }
+
+            coefficients128_8 = vld1q_s32(tempC);
+            samples128_8      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
+        coefficients128_0 = drflac__vrevq_s32(coefficients128_0);
+        coefficients128_4 = drflac__vrevq_s32(coefficients128_4);
+        coefficients128_8 = drflac__vrevq_s32(coefficients128_8);
+    }
+
+    /* For this version we are doing one sample at a time. */
+    while (pDecodedSamples < pDecodedSamplesEnd) {
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) {
+            return DRFLAC_FALSE;
+        }
+
+        zeroCountPart128 = vld1q_u32(zeroCountParts);
+        riceParamPart128 = vld1q_u32(riceParamParts);
+
+        riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128);
+        riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128));
+        riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128));
+
+        for (i = 0; i < 4; i += 1) {
+            int64x1_t prediction64;
+
+            prediction128 = veorq_s64(prediction128, prediction128);    /* Reset to 0. */
+            switch (order)
+            {
+            case 12:
+            case 11: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_8), vget_low_s32(samples128_8)));
+            case 10:
+            case  9: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_8), vget_high_s32(samples128_8)));
+            case  8:
+            case  7: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_4), vget_low_s32(samples128_4)));
+            case  6:
+            case  5: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_4), vget_high_s32(samples128_4)));
+            case  4:
+            case  3: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_0), vget_low_s32(samples128_0)));
+            case  2:
+            case  1: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_0), vget_high_s32(samples128_0)));
+            }
+
+            /* Horizontal add and shift. */
+            prediction64 = drflac__vhaddq_s64(prediction128);
+            prediction64 = vshl_s64(prediction64, shift64);
+            prediction64 = vadd_s64(prediction64, vdup_n_s64(vgetq_lane_u32(riceParamPart128, 0)));
+
+            /* Our value should be sitting in prediction64[0]. We need to combine this with our SSE samples. */
+            samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8);
+            samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
+            samples128_0 = drflac__valignrq_s32_1(vcombine_s32(vreinterpret_s32_s64(prediction64), vdup_n_s32(0)), samples128_0);
+
+            /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */
+            riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
+        }
+
+        /* We store samples in groups of 4. */
+        vst1q_s32(pDecodedSamples, samples128_0);
+        pDecodedSamples += 4;
+    }
+
+    /* Make sure we process the last few samples. */
+    i = (count & ~3);
+    while (i < (int)count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts[0] &= riceParamMask;
+        riceParamParts[0] |= (zeroCountParts[0] << riceParam);
+        riceParamParts[0]  = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01];
+
+        /* Sample reconstruction. */
+        pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples);
+
+        i += 1;
+        pDecodedSamples += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    /* In my testing the order is rarely > 12, so in this case I'm going to simplify the NEON implementation by only handling order <= 12. */
+    if (lpcOrder > 0 && lpcOrder <= 12) {
+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
+            return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
+        } else {
+            return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
+        }
+    } else {
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
+    }
+}
+#endif
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+#if defined(DRFLAC_SUPPORT_SSE41)
+    if (drflac__gIsSSE41Supported) {
+        return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported) {
+        return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+    #if 0
+        return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
+    #else
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
+    #endif
+    }
+}
+
+/* Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes. */
+static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam)
+{
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+
+    for (i = 0; i < count; ++i) {
+        if (!drflac__seek_rice_parts(bs, riceParam)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+#if defined(__clang__)
+__attribute__((no_sanitize("signed-integer-overflow")))
+#endif
+static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(unencodedBitsPerSample <= 31);    /* <-- unencodedBitsPerSample is a 5 bit number, so cannot exceed 31. */
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    for (i = 0; i < count; ++i) {
+        if (unencodedBitsPerSample > 0) {
+            if (!drflac__read_int32(bs, unencodedBitsPerSample, pSamplesOut + i)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            pSamplesOut[i] = 0;
+        }
+
+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
+            pSamplesOut[i] += drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
+        } else {
+            pSamplesOut[i] += drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+/*
+Reads and decodes the residual for the sub-frame the decoder is currently sitting on. This function should be called
+when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The
+<blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
+*/
+static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
+{
+    drflac_uint8 residualMethod;
+    drflac_uint8 partitionOrder;
+    drflac_uint32 samplesInPartition;
+    drflac_uint32 partitionsRemaining;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(blockSize != 0);
+    DRFLAC_ASSERT(pDecodedSamples != NULL);       /* <-- Should we allow NULL, in which case we just seek past the residual rather than do a full decode? */
+
+    if (!drflac__read_uint8(bs, 2, &residualMethod)) {
+        return DRFLAC_FALSE;
+    }
+
+    if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
+        return DRFLAC_FALSE;    /* Unknown or unsupported residual coding method. */
+    }
+
+    /* Ignore the first <order> values. */
+    pDecodedSamples += lpcOrder;
+
+    if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    From the FLAC spec:
+      The Rice partition order in a Rice-coded residual section must be less than or equal to 8.
+    */
+    if (partitionOrder > 8) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Validation check. */
+    if ((blockSize / (1 << partitionOrder)) < lpcOrder) {
+        return DRFLAC_FALSE;
+    }
+
+    samplesInPartition = (blockSize / (1 << partitionOrder)) - lpcOrder;
+    partitionsRemaining = (1 << partitionOrder);
+    for (;;) {
+        drflac_uint8 riceParam = 0;
+        if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) {
+            if (!drflac__read_uint8(bs, 4, &riceParam)) {
+                return DRFLAC_FALSE;
+            }
+            if (riceParam == 15) {
+                riceParam = 0xFF;
+            }
+        } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
+            if (!drflac__read_uint8(bs, 5, &riceParam)) {
+                return DRFLAC_FALSE;
+            }
+            if (riceParam == 31) {
+                riceParam = 0xFF;
+            }
+        }
+
+        if (riceParam != 0xFF) {
+            if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            drflac_uint8 unencodedBitsPerSample = 0;
+            if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        pDecodedSamples += samplesInPartition;
+
+        if (partitionsRemaining == 1) {
+            break;
+        }
+
+        partitionsRemaining -= 1;
+
+        if (partitionOrder != 0) {
+            samplesInPartition = blockSize / (1 << partitionOrder);
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+/*
+Reads and seeks past the residual for the sub-frame the decoder is currently sitting on. This function should be called
+when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be set to 0. The
+<blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
+*/
+static drflac_bool32 drflac__read_and_seek_residual(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 order)
+{
+    drflac_uint8 residualMethod;
+    drflac_uint8 partitionOrder;
+    drflac_uint32 samplesInPartition;
+    drflac_uint32 partitionsRemaining;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(blockSize != 0);
+
+    if (!drflac__read_uint8(bs, 2, &residualMethod)) {
+        return DRFLAC_FALSE;
+    }
+
+    if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
+        return DRFLAC_FALSE;    /* Unknown or unsupported residual coding method. */
+    }
+
+    if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    From the FLAC spec:
+      The Rice partition order in a Rice-coded residual section must be less than or equal to 8.
+    */
+    if (partitionOrder > 8) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Validation check. */
+    if ((blockSize / (1 << partitionOrder)) <= order) {
+        return DRFLAC_FALSE;
+    }
+
+    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
+    partitionsRemaining = (1 << partitionOrder);
+    for (;;)
+    {
+        drflac_uint8 riceParam = 0;
+        if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) {
+            if (!drflac__read_uint8(bs, 4, &riceParam)) {
+                return DRFLAC_FALSE;
+            }
+            if (riceParam == 15) {
+                riceParam = 0xFF;
+            }
+        } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
+            if (!drflac__read_uint8(bs, 5, &riceParam)) {
+                return DRFLAC_FALSE;
+            }
+            if (riceParam == 31) {
+                riceParam = 0xFF;
+            }
+        }
+
+        if (riceParam != 0xFF) {
+            if (!drflac__read_and_seek_residual__rice(bs, samplesInPartition, riceParam)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            drflac_uint8 unencodedBitsPerSample = 0;
+            if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__seek_bits(bs, unencodedBitsPerSample * samplesInPartition)) {
+                return DRFLAC_FALSE;
+            }
+        }
+
+
+        if (partitionsRemaining == 1) {
+            break;
+        }
+
+        partitionsRemaining -= 1;
+        samplesInPartition = blockSize / (1 << partitionOrder);
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac__decode_samples__constant(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples)
+{
+    drflac_uint32 i;
+
+    /* Only a single sample needs to be decoded here. */
+    drflac_int32 sample;
+    if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    We don't really need to expand this, but it does simplify the process of reading samples. If this becomes a performance issue (unlikely)
+    we'll want to look at a more efficient way.
+    */
+    for (i = 0; i < blockSize; ++i) {
+        pDecodedSamples[i] = sample;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples__verbatim(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples)
+{
+    drflac_uint32 i;
+
+    for (i = 0; i < blockSize; ++i) {
+        drflac_int32 sample;
+        if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
+            return DRFLAC_FALSE;
+        }
+
+        pDecodedSamples[i] = sample;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples)
+{
+    drflac_uint32 i;
+
+    static drflac_int32 lpcCoefficientsTable[5][4] = {
+        {0,  0, 0,  0},
+        {1,  0, 0,  0},
+        {2, -1, 0,  0},
+        {3, -3, 1,  0},
+        {4, -6, 4, -1}
+    };
+
+    /* Warm up samples and coefficients. */
+    for (i = 0; i < lpcOrder; ++i) {
+        drflac_int32 sample;
+        if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
+            return DRFLAC_FALSE;
+        }
+
+        pDecodedSamples[i] = sample;
+    }
+
+    if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, 4, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
+        return DRFLAC_FALSE;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples)
+{
+    drflac_uint8 i;
+    drflac_uint8 lpcPrecision;
+    drflac_int8 lpcShift;
+    drflac_int32 coefficients[32];
+
+    /* Warm up samples. */
+    for (i = 0; i < lpcOrder; ++i) {
+        drflac_int32 sample;
+        if (!drflac__read_int32(bs, bitsPerSample, &sample)) {
+            return DRFLAC_FALSE;
+        }
+
+        pDecodedSamples[i] = sample;
+    }
+
+    if (!drflac__read_uint8(bs, 4, &lpcPrecision)) {
+        return DRFLAC_FALSE;
+    }
+    if (lpcPrecision == 15) {
+        return DRFLAC_FALSE;    /* Invalid. */
+    }
+    lpcPrecision += 1;
+
+    if (!drflac__read_int8(bs, 5, &lpcShift)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    From the FLAC specification:
+
+        Quantized linear predictor coefficient shift needed in bits (NOTE: this number is signed two's-complement)
+
+    Emphasis on the "signed two's-complement". In practice there does not seem to be any encoders nor decoders supporting negative shifts. For now dr_flac is
+    not going to support negative shifts as I don't have any reference files. However, when a reference file comes through I will consider adding support.
+    */
+    if (lpcShift < 0) {
+        return DRFLAC_FALSE;
+    }
+
+    DRFLAC_ZERO_MEMORY(coefficients, sizeof(coefficients));
+    for (i = 0; i < lpcOrder; ++i) {
+        if (!drflac__read_int32(bs, lpcPrecision, coefficients + i)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
+        return DRFLAC_FALSE;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_uint8 streaminfoBitsPerSample, drflac_frame_header* header)
+{
+    const drflac_uint32 sampleRateTable[12]  = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000};
+    const drflac_uint8 bitsPerSampleTable[8] = {0, 8, 12, (drflac_uint8)-1, 16, 20, 24, (drflac_uint8)-1};   /* -1 = reserved. */
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(header != NULL);
+
+    /* Keep looping until we find a valid sync code. */
+    for (;;) {
+        drflac_uint8 crc8 = 0xCE; /* 0xCE = drflac_crc8(0, 0x3FFE, 14); */
+        drflac_uint8 reserved = 0;
+        drflac_uint8 blockingStrategy = 0;
+        drflac_uint8 blockSize = 0;
+        drflac_uint8 sampleRate = 0;
+        drflac_uint8 channelAssignment = 0;
+        drflac_uint8 bitsPerSample = 0;
+        drflac_bool32 isVariableBlockSize;
+
+        if (!drflac__find_and_seek_to_next_sync_code(bs)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (!drflac__read_uint8(bs, 1, &reserved)) {
+            return DRFLAC_FALSE;
+        }
+        if (reserved == 1) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, reserved, 1);
+
+        if (!drflac__read_uint8(bs, 1, &blockingStrategy)) {
+            return DRFLAC_FALSE;
+        }
+        crc8 = drflac_crc8(crc8, blockingStrategy, 1);
+
+        if (!drflac__read_uint8(bs, 4, &blockSize)) {
+            return DRFLAC_FALSE;
+        }
+        if (blockSize == 0) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, blockSize, 4);
+
+        if (!drflac__read_uint8(bs, 4, &sampleRate)) {
+            return DRFLAC_FALSE;
+        }
+        crc8 = drflac_crc8(crc8, sampleRate, 4);
+
+        if (!drflac__read_uint8(bs, 4, &channelAssignment)) {
+            return DRFLAC_FALSE;
+        }
+        if (channelAssignment > 10) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, channelAssignment, 4);
+
+        if (!drflac__read_uint8(bs, 3, &bitsPerSample)) {
+            return DRFLAC_FALSE;
+        }
+        if (bitsPerSample == 3 || bitsPerSample == 7) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, bitsPerSample, 3);
+
+
+        if (!drflac__read_uint8(bs, 1, &reserved)) {
+            return DRFLAC_FALSE;
+        }
+        if (reserved == 1) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, reserved, 1);
+
+
+        isVariableBlockSize = blockingStrategy == 1;
+        if (isVariableBlockSize) {
+            drflac_uint64 pcmFrameNumber;
+            drflac_result result = drflac__read_utf8_coded_number(bs, &pcmFrameNumber, &crc8);
+            if (result != DRFLAC_SUCCESS) {
+                if (result == DRFLAC_AT_END) {
+                    return DRFLAC_FALSE;
+                } else {
+                    continue;
+                }
+            }
+            header->flacFrameNumber  = 0;
+            header->pcmFrameNumber = pcmFrameNumber;
+        } else {
+            drflac_uint64 flacFrameNumber = 0;
+            drflac_result result = drflac__read_utf8_coded_number(bs, &flacFrameNumber, &crc8);
+            if (result != DRFLAC_SUCCESS) {
+                if (result == DRFLAC_AT_END) {
+                    return DRFLAC_FALSE;
+                } else {
+                    continue;
+                }
+            }
+            header->flacFrameNumber  = (drflac_uint32)flacFrameNumber;   /* <-- Safe cast. */
+            header->pcmFrameNumber = 0;
+        }
+
+
+        DRFLAC_ASSERT(blockSize > 0);
+        if (blockSize == 1) {
+            header->blockSizeInPCMFrames = 192;
+        } else if (blockSize <= 5) {
+            DRFLAC_ASSERT(blockSize >= 2);
+            header->blockSizeInPCMFrames = 576 * (1 << (blockSize - 2));
+        } else if (blockSize == 6) {
+            if (!drflac__read_uint16(bs, 8, &header->blockSizeInPCMFrames)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 8);
+            header->blockSizeInPCMFrames += 1;
+        } else if (blockSize == 7) {
+            if (!drflac__read_uint16(bs, 16, &header->blockSizeInPCMFrames)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 16);
+            if (header->blockSizeInPCMFrames == 0xFFFF) {
+                return DRFLAC_FALSE;    /* Frame is too big. This is the size of the frame minus 1. The STREAMINFO block defines the max block size which is 16-bits. Adding one will make it 17 bits and therefore too big. */
+            }
+            header->blockSizeInPCMFrames += 1;
+        } else {
+            DRFLAC_ASSERT(blockSize >= 8);
+            header->blockSizeInPCMFrames = 256 * (1 << (blockSize - 8));
+        }
+
+
+        if (sampleRate <= 11) {
+            header->sampleRate = sampleRateTable[sampleRate];
+        } else if (sampleRate == 12) {
+            if (!drflac__read_uint32(bs, 8, &header->sampleRate)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->sampleRate, 8);
+            header->sampleRate *= 1000;
+        } else if (sampleRate == 13) {
+            if (!drflac__read_uint32(bs, 16, &header->sampleRate)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->sampleRate, 16);
+        } else if (sampleRate == 14) {
+            if (!drflac__read_uint32(bs, 16, &header->sampleRate)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->sampleRate, 16);
+            header->sampleRate *= 10;
+        } else {
+            continue;  /* Invalid. Assume an invalid block. */
+        }
+
+
+        header->channelAssignment = channelAssignment;
+
+        header->bitsPerSample = bitsPerSampleTable[bitsPerSample];
+        if (header->bitsPerSample == 0) {
+            header->bitsPerSample = streaminfoBitsPerSample;
+        }
+
+        if (header->bitsPerSample != streaminfoBitsPerSample) {
+            /* If this subframe has a different bitsPerSample then streaminfo or the first frame, reject it */
+            return DRFLAC_FALSE;
+        }
+
+        if (!drflac__read_uint8(bs, 8, &header->crc8)) {
+            return DRFLAC_FALSE;
+        }
+
+#ifndef DR_FLAC_NO_CRC
+        if (header->crc8 != crc8) {
+            continue;    /* CRC mismatch. Loop back to the top and find the next sync code. */
+        }
+#endif
+        return DRFLAC_TRUE;
+    }
+}
+
+static drflac_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe* pSubframe)
+{
+    drflac_uint8 header;
+    int type;
+
+    if (!drflac__read_uint8(bs, 8, &header)) {
+        return DRFLAC_FALSE;
+    }
+
+    /* First bit should always be 0. */
+    if ((header & 0x80) != 0) {
+        return DRFLAC_FALSE;
+    }
+
+    type = (header & 0x7E) >> 1;
+    if (type == 0) {
+        pSubframe->subframeType = DRFLAC_SUBFRAME_CONSTANT;
+    } else if (type == 1) {
+        pSubframe->subframeType = DRFLAC_SUBFRAME_VERBATIM;
+    } else {
+        if ((type & 0x20) != 0) {
+            pSubframe->subframeType = DRFLAC_SUBFRAME_LPC;
+            pSubframe->lpcOrder = (drflac_uint8)(type & 0x1F) + 1;
+        } else if ((type & 0x08) != 0) {
+            pSubframe->subframeType = DRFLAC_SUBFRAME_FIXED;
+            pSubframe->lpcOrder = (drflac_uint8)(type & 0x07);
+            if (pSubframe->lpcOrder > 4) {
+                pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED;
+                pSubframe->lpcOrder = 0;
+            }
+        } else {
+            pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED;
+        }
+    }
+
+    if (pSubframe->subframeType == DRFLAC_SUBFRAME_RESERVED) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Wasted bits per sample. */
+    pSubframe->wastedBitsPerSample = 0;
+    if ((header & 0x01) == 1) {
+        unsigned int wastedBitsPerSample;
+        if (!drflac__seek_past_next_set_bit(bs, &wastedBitsPerSample)) {
+            return DRFLAC_FALSE;
+        }
+        pSubframe->wastedBitsPerSample = (drflac_uint8)wastedBitsPerSample + 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex, drflac_int32* pDecodedSamplesOut)
+{
+    drflac_subframe* pSubframe;
+    drflac_uint32 subframeBitsPerSample;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(frame != NULL);
+
+    pSubframe = frame->subframes + subframeIndex;
+    if (!drflac__read_subframe_header(bs, pSubframe)) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Side channels require an extra bit per sample. Took a while to figure that one out... */
+    subframeBitsPerSample = frame->header.bitsPerSample;
+    if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) {
+        subframeBitsPerSample += 1;
+    } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) {
+        subframeBitsPerSample += 1;
+    }
+
+    if (subframeBitsPerSample > 32) {
+        /* libFLAC and ffmpeg reject 33-bit subframes as well */
+        return DRFLAC_FALSE;
+    }
+
+    /* Need to handle wasted bits per sample. */
+    if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
+        return DRFLAC_FALSE;
+    }
+    subframeBitsPerSample -= pSubframe->wastedBitsPerSample;
+
+    pSubframe->pSamplesS32 = pDecodedSamplesOut;
+
+    switch (pSubframe->subframeType)
+    {
+        case DRFLAC_SUBFRAME_CONSTANT:
+        {
+            drflac__decode_samples__constant(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32);
+        } break;
+
+        case DRFLAC_SUBFRAME_VERBATIM:
+        {
+            drflac__decode_samples__verbatim(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32);
+        } break;
+
+        case DRFLAC_SUBFRAME_FIXED:
+        {
+            drflac__decode_samples__fixed(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32);
+        } break;
+
+        case DRFLAC_SUBFRAME_LPC:
+        {
+            drflac__decode_samples__lpc(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32);
+        } break;
+
+        default: return DRFLAC_FALSE;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex)
+{
+    drflac_subframe* pSubframe;
+    drflac_uint32 subframeBitsPerSample;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(frame != NULL);
+
+    pSubframe = frame->subframes + subframeIndex;
+    if (!drflac__read_subframe_header(bs, pSubframe)) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Side channels require an extra bit per sample. Took a while to figure that one out... */
+    subframeBitsPerSample = frame->header.bitsPerSample;
+    if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) {
+        subframeBitsPerSample += 1;
+    } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) {
+        subframeBitsPerSample += 1;
+    }
+
+    /* Need to handle wasted bits per sample. */
+    if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
+        return DRFLAC_FALSE;
+    }
+    subframeBitsPerSample -= pSubframe->wastedBitsPerSample;
+
+    pSubframe->pSamplesS32 = NULL;
+
+    switch (pSubframe->subframeType)
+    {
+        case DRFLAC_SUBFRAME_CONSTANT:
+        {
+            if (!drflac__seek_bits(bs, subframeBitsPerSample)) {
+                return DRFLAC_FALSE;
+            }
+        } break;
+
+        case DRFLAC_SUBFRAME_VERBATIM:
+        {
+            unsigned int bitsToSeek = frame->header.blockSizeInPCMFrames * subframeBitsPerSample;
+            if (!drflac__seek_bits(bs, bitsToSeek)) {
+                return DRFLAC_FALSE;
+            }
+        } break;
+
+        case DRFLAC_SUBFRAME_FIXED:
+        {
+            unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample;
+            if (!drflac__seek_bits(bs, bitsToSeek)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) {
+                return DRFLAC_FALSE;
+            }
+        } break;
+
+        case DRFLAC_SUBFRAME_LPC:
+        {
+            drflac_uint8 lpcPrecision;
+
+            unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample;
+            if (!drflac__seek_bits(bs, bitsToSeek)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__read_uint8(bs, 4, &lpcPrecision)) {
+                return DRFLAC_FALSE;
+            }
+            if (lpcPrecision == 15) {
+                return DRFLAC_FALSE;    /* Invalid. */
+            }
+            lpcPrecision += 1;
+
+
+            bitsToSeek = (pSubframe->lpcOrder * lpcPrecision) + 5;    /* +5 for shift. */
+            if (!drflac__seek_bits(bs, bitsToSeek)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) {
+                return DRFLAC_FALSE;
+            }
+        } break;
+
+        default: return DRFLAC_FALSE;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+static DRFLAC_INLINE drflac_uint8 drflac__get_channel_count_from_channel_assignment(drflac_int8 channelAssignment)
+{
+    drflac_uint8 lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2};
+
+    DRFLAC_ASSERT(channelAssignment <= 10);
+    return lookup[channelAssignment];
+}
+
+static drflac_result drflac__decode_flac_frame(drflac* pFlac)
+{
+    int channelCount;
+    int i;
+    drflac_uint8 paddingSizeInBits;
+    drflac_uint16 desiredCRC16;
+#ifndef DR_FLAC_NO_CRC
+    drflac_uint16 actualCRC16;
+#endif
+
+    /* This function should be called while the stream is sitting on the first byte after the frame header. */
+    DRFLAC_ZERO_MEMORY(pFlac->currentFLACFrame.subframes, sizeof(pFlac->currentFLACFrame.subframes));
+
+    /* The frame block size must never be larger than the maximum block size defined by the FLAC stream. */
+    if (pFlac->currentFLACFrame.header.blockSizeInPCMFrames > pFlac->maxBlockSizeInPCMFrames) {
+        return DRFLAC_ERROR;
+    }
+
+    /* The number of channels in the frame must match the channel count from the STREAMINFO block. */
+    channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+    if (channelCount != (int)pFlac->channels) {
+        return DRFLAC_ERROR;
+    }
+
+    for (i = 0; i < channelCount; ++i) {
+        if (!drflac__decode_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i, pFlac->pDecodedSamples + (pFlac->currentFLACFrame.header.blockSizeInPCMFrames * i))) {
+            return DRFLAC_ERROR;
+        }
+    }
+
+    paddingSizeInBits = (drflac_uint8)(DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7);
+    if (paddingSizeInBits > 0) {
+        drflac_uint8 padding = 0;
+        if (!drflac__read_uint8(&pFlac->bs, paddingSizeInBits, &padding)) {
+            return DRFLAC_AT_END;
+        }
+    }
+
+#ifndef DR_FLAC_NO_CRC
+    actualCRC16 = drflac__flush_crc16(&pFlac->bs);
+#endif
+    if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) {
+        return DRFLAC_AT_END;
+    }
+
+#ifndef DR_FLAC_NO_CRC
+    if (actualCRC16 != desiredCRC16) {
+        return DRFLAC_CRC_MISMATCH;    /* CRC mismatch. */
+    }
+#endif
+
+    pFlac->currentFLACFrame.pcmFramesRemaining = pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
+
+    return DRFLAC_SUCCESS;
+}
+
+static drflac_result drflac__seek_flac_frame(drflac* pFlac)
+{
+    int channelCount;
+    int i;
+    drflac_uint16 desiredCRC16;
+#ifndef DR_FLAC_NO_CRC
+    drflac_uint16 actualCRC16;
+#endif
+
+    channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+    for (i = 0; i < channelCount; ++i) {
+        if (!drflac__seek_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i)) {
+            return DRFLAC_ERROR;
+        }
+    }
+
+    /* Padding. */
+    if (!drflac__seek_bits(&pFlac->bs, DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7)) {
+        return DRFLAC_ERROR;
+    }
+
+    /* CRC. */
+#ifndef DR_FLAC_NO_CRC
+    actualCRC16 = drflac__flush_crc16(&pFlac->bs);
+#endif
+    if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) {
+        return DRFLAC_AT_END;
+    }
+
+#ifndef DR_FLAC_NO_CRC
+    if (actualCRC16 != desiredCRC16) {
+        return DRFLAC_CRC_MISMATCH;    /* CRC mismatch. */
+    }
+#endif
+
+    return DRFLAC_SUCCESS;
+}
+
+static drflac_bool32 drflac__read_and_decode_next_flac_frame(drflac* pFlac)
+{
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    for (;;) {
+        drflac_result result;
+
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+
+        result = drflac__decode_flac_frame(pFlac);
+        if (result != DRFLAC_SUCCESS) {
+            if (result == DRFLAC_CRC_MISMATCH) {
+                continue;   /* CRC mismatch. Skip to the next frame. */
+            } else {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        return DRFLAC_TRUE;
+    }
+}
+
+static void drflac__get_pcm_frame_range_of_current_flac_frame(drflac* pFlac, drflac_uint64* pFirstPCMFrame, drflac_uint64* pLastPCMFrame)
+{
+    drflac_uint64 firstPCMFrame;
+    drflac_uint64 lastPCMFrame;
+
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    firstPCMFrame = pFlac->currentFLACFrame.header.pcmFrameNumber;
+    if (firstPCMFrame == 0) {
+        firstPCMFrame = ((drflac_uint64)pFlac->currentFLACFrame.header.flacFrameNumber) * pFlac->maxBlockSizeInPCMFrames;
+    }
+
+    lastPCMFrame = firstPCMFrame + pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
+    if (lastPCMFrame > 0) {
+        lastPCMFrame -= 1; /* Needs to be zero based. */
+    }
+
+    if (pFirstPCMFrame) {
+        *pFirstPCMFrame = firstPCMFrame;
+    }
+    if (pLastPCMFrame) {
+        *pLastPCMFrame = lastPCMFrame;
+    }
+}
+
+static drflac_bool32 drflac__seek_to_first_frame(drflac* pFlac)
+{
+    drflac_bool32 result;
+
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    result = drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes);
+
+    DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame));
+    pFlac->currentPCMFrame = 0;
+
+    return result;
+}
+
+static DRFLAC_INLINE drflac_result drflac__seek_to_next_flac_frame(drflac* pFlac)
+{
+    /* This function should only ever be called while the decoder is sitting on the first byte past the FRAME_HEADER section. */
+    DRFLAC_ASSERT(pFlac != NULL);
+    return drflac__seek_flac_frame(pFlac);
+}
+
+
+static drflac_uint64 drflac__seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 pcmFramesToSeek)
+{
+    drflac_uint64 pcmFramesRead = 0;
+    while (pcmFramesToSeek > 0) {
+        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            if (pFlac->currentFLACFrame.pcmFramesRemaining > pcmFramesToSeek) {
+                pcmFramesRead   += pcmFramesToSeek;
+                pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)pcmFramesToSeek;   /* <-- Safe cast. Will always be < currentFrame.pcmFramesRemaining < 65536. */
+                pcmFramesToSeek  = 0;
+            } else {
+                pcmFramesRead   += pFlac->currentFLACFrame.pcmFramesRemaining;
+                pcmFramesToSeek -= pFlac->currentFLACFrame.pcmFramesRemaining;
+                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
+            }
+        }
+    }
+
+    pFlac->currentPCMFrame += pcmFramesRead;
+    return pcmFramesRead;
+}
+
+
+static drflac_bool32 drflac__seek_to_pcm_frame__brute_force(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    drflac_bool32 isMidFrame = DRFLAC_FALSE;
+    drflac_uint64 runningPCMFrameCount;
+
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    /* If we are seeking forward we start from the current position. Otherwise we need to start all the way from the start of the file. */
+    if (pcmFrameIndex >= pFlac->currentPCMFrame) {
+        /* Seeking forward. Need to seek from the current position. */
+        runningPCMFrameCount = pFlac->currentPCMFrame;
+
+        /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */
+        if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            isMidFrame = DRFLAC_TRUE;
+        }
+    } else {
+        /* Seeking backwards. Need to seek from the start of the file. */
+        runningPCMFrameCount = 0;
+
+        /* Move back to the start. */
+        if (!drflac__seek_to_first_frame(pFlac)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Decode the first frame in preparation for sample-exact seeking below. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    /*
+    We need to as quickly as possible find the frame that contains the target sample. To do this, we iterate over each frame and inspect its
+    header. If based on the header we can determine that the frame contains the sample, we do a full decode of that frame.
+    */
+    for (;;) {
+        drflac_uint64 pcmFrameCountInThisFLACFrame;
+        drflac_uint64 firstPCMFrameInFLACFrame = 0;
+        drflac_uint64 lastPCMFrameInFLACFrame = 0;
+
+        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
+
+        pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
+        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) {
+            /*
+            The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
+            it never existed and keep iterating.
+            */
+            drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount;
+
+            if (!isMidFrame) {
+                drflac_result result = drflac__decode_flac_frame(pFlac);
+                if (result == DRFLAC_SUCCESS) {
+                    /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
+                    return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
+                } else {
+                    if (result == DRFLAC_CRC_MISMATCH) {
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
+                    } else {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /* We started seeking mid-frame which means we need to skip the frame decoding part. */
+                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;
+            }
+        } else {
+            /*
+            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            frame never existed and leave the running sample count untouched.
+            */
+            if (!isMidFrame) {
+                drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
+                if (result == DRFLAC_SUCCESS) {
+                    runningPCMFrameCount += pcmFrameCountInThisFLACFrame;
+                } else {
+                    if (result == DRFLAC_CRC_MISMATCH) {
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
+                    } else {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /*
+                We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with
+                drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header.
+                */
+                runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining;
+                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
+                isMidFrame = DRFLAC_FALSE;
+            }
+
+            /* If we are seeking to the end of the file and we've just hit it, we're done. */
+            if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) {
+                return DRFLAC_TRUE;
+            }
+        }
+
+    next_iteration:
+        /* Grab the next frame in preparation for the next iteration. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+    }
+}
+
+
+#if !defined(DR_FLAC_NO_CRC)
+/*
+We use an average compression ratio to determine our approximate start location. FLAC files are generally about 50%-70% the size of their
+uncompressed counterparts so we'll use this as a basis. I'm going to split the middle and use a factor of 0.6 to determine the starting
+location.
+*/
+#define DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO 0.6f
+
+static drflac_bool32 drflac__seek_to_approximate_flac_frame_to_byte(drflac* pFlac, drflac_uint64 targetByte, drflac_uint64 rangeLo, drflac_uint64 rangeHi, drflac_uint64* pLastSuccessfulSeekOffset)
+{
+    DRFLAC_ASSERT(pFlac != NULL);
+    DRFLAC_ASSERT(pLastSuccessfulSeekOffset != NULL);
+    DRFLAC_ASSERT(targetByte >= rangeLo);
+    DRFLAC_ASSERT(targetByte <= rangeHi);
+
+    *pLastSuccessfulSeekOffset = pFlac->firstFLACFramePosInBytes;
+
+    for (;;) {
+        /* After rangeLo == rangeHi == targetByte fails, we need to break out. */
+        drflac_uint64 lastTargetByte = targetByte;
+
+        /* When seeking to a byte, failure probably means we've attempted to seek beyond the end of the stream. To counter this we just halve it each attempt. */
+        if (!drflac__seek_to_byte(&pFlac->bs, targetByte)) {
+            /* If we couldn't even seek to the first byte in the stream we have a problem. Just abandon the whole thing. */
+            if (targetByte == 0) {
+                drflac__seek_to_first_frame(pFlac); /* Try to recover. */
+                return DRFLAC_FALSE;
+            }
+
+            /* Halve the byte location and continue. */
+            targetByte = rangeLo + ((rangeHi - rangeLo)/2);
+            rangeHi = targetByte;
+        } else {
+            /* Getting here should mean that we have seeked to an appropriate byte. */
+
+            /* Clear the details of the FLAC frame so we don't misreport data. */
+            DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame));
+
+            /*
+            Now seek to the next FLAC frame. We need to decode the entire frame (not just the header) because it's possible for the header to incorrectly pass the
+            CRC check and return bad data. We need to decode the entire frame to be more certain. Although this seems unlikely, this has happened to me in testing
+            so it needs to stay this way for now.
+            */
+#if 1
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                /* Halve the byte location and continue. */
+                targetByte = rangeLo + ((rangeHi - rangeLo)/2);
+                rangeHi = targetByte;
+            } else {
+                break;
+            }
+#else
+            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                /* Halve the byte location and continue. */
+                targetByte = rangeLo + ((rangeHi - rangeLo)/2);
+                rangeHi = targetByte;
+            } else {
+                break;
+            }
+#endif
+        }
+
+        /* We already tried this byte and there are no more to try, break out. */
+        if(targetByte == lastTargetByte) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    /* The current PCM frame needs to be updated based on the frame we just seeked to. */
+    drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL);
+
+    DRFLAC_ASSERT(targetByte <= rangeHi);
+
+    *pLastSuccessfulSeekOffset = targetByte;
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 offset)
+{
+    /* This section of code would be used if we were only decoding the FLAC frame header when calling drflac__seek_to_approximate_flac_frame_to_byte(). */
+#if 0
+    if (drflac__decode_flac_frame(pFlac) != DRFLAC_SUCCESS) {
+        /* We failed to decode this frame which may be due to it being corrupt. We'll just use the next valid FLAC frame. */
+        if (drflac__read_and_decode_next_flac_frame(pFlac) == DRFLAC_FALSE) {
+            return DRFLAC_FALSE;
+        }
+    }
+#endif
+
+    return drflac__seek_forward_by_pcm_frames(pFlac, offset) == offset;
+}
+
+
+static drflac_bool32 drflac__seek_to_pcm_frame__binary_search_internal(drflac* pFlac, drflac_uint64 pcmFrameIndex, drflac_uint64 byteRangeLo, drflac_uint64 byteRangeHi)
+{
+    /* This assumes pFlac->currentPCMFrame is sitting on byteRangeLo upon entry. */
+
+    drflac_uint64 targetByte;
+    drflac_uint64 pcmRangeLo = pFlac->totalPCMFrameCount;
+    drflac_uint64 pcmRangeHi = 0;
+    drflac_uint64 lastSuccessfulSeekOffset = (drflac_uint64)-1;
+    drflac_uint64 closestSeekOffsetBeforeTargetPCMFrame = byteRangeLo;
+    drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096;
+
+    targetByte = byteRangeLo + (drflac_uint64)(((drflac_int64)((pcmFrameIndex - pFlac->currentPCMFrame) * pFlac->channels * pFlac->bitsPerSample)/8.0f) * DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO);
+    if (targetByte > byteRangeHi) {
+        targetByte = byteRangeHi;
+    }
+
+    for (;;) {
+        if (drflac__seek_to_approximate_flac_frame_to_byte(pFlac, targetByte, byteRangeLo, byteRangeHi, &lastSuccessfulSeekOffset)) {
+            /* We found a FLAC frame. We need to check if it contains the sample we're looking for. */
+            drflac_uint64 newPCMRangeLo;
+            drflac_uint64 newPCMRangeHi;
+            drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &newPCMRangeLo, &newPCMRangeHi);
+
+            /* If we selected the same frame, it means we should be pretty close. Just decode the rest. */
+            if (pcmRangeLo == newPCMRangeLo) {
+                if (!drflac__seek_to_approximate_flac_frame_to_byte(pFlac, closestSeekOffsetBeforeTargetPCMFrame, closestSeekOffsetBeforeTargetPCMFrame, byteRangeHi, &lastSuccessfulSeekOffset)) {
+                    break;  /* Failed to seek to closest frame. */
+                }
+
+                if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) {
+                    return DRFLAC_TRUE;
+                } else {
+                    break;  /* Failed to seek forward. */
+                }
+            }
+
+            pcmRangeLo = newPCMRangeLo;
+            pcmRangeHi = newPCMRangeHi;
+
+            if (pcmRangeLo <= pcmFrameIndex && pcmRangeHi >= pcmFrameIndex) {
+                /* The target PCM frame is in this FLAC frame. */
+                if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame) ) {
+                    return DRFLAC_TRUE;
+                } else {
+                    break;  /* Failed to seek to FLAC frame. */
+                }
+            } else {
+                const float approxCompressionRatio = (drflac_int64)(lastSuccessfulSeekOffset - pFlac->firstFLACFramePosInBytes) / ((drflac_int64)(pcmRangeLo * pFlac->channels * pFlac->bitsPerSample)/8.0f);
+
+                if (pcmRangeLo > pcmFrameIndex) {
+                    /* We seeked too far forward. We need to move our target byte backward and try again. */
+                    byteRangeHi = lastSuccessfulSeekOffset;
+                    if (byteRangeLo > byteRangeHi) {
+                        byteRangeLo = byteRangeHi;
+                    }
+
+                    targetByte = byteRangeLo + ((byteRangeHi - byteRangeLo) / 2);
+                    if (targetByte < byteRangeLo) {
+                        targetByte = byteRangeLo;
+                    }
+                } else /*if (pcmRangeHi < pcmFrameIndex)*/ {
+                    /* We didn't seek far enough. We need to move our target byte forward and try again. */
+
+                    /* If we're close enough we can just seek forward. */
+                    if ((pcmFrameIndex - pcmRangeLo) < seekForwardThreshold) {
+                        if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) {
+                            return DRFLAC_TRUE;
+                        } else {
+                            break;  /* Failed to seek to FLAC frame. */
+                        }
+                    } else {
+                        byteRangeLo = lastSuccessfulSeekOffset;
+                        if (byteRangeHi < byteRangeLo) {
+                            byteRangeHi = byteRangeLo;
+                        }
+
+                        targetByte = lastSuccessfulSeekOffset + (drflac_uint64)(((drflac_int64)((pcmFrameIndex-pcmRangeLo) * pFlac->channels * pFlac->bitsPerSample)/8.0f) * approxCompressionRatio);
+                        if (targetByte > byteRangeHi) {
+                            targetByte = byteRangeHi;
+                        }
+
+                        if (closestSeekOffsetBeforeTargetPCMFrame < lastSuccessfulSeekOffset) {
+                            closestSeekOffsetBeforeTargetPCMFrame = lastSuccessfulSeekOffset;
+                        }
+                    }
+                }
+            }
+        } else {
+            /* Getting here is really bad. We just recover as best we can, but moving to the first frame in the stream, and then abort. */
+            break;
+        }
+    }
+
+    drflac__seek_to_first_frame(pFlac); /* <-- Try to recover. */
+    return DRFLAC_FALSE;
+}
+
+static drflac_bool32 drflac__seek_to_pcm_frame__binary_search(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    drflac_uint64 byteRangeLo;
+    drflac_uint64 byteRangeHi;
+    drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096;
+
+    /* Our algorithm currently assumes the FLAC stream is currently sitting at the start. */
+    if (drflac__seek_to_first_frame(pFlac) == DRFLAC_FALSE) {
+        return DRFLAC_FALSE;
+    }
+
+    /* If we're close enough to the start, just move to the start and seek forward. */
+    if (pcmFrameIndex < seekForwardThreshold) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, pcmFrameIndex) == pcmFrameIndex;
+    }
+
+    /*
+    Our starting byte range is the byte position of the first FLAC frame and the approximate end of the file as if it were completely uncompressed. This ensures
+    the entire file is included, even though most of the time it'll exceed the end of the actual stream. This is OK as the frame searching logic will handle it.
+    */
+    byteRangeLo = pFlac->firstFLACFramePosInBytes;
+    byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)((drflac_int64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample)/8.0f);
+
+    return drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi);
+}
+#endif  /* !DR_FLAC_NO_CRC */
+
+static drflac_bool32 drflac__seek_to_pcm_frame__seek_table(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    drflac_uint32 iClosestSeekpoint = 0;
+    drflac_bool32 isMidFrame = DRFLAC_FALSE;
+    drflac_uint64 runningPCMFrameCount;
+    drflac_uint32 iSeekpoint;
+
+
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    if (pFlac->pSeekpoints == NULL || pFlac->seekpointCount == 0) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Do not use the seektable if pcmFramIndex is not coverd by it. */
+    if (pFlac->pSeekpoints[0].firstPCMFrame > pcmFrameIndex) {
+        return DRFLAC_FALSE;
+    }
+
+    for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
+        if (pFlac->pSeekpoints[iSeekpoint].firstPCMFrame >= pcmFrameIndex) {
+            break;
+        }
+
+        iClosestSeekpoint = iSeekpoint;
+    }
+
+    /* There's been cases where the seek table contains only zeros. We need to do some basic validation on the closest seekpoint. */
+    if (pFlac->pSeekpoints[iClosestSeekpoint].pcmFrameCount == 0 || pFlac->pSeekpoints[iClosestSeekpoint].pcmFrameCount > pFlac->maxBlockSizeInPCMFrames) {
+        return DRFLAC_FALSE;
+    }
+    if (pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame > pFlac->totalPCMFrameCount && pFlac->totalPCMFrameCount > 0) {
+        return DRFLAC_FALSE;
+    }
+
+#if !defined(DR_FLAC_NO_CRC)
+    /* At this point we should know the closest seek point. We can use a binary search for this. We need to know the total sample count for this. */
+    if (pFlac->totalPCMFrameCount > 0) {
+        drflac_uint64 byteRangeLo;
+        drflac_uint64 byteRangeHi;
+
+        byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)((drflac_int64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample)/8.0f);
+        byteRangeLo = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset;
+
+        /*
+        If our closest seek point is not the last one, we only need to search between it and the next one. The section below calculates an appropriate starting
+        value for byteRangeHi which will clamp it appropriately.
+
+        Note that the next seekpoint must have an offset greater than the closest seekpoint because otherwise our binary search algorithm will break down. There
+        have been cases where a seektable consists of seek points where every byte offset is set to 0 which causes problems. If this happens we need to abort.
+        */
+        if (iClosestSeekpoint < pFlac->seekpointCount-1) {
+            drflac_uint32 iNextSeekpoint = iClosestSeekpoint + 1;
+
+            /* Basic validation on the seekpoints to ensure they're usable. */
+            if (pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset >= pFlac->pSeekpoints[iNextSeekpoint].flacFrameOffset || pFlac->pSeekpoints[iNextSeekpoint].pcmFrameCount == 0) {
+                return DRFLAC_FALSE;    /* The next seekpoint doesn't look right. The seek table cannot be trusted from here. Abort. */
+            }
+
+            if (pFlac->pSeekpoints[iNextSeekpoint].firstPCMFrame != (((drflac_uint64)0xFFFFFFFF << 32) | 0xFFFFFFFF)) { /* Make sure it's not a placeholder seekpoint. */
+                byteRangeHi = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iNextSeekpoint].flacFrameOffset - 1; /* byteRangeHi must be zero based. */
+            }
+        }
+
+        if (drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) {
+            if (drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL);
+
+                if (drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi)) {
+                    return DRFLAC_TRUE;
+                }
+            }
+        }
+    }
+#endif  /* !DR_FLAC_NO_CRC */
+
+    /* Getting here means we need to use a slower algorithm because the binary search method failed or cannot be used. */
+
+    /*
+    If we are seeking forward and the closest seekpoint is _before_ the current sample, we just seek forward from where we are. Otherwise we start seeking
+    from the seekpoint's first sample.
+    */
+    if (pcmFrameIndex >= pFlac->currentPCMFrame && pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame <= pFlac->currentPCMFrame) {
+        /* Optimized case. Just seek forward from where we are. */
+        runningPCMFrameCount = pFlac->currentPCMFrame;
+
+        /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */
+        if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            isMidFrame = DRFLAC_TRUE;
+        }
+    } else {
+        /* Slower case. Seek to the start of the seekpoint and then seek forward from there. */
+        runningPCMFrameCount = pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame;
+
+        if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Grab the frame the seekpoint is sitting on in preparation for the sample-exact seeking below. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    for (;;) {
+        drflac_uint64 pcmFrameCountInThisFLACFrame;
+        drflac_uint64 firstPCMFrameInFLACFrame = 0;
+        drflac_uint64 lastPCMFrameInFLACFrame = 0;
+
+        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
+
+        pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
+        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) {
+            /*
+            The sample should be in this frame. We need to fully decode it, but if it's an invalid frame (a CRC mismatch) we need to pretend
+            it never existed and keep iterating.
+            */
+            drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount;
+
+            if (!isMidFrame) {
+                drflac_result result = drflac__decode_flac_frame(pFlac);
+                if (result == DRFLAC_SUCCESS) {
+                    /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
+                    return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
+                } else {
+                    if (result == DRFLAC_CRC_MISMATCH) {
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
+                    } else {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /* We started seeking mid-frame which means we need to skip the frame decoding part. */
+                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;
+            }
+        } else {
+            /*
+            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            frame never existed and leave the running sample count untouched.
+            */
+            if (!isMidFrame) {
+                drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
+                if (result == DRFLAC_SUCCESS) {
+                    runningPCMFrameCount += pcmFrameCountInThisFLACFrame;
+                } else {
+                    if (result == DRFLAC_CRC_MISMATCH) {
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
+                    } else {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /*
+                We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with
+                drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header.
+                */
+                runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining;
+                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
+                isMidFrame = DRFLAC_FALSE;
+            }
+
+            /* If we are seeking to the end of the file and we've just hit it, we're done. */
+            if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) {
+                return DRFLAC_TRUE;
+            }
+        }
+
+    next_iteration:
+        /* Grab the next frame in preparation for the next iteration. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+    }
+}
+
+
+#ifndef DR_FLAC_NO_OGG
+typedef struct
+{
+    drflac_uint8 capturePattern[4];  /* Should be "OggS" */
+    drflac_uint8 structureVersion;   /* Always 0. */
+    drflac_uint8 headerType;
+    drflac_uint64 granulePosition;
+    drflac_uint32 serialNumber;
+    drflac_uint32 sequenceNumber;
+    drflac_uint32 checksum;
+    drflac_uint8 segmentCount;
+    drflac_uint8 segmentTable[255];
+} drflac_ogg_page_header;
+#endif
+
+typedef struct
+{
+    drflac_read_proc onRead;
+    drflac_seek_proc onSeek;
+    drflac_meta_proc onMeta;
+    drflac_container container;
+    void* pUserData;
+    void* pUserDataMD;
+    drflac_uint32 sampleRate;
+    drflac_uint8  channels;
+    drflac_uint8  bitsPerSample;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_uint16 maxBlockSizeInPCMFrames;
+    drflac_uint64 runningFilePos;
+    drflac_bool32 hasStreamInfoBlock;
+    drflac_bool32 hasMetadataBlocks;
+    drflac_bs bs;                           /* <-- A bit streamer is required for loading data during initialization. */
+    drflac_frame_header firstFrameHeader;   /* <-- The header of the first frame that was read during relaxed initalization. Only set if there is no STREAMINFO block. */
+
+#ifndef DR_FLAC_NO_OGG
+    drflac_uint32 oggSerial;
+    drflac_uint64 oggFirstBytePos;
+    drflac_ogg_page_header oggBosHeader;
+#endif
+} drflac_init_info;
+
+static DRFLAC_INLINE void drflac__decode_block_header(drflac_uint32 blockHeader, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize)
+{
+    blockHeader = drflac__be2host_32(blockHeader);
+    *isLastBlock = (drflac_uint8)((blockHeader & 0x80000000UL) >> 31);
+    *blockType   = (drflac_uint8)((blockHeader & 0x7F000000UL) >> 24);
+    *blockSize   =                (blockHeader & 0x00FFFFFFUL);
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac__read_and_decode_block_header(drflac_read_proc onRead, void* pUserData, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize)
+{
+    drflac_uint32 blockHeader;
+
+    *blockSize = 0;
+    if (onRead(pUserData, &blockHeader, 4) != 4) {
+        return DRFLAC_FALSE;
+    }
+
+    drflac__decode_block_header(blockHeader, isLastBlock, blockType, blockSize);
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData, drflac_streaminfo* pStreamInfo)
+{
+    drflac_uint32 blockSizes;
+    drflac_uint64 frameSizes = 0;
+    drflac_uint64 importantProps;
+    drflac_uint8 md5[16];
+
+    /* min/max block size. */
+    if (onRead(pUserData, &blockSizes, 4) != 4) {
+        return DRFLAC_FALSE;
+    }
+
+    /* min/max frame size. */
+    if (onRead(pUserData, &frameSizes, 6) != 6) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Sample rate, channels, bits per sample and total sample count. */
+    if (onRead(pUserData, &importantProps, 8) != 8) {
+        return DRFLAC_FALSE;
+    }
+
+    /* MD5 */
+    if (onRead(pUserData, md5, sizeof(md5)) != sizeof(md5)) {
+        return DRFLAC_FALSE;
+    }
+
+    blockSizes     = drflac__be2host_32(blockSizes);
+    frameSizes     = drflac__be2host_64(frameSizes);
+    importantProps = drflac__be2host_64(importantProps);
+
+    pStreamInfo->minBlockSizeInPCMFrames = (drflac_uint16)((blockSizes & 0xFFFF0000) >> 16);
+    pStreamInfo->maxBlockSizeInPCMFrames = (drflac_uint16) (blockSizes & 0x0000FFFF);
+    pStreamInfo->minFrameSizeInPCMFrames = (drflac_uint32)((frameSizes     &  (((drflac_uint64)0x00FFFFFF << 16) << 24)) >> 40);
+    pStreamInfo->maxFrameSizeInPCMFrames = (drflac_uint32)((frameSizes     &  (((drflac_uint64)0x00FFFFFF << 16) <<  0)) >> 16);
+    pStreamInfo->sampleRate              = (drflac_uint32)((importantProps &  (((drflac_uint64)0x000FFFFF << 16) << 28)) >> 44);
+    pStreamInfo->channels                = (drflac_uint8 )((importantProps &  (((drflac_uint64)0x0000000E << 16) << 24)) >> 41) + 1;
+    pStreamInfo->bitsPerSample           = (drflac_uint8 )((importantProps &  (((drflac_uint64)0x0000001F << 16) << 20)) >> 36) + 1;
+    pStreamInfo->totalPCMFrameCount      =                ((importantProps & ((((drflac_uint64)0x0000000F << 16) << 16) | 0xFFFFFFFF)));
+    DRFLAC_COPY_MEMORY(pStreamInfo->md5, md5, sizeof(md5));
+
+    return DRFLAC_TRUE;
+}
+
+
+static void* drflac__malloc_default(size_t sz, void* pUserData)
+{
+    (void)pUserData;
+    return DRFLAC_MALLOC(sz);
+}
+
+static void* drflac__realloc_default(void* p, size_t sz, void* pUserData)
+{
+    (void)pUserData;
+    return DRFLAC_REALLOC(p, sz);
+}
+
+static void drflac__free_default(void* p, void* pUserData)
+{
+    (void)pUserData;
+    DRFLAC_FREE(p);
+}
+
+
+static void* drflac__malloc_from_callbacks(size_t sz, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks == NULL) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks->onMalloc != NULL) {
+        return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData);
+    }
+
+    /* Try using realloc(). */
+    if (pAllocationCallbacks->onRealloc != NULL) {
+        return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData);
+    }
+
+    return NULL;
+}
+
+static void* drflac__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks == NULL) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks->onRealloc != NULL) {
+        return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData);
+    }
+
+    /* Try emulating realloc() in terms of malloc()/free(). */
+    if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) {
+        void* p2;
+
+        p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData);
+        if (p2 == NULL) {
+            return NULL;
+        }
+
+        if (p != NULL) {
+            DRFLAC_COPY_MEMORY(p2, p, szOld);
+            pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
+        }
+
+        return p2;
+    }
+
+    return NULL;
+}
+
+static void drflac__free_from_callbacks(void* p, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (p == NULL || pAllocationCallbacks == NULL) {
+        return;
+    }
+
+    if (pAllocationCallbacks->onFree != NULL) {
+        pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
+    }
+}
+
+
+static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeekpointCount, drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    /*
+    We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that
+    we'll be sitting on byte 42.
+    */
+    drflac_uint64 runningFilePos = 42;
+    drflac_uint64 seektablePos   = 0;
+    drflac_uint32 seektableSize  = 0;
+
+    for (;;) {
+        drflac_metadata metadata;
+        drflac_uint8 isLastBlock = 0;
+        drflac_uint8 blockType = 0;
+        drflac_uint32 blockSize;
+        if (drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize) == DRFLAC_FALSE) {
+            return DRFLAC_FALSE;
+        }
+        runningFilePos += 4;
+
+        metadata.type = blockType;
+        metadata.pRawData = NULL;
+        metadata.rawDataSize = 0;
+
+        switch (blockType)
+        {
+            case DRFLAC_METADATA_BLOCK_TYPE_APPLICATION:
+            {
+                if (blockSize < 4) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (onMeta) {
+                    void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+                    metadata.data.application.id       = drflac__be2host_32(*(drflac_uint32*)pRawData);
+                    metadata.data.application.pData    = (const void*)((drflac_uint8*)pRawData + sizeof(drflac_uint32));
+                    metadata.data.application.dataSize = blockSize - sizeof(drflac_uint32);
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE:
+            {
+                seektablePos  = runningFilePos;
+                seektableSize = blockSize;
+
+                if (onMeta) {
+                    drflac_uint32 seekpointCount;
+                    drflac_uint32 iSeekpoint;
+                    void* pRawData;
+
+                    seekpointCount = blockSize/DRFLAC_SEEKPOINT_SIZE_IN_BYTES;
+
+                    pRawData = drflac__malloc_from_callbacks(seekpointCount * sizeof(drflac_seekpoint), pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    /* We need to read seekpoint by seekpoint and do some processing. */
+                    for (iSeekpoint = 0; iSeekpoint < seekpointCount; ++iSeekpoint) {
+                        drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint;
+
+                        if (onRead(pUserData, pSeekpoint, DRFLAC_SEEKPOINT_SIZE_IN_BYTES) != DRFLAC_SEEKPOINT_SIZE_IN_BYTES) {
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
+
+                        /* Endian swap. */
+                        pSeekpoint->firstPCMFrame   = drflac__be2host_64(pSeekpoint->firstPCMFrame);
+                        pSeekpoint->flacFrameOffset = drflac__be2host_64(pSeekpoint->flacFrameOffset);
+                        pSeekpoint->pcmFrameCount   = drflac__be2host_16(pSeekpoint->pcmFrameCount);
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+                    metadata.data.seektable.seekpointCount = seekpointCount;
+                    metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData;
+
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT:
+            {
+                if (blockSize < 8) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (onMeta) {
+                    void* pRawData;
+                    const char* pRunningData;
+                    const char* pRunningDataEnd;
+                    drflac_uint32 i;
+
+                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+
+                    pRunningData    = (const char*)pRawData;
+                    pRunningDataEnd = (const char*)pRawData + blockSize;
+
+                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+
+                    /* Need space for the rest of the block */
+                    if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.vorbis_comment.vendor       = pRunningData;                                            pRunningData += metadata.data.vorbis_comment.vendorLength;
+                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+
+                    /* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */
+                    if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.vorbis_comment.pComments    = pRunningData;
+
+                    /* Check that the comments section is valid before passing it to the callback */
+                    for (i = 0; i < metadata.data.vorbis_comment.commentCount; ++i) {
+                        drflac_uint32 commentLength;
+
+                        if (pRunningDataEnd - pRunningData < 4) {
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
+
+                        commentLength = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                        if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
+                        pRunningData += commentLength;
+                    }
+
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_CUESHEET:
+            {
+                if (blockSize < 396) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (onMeta) {
+                    void* pRawData;
+                    const char* pRunningData;
+                    const char* pRunningDataEnd;
+                    size_t bufferSize;
+                    drflac_uint8 iTrack;
+                    drflac_uint8 iIndex;
+                    void* pTrackData;
+
+                    /*
+                    This needs to be loaded in two passes. The first pass is used to calculate the size of the memory allocation
+                    we need for storing the necessary data. The second pass will fill that buffer with usable data.
+                    */
+                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+
+                    pRunningData    = (const char*)pRawData;
+                    pRunningDataEnd = (const char*)pRawData + blockSize;
+
+                    DRFLAC_COPY_MEMORY(metadata.data.cuesheet.catalog, pRunningData, 128);                              pRunningData += 128;
+                    metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(const drflac_uint64*)pRunningData); pRunningData += 8;
+                    metadata.data.cuesheet.isCD              = (pRunningData[0] & 0x80) != 0;                           pRunningData += 259;
+                    metadata.data.cuesheet.trackCount        = pRunningData[0];                                         pRunningData += 1;
+                    metadata.data.cuesheet.pTrackData        = NULL;    /* Will be filled later. */
+
+                    /* Pass 1: Calculate the size of the buffer for the track data. */
+                    {
+                        const char* pRunningDataSaved = pRunningData;   /* Will be restored at the end in preparation for the second pass. */
+
+                        bufferSize = metadata.data.cuesheet.trackCount * DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES;
+
+                        for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) {
+                            drflac_uint8 indexCount;
+                            drflac_uint32 indexPointSize;
+
+                            if (pRunningDataEnd - pRunningData < DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES) {
+                                drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                                return DRFLAC_FALSE;
+                            }
+
+                            /* Skip to the index point count */
+                            pRunningData += 35;
+                            
+                            indexCount = pRunningData[0];
+                            pRunningData += 1;
+                            
+                            bufferSize += indexCount * sizeof(drflac_cuesheet_track_index);
+
+                            /* Quick validation check. */
+                            indexPointSize = indexCount * DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES;
+                            if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) {
+                                drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                                return DRFLAC_FALSE;
+                            }
+
+                            pRunningData += indexPointSize;
+                        }
+
+                        pRunningData = pRunningDataSaved;
+                    }
+
+                    /* Pass 2: Allocate a buffer and fill the data. Validation was done in the step above so can be skipped. */
+                    {
+                        char* pRunningTrackData;
+
+                        pTrackData = drflac__malloc_from_callbacks(bufferSize, pAllocationCallbacks);
+                        if (pTrackData == NULL) {
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
+
+                        pRunningTrackData = (char*)pTrackData;
+
+                        for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) {
+                            drflac_uint8 indexCount;
+
+                            DRFLAC_COPY_MEMORY(pRunningTrackData, pRunningData, DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES);
+                            pRunningData      += DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES-1; /* Skip forward, but not beyond the last byte in the CUESHEET_TRACK block which is the index count. */
+                            pRunningTrackData += DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES-1;
+
+                            /* Grab the index count for the next part. */
+                            indexCount = pRunningData[0];
+                            pRunningData      += 1;
+                            pRunningTrackData += 1;
+
+                            /* Extract each track index. */
+                            for (iIndex = 0; iIndex < indexCount; ++iIndex) {
+                                drflac_cuesheet_track_index* pTrackIndex = (drflac_cuesheet_track_index*)pRunningTrackData;
+
+                                DRFLAC_COPY_MEMORY(pRunningTrackData, pRunningData, DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES);
+                                pRunningData      += DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES;
+                                pRunningTrackData += sizeof(drflac_cuesheet_track_index);
+
+                                pTrackIndex->offset = drflac__be2host_64(pTrackIndex->offset);
+                            }
+                        }
+
+                        metadata.data.cuesheet.pTrackData = pTrackData;
+                    }
+
+                    /* The original data is no longer needed. */
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                    pRawData = NULL;
+
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pTrackData, pAllocationCallbacks);
+                    pTrackData = NULL;
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_PICTURE:
+            {
+                if (blockSize < 32) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (onMeta) {
+                    void* pRawData;
+                    const char* pRunningData;
+                    const char* pRunningDataEnd;
+
+                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+
+                    pRunningData    = (const char*)pRawData;
+                    pRunningDataEnd = (const char*)pRawData + blockSize;
+
+                    metadata.data.picture.type       = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                    metadata.data.picture.mimeLength = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+
+                    /* Need space for the rest of the block */
+                    if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.picture.mime              = pRunningData;                                   pRunningData += metadata.data.picture.mimeLength;
+                    metadata.data.picture.descriptionLength = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+
+                    /* Need space for the rest of the block */
+                    if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.picture.description     = pRunningData;                                   pRunningData += metadata.data.picture.descriptionLength;
+                    metadata.data.picture.width           = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                    metadata.data.picture.height          = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                    metadata.data.picture.colorDepth      = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                    metadata.data.picture.indexColorCount = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                    metadata.data.picture.pictureDataSize = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                    metadata.data.picture.pPictureData    = (const drflac_uint8*)pRunningData;
+
+                    /* Need space for the picture after the block */
+                    if (pRunningDataEnd - pRunningData < (drflac_int64)metadata.data.picture.pictureDataSize) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_PADDING:
+            {
+                if (onMeta) {
+                    metadata.data.padding.unused = 0;
+
+                    /* Padding doesn't have anything meaningful in it, so just skip over it, but make sure the caller is aware of it by firing the callback. */
+                    if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
+                        isLastBlock = DRFLAC_TRUE;  /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */
+                    } else {
+                        onMeta(pUserDataMD, &metadata);
+                    }
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_INVALID:
+            {
+                /* Invalid chunk. Just skip over this one. */
+                if (onMeta) {
+                    if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
+                        isLastBlock = DRFLAC_TRUE;  /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */
+                    }
+                }
+            } break;
+
+            default:
+            {
+                /*
+                It's an unknown chunk, but not necessarily invalid. There's a chance more metadata blocks might be defined later on, so we
+                can at the very least report the chunk to the application and let it look at the raw data.
+                */
+                if (onMeta) {
+                    void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+        }
+
+        /* If we're not handling metadata, just skip over the block. If we are, it will have been handled earlier in the switch statement above. */
+        if (onMeta == NULL && blockSize > 0) {
+            if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
+                isLastBlock = DRFLAC_TRUE;
+            }
+        }
+
+        runningFilePos += blockSize;
+        if (isLastBlock) {
+            break;
+        }
+    }
+
+    *pSeektablePos   = seektablePos;
+    *pSeekpointCount = seektableSize / DRFLAC_SEEKPOINT_SIZE_IN_BYTES;
+    *pFirstFramePos  = runningFilePos;
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed)
+{
+    /* Pre Condition: The bit stream should be sitting just past the 4-byte id header. */
+
+    drflac_uint8 isLastBlock;
+    drflac_uint8 blockType;
+    drflac_uint32 blockSize;
+
+    (void)onSeek;
+
+    pInit->container = drflac_container_native;
+
+    /* The first metadata block should be the STREAMINFO block. */
+    if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) {
+        return DRFLAC_FALSE;
+    }
+
+    if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) {
+        if (!relaxed) {
+            /* We're opening in strict mode and the first block is not the STREAMINFO block. Error. */
+            return DRFLAC_FALSE;
+        } else {
+            /*
+            Relaxed mode. To open from here we need to just find the first frame and set the sample rate, etc. to whatever is defined
+            for that frame.
+            */
+            pInit->hasStreamInfoBlock = DRFLAC_FALSE;
+            pInit->hasMetadataBlocks  = DRFLAC_FALSE;
+
+            if (!drflac__read_next_flac_frame_header(&pInit->bs, 0, &pInit->firstFrameHeader)) {
+                return DRFLAC_FALSE;    /* Couldn't find a frame. */
+            }
+
+            if (pInit->firstFrameHeader.bitsPerSample == 0) {
+                return DRFLAC_FALSE;    /* Failed to initialize because the first frame depends on the STREAMINFO block, which does not exist. */
+            }
+
+            pInit->sampleRate              = pInit->firstFrameHeader.sampleRate;
+            pInit->channels                = drflac__get_channel_count_from_channel_assignment(pInit->firstFrameHeader.channelAssignment);
+            pInit->bitsPerSample           = pInit->firstFrameHeader.bitsPerSample;
+            pInit->maxBlockSizeInPCMFrames = 65535;   /* <-- See notes here: https://xiph.org/flac/format.html#metadata_block_streaminfo */
+            return DRFLAC_TRUE;
+        }
+    } else {
+        drflac_streaminfo streaminfo;
+        if (!drflac__read_streaminfo(onRead, pUserData, &streaminfo)) {
+            return DRFLAC_FALSE;
+        }
+
+        pInit->hasStreamInfoBlock      = DRFLAC_TRUE;
+        pInit->sampleRate              = streaminfo.sampleRate;
+        pInit->channels                = streaminfo.channels;
+        pInit->bitsPerSample           = streaminfo.bitsPerSample;
+        pInit->totalPCMFrameCount      = streaminfo.totalPCMFrameCount;
+        pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames;    /* Don't care about the min block size - only the max (used for determining the size of the memory allocation). */
+        pInit->hasMetadataBlocks       = !isLastBlock;
+
+        if (onMeta) {
+            drflac_metadata metadata;
+            metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO;
+            metadata.pRawData = NULL;
+            metadata.rawDataSize = 0;
+            metadata.data.streaminfo = streaminfo;
+            onMeta(pUserDataMD, &metadata);
+        }
+
+        return DRFLAC_TRUE;
+    }
+}
+
+#ifndef DR_FLAC_NO_OGG
+#define DRFLAC_OGG_MAX_PAGE_SIZE            65307
+#define DRFLAC_OGG_CAPTURE_PATTERN_CRC32    1605413199  /* CRC-32 of "OggS". */
+
+typedef enum
+{
+    drflac_ogg_recover_on_crc_mismatch,
+    drflac_ogg_fail_on_crc_mismatch
+} drflac_ogg_crc_mismatch_recovery;
+
+#ifndef DR_FLAC_NO_CRC
+static drflac_uint32 drflac__crc32_table[] = {
+    0x00000000L, 0x04C11DB7L, 0x09823B6EL, 0x0D4326D9L,
+    0x130476DCL, 0x17C56B6BL, 0x1A864DB2L, 0x1E475005L,
+    0x2608EDB8L, 0x22C9F00FL, 0x2F8AD6D6L, 0x2B4BCB61L,
+    0x350C9B64L, 0x31CD86D3L, 0x3C8EA00AL, 0x384FBDBDL,
+    0x4C11DB70L, 0x48D0C6C7L, 0x4593E01EL, 0x4152FDA9L,
+    0x5F15ADACL, 0x5BD4B01BL, 0x569796C2L, 0x52568B75L,
+    0x6A1936C8L, 0x6ED82B7FL, 0x639B0DA6L, 0x675A1011L,
+    0x791D4014L, 0x7DDC5DA3L, 0x709F7B7AL, 0x745E66CDL,
+    0x9823B6E0L, 0x9CE2AB57L, 0x91A18D8EL, 0x95609039L,
+    0x8B27C03CL, 0x8FE6DD8BL, 0x82A5FB52L, 0x8664E6E5L,
+    0xBE2B5B58L, 0xBAEA46EFL, 0xB7A96036L, 0xB3687D81L,
+    0xAD2F2D84L, 0xA9EE3033L, 0xA4AD16EAL, 0xA06C0B5DL,
+    0xD4326D90L, 0xD0F37027L, 0xDDB056FEL, 0xD9714B49L,
+    0xC7361B4CL, 0xC3F706FBL, 0xCEB42022L, 0xCA753D95L,
+    0xF23A8028L, 0xF6FB9D9FL, 0xFBB8BB46L, 0xFF79A6F1L,
+    0xE13EF6F4L, 0xE5FFEB43L, 0xE8BCCD9AL, 0xEC7DD02DL,
+    0x34867077L, 0x30476DC0L, 0x3D044B19L, 0x39C556AEL,
+    0x278206ABL, 0x23431B1CL, 0x2E003DC5L, 0x2AC12072L,
+    0x128E9DCFL, 0x164F8078L, 0x1B0CA6A1L, 0x1FCDBB16L,
+    0x018AEB13L, 0x054BF6A4L, 0x0808D07DL, 0x0CC9CDCAL,
+    0x7897AB07L, 0x7C56B6B0L, 0x71159069L, 0x75D48DDEL,
+    0x6B93DDDBL, 0x6F52C06CL, 0x6211E6B5L, 0x66D0FB02L,
+    0x5E9F46BFL, 0x5A5E5B08L, 0x571D7DD1L, 0x53DC6066L,
+    0x4D9B3063L, 0x495A2DD4L, 0x44190B0DL, 0x40D816BAL,
+    0xACA5C697L, 0xA864DB20L, 0xA527FDF9L, 0xA1E6E04EL,
+    0xBFA1B04BL, 0xBB60ADFCL, 0xB6238B25L, 0xB2E29692L,
+    0x8AAD2B2FL, 0x8E6C3698L, 0x832F1041L, 0x87EE0DF6L,
+    0x99A95DF3L, 0x9D684044L, 0x902B669DL, 0x94EA7B2AL,
+    0xE0B41DE7L, 0xE4750050L, 0xE9362689L, 0xEDF73B3EL,
+    0xF3B06B3BL, 0xF771768CL, 0xFA325055L, 0xFEF34DE2L,
+    0xC6BCF05FL, 0xC27DEDE8L, 0xCF3ECB31L, 0xCBFFD686L,
+    0xD5B88683L, 0xD1799B34L, 0xDC3ABDEDL, 0xD8FBA05AL,
+    0x690CE0EEL, 0x6DCDFD59L, 0x608EDB80L, 0x644FC637L,
+    0x7A089632L, 0x7EC98B85L, 0x738AAD5CL, 0x774BB0EBL,
+    0x4F040D56L, 0x4BC510E1L, 0x46863638L, 0x42472B8FL,
+    0x5C007B8AL, 0x58C1663DL, 0x558240E4L, 0x51435D53L,
+    0x251D3B9EL, 0x21DC2629L, 0x2C9F00F0L, 0x285E1D47L,
+    0x36194D42L, 0x32D850F5L, 0x3F9B762CL, 0x3B5A6B9BL,
+    0x0315D626L, 0x07D4CB91L, 0x0A97ED48L, 0x0E56F0FFL,
+    0x1011A0FAL, 0x14D0BD4DL, 0x19939B94L, 0x1D528623L,
+    0xF12F560EL, 0xF5EE4BB9L, 0xF8AD6D60L, 0xFC6C70D7L,
+    0xE22B20D2L, 0xE6EA3D65L, 0xEBA91BBCL, 0xEF68060BL,
+    0xD727BBB6L, 0xD3E6A601L, 0xDEA580D8L, 0xDA649D6FL,
+    0xC423CD6AL, 0xC0E2D0DDL, 0xCDA1F604L, 0xC960EBB3L,
+    0xBD3E8D7EL, 0xB9FF90C9L, 0xB4BCB610L, 0xB07DABA7L,
+    0xAE3AFBA2L, 0xAAFBE615L, 0xA7B8C0CCL, 0xA379DD7BL,
+    0x9B3660C6L, 0x9FF77D71L, 0x92B45BA8L, 0x9675461FL,
+    0x8832161AL, 0x8CF30BADL, 0x81B02D74L, 0x857130C3L,
+    0x5D8A9099L, 0x594B8D2EL, 0x5408ABF7L, 0x50C9B640L,
+    0x4E8EE645L, 0x4A4FFBF2L, 0x470CDD2BL, 0x43CDC09CL,
+    0x7B827D21L, 0x7F436096L, 0x7200464FL, 0x76C15BF8L,
+    0x68860BFDL, 0x6C47164AL, 0x61043093L, 0x65C52D24L,
+    0x119B4BE9L, 0x155A565EL, 0x18197087L, 0x1CD86D30L,
+    0x029F3D35L, 0x065E2082L, 0x0B1D065BL, 0x0FDC1BECL,
+    0x3793A651L, 0x3352BBE6L, 0x3E119D3FL, 0x3AD08088L,
+    0x2497D08DL, 0x2056CD3AL, 0x2D15EBE3L, 0x29D4F654L,
+    0xC5A92679L, 0xC1683BCEL, 0xCC2B1D17L, 0xC8EA00A0L,
+    0xD6AD50A5L, 0xD26C4D12L, 0xDF2F6BCBL, 0xDBEE767CL,
+    0xE3A1CBC1L, 0xE760D676L, 0xEA23F0AFL, 0xEEE2ED18L,
+    0xF0A5BD1DL, 0xF464A0AAL, 0xF9278673L, 0xFDE69BC4L,
+    0x89B8FD09L, 0x8D79E0BEL, 0x803AC667L, 0x84FBDBD0L,
+    0x9ABC8BD5L, 0x9E7D9662L, 0x933EB0BBL, 0x97FFAD0CL,
+    0xAFB010B1L, 0xAB710D06L, 0xA6322BDFL, 0xA2F33668L,
+    0xBCB4666DL, 0xB8757BDAL, 0xB5365D03L, 0xB1F740B4L
+};
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_byte(drflac_uint32 crc32, drflac_uint8 data)
+{
+#ifndef DR_FLAC_NO_CRC
+    return (crc32 << 8) ^ drflac__crc32_table[(drflac_uint8)((crc32 >> 24) & 0xFF) ^ data];
+#else
+    (void)data;
+    return crc32;
+#endif
+}
+
+#if 0
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint32(drflac_uint32 crc32, drflac_uint32 data)
+{
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 24) & 0xFF));
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 16) & 0xFF));
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >>  8) & 0xFF));
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >>  0) & 0xFF));
+    return crc32;
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint64(drflac_uint32 crc32, drflac_uint64 data)
+{
+    crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >> 32) & 0xFFFFFFFF));
+    crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >>  0) & 0xFFFFFFFF));
+    return crc32;
+}
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_buffer(drflac_uint32 crc32, drflac_uint8* pData, drflac_uint32 dataSize)
+{
+    /* This can be optimized. */
+    drflac_uint32 i;
+    for (i = 0; i < dataSize; ++i) {
+        crc32 = drflac_crc32_byte(crc32, pData[i]);
+    }
+    return crc32;
+}
+
+
+static DRFLAC_INLINE drflac_bool32 drflac_ogg__is_capture_pattern(drflac_uint8 pattern[4])
+{
+    return pattern[0] == 'O' && pattern[1] == 'g' && pattern[2] == 'g' && pattern[3] == 'S';
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_header_size(drflac_ogg_page_header* pHeader)
+{
+    return 27 + pHeader->segmentCount;
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_body_size(drflac_ogg_page_header* pHeader)
+{
+    drflac_uint32 pageBodySize = 0;
+    int i;
+
+    for (i = 0; i < pHeader->segmentCount; ++i) {
+        pageBodySize += pHeader->segmentTable[i];
+    }
+
+    return pageBodySize;
+}
+
+static drflac_result drflac_ogg__read_page_header_after_capture_pattern(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32)
+{
+    drflac_uint8 data[23];
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(*pCRC32 == DRFLAC_OGG_CAPTURE_PATTERN_CRC32);
+
+    if (onRead(pUserData, data, 23) != 23) {
+        return DRFLAC_AT_END;
+    }
+    *pBytesRead += 23;
+
+    /*
+    It's not actually used, but set the capture pattern to 'OggS' for completeness. Not doing this will cause static analysers to complain about
+    us trying to access uninitialized data. We could alternatively just comment out this member of the drflac_ogg_page_header structure, but I
+    like to have it map to the structure of the underlying data.
+    */
+    pHeader->capturePattern[0] = 'O';
+    pHeader->capturePattern[1] = 'g';
+    pHeader->capturePattern[2] = 'g';
+    pHeader->capturePattern[3] = 'S';
+
+    pHeader->structureVersion = data[0];
+    pHeader->headerType       = data[1];
+    DRFLAC_COPY_MEMORY(&pHeader->granulePosition, &data[ 2], 8);
+    DRFLAC_COPY_MEMORY(&pHeader->serialNumber,    &data[10], 4);
+    DRFLAC_COPY_MEMORY(&pHeader->sequenceNumber,  &data[14], 4);
+    DRFLAC_COPY_MEMORY(&pHeader->checksum,        &data[18], 4);
+    pHeader->segmentCount     = data[22];
+
+    /* Calculate the CRC. Note that for the calculation the checksum part of the page needs to be set to 0. */
+    data[18] = 0;
+    data[19] = 0;
+    data[20] = 0;
+    data[21] = 0;
+
+    for (i = 0; i < 23; ++i) {
+        *pCRC32 = drflac_crc32_byte(*pCRC32, data[i]);
+    }
+
+
+    if (onRead(pUserData, pHeader->segmentTable, pHeader->segmentCount) != pHeader->segmentCount) {
+        return DRFLAC_AT_END;
+    }
+    *pBytesRead += pHeader->segmentCount;
+
+    for (i = 0; i < pHeader->segmentCount; ++i) {
+        *pCRC32 = drflac_crc32_byte(*pCRC32, pHeader->segmentTable[i]);
+    }
+
+    return DRFLAC_SUCCESS;
+}
+
+static drflac_result drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32)
+{
+    drflac_uint8 id[4];
+
+    *pBytesRead = 0;
+
+    if (onRead(pUserData, id, 4) != 4) {
+        return DRFLAC_AT_END;
+    }
+    *pBytesRead += 4;
+
+    /* We need to read byte-by-byte until we find the OggS capture pattern. */
+    for (;;) {
+        if (drflac_ogg__is_capture_pattern(id)) {
+            drflac_result result;
+
+            *pCRC32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
+
+            result = drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, pHeader, pBytesRead, pCRC32);
+            if (result == DRFLAC_SUCCESS) {
+                return DRFLAC_SUCCESS;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;
+                } else {
+                    return result;
+                }
+            }
+        } else {
+            /* The first 4 bytes did not equal the capture pattern. Read the next byte and try again. */
+            id[0] = id[1];
+            id[1] = id[2];
+            id[2] = id[3];
+            if (onRead(pUserData, &id[3], 1) != 1) {
+                return DRFLAC_AT_END;
+            }
+            *pBytesRead += 1;
+        }
+    }
+}
+
+
+/*
+The main part of the Ogg encapsulation is the conversion from the physical Ogg bitstream to the native FLAC bitstream. It works
+in three general stages: Ogg Physical Bitstream -> Ogg/FLAC Logical Bitstream -> FLAC Native Bitstream. dr_flac is designed
+in such a way that the core sections assume everything is delivered in native format. Therefore, for each encapsulation type
+dr_flac is supporting there needs to be a layer sitting on top of the onRead and onSeek callbacks that ensures the bits read from
+the physical Ogg bitstream are converted and delivered in native FLAC format.
+*/
+typedef struct
+{
+    drflac_read_proc onRead;                /* The original onRead callback from drflac_open() and family. */
+    drflac_seek_proc onSeek;                /* The original onSeek callback from drflac_open() and family. */
+    void* pUserData;                        /* The user data passed on onRead and onSeek. This is the user data that was passed on drflac_open() and family. */
+    drflac_uint64 currentBytePos;           /* The position of the byte we are sitting on in the physical byte stream. Used for efficient seeking. */
+    drflac_uint64 firstBytePos;             /* The position of the first byte in the physical bitstream. Points to the start of the "OggS" identifier of the FLAC bos page. */
+    drflac_uint32 serialNumber;             /* The serial number of the FLAC audio pages. This is determined by the initial header page that was read during initialization. */
+    drflac_ogg_page_header bosPageHeader;   /* Used for seeking. */
+    drflac_ogg_page_header currentPageHeader;
+    drflac_uint32 bytesRemainingInPage;
+    drflac_uint32 pageDataSize;
+    drflac_uint8 pageData[DRFLAC_OGG_MAX_PAGE_SIZE];
+} drflac_oggbs; /* oggbs = Ogg Bitstream */
+
+static size_t drflac_oggbs__read_physical(drflac_oggbs* oggbs, void* bufferOut, size_t bytesToRead)
+{
+    size_t bytesActuallyRead = oggbs->onRead(oggbs->pUserData, bufferOut, bytesToRead);
+    oggbs->currentBytePos += bytesActuallyRead;
+
+    return bytesActuallyRead;
+}
+
+static drflac_bool32 drflac_oggbs__seek_physical(drflac_oggbs* oggbs, drflac_uint64 offset, drflac_seek_origin origin)
+{
+    if (origin == drflac_seek_origin_start) {
+        if (offset <= 0x7FFFFFFF) {
+            if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_start)) {
+                return DRFLAC_FALSE;
+            }
+            oggbs->currentBytePos = offset;
+
+            return DRFLAC_TRUE;
+        } else {
+            if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
+                return DRFLAC_FALSE;
+            }
+            oggbs->currentBytePos = offset;
+
+            return drflac_oggbs__seek_physical(oggbs, offset - 0x7FFFFFFF, drflac_seek_origin_current);
+        }
+    } else {
+        while (offset > 0x7FFFFFFF) {
+            if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+            oggbs->currentBytePos += 0x7FFFFFFF;
+            offset -= 0x7FFFFFFF;
+        }
+
+        if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_current)) {    /* <-- Safe cast thanks to the loop above. */
+            return DRFLAC_FALSE;
+        }
+        oggbs->currentBytePos += offset;
+
+        return DRFLAC_TRUE;
+    }
+}
+
+static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_ogg_crc_mismatch_recovery recoveryMethod)
+{
+    drflac_ogg_page_header header;
+    for (;;) {
+        drflac_uint32 crc32 = 0;
+        drflac_uint32 bytesRead;
+        drflac_uint32 pageBodySize;
+#ifndef DR_FLAC_NO_CRC
+        drflac_uint32 actualCRC32;
+#endif
+
+        if (drflac_ogg__read_page_header(oggbs->onRead, oggbs->pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
+            return DRFLAC_FALSE;
+        }
+        oggbs->currentBytePos += bytesRead;
+
+        pageBodySize = drflac_ogg__get_page_body_size(&header);
+        if (pageBodySize > DRFLAC_OGG_MAX_PAGE_SIZE) {
+            continue;   /* Invalid page size. Assume it's corrupted and just move to the next page. */
+        }
+
+        if (header.serialNumber != oggbs->serialNumber) {
+            /* It's not a FLAC page. Skip it. */
+            if (pageBodySize > 0 && !drflac_oggbs__seek_physical(oggbs, pageBodySize, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+            continue;
+        }
+
+
+        /* We need to read the entire page and then do a CRC check on it. If there's a CRC mismatch we need to skip this page. */
+        if (drflac_oggbs__read_physical(oggbs, oggbs->pageData, pageBodySize) != pageBodySize) {
+            return DRFLAC_FALSE;
+        }
+        oggbs->pageDataSize = pageBodySize;
+
+#ifndef DR_FLAC_NO_CRC
+        actualCRC32 = drflac_crc32_buffer(crc32, oggbs->pageData, oggbs->pageDataSize);
+        if (actualCRC32 != header.checksum) {
+            if (recoveryMethod == drflac_ogg_recover_on_crc_mismatch) {
+                continue;   /* CRC mismatch. Skip this page. */
+            } else {
+                /*
+                Even though we are failing on a CRC mismatch, we still want our stream to be in a good state. Therefore we
+                go to the next valid page to ensure we're in a good state, but return false to let the caller know that the
+                seek did not fully complete.
+                */
+                drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch);
+                return DRFLAC_FALSE;
+            }
+        }
+#else
+        (void)recoveryMethod;   /* <-- Silence a warning. */
+#endif
+
+        oggbs->currentPageHeader = header;
+        oggbs->bytesRemainingInPage = pageBodySize;
+        return DRFLAC_TRUE;
+    }
+}
+
+/* Function below is unused at the moment, but I might be re-adding it later. */
+#if 0
+static drflac_uint8 drflac_oggbs__get_current_segment_index(drflac_oggbs* oggbs, drflac_uint8* pBytesRemainingInSeg)
+{
+    drflac_uint32 bytesConsumedInPage = drflac_ogg__get_page_body_size(&oggbs->currentPageHeader) - oggbs->bytesRemainingInPage;
+    drflac_uint8 iSeg = 0;
+    drflac_uint32 iByte = 0;
+    while (iByte < bytesConsumedInPage) {
+        drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg];
+        if (iByte + segmentSize > bytesConsumedInPage) {
+            break;
+        } else {
+            iSeg += 1;
+            iByte += segmentSize;
+        }
+    }
+
+    *pBytesRemainingInSeg = oggbs->currentPageHeader.segmentTable[iSeg] - (drflac_uint8)(bytesConsumedInPage - iByte);
+    return iSeg;
+}
+
+static drflac_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs)
+{
+    /* The current packet ends when we get to the segment with a lacing value of < 255 which is not at the end of a page. */
+    for (;;) {
+        drflac_bool32 atEndOfPage = DRFLAC_FALSE;
+
+        drflac_uint8 bytesRemainingInSeg;
+        drflac_uint8 iFirstSeg = drflac_oggbs__get_current_segment_index(oggbs, &bytesRemainingInSeg);
+
+        drflac_uint32 bytesToEndOfPacketOrPage = bytesRemainingInSeg;
+        for (drflac_uint8 iSeg = iFirstSeg; iSeg < oggbs->currentPageHeader.segmentCount; ++iSeg) {
+            drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg];
+            if (segmentSize < 255) {
+                if (iSeg == oggbs->currentPageHeader.segmentCount-1) {
+                    atEndOfPage = DRFLAC_TRUE;
+                }
+
+                break;
+            }
+
+            bytesToEndOfPacketOrPage += segmentSize;
+        }
+
+        /*
+        At this point we will have found either the packet or the end of the page. If were at the end of the page we'll
+        want to load the next page and keep searching for the end of the packet.
+        */
+        drflac_oggbs__seek_physical(oggbs, bytesToEndOfPacketOrPage, drflac_seek_origin_current);
+        oggbs->bytesRemainingInPage -= bytesToEndOfPacketOrPage;
+
+        if (atEndOfPage) {
+            /*
+            We're potentially at the next packet, but we need to check the next page first to be sure because the packet may
+            straddle pages.
+            */
+            if (!drflac_oggbs__goto_next_page(oggbs)) {
+                return DRFLAC_FALSE;
+            }
+
+            /* If it's a fresh packet it most likely means we're at the next packet. */
+            if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {
+                return DRFLAC_TRUE;
+            }
+        } else {
+            /* We're at the next packet. */
+            return DRFLAC_TRUE;
+        }
+    }
+}
+
+static drflac_bool32 drflac_oggbs__seek_to_next_frame(drflac_oggbs* oggbs)
+{
+    /* The bitstream should be sitting on the first byte just after the header of the frame. */
+
+    /* What we're actually doing here is seeking to the start of the next packet. */
+    return drflac_oggbs__seek_to_next_packet(oggbs);
+}
+#endif
+
+static size_t drflac__on_read_ogg(void* pUserData, void* bufferOut, size_t bytesToRead)
+{
+    drflac_oggbs* oggbs = (drflac_oggbs*)pUserData;
+    drflac_uint8* pRunningBufferOut = (drflac_uint8*)bufferOut;
+    size_t bytesRead = 0;
+
+    DRFLAC_ASSERT(oggbs != NULL);
+    DRFLAC_ASSERT(pRunningBufferOut != NULL);
+
+    /* Reading is done page-by-page. If we've run out of bytes in the page we need to move to the next one. */
+    while (bytesRead < bytesToRead) {
+        size_t bytesRemainingToRead = bytesToRead - bytesRead;
+
+        if (oggbs->bytesRemainingInPage >= bytesRemainingToRead) {
+            DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), bytesRemainingToRead);
+            bytesRead += bytesRemainingToRead;
+            oggbs->bytesRemainingInPage -= (drflac_uint32)bytesRemainingToRead;
+            break;
+        }
+
+        /* If we get here it means some of the requested data is contained in the next pages. */
+        if (oggbs->bytesRemainingInPage > 0) {
+            DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), oggbs->bytesRemainingInPage);
+            bytesRead += oggbs->bytesRemainingInPage;
+            pRunningBufferOut += oggbs->bytesRemainingInPage;
+            oggbs->bytesRemainingInPage = 0;
+        }
+
+        DRFLAC_ASSERT(bytesRemainingToRead > 0);
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
+            break;  /* Failed to go to the next page. Might have simply hit the end of the stream. */
+        }
+    }
+
+    return bytesRead;
+}
+
+static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_seek_origin origin)
+{
+    drflac_oggbs* oggbs = (drflac_oggbs*)pUserData;
+    int bytesSeeked = 0;
+
+    DRFLAC_ASSERT(oggbs != NULL);
+    DRFLAC_ASSERT(offset >= 0);  /* <-- Never seek backwards. */
+
+    /* Seeking is always forward which makes things a lot simpler. */
+    if (origin == drflac_seek_origin_start) {
+        if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->firstBytePos, drflac_seek_origin_start)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
+            return DRFLAC_FALSE;
+        }
+
+        return drflac__on_seek_ogg(pUserData, offset, drflac_seek_origin_current);
+    }
+
+    DRFLAC_ASSERT(origin == drflac_seek_origin_current);
+
+    while (bytesSeeked < offset) {
+        int bytesRemainingToSeek = offset - bytesSeeked;
+        DRFLAC_ASSERT(bytesRemainingToSeek >= 0);
+
+        if (oggbs->bytesRemainingInPage >= (size_t)bytesRemainingToSeek) {
+            bytesSeeked += bytesRemainingToSeek;
+            (void)bytesSeeked;  /* <-- Silence a dead store warning emitted by Clang Static Analyzer. */
+            oggbs->bytesRemainingInPage -= bytesRemainingToSeek;
+            break;
+        }
+
+        /* If we get here it means some of the requested data is contained in the next pages. */
+        if (oggbs->bytesRemainingInPage > 0) {
+            bytesSeeked += (int)oggbs->bytesRemainingInPage;
+            oggbs->bytesRemainingInPage = 0;
+        }
+
+        DRFLAC_ASSERT(bytesRemainingToSeek > 0);
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
+            /* Failed to go to the next page. We either hit the end of the stream or had a CRC mismatch. */
+            return DRFLAC_FALSE;
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac_ogg__seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+    drflac_uint64 originalBytePos;
+    drflac_uint64 runningGranulePosition;
+    drflac_uint64 runningFrameBytePos;
+    drflac_uint64 runningPCMFrameCount;
+
+    DRFLAC_ASSERT(oggbs != NULL);
+
+    originalBytePos = oggbs->currentBytePos;   /* For recovery. Points to the OggS identifier. */
+
+    /* First seek to the first frame. */
+    if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes)) {
+        return DRFLAC_FALSE;
+    }
+    oggbs->bytesRemainingInPage = 0;
+
+    runningGranulePosition = 0;
+    for (;;) {
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
+            drflac_oggbs__seek_physical(oggbs, originalBytePos, drflac_seek_origin_start);
+            return DRFLAC_FALSE;   /* Never did find that sample... */
+        }
+
+        runningFrameBytePos = oggbs->currentBytePos - drflac_ogg__get_page_header_size(&oggbs->currentPageHeader) - oggbs->pageDataSize;
+        if (oggbs->currentPageHeader.granulePosition >= pcmFrameIndex) {
+            break; /* The sample is somewhere in the previous page. */
+        }
+
+        /*
+        At this point we know the sample is not in the previous page. It could possibly be in this page. For simplicity we
+        disregard any pages that do not begin a fresh packet.
+        */
+        if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {    /* <-- Is it a fresh page? */
+            if (oggbs->currentPageHeader.segmentTable[0] >= 2) {
+                drflac_uint8 firstBytesInPage[2];
+                firstBytesInPage[0] = oggbs->pageData[0];
+                firstBytesInPage[1] = oggbs->pageData[1];
+
+                if ((firstBytesInPage[0] == 0xFF) && (firstBytesInPage[1] & 0xFC) == 0xF8) {    /* <-- Does the page begin with a frame's sync code? */
+                    runningGranulePosition = oggbs->currentPageHeader.granulePosition;
+                }
+
+                continue;
+            }
+        }
+    }
+
+    /*
+    We found the page that that is closest to the sample, so now we need to find it. The first thing to do is seek to the
+    start of that page. In the loop above we checked that it was a fresh page which means this page is also the start of
+    a new frame. This property means that after we've seeked to the page we can immediately start looping over frames until
+    we find the one containing the target sample.
+    */
+    if (!drflac_oggbs__seek_physical(oggbs, runningFrameBytePos, drflac_seek_origin_start)) {
+        return DRFLAC_FALSE;
+    }
+    if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    At this point we'll be sitting on the first byte of the frame header of the first frame in the page. We just keep
+    looping over these frames until we find the one containing the sample we're after.
+    */
+    runningPCMFrameCount = runningGranulePosition;
+    for (;;) {
+        /*
+        There are two ways to find the sample and seek past irrelevant frames:
+          1) Use the native FLAC decoder.
+          2) Use Ogg's framing system.
+
+        Both of these options have their own pros and cons. Using the native FLAC decoder is slower because it needs to
+        do a full decode of the frame. Using Ogg's framing system is faster, but more complicated and involves some code
+        duplication for the decoding of frame headers.
+
+        Another thing to consider is that using the Ogg framing system will perform direct seeking of the physical Ogg
+        bitstream. This is important to consider because it means we cannot read data from the drflac_bs object using the
+        standard drflac__*() APIs because that will read in extra data for its own internal caching which in turn breaks
+        the positioning of the read pointer of the physical Ogg bitstream. Therefore, anything that would normally be read
+        using the native FLAC decoding APIs, such as drflac__read_next_flac_frame_header(), need to be re-implemented so as to
+        avoid the use of the drflac_bs object.
+
+        Considering these issues, I have decided to use the slower native FLAC decoding method for the following reasons:
+          1) Seeking is already partially accelerated using Ogg's paging system in the code block above.
+          2) Seeking in an Ogg encapsulated FLAC stream is probably quite uncommon.
+          3) Simplicity.
+        */
+        drflac_uint64 firstPCMFrameInFLACFrame = 0;
+        drflac_uint64 lastPCMFrameInFLACFrame = 0;
+        drflac_uint64 pcmFrameCountInThisFrame;
+
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+
+        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
+
+        pcmFrameCountInThisFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
+
+        /* If we are seeking to the end of the file and we've just hit it, we're done. */
+        if (pcmFrameIndex == pFlac->totalPCMFrameCount && (runningPCMFrameCount + pcmFrameCountInThisFrame) == pFlac->totalPCMFrameCount) {
+            drflac_result result = drflac__decode_flac_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                pFlac->currentPCMFrame = pcmFrameIndex;
+                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
+                return DRFLAC_TRUE;
+            } else {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFrame)) {
+            /*
+            The sample should be in this FLAC frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
+            it never existed and keep iterating.
+            */
+            drflac_result result = drflac__decode_flac_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
+                drflac_uint64 pcmFramesToDecode = (size_t)(pcmFrameIndex - runningPCMFrameCount);    /* <-- Safe cast because the maximum number of samples in a frame is 65535. */
+                if (pcmFramesToDecode == 0) {
+                    return DRFLAC_TRUE;
+                }
+
+                pFlac->currentPCMFrame = runningPCMFrameCount;
+
+                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;   /* CRC mismatch. Pretend this frame never existed. */
+                } else {
+                    return DRFLAC_FALSE;
+                }
+            }
+        } else {
+            /*
+            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            frame never existed and leave the running sample count untouched.
+            */
+            drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                runningPCMFrameCount += pcmFrameCountInThisFrame;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;   /* CRC mismatch. Pretend this frame never existed. */
+                } else {
+                    return DRFLAC_FALSE;
+                }
+            }
+        }
+    }
+}
+
+
+
+static drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed)
+{
+    drflac_ogg_page_header header;
+    drflac_uint32 crc32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
+    drflac_uint32 bytesRead = 0;
+
+    /* Pre Condition: The bit stream should be sitting just past the 4-byte OggS capture pattern. */
+    (void)relaxed;
+
+    pInit->container = drflac_container_ogg;
+    pInit->oggFirstBytePos = 0;
+
+    /*
+    We'll get here if the first 4 bytes of the stream were the OggS capture pattern, however it doesn't necessarily mean the
+    stream includes FLAC encoded audio. To check for this we need to scan the beginning-of-stream page markers and check if
+    any match the FLAC specification. Important to keep in mind that the stream may be multiplexed.
+    */
+    if (drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
+        return DRFLAC_FALSE;
+    }
+    pInit->runningFilePos += bytesRead;
+
+    for (;;) {
+        int pageBodySize;
+
+        /* Break if we're past the beginning of stream page. */
+        if ((header.headerType & 0x02) == 0) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Check if it's a FLAC header. */
+        pageBodySize = drflac_ogg__get_page_body_size(&header);
+        if (pageBodySize == 51) {   /* 51 = the lacing value of the FLAC header packet. */
+            /* It could be a FLAC page... */
+            drflac_uint32 bytesRemainingInPage = pageBodySize;
+            drflac_uint8 packetType;
+
+            if (onRead(pUserData, &packetType, 1) != 1) {
+                return DRFLAC_FALSE;
+            }
+
+            bytesRemainingInPage -= 1;
+            if (packetType == 0x7F) {
+                /* Increasingly more likely to be a FLAC page... */
+                drflac_uint8 sig[4];
+                if (onRead(pUserData, sig, 4) != 4) {
+                    return DRFLAC_FALSE;
+                }
+
+                bytesRemainingInPage -= 4;
+                if (sig[0] == 'F' && sig[1] == 'L' && sig[2] == 'A' && sig[3] == 'C') {
+                    /* Almost certainly a FLAC page... */
+                    drflac_uint8 mappingVersion[2];
+                    if (onRead(pUserData, mappingVersion, 2) != 2) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (mappingVersion[0] != 1) {
+                        return DRFLAC_FALSE;   /* Only supporting version 1.x of the Ogg mapping. */
+                    }
+
+                    /*
+                    The next 2 bytes are the non-audio packets, not including this one. We don't care about this because we're going to
+                    be handling it in a generic way based on the serial number and packet types.
+                    */
+                    if (!onSeek(pUserData, 2, drflac_seek_origin_current)) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    /* Expecting the native FLAC signature "fLaC". */
+                    if (onRead(pUserData, sig, 4) != 4) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (sig[0] == 'f' && sig[1] == 'L' && sig[2] == 'a' && sig[3] == 'C') {
+                        /* The remaining data in the page should be the STREAMINFO block. */
+                        drflac_streaminfo streaminfo;
+                        drflac_uint8 isLastBlock;
+                        drflac_uint8 blockType;
+                        drflac_uint32 blockSize;
+                        if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) {
+                            return DRFLAC_FALSE;
+                        }
+
+                        if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) {
+                            return DRFLAC_FALSE;    /* Invalid block type. First block must be the STREAMINFO block. */
+                        }
+
+                        if (drflac__read_streaminfo(onRead, pUserData, &streaminfo)) {
+                            /* Success! */
+                            pInit->hasStreamInfoBlock      = DRFLAC_TRUE;
+                            pInit->sampleRate              = streaminfo.sampleRate;
+                            pInit->channels                = streaminfo.channels;
+                            pInit->bitsPerSample           = streaminfo.bitsPerSample;
+                            pInit->totalPCMFrameCount      = streaminfo.totalPCMFrameCount;
+                            pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames;
+                            pInit->hasMetadataBlocks       = !isLastBlock;
+
+                            if (onMeta) {
+                                drflac_metadata metadata;
+                                metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO;
+                                metadata.pRawData = NULL;
+                                metadata.rawDataSize = 0;
+                                metadata.data.streaminfo = streaminfo;
+                                onMeta(pUserDataMD, &metadata);
+                            }
+
+                            pInit->runningFilePos  += pageBodySize;
+                            pInit->oggFirstBytePos  = pInit->runningFilePos - 79;   /* Subtracting 79 will place us right on top of the "OggS" identifier of the FLAC bos page. */
+                            pInit->oggSerial        = header.serialNumber;
+                            pInit->oggBosHeader     = header;
+                            break;
+                        } else {
+                            /* Failed to read STREAMINFO block. Aww, so close... */
+                            return DRFLAC_FALSE;
+                        }
+                    } else {
+                        /* Invalid file. */
+                        return DRFLAC_FALSE;
+                    }
+                } else {
+                    /* Not a FLAC header. Skip it. */
+                    if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /* Not a FLAC header. Seek past the entire page and move on to the next. */
+                if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
+                    return DRFLAC_FALSE;
+                }
+            }
+        } else {
+            if (!onSeek(pUserData, pageBodySize, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        pInit->runningFilePos += pageBodySize;
+
+
+        /* Read the header of the next page. */
+        if (drflac_ogg__read_page_header(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
+            return DRFLAC_FALSE;
+        }
+        pInit->runningFilePos += bytesRead;
+    }
+
+    /*
+    If we get here it means we found a FLAC audio stream. We should be sitting on the first byte of the header of the next page. The next
+    packets in the FLAC logical stream contain the metadata. The only thing left to do in the initialization phase for Ogg is to create the
+    Ogg bistream object.
+    */
+    pInit->hasMetadataBlocks = DRFLAC_TRUE;    /* <-- Always have at least VORBIS_COMMENT metadata block. */
+    return DRFLAC_TRUE;
+}
+#endif
+
+static drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD)
+{
+    drflac_bool32 relaxed;
+    drflac_uint8 id[4];
+
+    if (pInit == NULL || onRead == NULL || onSeek == NULL) {
+        return DRFLAC_FALSE;
+    }
+
+    DRFLAC_ZERO_MEMORY(pInit, sizeof(*pInit));
+    pInit->onRead       = onRead;
+    pInit->onSeek       = onSeek;
+    pInit->onMeta       = onMeta;
+    pInit->container    = container;
+    pInit->pUserData    = pUserData;
+    pInit->pUserDataMD  = pUserDataMD;
+
+    pInit->bs.onRead    = onRead;
+    pInit->bs.onSeek    = onSeek;
+    pInit->bs.pUserData = pUserData;
+    drflac__reset_cache(&pInit->bs);
+
+
+    /* If the container is explicitly defined then we can try opening in relaxed mode. */
+    relaxed = container != drflac_container_unknown;
+
+    /* Skip over any ID3 tags. */
+    for (;;) {
+        if (onRead(pUserData, id, 4) != 4) {
+            return DRFLAC_FALSE;    /* Ran out of data. */
+        }
+        pInit->runningFilePos += 4;
+
+        if (id[0] == 'I' && id[1] == 'D' && id[2] == '3') {
+            drflac_uint8 header[6];
+            drflac_uint8 flags;
+            drflac_uint32 headerSize;
+
+            if (onRead(pUserData, header, 6) != 6) {
+                return DRFLAC_FALSE;    /* Ran out of data. */
+            }
+            pInit->runningFilePos += 6;
+
+            flags = header[1];
+
+            DRFLAC_COPY_MEMORY(&headerSize, header+2, 4);
+            headerSize = drflac__unsynchsafe_32(drflac__be2host_32(headerSize));
+            if (flags & 0x10) {
+                headerSize += 10;
+            }
+
+            if (!onSeek(pUserData, headerSize, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;    /* Failed to seek past the tag. */
+            }
+            pInit->runningFilePos += headerSize;
+        } else {
+            break;
+        }
+    }
+
+    if (id[0] == 'f' && id[1] == 'L' && id[2] == 'a' && id[3] == 'C') {
+        return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+    }
+#ifndef DR_FLAC_NO_OGG
+    if (id[0] == 'O' && id[1] == 'g' && id[2] == 'g' && id[3] == 'S') {
+        return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+    }
+#endif
+
+    /* If we get here it means we likely don't have a header. Try opening in relaxed mode, if applicable. */
+    if (relaxed) {
+        if (container == drflac_container_native) {
+            return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+        }
+#ifndef DR_FLAC_NO_OGG
+        if (container == drflac_container_ogg) {
+            return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+        }
+#endif
+    }
+
+    /* Unsupported container. */
+    return DRFLAC_FALSE;
+}
+
+static void drflac__init_from_info(drflac* pFlac, const drflac_init_info* pInit)
+{
+    DRFLAC_ASSERT(pFlac != NULL);
+    DRFLAC_ASSERT(pInit != NULL);
+
+    DRFLAC_ZERO_MEMORY(pFlac, sizeof(*pFlac));
+    pFlac->bs                      = pInit->bs;
+    pFlac->onMeta                  = pInit->onMeta;
+    pFlac->pUserDataMD             = pInit->pUserDataMD;
+    pFlac->maxBlockSizeInPCMFrames = pInit->maxBlockSizeInPCMFrames;
+    pFlac->sampleRate              = pInit->sampleRate;
+    pFlac->channels                = (drflac_uint8)pInit->channels;
+    pFlac->bitsPerSample           = (drflac_uint8)pInit->bitsPerSample;
+    pFlac->totalPCMFrameCount      = pInit->totalPCMFrameCount;
+    pFlac->container               = pInit->container;
+}
+
+
+static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac_init_info init;
+    drflac_uint32 allocationSize;
+    drflac_uint32 wholeSIMDVectorCountPerChannel;
+    drflac_uint32 decodedSamplesAllocationSize;
+#ifndef DR_FLAC_NO_OGG
+    drflac_oggbs* pOggbs = NULL;
+#endif
+    drflac_uint64 firstFramePos;
+    drflac_uint64 seektablePos;
+    drflac_uint32 seekpointCount;
+    drflac_allocation_callbacks allocationCallbacks;
+    drflac* pFlac;
+
+    /* CPU support first. */
+    drflac__init_cpu_caps();
+
+    if (!drflac__init_private(&init, onRead, onSeek, onMeta, container, pUserData, pUserDataMD)) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks != NULL) {
+        allocationCallbacks = *pAllocationCallbacks;
+        if (allocationCallbacks.onFree == NULL || (allocationCallbacks.onMalloc == NULL && allocationCallbacks.onRealloc == NULL)) {
+            return NULL;    /* Invalid allocation callbacks. */
+        }
+    } else {
+        allocationCallbacks.pUserData = NULL;
+        allocationCallbacks.onMalloc  = drflac__malloc_default;
+        allocationCallbacks.onRealloc = drflac__realloc_default;
+        allocationCallbacks.onFree    = drflac__free_default;
+    }
+
+
+    /*
+    The size of the allocation for the drflac object needs to be large enough to fit the following:
+      1) The main members of the drflac structure
+      2) A block of memory large enough to store the decoded samples of the largest frame in the stream
+      3) If the container is Ogg, a drflac_oggbs object
+
+    The complicated part of the allocation is making sure there's enough room the decoded samples, taking into consideration
+    the different SIMD instruction sets.
+    */
+    allocationSize = sizeof(drflac);
+
+    /*
+    The allocation size for decoded frames depends on the number of 32-bit integers that fit inside the largest SIMD vector
+    we are supporting.
+    */
+    if ((init.maxBlockSizeInPCMFrames % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) {
+        wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32)));
+    } else {
+        wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1;
+    }
+
+    decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init.channels;
+
+    allocationSize += decodedSamplesAllocationSize;
+    allocationSize += DRFLAC_MAX_SIMD_VECTOR_SIZE;  /* Allocate extra bytes to ensure we have enough for alignment. */
+
+#ifndef DR_FLAC_NO_OGG
+    /* There's additional data required for Ogg streams. */
+    if (init.container == drflac_container_ogg) {
+        allocationSize += sizeof(drflac_oggbs);
+
+        pOggbs = (drflac_oggbs*)drflac__malloc_from_callbacks(sizeof(*pOggbs), &allocationCallbacks);
+        if (pOggbs == NULL) {
+            return NULL; /*DRFLAC_OUT_OF_MEMORY;*/
+        }
+
+        DRFLAC_ZERO_MEMORY(pOggbs, sizeof(*pOggbs));
+        pOggbs->onRead = onRead;
+        pOggbs->onSeek = onSeek;
+        pOggbs->pUserData = pUserData;
+        pOggbs->currentBytePos = init.oggFirstBytePos;
+        pOggbs->firstBytePos = init.oggFirstBytePos;
+        pOggbs->serialNumber = init.oggSerial;
+        pOggbs->bosPageHeader = init.oggBosHeader;
+        pOggbs->bytesRemainingInPage = 0;
+    }
+#endif
+
+    /*
+    This part is a bit awkward. We need to load the seektable so that it can be referenced in-memory, but I want the drflac object to
+    consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading
+    and decoding the metadata.
+    */
+    firstFramePos  = 42;   /* <-- We know we are at byte 42 at this point. */
+    seektablePos   = 0;
+    seekpointCount = 0;
+    if (init.hasMetadataBlocks) {
+        drflac_read_proc onReadOverride = onRead;
+        drflac_seek_proc onSeekOverride = onSeek;
+        void* pUserDataOverride = pUserData;
+
+#ifndef DR_FLAC_NO_OGG
+        if (init.container == drflac_container_ogg) {
+            onReadOverride = drflac__on_read_ogg;
+            onSeekOverride = drflac__on_seek_ogg;
+            pUserDataOverride = (void*)pOggbs;
+        }
+#endif
+
+        if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seekpointCount, &allocationCallbacks)) {
+        #ifndef DR_FLAC_NO_OGG
+            drflac__free_from_callbacks(pOggbs, &allocationCallbacks);
+        #endif
+            return NULL;
+        }
+
+        allocationSize += seekpointCount * sizeof(drflac_seekpoint);
+    }
+
+
+    pFlac = (drflac*)drflac__malloc_from_callbacks(allocationSize, &allocationCallbacks);
+    if (pFlac == NULL) {
+    #ifndef DR_FLAC_NO_OGG
+        drflac__free_from_callbacks(pOggbs, &allocationCallbacks);
+    #endif
+        return NULL;
+    }
+
+    drflac__init_from_info(pFlac, &init);
+    pFlac->allocationCallbacks = allocationCallbacks;
+    pFlac->pDecodedSamples = (drflac_int32*)drflac_align((size_t)pFlac->pExtraData, DRFLAC_MAX_SIMD_VECTOR_SIZE);
+
+#ifndef DR_FLAC_NO_OGG
+    if (init.container == drflac_container_ogg) {
+        drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + (seekpointCount * sizeof(drflac_seekpoint)));
+        DRFLAC_COPY_MEMORY(pInternalOggbs, pOggbs, sizeof(*pOggbs));
+
+        /* At this point the pOggbs object has been handed over to pInternalOggbs and can be freed. */
+        drflac__free_from_callbacks(pOggbs, &allocationCallbacks);
+        pOggbs = NULL;
+
+        /* The Ogg bistream needs to be layered on top of the original bitstream. */
+        pFlac->bs.onRead = drflac__on_read_ogg;
+        pFlac->bs.onSeek = drflac__on_seek_ogg;
+        pFlac->bs.pUserData = (void*)pInternalOggbs;
+        pFlac->_oggbs = (void*)pInternalOggbs;
+    }
+#endif
+
+    pFlac->firstFLACFramePosInBytes = firstFramePos;
+
+    /* NOTE: Seektables are not currently compatible with Ogg encapsulation (Ogg has its own accelerated seeking system). I may change this later, so I'm leaving this here for now. */
+#ifndef DR_FLAC_NO_OGG
+    if (init.container == drflac_container_ogg)
+    {
+        pFlac->pSeekpoints = NULL;
+        pFlac->seekpointCount = 0;
+    }
+    else
+#endif
+    {
+        /* If we have a seektable we need to load it now, making sure we move back to where we were previously. */
+        if (seektablePos != 0) {
+            pFlac->seekpointCount = seekpointCount;
+            pFlac->pSeekpoints = (drflac_seekpoint*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize);
+
+            DRFLAC_ASSERT(pFlac->bs.onSeek != NULL);
+            DRFLAC_ASSERT(pFlac->bs.onRead != NULL);
+
+            /* Seek to the seektable, then just read directly into our seektable buffer. */
+            if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, drflac_seek_origin_start)) {
+                drflac_uint32 iSeekpoint;
+
+                for (iSeekpoint = 0; iSeekpoint < seekpointCount; iSeekpoint += 1) {
+                    if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints + iSeekpoint, DRFLAC_SEEKPOINT_SIZE_IN_BYTES) == DRFLAC_SEEKPOINT_SIZE_IN_BYTES) {
+                        /* Endian swap. */
+                        pFlac->pSeekpoints[iSeekpoint].firstPCMFrame   = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].firstPCMFrame);
+                        pFlac->pSeekpoints[iSeekpoint].flacFrameOffset = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].flacFrameOffset);
+                        pFlac->pSeekpoints[iSeekpoint].pcmFrameCount   = drflac__be2host_16(pFlac->pSeekpoints[iSeekpoint].pcmFrameCount);
+                    } else {
+                        /* Failed to read the seektable. Pretend we don't have one. */
+                        pFlac->pSeekpoints = NULL;
+                        pFlac->seekpointCount = 0;
+                        break;
+                    }
+                }
+
+                /* We need to seek back to where we were. If this fails it's a critical error. */
+                if (!pFlac->bs.onSeek(pFlac->bs.pUserData, (int)pFlac->firstFLACFramePosInBytes, drflac_seek_origin_start)) {
+                    drflac__free_from_callbacks(pFlac, &allocationCallbacks);
+                    return NULL;
+                }
+            } else {
+                /* Failed to seek to the seektable. Ominous sign, but for now we can just pretend we don't have one. */
+                pFlac->pSeekpoints = NULL;
+                pFlac->seekpointCount = 0;
+            }
+        }
+    }
+
+
+    /*
+    If we get here, but don't have a STREAMINFO block, it means we've opened the stream in relaxed mode and need to decode
+    the first frame.
+    */
+    if (!init.hasStreamInfoBlock) {
+        pFlac->currentFLACFrame.header = init.firstFrameHeader;
+        for (;;) {
+            drflac_result result = drflac__decode_flac_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                break;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                        drflac__free_from_callbacks(pFlac, &allocationCallbacks);
+                        return NULL;
+                    }
+                    continue;
+                } else {
+                    drflac__free_from_callbacks(pFlac, &allocationCallbacks);
+                    return NULL;
+                }
+            }
+        }
+    }
+
+    return pFlac;
+}
+
+
+
+#ifndef DR_FLAC_NO_STDIO
+#include <stdio.h>
+#ifndef DR_FLAC_NO_WCHAR
+#include <wchar.h>      /* For wcslen(), wcsrtombs() */
+#endif
+
+/* Errno */
+/* drflac_result_from_errno() is only used for fopen() and wfopen() so putting it inside DR_WAV_NO_STDIO for now. If something else needs this later we can move it out. */
+#include <errno.h>
+static drflac_result drflac_result_from_errno(int e)
+{
+    switch (e)
+    {
+        case 0: return DRFLAC_SUCCESS;
+    #ifdef EPERM
+        case EPERM: return DRFLAC_INVALID_OPERATION;
+    #endif
+    #ifdef ENOENT
+        case ENOENT: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef ESRCH
+        case ESRCH: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef EINTR
+        case EINTR: return DRFLAC_INTERRUPT;
+    #endif
+    #ifdef EIO
+        case EIO: return DRFLAC_IO_ERROR;
+    #endif
+    #ifdef ENXIO
+        case ENXIO: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef E2BIG
+        case E2BIG: return DRFLAC_INVALID_ARGS;
+    #endif
+    #ifdef ENOEXEC
+        case ENOEXEC: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef EBADF
+        case EBADF: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef ECHILD
+        case ECHILD: return DRFLAC_ERROR;
+    #endif
+    #ifdef EAGAIN
+        case EAGAIN: return DRFLAC_UNAVAILABLE;
+    #endif
+    #ifdef ENOMEM
+        case ENOMEM: return DRFLAC_OUT_OF_MEMORY;
+    #endif
+    #ifdef EACCES
+        case EACCES: return DRFLAC_ACCESS_DENIED;
+    #endif
+    #ifdef EFAULT
+        case EFAULT: return DRFLAC_BAD_ADDRESS;
+    #endif
+    #ifdef ENOTBLK
+        case ENOTBLK: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBUSY
+        case EBUSY: return DRFLAC_BUSY;
+    #endif
+    #ifdef EEXIST
+        case EEXIST: return DRFLAC_ALREADY_EXISTS;
+    #endif
+    #ifdef EXDEV
+        case EXDEV: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENODEV
+        case ENODEV: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef ENOTDIR
+        case ENOTDIR: return DRFLAC_NOT_DIRECTORY;
+    #endif
+    #ifdef EISDIR
+        case EISDIR: return DRFLAC_IS_DIRECTORY;
+    #endif
+    #ifdef EINVAL
+        case EINVAL: return DRFLAC_INVALID_ARGS;
+    #endif
+    #ifdef ENFILE
+        case ENFILE: return DRFLAC_TOO_MANY_OPEN_FILES;
+    #endif
+    #ifdef EMFILE
+        case EMFILE: return DRFLAC_TOO_MANY_OPEN_FILES;
+    #endif
+    #ifdef ENOTTY
+        case ENOTTY: return DRFLAC_INVALID_OPERATION;
+    #endif
+    #ifdef ETXTBSY
+        case ETXTBSY: return DRFLAC_BUSY;
+    #endif
+    #ifdef EFBIG
+        case EFBIG: return DRFLAC_TOO_BIG;
+    #endif
+    #ifdef ENOSPC
+        case ENOSPC: return DRFLAC_NO_SPACE;
+    #endif
+    #ifdef ESPIPE
+        case ESPIPE: return DRFLAC_BAD_SEEK;
+    #endif
+    #ifdef EROFS
+        case EROFS: return DRFLAC_ACCESS_DENIED;
+    #endif
+    #ifdef EMLINK
+        case EMLINK: return DRFLAC_TOO_MANY_LINKS;
+    #endif
+    #ifdef EPIPE
+        case EPIPE: return DRFLAC_BAD_PIPE;
+    #endif
+    #ifdef EDOM
+        case EDOM: return DRFLAC_OUT_OF_RANGE;
+    #endif
+    #ifdef ERANGE
+        case ERANGE: return DRFLAC_OUT_OF_RANGE;
+    #endif
+    #ifdef EDEADLK
+        case EDEADLK: return DRFLAC_DEADLOCK;
+    #endif
+    #ifdef ENAMETOOLONG
+        case ENAMETOOLONG: return DRFLAC_PATH_TOO_LONG;
+    #endif
+    #ifdef ENOLCK
+        case ENOLCK: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOSYS
+        case ENOSYS: return DRFLAC_NOT_IMPLEMENTED;
+    #endif
+    #ifdef ENOTEMPTY
+        case ENOTEMPTY: return DRFLAC_DIRECTORY_NOT_EMPTY;
+    #endif
+    #ifdef ELOOP
+        case ELOOP: return DRFLAC_TOO_MANY_LINKS;
+    #endif
+    #ifdef ENOMSG
+        case ENOMSG: return DRFLAC_NO_MESSAGE;
+    #endif
+    #ifdef EIDRM
+        case EIDRM: return DRFLAC_ERROR;
+    #endif
+    #ifdef ECHRNG
+        case ECHRNG: return DRFLAC_ERROR;
+    #endif
+    #ifdef EL2NSYNC
+        case EL2NSYNC: return DRFLAC_ERROR;
+    #endif
+    #ifdef EL3HLT
+        case EL3HLT: return DRFLAC_ERROR;
+    #endif
+    #ifdef EL3RST
+        case EL3RST: return DRFLAC_ERROR;
+    #endif
+    #ifdef ELNRNG
+        case ELNRNG: return DRFLAC_OUT_OF_RANGE;
+    #endif
+    #ifdef EUNATCH
+        case EUNATCH: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOCSI
+        case ENOCSI: return DRFLAC_ERROR;
+    #endif
+    #ifdef EL2HLT
+        case EL2HLT: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADE
+        case EBADE: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADR
+        case EBADR: return DRFLAC_ERROR;
+    #endif
+    #ifdef EXFULL
+        case EXFULL: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOANO
+        case ENOANO: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADRQC
+        case EBADRQC: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADSLT
+        case EBADSLT: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBFONT
+        case EBFONT: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef ENOSTR
+        case ENOSTR: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENODATA
+        case ENODATA: return DRFLAC_NO_DATA_AVAILABLE;
+    #endif
+    #ifdef ETIME
+        case ETIME: return DRFLAC_TIMEOUT;
+    #endif
+    #ifdef ENOSR
+        case ENOSR: return DRFLAC_NO_DATA_AVAILABLE;
+    #endif
+    #ifdef ENONET
+        case ENONET: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ENOPKG
+        case ENOPKG: return DRFLAC_ERROR;
+    #endif
+    #ifdef EREMOTE
+        case EREMOTE: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOLINK
+        case ENOLINK: return DRFLAC_ERROR;
+    #endif
+    #ifdef EADV
+        case EADV: return DRFLAC_ERROR;
+    #endif
+    #ifdef ESRMNT
+        case ESRMNT: return DRFLAC_ERROR;
+    #endif
+    #ifdef ECOMM
+        case ECOMM: return DRFLAC_ERROR;
+    #endif
+    #ifdef EPROTO
+        case EPROTO: return DRFLAC_ERROR;
+    #endif
+    #ifdef EMULTIHOP
+        case EMULTIHOP: return DRFLAC_ERROR;
+    #endif
+    #ifdef EDOTDOT
+        case EDOTDOT: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADMSG
+        case EBADMSG: return DRFLAC_BAD_MESSAGE;
+    #endif
+    #ifdef EOVERFLOW
+        case EOVERFLOW: return DRFLAC_TOO_BIG;
+    #endif
+    #ifdef ENOTUNIQ
+        case ENOTUNIQ: return DRFLAC_NOT_UNIQUE;
+    #endif
+    #ifdef EBADFD
+        case EBADFD: return DRFLAC_ERROR;
+    #endif
+    #ifdef EREMCHG
+        case EREMCHG: return DRFLAC_ERROR;
+    #endif
+    #ifdef ELIBACC
+        case ELIBACC: return DRFLAC_ACCESS_DENIED;
+    #endif
+    #ifdef ELIBBAD
+        case ELIBBAD: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef ELIBSCN
+        case ELIBSCN: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef ELIBMAX
+        case ELIBMAX: return DRFLAC_ERROR;
+    #endif
+    #ifdef ELIBEXEC
+        case ELIBEXEC: return DRFLAC_ERROR;
+    #endif
+    #ifdef EILSEQ
+        case EILSEQ: return DRFLAC_INVALID_DATA;
+    #endif
+    #ifdef ERESTART
+        case ERESTART: return DRFLAC_ERROR;
+    #endif
+    #ifdef ESTRPIPE
+        case ESTRPIPE: return DRFLAC_ERROR;
+    #endif
+    #ifdef EUSERS
+        case EUSERS: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOTSOCK
+        case ENOTSOCK: return DRFLAC_NOT_SOCKET;
+    #endif
+    #ifdef EDESTADDRREQ
+        case EDESTADDRREQ: return DRFLAC_NO_ADDRESS;
+    #endif
+    #ifdef EMSGSIZE
+        case EMSGSIZE: return DRFLAC_TOO_BIG;
+    #endif
+    #ifdef EPROTOTYPE
+        case EPROTOTYPE: return DRFLAC_BAD_PROTOCOL;
+    #endif
+    #ifdef ENOPROTOOPT
+        case ENOPROTOOPT: return DRFLAC_PROTOCOL_UNAVAILABLE;
+    #endif
+    #ifdef EPROTONOSUPPORT
+        case EPROTONOSUPPORT: return DRFLAC_PROTOCOL_NOT_SUPPORTED;
+    #endif
+    #ifdef ESOCKTNOSUPPORT
+        case ESOCKTNOSUPPORT: return DRFLAC_SOCKET_NOT_SUPPORTED;
+    #endif
+    #ifdef EOPNOTSUPP
+        case EOPNOTSUPP: return DRFLAC_INVALID_OPERATION;
+    #endif
+    #ifdef EPFNOSUPPORT
+        case EPFNOSUPPORT: return DRFLAC_PROTOCOL_FAMILY_NOT_SUPPORTED;
+    #endif
+    #ifdef EAFNOSUPPORT
+        case EAFNOSUPPORT: return DRFLAC_ADDRESS_FAMILY_NOT_SUPPORTED;
+    #endif
+    #ifdef EADDRINUSE
+        case EADDRINUSE: return DRFLAC_ALREADY_IN_USE;
+    #endif
+    #ifdef EADDRNOTAVAIL
+        case EADDRNOTAVAIL: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENETDOWN
+        case ENETDOWN: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ENETUNREACH
+        case ENETUNREACH: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ENETRESET
+        case ENETRESET: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ECONNABORTED
+        case ECONNABORTED: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ECONNRESET
+        case ECONNRESET: return DRFLAC_CONNECTION_RESET;
+    #endif
+    #ifdef ENOBUFS
+        case ENOBUFS: return DRFLAC_NO_SPACE;
+    #endif
+    #ifdef EISCONN
+        case EISCONN: return DRFLAC_ALREADY_CONNECTED;
+    #endif
+    #ifdef ENOTCONN
+        case ENOTCONN: return DRFLAC_NOT_CONNECTED;
+    #endif
+    #ifdef ESHUTDOWN
+        case ESHUTDOWN: return DRFLAC_ERROR;
+    #endif
+    #ifdef ETOOMANYREFS
+        case ETOOMANYREFS: return DRFLAC_ERROR;
+    #endif
+    #ifdef ETIMEDOUT
+        case ETIMEDOUT: return DRFLAC_TIMEOUT;
+    #endif
+    #ifdef ECONNREFUSED
+        case ECONNREFUSED: return DRFLAC_CONNECTION_REFUSED;
+    #endif
+    #ifdef EHOSTDOWN
+        case EHOSTDOWN: return DRFLAC_NO_HOST;
+    #endif
+    #ifdef EHOSTUNREACH
+        case EHOSTUNREACH: return DRFLAC_NO_HOST;
+    #endif
+    #ifdef EALREADY
+        case EALREADY: return DRFLAC_IN_PROGRESS;
+    #endif
+    #ifdef EINPROGRESS
+        case EINPROGRESS: return DRFLAC_IN_PROGRESS;
+    #endif
+    #ifdef ESTALE
+        case ESTALE: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef EUCLEAN
+        case EUCLEAN: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOTNAM
+        case ENOTNAM: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENAVAIL
+        case ENAVAIL: return DRFLAC_ERROR;
+    #endif
+    #ifdef EISNAM
+        case EISNAM: return DRFLAC_ERROR;
+    #endif
+    #ifdef EREMOTEIO
+        case EREMOTEIO: return DRFLAC_IO_ERROR;
+    #endif
+    #ifdef EDQUOT
+        case EDQUOT: return DRFLAC_NO_SPACE;
+    #endif
+    #ifdef ENOMEDIUM
+        case ENOMEDIUM: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef EMEDIUMTYPE
+        case EMEDIUMTYPE: return DRFLAC_ERROR;
+    #endif
+    #ifdef ECANCELED
+        case ECANCELED: return DRFLAC_CANCELLED;
+    #endif
+    #ifdef ENOKEY
+        case ENOKEY: return DRFLAC_ERROR;
+    #endif
+    #ifdef EKEYEXPIRED
+        case EKEYEXPIRED: return DRFLAC_ERROR;
+    #endif
+    #ifdef EKEYREVOKED
+        case EKEYREVOKED: return DRFLAC_ERROR;
+    #endif
+    #ifdef EKEYREJECTED
+        case EKEYREJECTED: return DRFLAC_ERROR;
+    #endif
+    #ifdef EOWNERDEAD
+        case EOWNERDEAD: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOTRECOVERABLE
+        case ENOTRECOVERABLE: return DRFLAC_ERROR;
+    #endif
+    #ifdef ERFKILL
+        case ERFKILL: return DRFLAC_ERROR;
+    #endif
+    #ifdef EHWPOISON
+        case EHWPOISON: return DRFLAC_ERROR;
+    #endif
+        default: return DRFLAC_ERROR;
+    }
+}
+/* End Errno */
+
+/* fopen */
+static drflac_result drflac_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode)
+{
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    errno_t err;
+#endif
+
+    if (ppFile != NULL) {
+        *ppFile = NULL;  /* Safety. */
+    }
+
+    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
+        return DRFLAC_INVALID_ARGS;
+    }
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    err = fopen_s(ppFile, pFilePath, pOpenMode);
+    if (err != 0) {
+        return drflac_result_from_errno(err);
+    }
+#else
+#if defined(_WIN32) || defined(__APPLE__)
+    *ppFile = fopen(pFilePath, pOpenMode);
+#else
+    #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64 && defined(_LARGEFILE64_SOURCE)
+        *ppFile = fopen64(pFilePath, pOpenMode);
+    #else
+        *ppFile = fopen(pFilePath, pOpenMode);
+    #endif
+#endif
+    if (*ppFile == NULL) {
+        drflac_result result = drflac_result_from_errno(errno);
+        if (result == DRFLAC_SUCCESS) {
+            result = DRFLAC_ERROR;   /* Just a safety check to make sure we never ever return success when pFile == NULL. */
+        }
+
+        return result;
+    }
+#endif
+
+    return DRFLAC_SUCCESS;
+}
+
+/*
+_wfopen() isn't always available in all compilation environments.
+
+    * Windows only.
+    * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back).
+    * MinGW-64 (both 32- and 64-bit) seems to support it.
+    * MinGW wraps it in !defined(__STRICT_ANSI__).
+    * OpenWatcom wraps it in !defined(_NO_EXT_KEYS).
+
+This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs()
+fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support.
+*/
+#if defined(_WIN32)
+    #if defined(_MSC_VER) || defined(__MINGW64__) || (!defined(__STRICT_ANSI__) && !defined(_NO_EXT_KEYS))
+        #define DRFLAC_HAS_WFOPEN
+    #endif
+#endif
+
+#ifndef DR_FLAC_NO_WCHAR
+static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (ppFile != NULL) {
+        *ppFile = NULL;  /* Safety. */
+    }
+
+    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
+        return DRFLAC_INVALID_ARGS;
+    }
+
+#if defined(DRFLAC_HAS_WFOPEN)
+    {
+        /* Use _wfopen() on Windows. */
+    #if defined(_MSC_VER) && _MSC_VER >= 1400
+        errno_t err = _wfopen_s(ppFile, pFilePath, pOpenMode);
+        if (err != 0) {
+            return drflac_result_from_errno(err);
+        }
+    #else
+        *ppFile = _wfopen(pFilePath, pOpenMode);
+        if (*ppFile == NULL) {
+            return drflac_result_from_errno(errno);
+        }
+    #endif
+        (void)pAllocationCallbacks;
+    }
+#else
+    /*
+    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because
+	fopen() is locale specific. The only real way I can think of to do this is with wcsrtombs(). Note
+	that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
+    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler
+	error I'll look into improving compatibility.
+    */
+
+	/*
+	Some compilers don't support wchar_t or wcsrtombs() which we're using below. In this case we just
+	need to abort with an error. If you encounter a compiler lacking such support, add it to this list
+	and submit a bug report and it'll be added to the library upstream.
+	*/
+	#if defined(__DJGPP__)
+	{
+		/* Nothing to do here. This will fall through to the error check below. */
+	}
+	#else
+    {
+        mbstate_t mbs;
+        size_t lenMB;
+        const wchar_t* pFilePathTemp = pFilePath;
+        char* pFilePathMB = NULL;
+        char pOpenModeMB[32] = {0};
+
+        /* Get the length first. */
+        DRFLAC_ZERO_OBJECT(&mbs);
+        lenMB = wcsrtombs(NULL, &pFilePathTemp, 0, &mbs);
+        if (lenMB == (size_t)-1) {
+            return drflac_result_from_errno(errno);
+        }
+
+        pFilePathMB = (char*)drflac__malloc_from_callbacks(lenMB + 1, pAllocationCallbacks);
+        if (pFilePathMB == NULL) {
+            return DRFLAC_OUT_OF_MEMORY;
+        }
+
+        pFilePathTemp = pFilePath;
+        DRFLAC_ZERO_OBJECT(&mbs);
+        wcsrtombs(pFilePathMB, &pFilePathTemp, lenMB + 1, &mbs);
+
+        /* The open mode should always consist of ASCII characters so we should be able to do a trivial conversion. */
+        {
+            size_t i = 0;
+            for (;;) {
+                if (pOpenMode[i] == 0) {
+                    pOpenModeMB[i] = '\0';
+                    break;
+                }
+
+                pOpenModeMB[i] = (char)pOpenMode[i];
+                i += 1;
+            }
+        }
+
+        *ppFile = fopen(pFilePathMB, pOpenModeMB);
+
+        drflac__free_from_callbacks(pFilePathMB, pAllocationCallbacks);
+    }
+	#endif
+
+    if (*ppFile == NULL) {
+        return DRFLAC_ERROR;
+    }
+#endif
+
+    return DRFLAC_SUCCESS;
+}
+#endif
+/* End fopen */
+
+static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead)
+{
+    return fread(bufferOut, 1, bytesToRead, (FILE*)pUserData);
+}
+
+static drflac_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin)
+{
+    DRFLAC_ASSERT(offset >= 0);  /* <-- Never seek backwards. */
+
+    return fseek((FILE*)pUserData, offset, (origin == drflac_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
+}
+
+
+DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+    FILE* pFile;
+
+    if (drflac_fopen(&pFile, pFileName, "rb") != DRFLAC_SUCCESS) {
+        return NULL;
+    }
+
+    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        fclose(pFile);
+        return NULL;
+    }
+
+    return pFlac;
+}
+
+#ifndef DR_FLAC_NO_WCHAR
+DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+    FILE* pFile;
+
+    if (drflac_wfopen(&pFile, pFileName, L"rb", pAllocationCallbacks) != DRFLAC_SUCCESS) {
+        return NULL;
+    }
+
+    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        fclose(pFile);
+        return NULL;
+    }
+
+    return pFlac;
+}
+#endif
+
+DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+    FILE* pFile;
+
+    if (drflac_fopen(&pFile, pFileName, "rb") != DRFLAC_SUCCESS) {
+        return NULL;
+    }
+
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        fclose(pFile);
+        return pFlac;
+    }
+
+    return pFlac;
+}
+
+#ifndef DR_FLAC_NO_WCHAR
+DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+    FILE* pFile;
+
+    if (drflac_wfopen(&pFile, pFileName, L"rb", pAllocationCallbacks) != DRFLAC_SUCCESS) {
+        return NULL;
+    }
+
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        fclose(pFile);
+        return pFlac;
+    }
+
+    return pFlac;
+}
+#endif
+#endif  /* DR_FLAC_NO_STDIO */
+
+static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead)
+{
+    drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
+    size_t bytesRemaining;
+
+    DRFLAC_ASSERT(memoryStream != NULL);
+    DRFLAC_ASSERT(memoryStream->dataSize >= memoryStream->currentReadPos);
+
+    bytesRemaining = memoryStream->dataSize - memoryStream->currentReadPos;
+    if (bytesToRead > bytesRemaining) {
+        bytesToRead = bytesRemaining;
+    }
+
+    if (bytesToRead > 0) {
+        DRFLAC_COPY_MEMORY(bufferOut, memoryStream->data + memoryStream->currentReadPos, bytesToRead);
+        memoryStream->currentReadPos += bytesToRead;
+    }
+
+    return bytesToRead;
+}
+
+static drflac_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_seek_origin origin)
+{
+    drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
+
+    DRFLAC_ASSERT(memoryStream != NULL);
+    DRFLAC_ASSERT(offset >= 0); /* <-- Never seek backwards. */
+
+    if (offset > (drflac_int64)memoryStream->dataSize) {
+        return DRFLAC_FALSE;
+    }
+
+    if (origin == drflac_seek_origin_current) {
+        if (memoryStream->currentReadPos + offset <= memoryStream->dataSize) {
+            memoryStream->currentReadPos += offset;
+        } else {
+            return DRFLAC_FALSE;  /* Trying to seek too far forward. */
+        }
+    } else {
+        if ((drflac_uint32)offset <= memoryStream->dataSize) {
+            memoryStream->currentReadPos = offset;
+        } else {
+            return DRFLAC_FALSE;  /* Trying to seek too far forward. */
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac__memory_stream memoryStream;
+    drflac* pFlac;
+
+    memoryStream.data = (const drflac_uint8*)pData;
+    memoryStream.dataSize = dataSize;
+    memoryStream.currentReadPos = 0;
+    pFlac = drflac_open(drflac__on_read_memory, drflac__on_seek_memory, &memoryStream, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    pFlac->memoryStream = memoryStream;
+
+    /* This is an awful hack... */
+#ifndef DR_FLAC_NO_OGG
+    if (pFlac->container == drflac_container_ogg)
+    {
+        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+        oggbs->pUserData = &pFlac->memoryStream;
+    }
+    else
+#endif
+    {
+        pFlac->bs.pUserData = &pFlac->memoryStream;
+    }
+
+    return pFlac;
+}
+
+DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac__memory_stream memoryStream;
+    drflac* pFlac;
+
+    memoryStream.data = (const drflac_uint8*)pData;
+    memoryStream.dataSize = dataSize;
+    memoryStream.currentReadPos = 0;
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, onMeta, drflac_container_unknown, &memoryStream, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    pFlac->memoryStream = memoryStream;
+
+    /* This is an awful hack... */
+#ifndef DR_FLAC_NO_OGG
+    if (pFlac->container == drflac_container_ogg)
+    {
+        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+        oggbs->pUserData = &pFlac->memoryStream;
+    }
+    else
+#endif
+    {
+        pFlac->bs.pUserData = &pFlac->memoryStream;
+    }
+
+    return pFlac;
+}
+
+
+
+DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, NULL, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks);
+}
+DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, NULL, container, pUserData, pUserData, pAllocationCallbacks);
+}
+
+DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks);
+}
+DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, container, pUserData, pUserData, pAllocationCallbacks);
+}
+
+DRFLAC_API void drflac_close(drflac* pFlac)
+{
+    if (pFlac == NULL) {
+        return;
+    }
+
+#ifndef DR_FLAC_NO_STDIO
+    /*
+    If we opened the file with drflac_open_file() we will want to close the file handle. We can know whether or not drflac_open_file()
+    was used by looking at the callbacks.
+    */
+    if (pFlac->bs.onRead == drflac__on_read_stdio) {
+        fclose((FILE*)pFlac->bs.pUserData);
+    }
+
+#ifndef DR_FLAC_NO_OGG
+    /* Need to clean up Ogg streams a bit differently due to the way the bit streaming is chained. */
+    if (pFlac->container == drflac_container_ogg) {
+        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+        DRFLAC_ASSERT(pFlac->bs.onRead == drflac__on_read_ogg);
+
+        if (oggbs->onRead == drflac__on_read_stdio) {
+            fclose((FILE*)oggbs->pUserData);
+        }
+    }
+#endif
+#endif
+
+    drflac__free_from_callbacks(pFlac, &pFlac->allocationCallbacks);
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 right0 = left0 - side0;
+        drflac_uint32 right1 = left1 - side1;
+        drflac_uint32 right2 = left2 - side2;
+        drflac_uint32 right3 = left3 - side3;
+
+        pOutputSamples[i*8+0] = (drflac_int32)left0;
+        pOutputSamples[i*8+1] = (drflac_int32)right0;
+        pOutputSamples[i*8+2] = (drflac_int32)left1;
+        pOutputSamples[i*8+3] = (drflac_int32)right1;
+        pOutputSamples[i*8+4] = (drflac_int32)left2;
+        pOutputSamples[i*8+5] = (drflac_int32)right2;
+        pOutputSamples[i*8+6] = (drflac_int32)left3;
+        pOutputSamples[i*8+7] = (drflac_int32)right3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i right = _mm_sub_epi32(left, side);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t left;
+        uint32x4_t side;
+        uint32x4_t right;
+
+        left  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        right = vsubq_u32(left, side);
+
+        drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 left0 = right0 + side0;
+        drflac_uint32 left1 = right1 + side1;
+        drflac_uint32 left2 = right2 + side2;
+        drflac_uint32 left3 = right3 + side3;
+
+        pOutputSamples[i*8+0] = (drflac_int32)left0;
+        pOutputSamples[i*8+1] = (drflac_int32)right0;
+        pOutputSamples[i*8+2] = (drflac_int32)left1;
+        pOutputSamples[i*8+3] = (drflac_int32)right1;
+        pOutputSamples[i*8+4] = (drflac_int32)left2;
+        pOutputSamples[i*8+5] = (drflac_int32)right2;
+        pOutputSamples[i*8+6] = (drflac_int32)left3;
+        pOutputSamples[i*8+7] = (drflac_int32)right3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i left  = _mm_add_epi32(right, side);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t side;
+        uint32x4_t right;
+        uint32x4_t left;
+
+        side  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        left  = vaddq_u32(right, side);
+
+        drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample);
+        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_int32 shift = unusedBitsPerSample;
+
+    if (shift > 0) {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (mid0 + side0) << shift;
+            temp1L = (mid1 + side1) << shift;
+            temp2L = (mid2 + side2) << shift;
+            temp3L = (mid3 + side3) << shift;
+
+            temp0R = (mid0 - side0) << shift;
+            temp1R = (mid1 - side1) << shift;
+            temp2R = (mid2 - side2) << shift;
+            temp3R = (mid3 - side3) << shift;
+
+            pOutputSamples[i*8+0] = (drflac_int32)temp0L;
+            pOutputSamples[i*8+1] = (drflac_int32)temp0R;
+            pOutputSamples[i*8+2] = (drflac_int32)temp1L;
+            pOutputSamples[i*8+3] = (drflac_int32)temp1R;
+            pOutputSamples[i*8+4] = (drflac_int32)temp2L;
+            pOutputSamples[i*8+5] = (drflac_int32)temp2R;
+            pOutputSamples[i*8+6] = (drflac_int32)temp3L;
+            pOutputSamples[i*8+7] = (drflac_int32)temp3R;
+        }
+    } else {
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (drflac_uint32)((drflac_int32)(mid0 + side0) >> 1);
+            temp1L = (drflac_uint32)((drflac_int32)(mid1 + side1) >> 1);
+            temp2L = (drflac_uint32)((drflac_int32)(mid2 + side2) >> 1);
+            temp3L = (drflac_uint32)((drflac_int32)(mid3 + side3) >> 1);
+
+            temp0R = (drflac_uint32)((drflac_int32)(mid0 - side0) >> 1);
+            temp1R = (drflac_uint32)((drflac_int32)(mid1 - side1) >> 1);
+            temp2R = (drflac_uint32)((drflac_int32)(mid2 - side2) >> 1);
+            temp3R = (drflac_uint32)((drflac_int32)(mid3 - side3) >> 1);
+
+            pOutputSamples[i*8+0] = (drflac_int32)temp0L;
+            pOutputSamples[i*8+1] = (drflac_int32)temp0R;
+            pOutputSamples[i*8+2] = (drflac_int32)temp1L;
+            pOutputSamples[i*8+3] = (drflac_int32)temp1R;
+            pOutputSamples[i*8+4] = (drflac_int32)temp2L;
+            pOutputSamples[i*8+5] = (drflac_int32)temp2R;
+            pOutputSamples[i*8+6] = (drflac_int32)temp3L;
+            pOutputSamples[i*8+7] = (drflac_int32)temp3R;
+        }
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample);
+        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_int32 shift = unusedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i left;
+            __m128i right;
+
+            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            left  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
+            right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
+
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)(mid + side) >> 1;
+            pOutputSamples[i*2+1] = (drflac_int32)(mid - side) >> 1;
+        }
+    } else {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i left;
+            __m128i right;
+
+            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            left  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
+            right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
+
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift);
+            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift);
+        }
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_int32 shift = unusedBitsPerSample;
+    int32x4_t  wbpsShift0_4; /* wbps = Wasted Bits Per Sample */
+    int32x4_t  wbpsShift1_4; /* wbps = Wasted Bits Per Sample */
+    uint32x4_t one4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+    wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+    one4         = vdupq_n_u32(1);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t left;
+            int32x4_t right;
+
+            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
+            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
+
+            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4));
+
+            left  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
+            right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
+
+            drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)(mid + side) >> 1;
+            pOutputSamples[i*2+1] = (drflac_int32)(mid - side) >> 1;
+        }
+    } else {
+        int32x4_t shift4;
+
+        shift -= 1;
+        shift4 = vdupq_n_s32(shift);
+
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t left;
+            int32x4_t right;
+
+            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
+            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
+
+            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4));
+
+            left  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
+            right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
+
+            drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift);
+            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift);
+        }
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample));
+        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample));
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
+
+        pOutputSamples[i*8+0] = (drflac_int32)tempL0;
+        pOutputSamples[i*8+1] = (drflac_int32)tempR0;
+        pOutputSamples[i*8+2] = (drflac_int32)tempL1;
+        pOutputSamples[i*8+3] = (drflac_int32)tempR1;
+        pOutputSamples[i*8+4] = (drflac_int32)tempL2;
+        pOutputSamples[i*8+5] = (drflac_int32)tempR2;
+        pOutputSamples[i*8+6] = (drflac_int32)tempL3;
+        pOutputSamples[i*8+7] = (drflac_int32)tempR3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    int32x4_t shift4_0 = vdupq_n_s32(shift0);
+    int32x4_t shift4_1 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        int32x4_t left;
+        int32x4_t right;
+
+        left  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift4_0));
+        right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift4_1));
+
+        drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut)
+{
+    drflac_uint64 framesRead;
+    drflac_uint32 unusedBitsPerSample;
+
+    if (pFlac == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    if (pBufferOut == NULL) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
+    }
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
+    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
+
+    framesRead = 0;
+    while (framesToRead > 0) {
+        /* If we've run out of samples in this frame, go to the next. */
+        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
+            drflac_uint64 frameCountThisIteration = framesToRead;
+
+            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
+                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
+            }
+
+            if (channelCount == 2) {
+                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
+                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
+
+                switch (pFlac->currentFLACFrame.header.channelAssignment)
+                {
+                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
+                    {
+                        drflac_read_pcm_frames_s32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
+                    {
+                        drflac_read_pcm_frames_s32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
+                    {
+                        drflac_read_pcm_frames_s32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
+                    default:
+                    {
+                        drflac_read_pcm_frames_s32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+                }
+            } else {
+                /* Generic interleaving. */
+                drflac_uint64 i;
+                for (i = 0; i < frameCountThisIteration; ++i) {
+                    unsigned int j;
+                    for (j = 0; j < channelCount; ++j) {
+                        pBufferOut[(i*channelCount)+j] = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
+                    }
+                }
+            }
+
+            framesRead                += frameCountThisIteration;
+            pBufferOut                += frameCountThisIteration * channelCount;
+            framesToRead              -= frameCountThisIteration;
+            pFlac->currentPCMFrame    += frameCountThisIteration;
+            pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration;
+        }
+    }
+
+    return framesRead;
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 right = left - side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 right0 = left0 - side0;
+        drflac_uint32 right1 = left1 - side1;
+        drflac_uint32 right2 = left2 - side2;
+        drflac_uint32 right3 = left3 - side3;
+
+        left0  >>= 16;
+        left1  >>= 16;
+        left2  >>= 16;
+        left3  >>= 16;
+
+        right0 >>= 16;
+        right1 >>= 16;
+        right2 >>= 16;
+        right3 >>= 16;
+
+        pOutputSamples[i*8+0] = (drflac_int16)left0;
+        pOutputSamples[i*8+1] = (drflac_int16)right0;
+        pOutputSamples[i*8+2] = (drflac_int16)left1;
+        pOutputSamples[i*8+3] = (drflac_int16)right1;
+        pOutputSamples[i*8+4] = (drflac_int16)left2;
+        pOutputSamples[i*8+5] = (drflac_int16)right2;
+        pOutputSamples[i*8+6] = (drflac_int16)left3;
+        pOutputSamples[i*8+7] = (drflac_int16)right3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i right = _mm_sub_epi32(left, side);
+
+        left  = _mm_srai_epi32(left,  16);
+        right = _mm_srai_epi32(right, 16);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t left;
+        uint32x4_t side;
+        uint32x4_t right;
+
+        left  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        right = vsubq_u32(left, side);
+
+        left  = vshrq_n_u32(left,  16);
+        right = vshrq_n_u32(right, 16);
+
+        drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right)));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s16__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s16__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 left  = right + side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 left0 = right0 + side0;
+        drflac_uint32 left1 = right1 + side1;
+        drflac_uint32 left2 = right2 + side2;
+        drflac_uint32 left3 = right3 + side3;
+
+        left0  >>= 16;
+        left1  >>= 16;
+        left2  >>= 16;
+        left3  >>= 16;
+
+        right0 >>= 16;
+        right1 >>= 16;
+        right2 >>= 16;
+        right3 >>= 16;
+
+        pOutputSamples[i*8+0] = (drflac_int16)left0;
+        pOutputSamples[i*8+1] = (drflac_int16)right0;
+        pOutputSamples[i*8+2] = (drflac_int16)left1;
+        pOutputSamples[i*8+3] = (drflac_int16)right1;
+        pOutputSamples[i*8+4] = (drflac_int16)left2;
+        pOutputSamples[i*8+5] = (drflac_int16)right2;
+        pOutputSamples[i*8+6] = (drflac_int16)left3;
+        pOutputSamples[i*8+7] = (drflac_int16)right3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i left  = _mm_add_epi32(right, side);
+
+        left  = _mm_srai_epi32(left,  16);
+        right = _mm_srai_epi32(right, 16);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t side;
+        uint32x4_t right;
+        uint32x4_t left;
+
+        side  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        left  = vaddq_u32(right, side);
+
+        left  = vshrq_n_u32(left,  16);
+        right = vshrq_n_u32(right, 16);
+
+        drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right)));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s16__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s16__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        drflac_uint32 mid  = (drflac_uint32)pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) >> 16);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample;
+
+    if (shift > 0) {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (mid0 + side0) << shift;
+            temp1L = (mid1 + side1) << shift;
+            temp2L = (mid2 + side2) << shift;
+            temp3L = (mid3 + side3) << shift;
+
+            temp0R = (mid0 - side0) << shift;
+            temp1R = (mid1 - side1) << shift;
+            temp2R = (mid2 - side2) << shift;
+            temp3R = (mid3 - side3) << shift;
+
+            temp0L >>= 16;
+            temp1L >>= 16;
+            temp2L >>= 16;
+            temp3L >>= 16;
+
+            temp0R >>= 16;
+            temp1R >>= 16;
+            temp2R >>= 16;
+            temp3R >>= 16;
+
+            pOutputSamples[i*8+0] = (drflac_int16)temp0L;
+            pOutputSamples[i*8+1] = (drflac_int16)temp0R;
+            pOutputSamples[i*8+2] = (drflac_int16)temp1L;
+            pOutputSamples[i*8+3] = (drflac_int16)temp1R;
+            pOutputSamples[i*8+4] = (drflac_int16)temp2L;
+            pOutputSamples[i*8+5] = (drflac_int16)temp2R;
+            pOutputSamples[i*8+6] = (drflac_int16)temp3L;
+            pOutputSamples[i*8+7] = (drflac_int16)temp3R;
+        }
+    } else {
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = ((drflac_int32)(mid0 + side0) >> 1);
+            temp1L = ((drflac_int32)(mid1 + side1) >> 1);
+            temp2L = ((drflac_int32)(mid2 + side2) >> 1);
+            temp3L = ((drflac_int32)(mid3 + side3) >> 1);
+
+            temp0R = ((drflac_int32)(mid0 - side0) >> 1);
+            temp1R = ((drflac_int32)(mid1 - side1) >> 1);
+            temp2R = ((drflac_int32)(mid2 - side2) >> 1);
+            temp3R = ((drflac_int32)(mid3 - side3) >> 1);
+
+            temp0L >>= 16;
+            temp1L >>= 16;
+            temp2L >>= 16;
+            temp3L >>= 16;
+
+            temp0R >>= 16;
+            temp1R >>= 16;
+            temp2R >>= 16;
+            temp3R >>= 16;
+
+            pOutputSamples[i*8+0] = (drflac_int16)temp0L;
+            pOutputSamples[i*8+1] = (drflac_int16)temp0R;
+            pOutputSamples[i*8+2] = (drflac_int16)temp1L;
+            pOutputSamples[i*8+3] = (drflac_int16)temp1R;
+            pOutputSamples[i*8+4] = (drflac_int16)temp2L;
+            pOutputSamples[i*8+5] = (drflac_int16)temp2R;
+            pOutputSamples[i*8+6] = (drflac_int16)temp3L;
+            pOutputSamples[i*8+7] = (drflac_int16)temp3R;
+        }
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) >> 16);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i left;
+            __m128i right;
+
+            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            left  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
+            right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
+
+            left  = _mm_srai_epi32(left,  16);
+            right = _mm_srai_epi32(right, 16);
+
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int16)(((drflac_int32)(mid + side) >> 1) >> 16);
+            pOutputSamples[i*2+1] = (drflac_int16)(((drflac_int32)(mid - side) >> 1) >> 16);
+        }
+    } else {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i left;
+            __m128i right;
+
+            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            left  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
+            right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
+
+            left  = _mm_srai_epi32(left,  16);
+            right = _mm_srai_epi32(right, 16);
+
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16);
+            pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16);
+        }
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample;
+    int32x4_t wbpsShift0_4; /* wbps = Wasted Bits Per Sample */
+    int32x4_t wbpsShift1_4; /* wbps = Wasted Bits Per Sample */
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+    wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t left;
+            int32x4_t right;
+
+            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
+            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
+
+            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
+
+            left  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
+            right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
+
+            left  = vshrq_n_s32(left,  16);
+            right = vshrq_n_s32(right, 16);
+
+            drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int16)(((drflac_int32)(mid + side) >> 1) >> 16);
+            pOutputSamples[i*2+1] = (drflac_int16)(((drflac_int32)(mid - side) >> 1) >> 16);
+        }
+    } else {
+        int32x4_t shift4;
+
+        shift -= 1;
+        shift4 = vdupq_n_s32(shift);
+
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t left;
+            int32x4_t right;
+
+            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
+            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
+
+            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
+
+            left  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
+            right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
+
+            left  = vshrq_n_s32(left,  16);
+            right = vshrq_n_s32(right, 16);
+
+            drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16);
+            pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16);
+        }
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s16__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s16__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int16)((drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)((drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) >> 16);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
+
+        tempL0 >>= 16;
+        tempL1 >>= 16;
+        tempL2 >>= 16;
+        tempL3 >>= 16;
+
+        tempR0 >>= 16;
+        tempR1 >>= 16;
+        tempR2 >>= 16;
+        tempR3 >>= 16;
+
+        pOutputSamples[i*8+0] = (drflac_int16)tempL0;
+        pOutputSamples[i*8+1] = (drflac_int16)tempR0;
+        pOutputSamples[i*8+2] = (drflac_int16)tempL1;
+        pOutputSamples[i*8+3] = (drflac_int16)tempR1;
+        pOutputSamples[i*8+4] = (drflac_int16)tempL2;
+        pOutputSamples[i*8+5] = (drflac_int16)tempR2;
+        pOutputSamples[i*8+6] = (drflac_int16)tempL3;
+        pOutputSamples[i*8+7] = (drflac_int16)tempR3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+
+        left  = _mm_srai_epi32(left,  16);
+        right = _mm_srai_epi32(right, 16);
+
+        /* At this point we have results. We can now pack and interleave these into a single __m128i object and then store the in the output buffer. */
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    int32x4_t shift0_4 = vdupq_n_s32(shift0);
+    int32x4_t shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        int32x4_t left;
+        int32x4_t right;
+
+        left  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4));
+        right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4));
+
+        left  = vshrq_n_s32(left,  16);
+        right = vshrq_n_s32(right, 16);
+
+        drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s16__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut)
+{
+    drflac_uint64 framesRead;
+    drflac_uint32 unusedBitsPerSample;
+
+    if (pFlac == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    if (pBufferOut == NULL) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
+    }
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
+    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
+
+    framesRead = 0;
+    while (framesToRead > 0) {
+        /* If we've run out of samples in this frame, go to the next. */
+        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
+            drflac_uint64 frameCountThisIteration = framesToRead;
+
+            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
+                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
+            }
+
+            if (channelCount == 2) {
+                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
+                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
+
+                switch (pFlac->currentFLACFrame.header.channelAssignment)
+                {
+                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
+                    {
+                        drflac_read_pcm_frames_s16__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
+                    {
+                        drflac_read_pcm_frames_s16__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
+                    {
+                        drflac_read_pcm_frames_s16__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
+                    default:
+                    {
+                        drflac_read_pcm_frames_s16__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+                }
+            } else {
+                /* Generic interleaving. */
+                drflac_uint64 i;
+                for (i = 0; i < frameCountThisIteration; ++i) {
+                    unsigned int j;
+                    for (j = 0; j < channelCount; ++j) {
+                        drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
+                        pBufferOut[(i*channelCount)+j] = (drflac_int16)(sampleS32 >> 16);
+                    }
+                }
+            }
+
+            framesRead                += frameCountThisIteration;
+            pBufferOut                += frameCountThisIteration * channelCount;
+            framesToRead              -= frameCountThisIteration;
+            pFlac->currentPCMFrame    += frameCountThisIteration;
+            pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration;
+        }
+    }
+
+    return framesRead;
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (float)((drflac_int32)left  / 2147483648.0);
+        pOutputSamples[i*2+1] = (float)((drflac_int32)right / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    float factor = 1 / 2147483648.0;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 right0 = left0 - side0;
+        drflac_uint32 right1 = left1 - side1;
+        drflac_uint32 right2 = left2 - side2;
+        drflac_uint32 right3 = left3 - side3;
+
+        pOutputSamples[i*8+0] = (drflac_int32)left0  * factor;
+        pOutputSamples[i*8+1] = (drflac_int32)right0 * factor;
+        pOutputSamples[i*8+2] = (drflac_int32)left1  * factor;
+        pOutputSamples[i*8+3] = (drflac_int32)right1 * factor;
+        pOutputSamples[i*8+4] = (drflac_int32)left2  * factor;
+        pOutputSamples[i*8+5] = (drflac_int32)right2 * factor;
+        pOutputSamples[i*8+6] = (drflac_int32)left3  * factor;
+        pOutputSamples[i*8+7] = (drflac_int32)right3 * factor;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)right * factor;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+    __m128 factor;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor = _mm_set1_ps(1.0f / 8388608.0f);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i right = _mm_sub_epi32(left, side);
+        __m128 leftf  = _mm_mul_ps(_mm_cvtepi32_ps(left),  factor);
+        __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
+
+        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
+        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+    float32x4_t factor4;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor4  = vdupq_n_f32(1.0f / 8388608.0f);
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t left;
+        uint32x4_t side;
+        uint32x4_t right;
+        float32x4_t leftf;
+        float32x4_t rightf;
+
+        left   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        side   = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        right  = vsubq_u32(left, side);
+        leftf  = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)),  factor4);
+        rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4);
+
+        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
+        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (float)((drflac_int32)left  / 2147483648.0);
+        pOutputSamples[i*2+1] = (float)((drflac_int32)right / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    float factor = 1 / 2147483648.0;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 left0 = right0 + side0;
+        drflac_uint32 left1 = right1 + side1;
+        drflac_uint32 left2 = right2 + side2;
+        drflac_uint32 left3 = right3 + side3;
+
+        pOutputSamples[i*8+0] = (drflac_int32)left0  * factor;
+        pOutputSamples[i*8+1] = (drflac_int32)right0 * factor;
+        pOutputSamples[i*8+2] = (drflac_int32)left1  * factor;
+        pOutputSamples[i*8+3] = (drflac_int32)right1 * factor;
+        pOutputSamples[i*8+4] = (drflac_int32)left2  * factor;
+        pOutputSamples[i*8+5] = (drflac_int32)right2 * factor;
+        pOutputSamples[i*8+6] = (drflac_int32)left3  * factor;
+        pOutputSamples[i*8+7] = (drflac_int32)right3 * factor;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)right * factor;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+    __m128 factor;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor = _mm_set1_ps(1.0f / 8388608.0f);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i left  = _mm_add_epi32(right, side);
+        __m128 leftf  = _mm_mul_ps(_mm_cvtepi32_ps(left),  factor);
+        __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
+
+        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
+        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+    float32x4_t factor4;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor4  = vdupq_n_f32(1.0f / 8388608.0f);
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t side;
+        uint32x4_t right;
+        uint32x4_t left;
+        float32x4_t leftf;
+        float32x4_t rightf;
+
+        side   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        right  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        left   = vaddq_u32(right, side);
+        leftf  = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)),  factor4);
+        rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4);
+
+        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
+        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        drflac_uint32 mid  = (drflac_uint32)pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (float)((((drflac_int32)(mid + side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
+        pOutputSamples[i*2+1] = (float)((((drflac_int32)(mid - side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample;
+    float factor = 1 / 2147483648.0;
+
+    if (shift > 0) {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (mid0 + side0) << shift;
+            temp1L = (mid1 + side1) << shift;
+            temp2L = (mid2 + side2) << shift;
+            temp3L = (mid3 + side3) << shift;
+
+            temp0R = (mid0 - side0) << shift;
+            temp1R = (mid1 - side1) << shift;
+            temp2R = (mid2 - side2) << shift;
+            temp3R = (mid3 - side3) << shift;
+
+            pOutputSamples[i*8+0] = (drflac_int32)temp0L * factor;
+            pOutputSamples[i*8+1] = (drflac_int32)temp0R * factor;
+            pOutputSamples[i*8+2] = (drflac_int32)temp1L * factor;
+            pOutputSamples[i*8+3] = (drflac_int32)temp1R * factor;
+            pOutputSamples[i*8+4] = (drflac_int32)temp2L * factor;
+            pOutputSamples[i*8+5] = (drflac_int32)temp2R * factor;
+            pOutputSamples[i*8+6] = (drflac_int32)temp3L * factor;
+            pOutputSamples[i*8+7] = (drflac_int32)temp3R * factor;
+        }
+    } else {
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (drflac_uint32)((drflac_int32)(mid0 + side0) >> 1);
+            temp1L = (drflac_uint32)((drflac_int32)(mid1 + side1) >> 1);
+            temp2L = (drflac_uint32)((drflac_int32)(mid2 + side2) >> 1);
+            temp3L = (drflac_uint32)((drflac_int32)(mid3 + side3) >> 1);
+
+            temp0R = (drflac_uint32)((drflac_int32)(mid0 - side0) >> 1);
+            temp1R = (drflac_uint32)((drflac_int32)(mid1 - side1) >> 1);
+            temp2R = (drflac_uint32)((drflac_int32)(mid2 - side2) >> 1);
+            temp3R = (drflac_uint32)((drflac_int32)(mid3 - side3) >> 1);
+
+            pOutputSamples[i*8+0] = (drflac_int32)temp0L * factor;
+            pOutputSamples[i*8+1] = (drflac_int32)temp0R * factor;
+            pOutputSamples[i*8+2] = (drflac_int32)temp1L * factor;
+            pOutputSamples[i*8+3] = (drflac_int32)temp1R * factor;
+            pOutputSamples[i*8+4] = (drflac_int32)temp2L * factor;
+            pOutputSamples[i*8+5] = (drflac_int32)temp2R * factor;
+            pOutputSamples[i*8+6] = (drflac_int32)temp3L * factor;
+            pOutputSamples[i*8+7] = (drflac_int32)temp3R * factor;
+        }
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) * factor;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample - 8;
+    float factor;
+    __m128 factor128;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor = 1.0f / 8388608.0f;
+    factor128 = _mm_set1_ps(factor);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i tempL;
+            __m128i tempR;
+            __m128  leftf;
+            __m128  rightf;
+
+            mid    = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid    = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            tempL  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
+            tempR  = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
+
+            leftf  = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
+            rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
+
+            _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+            _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = ((drflac_int32)(mid + side) >> 1) * factor;
+            pOutputSamples[i*2+1] = ((drflac_int32)(mid - side) >> 1) * factor;
+        }
+    } else {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i tempL;
+            __m128i tempR;
+            __m128 leftf;
+            __m128 rightf;
+
+            mid    = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid    = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            tempL  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
+            tempR  = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
+
+            leftf  = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
+            rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
+
+            _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+            _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift) * factor;
+            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift) * factor;
+        }
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample - 8;
+    float factor;
+    float32x4_t factor4;
+    int32x4_t shift4;
+    int32x4_t wbps0_4;  /* Wasted Bits Per Sample */
+    int32x4_t wbps1_4;  /* Wasted Bits Per Sample */
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor  = 1.0f / 8388608.0f;
+    factor4 = vdupq_n_f32(factor);
+    wbps0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+    wbps1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            int32x4_t lefti;
+            int32x4_t righti;
+            float32x4_t leftf;
+            float32x4_t rightf;
+
+            uint32x4_t mid  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4);
+            uint32x4_t side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4);
+
+            mid    = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
+
+            lefti  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
+            righti = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
+
+            leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
+            rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
+
+            drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = ((drflac_int32)(mid + side) >> 1) * factor;
+            pOutputSamples[i*2+1] = ((drflac_int32)(mid - side) >> 1) * factor;
+        }
+    } else {
+        shift -= 1;
+        shift4 = vdupq_n_s32(shift);
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t lefti;
+            int32x4_t righti;
+            float32x4_t leftf;
+            float32x4_t rightf;
+
+            mid    = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4);
+            side   = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4);
+
+            mid    = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
+
+            lefti  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
+            righti = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
+
+            leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
+            rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
+
+            drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift) * factor;
+            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift) * factor;
+        }
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (float)((drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) / 2147483648.0);
+        pOutputSamples[i*2+1] = (float)((drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    float factor = 1 / 2147483648.0;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
+
+        pOutputSamples[i*8+0] = (drflac_int32)tempL0 * factor;
+        pOutputSamples[i*8+1] = (drflac_int32)tempR0 * factor;
+        pOutputSamples[i*8+2] = (drflac_int32)tempL1 * factor;
+        pOutputSamples[i*8+3] = (drflac_int32)tempR1 * factor;
+        pOutputSamples[i*8+4] = (drflac_int32)tempL2 * factor;
+        pOutputSamples[i*8+5] = (drflac_int32)tempR2 * factor;
+        pOutputSamples[i*8+6] = (drflac_int32)tempL3 * factor;
+        pOutputSamples[i*8+7] = (drflac_int32)tempR3 * factor;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+
+    float factor = 1.0f / 8388608.0f;
+    __m128 factor128 = _mm_set1_ps(factor);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i lefti;
+        __m128i righti;
+        __m128 leftf;
+        __m128 rightf;
+
+        lefti  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        righti = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+
+        leftf  = _mm_mul_ps(_mm_cvtepi32_ps(lefti),  factor128);
+        rightf = _mm_mul_ps(_mm_cvtepi32_ps(righti), factor128);
+
+        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+
+    float factor = 1.0f / 8388608.0f;
+    float32x4_t factor4 = vdupq_n_f32(factor);
+    int32x4_t shift0_4  = vdupq_n_s32(shift0);
+    int32x4_t shift1_4  = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        int32x4_t lefti;
+        int32x4_t righti;
+        float32x4_t leftf;
+        float32x4_t rightf;
+
+        lefti  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4));
+        righti = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4));
+
+        leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
+        rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
+
+        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut)
+{
+    drflac_uint64 framesRead;
+    drflac_uint32 unusedBitsPerSample;
+
+    if (pFlac == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    if (pBufferOut == NULL) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
+    }
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
+    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
+
+    framesRead = 0;
+    while (framesToRead > 0) {
+        /* If we've run out of samples in this frame, go to the next. */
+        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
+            drflac_uint64 frameCountThisIteration = framesToRead;
+
+            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
+                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
+            }
+
+            if (channelCount == 2) {
+                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
+                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
+
+                switch (pFlac->currentFLACFrame.header.channelAssignment)
+                {
+                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
+                    {
+                        drflac_read_pcm_frames_f32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
+                    {
+                        drflac_read_pcm_frames_f32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
+                    {
+                        drflac_read_pcm_frames_f32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
+                    default:
+                    {
+                        drflac_read_pcm_frames_f32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+                }
+            } else {
+                /* Generic interleaving. */
+                drflac_uint64 i;
+                for (i = 0; i < frameCountThisIteration; ++i) {
+                    unsigned int j;
+                    for (j = 0; j < channelCount; ++j) {
+                        drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
+                        pBufferOut[(i*channelCount)+j] = (float)(sampleS32 / 2147483648.0);
+                    }
+                }
+            }
+
+            framesRead                += frameCountThisIteration;
+            pBufferOut                += frameCountThisIteration * channelCount;
+            framesToRead              -= frameCountThisIteration;
+            pFlac->currentPCMFrame    += frameCountThisIteration;
+            pFlac->currentFLACFrame.pcmFramesRemaining -= (unsigned int)frameCountThisIteration;
+        }
+    }
+
+    return framesRead;
+}
+
+
+DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    if (pFlac == NULL) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Don't do anything if we're already on the seek point. */
+    if (pFlac->currentPCMFrame == pcmFrameIndex) {
+        return DRFLAC_TRUE;
+    }
+
+    /*
+    If we don't know where the first frame begins then we can't seek. This will happen when the STREAMINFO block was not present
+    when the decoder was opened.
+    */
+    if (pFlac->firstFLACFramePosInBytes == 0) {
+        return DRFLAC_FALSE;
+    }
+
+    if (pcmFrameIndex == 0) {
+        pFlac->currentPCMFrame = 0;
+        return drflac__seek_to_first_frame(pFlac);
+    } else {
+        drflac_bool32 wasSuccessful = DRFLAC_FALSE;
+        drflac_uint64 originalPCMFrame = pFlac->currentPCMFrame;
+
+        /* Clamp the sample to the end. */
+        if (pcmFrameIndex > pFlac->totalPCMFrameCount) {
+            pcmFrameIndex = pFlac->totalPCMFrameCount;
+        }
+
+        /* If the target sample and the current sample are in the same frame we just move the position forward. */
+        if (pcmFrameIndex > pFlac->currentPCMFrame) {
+            /* Forward. */
+            drflac_uint32 offset = (drflac_uint32)(pcmFrameIndex - pFlac->currentPCMFrame);
+            if (pFlac->currentFLACFrame.pcmFramesRemaining >  offset) {
+                pFlac->currentFLACFrame.pcmFramesRemaining -= offset;
+                pFlac->currentPCMFrame = pcmFrameIndex;
+                return DRFLAC_TRUE;
+            }
+        } else {
+            /* Backward. */
+            drflac_uint32 offsetAbs = (drflac_uint32)(pFlac->currentPCMFrame - pcmFrameIndex);
+            drflac_uint32 currentFLACFramePCMFrameCount = pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
+            drflac_uint32 currentFLACFramePCMFramesConsumed = currentFLACFramePCMFrameCount - pFlac->currentFLACFrame.pcmFramesRemaining;
+            if (currentFLACFramePCMFramesConsumed > offsetAbs) {
+                pFlac->currentFLACFrame.pcmFramesRemaining += offsetAbs;
+                pFlac->currentPCMFrame = pcmFrameIndex;
+                return DRFLAC_TRUE;
+            }
+        }
+
+        /*
+        Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so
+        we'll instead use Ogg's natural seeking facility.
+        */
+#ifndef DR_FLAC_NO_OGG
+        if (pFlac->container == drflac_container_ogg)
+        {
+            wasSuccessful = drflac_ogg__seek_to_pcm_frame(pFlac, pcmFrameIndex);
+        }
+        else
+#endif
+        {
+            /* First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. */
+            if (/*!wasSuccessful && */!pFlac->_noSeekTableSeek) {
+                wasSuccessful = drflac__seek_to_pcm_frame__seek_table(pFlac, pcmFrameIndex);
+            }
+
+#if !defined(DR_FLAC_NO_CRC)
+            /* Fall back to binary search if seek table seeking fails. This requires the length of the stream to be known. */
+            if (!wasSuccessful && !pFlac->_noBinarySearchSeek && pFlac->totalPCMFrameCount > 0) {
+                wasSuccessful = drflac__seek_to_pcm_frame__binary_search(pFlac, pcmFrameIndex);
+            }
+#endif
+
+            /* Fall back to brute force if all else fails. */
+            if (!wasSuccessful && !pFlac->_noBruteForceSeek) {
+                wasSuccessful = drflac__seek_to_pcm_frame__brute_force(pFlac, pcmFrameIndex);
+            }
+        }
+
+        if (wasSuccessful) {
+            pFlac->currentPCMFrame = pcmFrameIndex;
+        } else {
+            /* Seek failed. Try putting the decoder back to it's original state. */
+            if (drflac_seek_to_pcm_frame(pFlac, originalPCMFrame) == DRFLAC_FALSE) {
+                /* Failed to seek back to the original PCM frame. Fall back to 0. */
+                drflac_seek_to_pcm_frame(pFlac, 0);
+            }
+        }
+
+        return wasSuccessful;
+    }
+}
+
+
+
+/* High Level APIs */
+
+/* SIZE_MAX */
+#if defined(SIZE_MAX)
+    #define DRFLAC_SIZE_MAX  SIZE_MAX
+#else
+    #if defined(DRFLAC_64BIT)
+        #define DRFLAC_SIZE_MAX  ((drflac_uint64)0xFFFFFFFFFFFFFFFF)
+    #else
+        #define DRFLAC_SIZE_MAX  0xFFFFFFFF
+    #endif
+#endif
+/* End SIZE_MAX */
+
+
+/* Using a macro as the definition of the drflac__full_decode_and_close_*() API family. Sue me. */
+#define DRFLAC_DEFINE_FULL_READ_AND_CLOSE(extension, type) \
+static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)\
+{                                                                                                                                                                   \
+    type* pSampleData = NULL;                                                                                                                                       \
+    drflac_uint64 totalPCMFrameCount;                                                                                                                               \
+                                                                                                                                                                    \
+    DRFLAC_ASSERT(pFlac != NULL);                                                                                                                                   \
+                                                                                                                                                                    \
+    totalPCMFrameCount = pFlac->totalPCMFrameCount;                                                                                                                 \
+                                                                                                                                                                    \
+    if (totalPCMFrameCount == 0) {                                                                                                                                  \
+        type buffer[4096];                                                                                                                                          \
+        drflac_uint64 pcmFramesRead;                                                                                                                                \
+        size_t sampleDataBufferSize = sizeof(buffer);                                                                                                               \
+                                                                                                                                                                    \
+        pSampleData = (type*)drflac__malloc_from_callbacks(sampleDataBufferSize, &pFlac->allocationCallbacks);                                                      \
+        if (pSampleData == NULL) {                                                                                                                                  \
+            goto on_error;                                                                                                                                          \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        while ((pcmFramesRead = (drflac_uint64)drflac_read_pcm_frames_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0])/pFlac->channels, buffer)) > 0) {          \
+            if (((totalPCMFrameCount + pcmFramesRead) * pFlac->channels * sizeof(type)) > sampleDataBufferSize) {                                                   \
+                type* pNewSampleData;                                                                                                                               \
+                size_t newSampleDataBufferSize;                                                                                                                     \
+                                                                                                                                                                    \
+                newSampleDataBufferSize = sampleDataBufferSize * 2;                                                                                                 \
+                pNewSampleData = (type*)drflac__realloc_from_callbacks(pSampleData, newSampleDataBufferSize, sampleDataBufferSize, &pFlac->allocationCallbacks);    \
+                if (pNewSampleData == NULL) {                                                                                                                       \
+                    drflac__free_from_callbacks(pSampleData, &pFlac->allocationCallbacks);                                                                          \
+                    goto on_error;                                                                                                                                  \
+                }                                                                                                                                                   \
+                                                                                                                                                                    \
+                sampleDataBufferSize = newSampleDataBufferSize;                                                                                                     \
+                pSampleData = pNewSampleData;                                                                                                                       \
+            }                                                                                                                                                       \
+                                                                                                                                                                    \
+            DRFLAC_COPY_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), buffer, (size_t)(pcmFramesRead*pFlac->channels*sizeof(type)));                   \
+            totalPCMFrameCount += pcmFramesRead;                                                                                                                    \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        /* At this point everything should be decoded, but we just want to fill the unused part buffer with silence - need to                                       \
+           protect those ears from random noise! */                                                                                                                 \
+        DRFLAC_ZERO_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type)));   \
+    } else {                                                                                                                                                        \
+        drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type);                                                                                   \
+        if (dataSize > (drflac_uint64)DRFLAC_SIZE_MAX) {                                                                                                            \
+            goto on_error;  /* The decoded data is too big. */                                                                                                      \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        pSampleData = (type*)drflac__malloc_from_callbacks((size_t)dataSize, &pFlac->allocationCallbacks);    /* <-- Safe cast as per the check above. */           \
+        if (pSampleData == NULL) {                                                                                                                                  \
+            goto on_error;                                                                                                                                          \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        totalPCMFrameCount = drflac_read_pcm_frames_##extension(pFlac, pFlac->totalPCMFrameCount, pSampleData);                                                     \
+    }                                                                                                                                                               \
+                                                                                                                                                                    \
+    if (sampleRateOut) *sampleRateOut = pFlac->sampleRate;                                                                                                          \
+    if (channelsOut) *channelsOut = pFlac->channels;                                                                                                                \
+    if (totalPCMFrameCountOut) *totalPCMFrameCountOut = totalPCMFrameCount;                                                                                         \
+                                                                                                                                                                    \
+    drflac_close(pFlac);                                                                                                                                            \
+    return pSampleData;                                                                                                                                             \
+                                                                                                                                                                    \
+on_error:                                                                                                                                                           \
+    drflac_close(pFlac);                                                                                                                                            \
+    return NULL;                                                                                                                                                    \
+}
+
+DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s32, drflac_int32)
+DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s16, drflac_int16)
+DRFLAC_DEFINE_FULL_READ_AND_CLOSE(f32, float)
+
+DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalPCMFrameCountOut) {
+        *totalPCMFrameCountOut = 0;
+    }
+
+    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
+}
+
+DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalPCMFrameCountOut) {
+        *totalPCMFrameCountOut = 0;
+    }
+
+    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s16(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
+}
+
+DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalPCMFrameCountOut) {
+        *totalPCMFrameCountOut = 0;
+    }
+
+    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_f32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
+}
+
+#ifndef DR_FLAC_NO_STDIO
+DRFLAC_API drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_file(filename, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+DRFLAC_API drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_file(filename, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+DRFLAC_API float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_file(filename, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+#endif
+
+DRFLAC_API drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+DRFLAC_API drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+DRFLAC_API float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+
+DRFLAC_API void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks != NULL) {
+        drflac__free_from_callbacks(p, pAllocationCallbacks);
+    } else {
+        drflac__free_default(p, NULL);
+    }
+}
+
+
+
+
+DRFLAC_API void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments)
+{
+    if (pIter == NULL) {
+        return;
+    }
+
+    pIter->countRemaining = commentCount;
+    pIter->pRunningData   = (const char*)pComments;
+}
+
+DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut)
+{
+    drflac_int32 length;
+    const char* pComment;
+
+    /* Safety. */
+    if (pCommentLengthOut) {
+        *pCommentLengthOut = 0;
+    }
+
+    if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) {
+        return NULL;
+    }
+
+    length = drflac__le2host_32_ptr_unaligned(pIter->pRunningData);
+    pIter->pRunningData += 4;
+
+    pComment = pIter->pRunningData;
+    pIter->pRunningData += length;
+    pIter->countRemaining -= 1;
+
+    if (pCommentLengthOut) {
+        *pCommentLengthOut = length;
+    }
+
+    return pComment;
+}
+
+
+
+
+DRFLAC_API void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData)
+{
+    if (pIter == NULL) {
+        return;
+    }
+
+    pIter->countRemaining = trackCount;
+    pIter->pRunningData   = (const char*)pTrackData;
+}
+
+DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack)
+{
+    drflac_cuesheet_track cuesheetTrack;
+    const char* pRunningData;
+    drflac_uint64 offsetHi;
+    drflac_uint64 offsetLo;
+
+    if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) {
+        return DRFLAC_FALSE;
+    }
+
+    pRunningData = pIter->pRunningData;
+
+    offsetHi                   = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+    offsetLo                   = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+    cuesheetTrack.offset       = offsetLo | (offsetHi << 32);
+    cuesheetTrack.trackNumber  = pRunningData[0];                                         pRunningData += 1;
+    DRFLAC_COPY_MEMORY(cuesheetTrack.ISRC, pRunningData, sizeof(cuesheetTrack.ISRC));     pRunningData += 12;
+    cuesheetTrack.isAudio      = (pRunningData[0] & 0x80) != 0;
+    cuesheetTrack.preEmphasis  = (pRunningData[0] & 0x40) != 0;                           pRunningData += 14;
+    cuesheetTrack.indexCount   = pRunningData[0];                                         pRunningData += 1;
+    cuesheetTrack.pIndexPoints = (const drflac_cuesheet_track_index*)pRunningData;        pRunningData += cuesheetTrack.indexCount * sizeof(drflac_cuesheet_track_index);
+
+    pIter->pRunningData = pRunningData;
+    pIter->countRemaining -= 1;
+
+    if (pCuesheetTrack) {
+        *pCuesheetTrack = cuesheetTrack;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
+    #pragma GCC diagnostic pop
+#endif
+#endif  /* dr_flac_c */
+#endif  /* DR_FLAC_IMPLEMENTATION */
+
+
+/*
+REVISION HISTORY
+================
+v0.12.42 - 2023-11-02
+  - Fix build for ARMv6-M.
+  - Fix a compilation warning with GCC.
+
+v0.12.41 - 2023-06-17
+  - Fix an incorrect date in revision history. No functional change.
+
+v0.12.40 - 2023-05-22
+  - Minor code restructure. No functional change.
+
+v0.12.39 - 2022-09-17
+  - Fix compilation with DJGPP.
+  - Fix compilation error with Visual Studio 2019 and the ARM build.
+  - Fix an error with SSE 4.1 detection.
+  - Add support for disabling wchar_t with DR_WAV_NO_WCHAR.
+  - Improve compatibility with compilers which lack support for explicit struct packing.
+  - Improve compatibility with low-end and embedded hardware by reducing the amount of stack
+    allocation when loading an Ogg encapsulated file.
+
+v0.12.38 - 2022-04-10
+  - Fix compilation error on older versions of GCC.
+
+v0.12.37 - 2022-02-12
+  - Improve ARM detection.
+
+v0.12.36 - 2022-02-07
+  - Fix a compilation error with the ARM build.
+
+v0.12.35 - 2022-02-06
+  - Fix a bug due to underestimating the amount of precision required for the prediction stage.
+  - Fix some bugs found from fuzz testing.
+
+v0.12.34 - 2022-01-07
+  - Fix some misalignment bugs when reading metadata.
+
+v0.12.33 - 2021-12-22
+  - Fix a bug with seeking when the seek table does not start at PCM frame 0.
+
+v0.12.32 - 2021-12-11
+  - Fix a warning with Clang.
+
+v0.12.31 - 2021-08-16
+  - Silence some warnings.
+
+v0.12.30 - 2021-07-31
+  - Fix platform detection for ARM64.
+
+v0.12.29 - 2021-04-02
+  - Fix a bug where the running PCM frame index is set to an invalid value when over-seeking.
+  - Fix a decoding error due to an incorrect validation check.
+
+v0.12.28 - 2021-02-21
+  - Fix a warning due to referencing _MSC_VER when it is undefined.
+
+v0.12.27 - 2021-01-31
+  - Fix a static analysis warning.
+
+v0.12.26 - 2021-01-17
+  - Fix a compilation warning due to _BSD_SOURCE being deprecated.
+
+v0.12.25 - 2020-12-26
+  - Update documentation.
+
+v0.12.24 - 2020-11-29
+  - Fix ARM64/NEON detection when compiling with MSVC.
+
+v0.12.23 - 2020-11-21
+  - Fix compilation with OpenWatcom.
+
+v0.12.22 - 2020-11-01
+  - Fix an error with the previous release.
+
+v0.12.21 - 2020-11-01
+  - Fix a possible deadlock when seeking.
+  - Improve compiler support for older versions of GCC.
+
+v0.12.20 - 2020-09-08
+  - Fix a compilation error on older compilers.
+
+v0.12.19 - 2020-08-30
+  - Fix a bug due to an undefined 32-bit shift.
+
+v0.12.18 - 2020-08-14
+  - Fix a crash when compiling with clang-cl.
+
+v0.12.17 - 2020-08-02
+  - Simplify sized types.
+
+v0.12.16 - 2020-07-25
+  - Fix a compilation warning.
+
+v0.12.15 - 2020-07-06
+  - Check for negative LPC shifts and return an error.
+
+v0.12.14 - 2020-06-23
+  - Add include guard for the implementation section.
+
+v0.12.13 - 2020-05-16
+  - Add compile-time and run-time version querying.
+    - DRFLAC_VERSION_MINOR
+    - DRFLAC_VERSION_MAJOR
+    - DRFLAC_VERSION_REVISION
+    - DRFLAC_VERSION_STRING
+    - drflac_version()
+    - drflac_version_string()
+
+v0.12.12 - 2020-04-30
+  - Fix compilation errors with VC6.
+
+v0.12.11 - 2020-04-19
+  - Fix some pedantic warnings.
+  - Fix some undefined behaviour warnings.
+
+v0.12.10 - 2020-04-10
+  - Fix some bugs when trying to seek with an invalid seek table.
+
+v0.12.9 - 2020-04-05
+  - Fix warnings.
+
+v0.12.8 - 2020-04-04
+  - Add drflac_open_file_w() and drflac_open_file_with_metadata_w().
+  - Fix some static analysis warnings.
+  - Minor documentation updates.
+
+v0.12.7 - 2020-03-14
+  - Fix compilation errors with VC6.
+
+v0.12.6 - 2020-03-07
+  - Fix compilation error with Visual Studio .NET 2003.
+
+v0.12.5 - 2020-01-30
+  - Silence some static analysis warnings.
+
+v0.12.4 - 2020-01-29
+  - Silence some static analysis warnings.
+
+v0.12.3 - 2019-12-02
+  - Fix some warnings when compiling with GCC and the -Og flag.
+  - Fix a crash in out-of-memory situations.
+  - Fix potential integer overflow bug.
+  - Fix some static analysis warnings.
+  - Fix a possible crash when using custom memory allocators without a custom realloc() implementation.
+  - Fix a bug with binary search seeking where the bits per sample is not a multiple of 8.
+
+v0.12.2 - 2019-10-07
+  - Internal code clean up.
+
+v0.12.1 - 2019-09-29
+  - Fix some Clang Static Analyzer warnings.
+  - Fix an unused variable warning.
+
+v0.12.0 - 2019-09-23
+  - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation
+    routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs:
+    - drflac_open()
+    - drflac_open_relaxed()
+    - drflac_open_with_metadata()
+    - drflac_open_with_metadata_relaxed()
+    - drflac_open_file()
+    - drflac_open_file_with_metadata()
+    - drflac_open_memory()
+    - drflac_open_memory_with_metadata()
+    - drflac_open_and_read_pcm_frames_s32()
+    - drflac_open_and_read_pcm_frames_s16()
+    - drflac_open_and_read_pcm_frames_f32()
+    - drflac_open_file_and_read_pcm_frames_s32()
+    - drflac_open_file_and_read_pcm_frames_s16()
+    - drflac_open_file_and_read_pcm_frames_f32()
+    - drflac_open_memory_and_read_pcm_frames_s32()
+    - drflac_open_memory_and_read_pcm_frames_s16()
+    - drflac_open_memory_and_read_pcm_frames_f32()
+    Set this extra parameter to NULL to use defaults which is the same as the previous behaviour. Setting this NULL will use
+    DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE.
+  - Remove deprecated APIs:
+    - drflac_read_s32()
+    - drflac_read_s16()
+    - drflac_read_f32()
+    - drflac_seek_to_sample()
+    - drflac_open_and_decode_s32()
+    - drflac_open_and_decode_s16()
+    - drflac_open_and_decode_f32()
+    - drflac_open_and_decode_file_s32()
+    - drflac_open_and_decode_file_s16()
+    - drflac_open_and_decode_file_f32()
+    - drflac_open_and_decode_memory_s32()
+    - drflac_open_and_decode_memory_s16()
+    - drflac_open_and_decode_memory_f32()
+  - Remove drflac.totalSampleCount which is now replaced with drflac.totalPCMFrameCount. You can emulate drflac.totalSampleCount
+    by doing pFlac->totalPCMFrameCount*pFlac->channels.
+  - Rename drflac.currentFrame to drflac.currentFLACFrame to remove ambiguity with PCM frames.
+  - Fix errors when seeking to the end of a stream.
+  - Optimizations to seeking.
+  - SSE improvements and optimizations.
+  - ARM NEON optimizations.
+  - Optimizations to drflac_read_pcm_frames_s16().
+  - Optimizations to drflac_read_pcm_frames_s32().
+
+v0.11.10 - 2019-06-26
+  - Fix a compiler error.
+
+v0.11.9 - 2019-06-16
+  - Silence some ThreadSanitizer warnings.
+
+v0.11.8 - 2019-05-21
+  - Fix warnings.
+
+v0.11.7 - 2019-05-06
+  - C89 fixes.
+
+v0.11.6 - 2019-05-05
+  - Add support for C89.
+  - Fix a compiler warning when CRC is disabled.
+  - Change license to choice of public domain or MIT-0.
+
+v0.11.5 - 2019-04-19
+  - Fix a compiler error with GCC.
+
+v0.11.4 - 2019-04-17
+  - Fix some warnings with GCC when compiling with -std=c99.
+
+v0.11.3 - 2019-04-07
+  - Silence warnings with GCC.
+
+v0.11.2 - 2019-03-10
+  - Fix a warning.
+
+v0.11.1 - 2019-02-17
+  - Fix a potential bug with seeking.
+
+v0.11.0 - 2018-12-16
+  - API CHANGE: Deprecated drflac_read_s32(), drflac_read_s16() and drflac_read_f32() and replaced them with
+    drflac_read_pcm_frames_s32(), drflac_read_pcm_frames_s16() and drflac_read_pcm_frames_f32(). The new APIs take
+    and return PCM frame counts instead of sample counts. To upgrade you will need to change the input count by
+    dividing it by the channel count, and then do the same with the return value.
+  - API_CHANGE: Deprecated drflac_seek_to_sample() and replaced with drflac_seek_to_pcm_frame(). Same rules as
+    the changes to drflac_read_*() apply.
+  - API CHANGE: Deprecated drflac_open_and_decode_*() and replaced with drflac_open_*_and_read_*(). Same rules as
+    the changes to drflac_read_*() apply.
+  - Optimizations.
+
+v0.10.0 - 2018-09-11
+  - Remove the DR_FLAC_NO_WIN32_IO option and the Win32 file IO functionality. If you need to use Win32 file IO you
+    need to do it yourself via the callback API.
+  - Fix the clang build.
+  - Fix undefined behavior.
+  - Fix errors with CUESHEET metdata blocks.
+  - Add an API for iterating over each cuesheet track in the CUESHEET metadata block. This works the same way as the
+    Vorbis comment API.
+  - Other miscellaneous bug fixes, mostly relating to invalid FLAC streams.
+  - Minor optimizations.
+
+v0.9.11 - 2018-08-29
+  - Fix a bug with sample reconstruction.
+
+v0.9.10 - 2018-08-07
+  - Improve 64-bit detection.
+
+v0.9.9 - 2018-08-05
+  - Fix C++ build on older versions of GCC.
+
+v0.9.8 - 2018-07-24
+  - Fix compilation errors.
+
+v0.9.7 - 2018-07-05
+  - Fix a warning.
+
+v0.9.6 - 2018-06-29
+  - Fix some typos.
+
+v0.9.5 - 2018-06-23
+  - Fix some warnings.
+
+v0.9.4 - 2018-06-14
+  - Optimizations to seeking.
+  - Clean up.
+
+v0.9.3 - 2018-05-22
+  - Bug fix.
+
+v0.9.2 - 2018-05-12
+  - Fix a compilation error due to a missing break statement.
+
+v0.9.1 - 2018-04-29
+  - Fix compilation error with Clang.
+
+v0.9 - 2018-04-24
+  - Fix Clang build.
+  - Start using major.minor.revision versioning.
+
+v0.8g - 2018-04-19
+  - Fix build on non-x86/x64 architectures.
+
+v0.8f - 2018-02-02
+  - Stop pretending to support changing rate/channels mid stream.
+
+v0.8e - 2018-02-01
+  - Fix a crash when the block size of a frame is larger than the maximum block size defined by the FLAC stream.
+  - Fix a crash the the Rice partition order is invalid.
+
+v0.8d - 2017-09-22
+  - Add support for decoding streams with ID3 tags. ID3 tags are just skipped.
+
+v0.8c - 2017-09-07
+  - Fix warning on non-x86/x64 architectures.
+
+v0.8b - 2017-08-19
+  - Fix build on non-x86/x64 architectures.
+
+v0.8a - 2017-08-13
+  - A small optimization for the Clang build.
+
+v0.8 - 2017-08-12
+  - API CHANGE: Rename dr_* types to drflac_*.
+  - Optimizations. This brings dr_flac back to about the same class of efficiency as the reference implementation.
+  - Add support for custom implementations of malloc(), realloc(), etc.
+  - Add CRC checking to Ogg encapsulated streams.
+  - Fix VC++ 6 build. This is only for the C++ compiler. The C compiler is not currently supported.
+  - Bug fixes.
+
+v0.7 - 2017-07-23
+  - Add support for opening a stream without a header block. To do this, use drflac_open_relaxed() / drflac_open_with_metadata_relaxed().
+
+v0.6 - 2017-07-22
+  - Add support for recovering from invalid frames. With this change, dr_flac will simply skip over invalid frames as if they
+    never existed. Frames are checked against their sync code, the CRC-8 of the frame header and the CRC-16 of the whole frame.
+
+v0.5 - 2017-07-16
+  - Fix typos.
+  - Change drflac_bool* types to unsigned.
+  - Add CRC checking. This makes dr_flac slower, but can be disabled with #define DR_FLAC_NO_CRC.
+
+v0.4f - 2017-03-10
+  - Fix a couple of bugs with the bitstreaming code.
+
+v0.4e - 2017-02-17
+  - Fix some warnings.
+
+v0.4d - 2016-12-26
+  - Add support for 32-bit floating-point PCM decoding.
+  - Use drflac_int* and drflac_uint* sized types to improve compiler support.
+  - Minor improvements to documentation.
+
+v0.4c - 2016-12-26
+  - Add support for signed 16-bit integer PCM decoding.
+
+v0.4b - 2016-10-23
+  - A minor change to drflac_bool8 and drflac_bool32 types.
+
+v0.4a - 2016-10-11
+  - Rename drBool32 to drflac_bool32 for styling consistency.
+
+v0.4 - 2016-09-29
+  - API/ABI CHANGE: Use fixed size 32-bit booleans instead of the built-in bool type.
+  - API CHANGE: Rename drflac_open_and_decode*() to drflac_open_and_decode*_s32().
+  - API CHANGE: Swap the order of "channels" and "sampleRate" parameters in drflac_open_and_decode*(). Rationale for this is to
+    keep it consistent with drflac_audio.
+
+v0.3f - 2016-09-21
+  - Fix a warning with GCC.
+
+v0.3e - 2016-09-18
+  - Fixed a bug where GCC 4.3+ was not getting properly identified.
+  - Fixed a few typos.
+  - Changed date formats to ISO 8601 (YYYY-MM-DD).
+
+v0.3d - 2016-06-11
+  - Minor clean up.
+
+v0.3c - 2016-05-28
+  - Fixed compilation error.
+
+v0.3b - 2016-05-16
+  - Fixed Linux/GCC build.
+  - Updated documentation.
+
+v0.3a - 2016-05-15
+  - Minor fixes to documentation.
+
+v0.3 - 2016-05-11
+  - Optimizations. Now at about parity with the reference implementation on 32-bit builds.
+  - Lots of clean up.
+
+v0.2b - 2016-05-10
+  - Bug fixes.
+
+v0.2a - 2016-05-10
+  - Made drflac_open_and_decode() more robust.
+  - Removed an unused debugging variable
+
+v0.2 - 2016-05-09
+  - Added support for Ogg encapsulation.
+  - API CHANGE. Have the onSeek callback take a third argument which specifies whether or not the seek
+    should be relative to the start or the current position. Also changes the seeking rules such that
+    seeking offsets will never be negative.
+  - Have drflac_open_and_decode() fail gracefully if the stream has an unknown total sample count.
+
+v0.1b - 2016-05-07
+  - Properly close the file handle in drflac_open_file() and family when the decoder fails to initialize.
+  - Removed a stale comment.
+
+v0.1a - 2016-05-05
+  - Minor formatting changes.
+  - Fixed a warning on the GCC build.
+
+v0.1 - 2016-05-03
+  - Initial versioned release.
+*/
+
+/*
+This software is available as a choice of the following licenses. Choose
+whichever you prefer.
+
+===============================================================================
+ALTERNATIVE 1 - Public Domain (www.unlicense.org)
+===============================================================================
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
+
+===============================================================================
+ALTERNATIVE 2 - MIT No Attribution
+===============================================================================
+Copyright 2023 David Reid
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
diff --git a/shared/external/dr_mp3.h b/shared/external/dr_mp3.h
new file mode 100644
index 0000000..84849ee
--- /dev/null
+++ b/shared/external/dr_mp3.h
@@ -0,0 +1,4834 @@
+/*
+MP3 audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
+dr_mp3 - v0.6.38 - 2023-11-02
+
+David Reid - mackron@gmail.com
+
+GitHub: https://github.com/mackron/dr_libs
+
+Based on minimp3 (https://github.com/lieff/minimp3) which is where the real work was done. See the bottom of this file for differences between minimp3 and dr_mp3.
+*/
+
+/*
+RELEASE NOTES - VERSION 0.6
+===========================
+Version 0.6 includes breaking changes with the configuration of decoders. The ability to customize the number of output channels and the sample rate has been
+removed. You must now use the channel count and sample rate reported by the MP3 stream itself, and all channel and sample rate conversion must be done
+yourself.
+
+
+Changes to Initialization
+-------------------------
+Previously, `drmp3_init()`, etc. took a pointer to a `drmp3_config` object that allowed you to customize the output channels and sample rate. This has been
+removed. If you need the old behaviour you will need to convert the data yourself or just not upgrade. The following APIs have changed.
+
+    `drmp3_init()`
+    `drmp3_init_memory()`
+    `drmp3_init_file()`
+
+
+Miscellaneous Changes
+---------------------
+Support for loading a file from a `wchar_t` string has been added via the `drmp3_init_file_w()` API.
+*/
+
+/*
+Introducation
+=============
+dr_mp3 is a single file library. To use it, do something like the following in one .c file.
+
+    ```c
+    #define DR_MP3_IMPLEMENTATION
+    #include "dr_mp3.h"
+    ```
+
+You can then #include this file in other parts of the program as you would with any other header file. To decode audio data, do something like the following:
+
+    ```c
+    drmp3 mp3;
+    if (!drmp3_init_file(&mp3, "MySong.mp3", NULL)) {
+        // Failed to open file
+    }
+
+    ...
+
+    drmp3_uint64 framesRead = drmp3_read_pcm_frames_f32(pMP3, framesToRead, pFrames);
+    ```
+
+The drmp3 object is transparent so you can get access to the channel count and sample rate like so:
+
+    ```
+    drmp3_uint32 channels = mp3.channels;
+    drmp3_uint32 sampleRate = mp3.sampleRate;
+    ```
+
+The example above initializes a decoder from a file, but you can also initialize it from a block of memory and read and seek callbacks with
+`drmp3_init_memory()` and `drmp3_init()` respectively.
+
+You do not need to do any annoying memory management when reading PCM frames - this is all managed internally. You can request any number of PCM frames in each
+call to `drmp3_read_pcm_frames_f32()` and it will return as many PCM frames as it can, up to the requested amount.
+
+You can also decode an entire file in one go with `drmp3_open_and_read_pcm_frames_f32()`, `drmp3_open_memory_and_read_pcm_frames_f32()` and
+`drmp3_open_file_and_read_pcm_frames_f32()`.
+
+
+Build Options
+=============
+#define these options before including this file.
+
+#define DR_MP3_NO_STDIO
+  Disable drmp3_init_file(), etc.
+
+#define DR_MP3_NO_SIMD
+  Disable SIMD optimizations.
+*/
+
+#ifndef dr_mp3_h
+#define dr_mp3_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DRMP3_STRINGIFY(x)      #x
+#define DRMP3_XSTRINGIFY(x)     DRMP3_STRINGIFY(x)
+
+#define DRMP3_VERSION_MAJOR     0
+#define DRMP3_VERSION_MINOR     6
+#define DRMP3_VERSION_REVISION  38
+#define DRMP3_VERSION_STRING    DRMP3_XSTRINGIFY(DRMP3_VERSION_MAJOR) "." DRMP3_XSTRINGIFY(DRMP3_VERSION_MINOR) "." DRMP3_XSTRINGIFY(DRMP3_VERSION_REVISION)
+
+#include <stddef.h> /* For size_t. */
+
+/* Sized Types */
+typedef   signed char           drmp3_int8;
+typedef unsigned char           drmp3_uint8;
+typedef   signed short          drmp3_int16;
+typedef unsigned short          drmp3_uint16;
+typedef   signed int            drmp3_int32;
+typedef unsigned int            drmp3_uint32;
+#if defined(_MSC_VER) && !defined(__clang__)
+    typedef   signed __int64    drmp3_int64;
+    typedef unsigned __int64    drmp3_uint64;
+#else
+    #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
+        #pragma GCC diagnostic push
+        #pragma GCC diagnostic ignored "-Wlong-long"
+        #if defined(__clang__)
+            #pragma GCC diagnostic ignored "-Wc++11-long-long"
+        #endif
+    #endif
+    typedef   signed long long  drmp3_int64;
+    typedef unsigned long long  drmp3_uint64;
+    #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
+        #pragma GCC diagnostic pop
+    #endif
+#endif
+#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__powerpc64__)
+    typedef drmp3_uint64        drmp3_uintptr;
+#else
+    typedef drmp3_uint32        drmp3_uintptr;
+#endif
+typedef drmp3_uint8             drmp3_bool8;
+typedef drmp3_uint32            drmp3_bool32;
+#define DRMP3_TRUE              1
+#define DRMP3_FALSE             0
+/* End Sized Types */
+
+/* Decorations */
+#if !defined(DRMP3_API)
+    #if defined(DRMP3_DLL)
+        #if defined(_WIN32)
+            #define DRMP3_DLL_IMPORT  __declspec(dllimport)
+            #define DRMP3_DLL_EXPORT  __declspec(dllexport)
+            #define DRMP3_DLL_PRIVATE static
+        #else
+            #if defined(__GNUC__) && __GNUC__ >= 4
+                #define DRMP3_DLL_IMPORT  __attribute__((visibility("default")))
+                #define DRMP3_DLL_EXPORT  __attribute__((visibility("default")))
+                #define DRMP3_DLL_PRIVATE __attribute__((visibility("hidden")))
+            #else
+                #define DRMP3_DLL_IMPORT
+                #define DRMP3_DLL_EXPORT
+                #define DRMP3_DLL_PRIVATE static
+            #endif
+        #endif
+
+        #if defined(DR_MP3_IMPLEMENTATION) || defined(DRMP3_IMPLEMENTATION)
+            #define DRMP3_API  DRMP3_DLL_EXPORT
+        #else
+            #define DRMP3_API  DRMP3_DLL_IMPORT
+        #endif
+        #define DRMP3_PRIVATE DRMP3_DLL_PRIVATE
+    #else
+        #define DRMP3_API extern
+        #define DRMP3_PRIVATE static
+    #endif
+#endif
+/* End Decorations */
+
+/* Result Codes */
+typedef drmp3_int32 drmp3_result;
+#define DRMP3_SUCCESS                        0
+#define DRMP3_ERROR                         -1   /* A generic error. */
+#define DRMP3_INVALID_ARGS                  -2
+#define DRMP3_INVALID_OPERATION             -3
+#define DRMP3_OUT_OF_MEMORY                 -4
+#define DRMP3_OUT_OF_RANGE                  -5
+#define DRMP3_ACCESS_DENIED                 -6
+#define DRMP3_DOES_NOT_EXIST                -7
+#define DRMP3_ALREADY_EXISTS                -8
+#define DRMP3_TOO_MANY_OPEN_FILES           -9
+#define DRMP3_INVALID_FILE                  -10
+#define DRMP3_TOO_BIG                       -11
+#define DRMP3_PATH_TOO_LONG                 -12
+#define DRMP3_NAME_TOO_LONG                 -13
+#define DRMP3_NOT_DIRECTORY                 -14
+#define DRMP3_IS_DIRECTORY                  -15
+#define DRMP3_DIRECTORY_NOT_EMPTY           -16
+#define DRMP3_END_OF_FILE                   -17
+#define DRMP3_NO_SPACE                      -18
+#define DRMP3_BUSY                          -19
+#define DRMP3_IO_ERROR                      -20
+#define DRMP3_INTERRUPT                     -21
+#define DRMP3_UNAVAILABLE                   -22
+#define DRMP3_ALREADY_IN_USE                -23
+#define DRMP3_BAD_ADDRESS                   -24
+#define DRMP3_BAD_SEEK                      -25
+#define DRMP3_BAD_PIPE                      -26
+#define DRMP3_DEADLOCK                      -27
+#define DRMP3_TOO_MANY_LINKS                -28
+#define DRMP3_NOT_IMPLEMENTED               -29
+#define DRMP3_NO_MESSAGE                    -30
+#define DRMP3_BAD_MESSAGE                   -31
+#define DRMP3_NO_DATA_AVAILABLE             -32
+#define DRMP3_INVALID_DATA                  -33
+#define DRMP3_TIMEOUT                       -34
+#define DRMP3_NO_NETWORK                    -35
+#define DRMP3_NOT_UNIQUE                    -36
+#define DRMP3_NOT_SOCKET                    -37
+#define DRMP3_NO_ADDRESS                    -38
+#define DRMP3_BAD_PROTOCOL                  -39
+#define DRMP3_PROTOCOL_UNAVAILABLE          -40
+#define DRMP3_PROTOCOL_NOT_SUPPORTED        -41
+#define DRMP3_PROTOCOL_FAMILY_NOT_SUPPORTED -42
+#define DRMP3_ADDRESS_FAMILY_NOT_SUPPORTED  -43
+#define DRMP3_SOCKET_NOT_SUPPORTED          -44
+#define DRMP3_CONNECTION_RESET              -45
+#define DRMP3_ALREADY_CONNECTED             -46
+#define DRMP3_NOT_CONNECTED                 -47
+#define DRMP3_CONNECTION_REFUSED            -48
+#define DRMP3_NO_HOST                       -49
+#define DRMP3_IN_PROGRESS                   -50
+#define DRMP3_CANCELLED                     -51
+#define DRMP3_MEMORY_ALREADY_MAPPED         -52
+#define DRMP3_AT_END                        -53
+/* End Result Codes */
+
+#define DRMP3_MAX_PCM_FRAMES_PER_MP3_FRAME  1152
+#define DRMP3_MAX_SAMPLES_PER_FRAME         (DRMP3_MAX_PCM_FRAMES_PER_MP3_FRAME*2)
+
+/* Inline */
+#ifdef _MSC_VER
+    #define DRMP3_INLINE __forceinline
+#elif defined(__GNUC__)
+    /*
+    I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when
+    the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some
+    case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the
+    command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue
+    I am using "__inline__" only when we're compiling in strict ANSI mode.
+    */
+    #if defined(__STRICT_ANSI__)
+        #define DRMP3_GNUC_INLINE_HINT __inline__
+    #else
+        #define DRMP3_GNUC_INLINE_HINT inline
+    #endif
+
+    #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)) || defined(__clang__)
+        #define DRMP3_INLINE DRMP3_GNUC_INLINE_HINT __attribute__((always_inline))
+    #else
+        #define DRMP3_INLINE DRMP3_GNUC_INLINE_HINT
+    #endif
+#elif defined(__WATCOMC__)
+    #define DRMP3_INLINE __inline
+#else
+    #define DRMP3_INLINE
+#endif
+/* End Inline */
+
+
+DRMP3_API void drmp3_version(drmp3_uint32* pMajor, drmp3_uint32* pMinor, drmp3_uint32* pRevision);
+DRMP3_API const char* drmp3_version_string(void);
+
+
+/* Allocation Callbacks */
+typedef struct
+{
+    void* pUserData;
+    void* (* onMalloc)(size_t sz, void* pUserData);
+    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
+    void  (* onFree)(void* p, void* pUserData);
+} drmp3_allocation_callbacks;
+/* End Allocation Callbacks */
+
+
+/*
+Low Level Push API
+==================
+*/
+typedef struct
+{
+    int frame_bytes, channels, hz, layer, bitrate_kbps;
+} drmp3dec_frame_info;
+
+typedef struct
+{
+    float mdct_overlap[2][9*32], qmf_state[15*2*32];
+    int reserv, free_format_bytes;
+    drmp3_uint8 header[4], reserv_buf[511];
+} drmp3dec;
+
+/* Initializes a low level decoder. */
+DRMP3_API void drmp3dec_init(drmp3dec *dec);
+
+/* Reads a frame from a low level decoder. */
+DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int mp3_bytes, void *pcm, drmp3dec_frame_info *info);
+
+/* Helper for converting between f32 and s16. */
+DRMP3_API void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, size_t num_samples);
+
+
+
+/*
+Main API (Pull API)
+===================
+*/
+typedef enum
+{
+    drmp3_seek_origin_start,
+    drmp3_seek_origin_current
+} drmp3_seek_origin;
+
+typedef struct
+{
+    drmp3_uint64 seekPosInBytes;        /* Points to the first byte of an MP3 frame. */
+    drmp3_uint64 pcmFrameIndex;         /* The index of the PCM frame this seek point targets. */
+    drmp3_uint16 mp3FramesToDiscard;    /* The number of whole MP3 frames to be discarded before pcmFramesToDiscard. */
+    drmp3_uint16 pcmFramesToDiscard;    /* The number of leading samples to read and discard. These are discarded after mp3FramesToDiscard. */
+} drmp3_seek_point;
+
+/*
+Callback for when data is read. Return value is the number of bytes actually read.
+
+pUserData   [in]  The user data that was passed to drmp3_init(), drmp3_open() and family.
+pBufferOut  [out] The output buffer.
+bytesToRead [in]  The number of bytes to read.
+
+Returns the number of bytes actually read.
+
+A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until
+either the entire bytesToRead is filled or you have reached the end of the stream.
+*/
+typedef size_t (* drmp3_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
+
+/*
+Callback for when data needs to be seeked.
+
+pUserData [in] The user data that was passed to drmp3_init(), drmp3_open() and family.
+offset    [in] The number of bytes to move, relative to the origin. Will never be negative.
+origin    [in] The origin of the seek - the current position or the start of the stream.
+
+Returns whether or not the seek was successful.
+
+Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which
+will be either drmp3_seek_origin_start or drmp3_seek_origin_current.
+*/
+typedef drmp3_bool32 (* drmp3_seek_proc)(void* pUserData, int offset, drmp3_seek_origin origin);
+
+typedef struct
+{
+    drmp3_uint32 channels;
+    drmp3_uint32 sampleRate;
+} drmp3_config;
+
+typedef struct
+{
+    drmp3dec decoder;
+    drmp3_uint32 channels;
+    drmp3_uint32 sampleRate;
+    drmp3_read_proc onRead;
+    drmp3_seek_proc onSeek;
+    void* pUserData;
+    drmp3_allocation_callbacks allocationCallbacks;
+    drmp3_uint32 mp3FrameChannels;      /* The number of channels in the currently loaded MP3 frame. Internal use only. */
+    drmp3_uint32 mp3FrameSampleRate;    /* The sample rate of the currently loaded MP3 frame. Internal use only. */
+    drmp3_uint32 pcmFramesConsumedInMP3Frame;
+    drmp3_uint32 pcmFramesRemainingInMP3Frame;
+    drmp3_uint8 pcmFrames[sizeof(float)*DRMP3_MAX_SAMPLES_PER_FRAME];  /* <-- Multipled by sizeof(float) to ensure there's enough room for DR_MP3_FLOAT_OUTPUT. */
+    drmp3_uint64 currentPCMFrame;       /* The current PCM frame, globally, based on the output sample rate. Mainly used for seeking. */
+    drmp3_uint64 streamCursor;          /* The current byte the decoder is sitting on in the raw stream. */
+    drmp3_seek_point* pSeekPoints;      /* NULL by default. Set with drmp3_bind_seek_table(). Memory is owned by the client. dr_mp3 will never attempt to free this pointer. */
+    drmp3_uint32 seekPointCount;        /* The number of items in pSeekPoints. When set to 0 assumes to no seek table. Defaults to zero. */
+    size_t dataSize;
+    size_t dataCapacity;
+    size_t dataConsumed;
+    drmp3_uint8* pData;
+    drmp3_bool32 atEnd : 1;
+    struct
+    {
+        const drmp3_uint8* pData;
+        size_t dataSize;
+        size_t currentReadPos;
+    } memory;   /* Only used for decoders that were opened against a block of memory. */
+} drmp3;
+
+/*
+Initializes an MP3 decoder.
+
+onRead    [in]           The function to call when data needs to be read from the client.
+onSeek    [in]           The function to call when the read position of the client data needs to move.
+pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
+
+Returns true if successful; false otherwise.
+
+Close the loader with drmp3_uninit().
+
+See also: drmp3_init_file(), drmp3_init_memory(), drmp3_uninit()
+*/
+DRMP3_API drmp3_bool32 drmp3_init(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, const drmp3_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Initializes an MP3 decoder from a block of memory.
+
+This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for
+the lifetime of the drmp3 object.
+
+The buffer should contain the contents of the entire MP3 file.
+*/
+DRMP3_API drmp3_bool32 drmp3_init_memory(drmp3* pMP3, const void* pData, size_t dataSize, const drmp3_allocation_callbacks* pAllocationCallbacks);
+
+#ifndef DR_MP3_NO_STDIO
+/*
+Initializes an MP3 decoder from a file.
+
+This holds the internal FILE object until drmp3_uninit() is called. Keep this in mind if you're caching drmp3
+objects because the operating system may restrict the number of file handles an application can have open at
+any given time.
+*/
+DRMP3_API drmp3_bool32 drmp3_init_file(drmp3* pMP3, const char* pFilePath, const drmp3_allocation_callbacks* pAllocationCallbacks);
+DRMP3_API drmp3_bool32 drmp3_init_file_w(drmp3* pMP3, const wchar_t* pFilePath, const drmp3_allocation_callbacks* pAllocationCallbacks);
+#endif
+
+/*
+Uninitializes an MP3 decoder.
+*/
+DRMP3_API void drmp3_uninit(drmp3* pMP3);
+
+/*
+Reads PCM frames as interleaved 32-bit IEEE floating point PCM.
+
+Note that framesToRead specifies the number of PCM frames to read, _not_ the number of MP3 frames.
+*/
+DRMP3_API drmp3_uint64 drmp3_read_pcm_frames_f32(drmp3* pMP3, drmp3_uint64 framesToRead, float* pBufferOut);
+
+/*
+Reads PCM frames as interleaved signed 16-bit integer PCM.
+
+Note that framesToRead specifies the number of PCM frames to read, _not_ the number of MP3 frames.
+*/
+DRMP3_API drmp3_uint64 drmp3_read_pcm_frames_s16(drmp3* pMP3, drmp3_uint64 framesToRead, drmp3_int16* pBufferOut);
+
+/*
+Seeks to a specific frame.
+
+Note that this is _not_ an MP3 frame, but rather a PCM frame.
+*/
+DRMP3_API drmp3_bool32 drmp3_seek_to_pcm_frame(drmp3* pMP3, drmp3_uint64 frameIndex);
+
+/*
+Calculates the total number of PCM frames in the MP3 stream. Cannot be used for infinite streams such as internet
+radio. Runs in linear time. Returns 0 on error.
+*/
+DRMP3_API drmp3_uint64 drmp3_get_pcm_frame_count(drmp3* pMP3);
+
+/*
+Calculates the total number of MP3 frames in the MP3 stream. Cannot be used for infinite streams such as internet
+radio. Runs in linear time. Returns 0 on error.
+*/
+DRMP3_API drmp3_uint64 drmp3_get_mp3_frame_count(drmp3* pMP3);
+
+/*
+Calculates the total number of MP3 and PCM frames in the MP3 stream. Cannot be used for infinite streams such as internet
+radio. Runs in linear time. Returns 0 on error.
+
+This is equivalent to calling drmp3_get_mp3_frame_count() and drmp3_get_pcm_frame_count() except that it's more efficient.
+*/
+DRMP3_API drmp3_bool32 drmp3_get_mp3_and_pcm_frame_count(drmp3* pMP3, drmp3_uint64* pMP3FrameCount, drmp3_uint64* pPCMFrameCount);
+
+/*
+Calculates the seekpoints based on PCM frames. This is slow.
+
+pSeekpoint count is a pointer to a uint32 containing the seekpoint count. On input it contains the desired count.
+On output it contains the actual count. The reason for this design is that the client may request too many
+seekpoints, in which case dr_mp3 will return a corrected count.
+
+Note that seektable seeking is not quite sample exact when the MP3 stream contains inconsistent sample rates.
+*/
+DRMP3_API drmp3_bool32 drmp3_calculate_seek_points(drmp3* pMP3, drmp3_uint32* pSeekPointCount, drmp3_seek_point* pSeekPoints);
+
+/*
+Binds a seek table to the decoder.
+
+This does _not_ make a copy of pSeekPoints - it only references it. It is up to the application to ensure this
+remains valid while it is bound to the decoder.
+
+Use drmp3_calculate_seek_points() to calculate the seek points.
+*/
+DRMP3_API drmp3_bool32 drmp3_bind_seek_table(drmp3* pMP3, drmp3_uint32 seekPointCount, drmp3_seek_point* pSeekPoints);
+
+
+/*
+Opens an decodes an entire MP3 stream as a single operation.
+
+On output pConfig will receive the channel count and sample rate of the stream.
+
+Free the returned pointer with drmp3_free().
+*/
+DRMP3_API float* drmp3_open_and_read_pcm_frames_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks);
+DRMP3_API drmp3_int16* drmp3_open_and_read_pcm_frames_s16(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks);
+
+DRMP3_API float* drmp3_open_memory_and_read_pcm_frames_f32(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks);
+DRMP3_API drmp3_int16* drmp3_open_memory_and_read_pcm_frames_s16(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks);
+
+#ifndef DR_MP3_NO_STDIO
+DRMP3_API float* drmp3_open_file_and_read_pcm_frames_f32(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks);
+DRMP3_API drmp3_int16* drmp3_open_file_and_read_pcm_frames_s16(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks);
+#endif
+
+/*
+Allocates a block of memory on the heap.
+*/
+DRMP3_API void* drmp3_malloc(size_t sz, const drmp3_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Frees any memory that was allocated by a public drmp3 API.
+*/
+DRMP3_API void drmp3_free(void* p, const drmp3_allocation_callbacks* pAllocationCallbacks);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* dr_mp3_h */
+
+
+/************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************
+
+ IMPLEMENTATION
+
+ ************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************/
+#if defined(DR_MP3_IMPLEMENTATION) || defined(DRMP3_IMPLEMENTATION)
+#ifndef dr_mp3_c
+#define dr_mp3_c
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h> /* For INT_MAX */
+
+DRMP3_API void drmp3_version(drmp3_uint32* pMajor, drmp3_uint32* pMinor, drmp3_uint32* pRevision)
+{
+    if (pMajor) {
+        *pMajor = DRMP3_VERSION_MAJOR;
+    }
+
+    if (pMinor) {
+        *pMinor = DRMP3_VERSION_MINOR;
+    }
+
+    if (pRevision) {
+        *pRevision = DRMP3_VERSION_REVISION;
+    }
+}
+
+DRMP3_API const char* drmp3_version_string(void)
+{
+    return DRMP3_VERSION_STRING;
+}
+
+/* Disable SIMD when compiling with TCC for now. */
+#if defined(__TINYC__)
+#define DR_MP3_NO_SIMD
+#endif
+
+#define DRMP3_OFFSET_PTR(p, offset) ((void*)((drmp3_uint8*)(p) + (offset)))
+
+#define DRMP3_MAX_FREE_FORMAT_FRAME_SIZE  2304    /* more than ISO spec's */
+#ifndef DRMP3_MAX_FRAME_SYNC_MATCHES
+#define DRMP3_MAX_FRAME_SYNC_MATCHES      10
+#endif
+
+#define DRMP3_MAX_L3_FRAME_PAYLOAD_BYTES  DRMP3_MAX_FREE_FORMAT_FRAME_SIZE /* MUST be >= 320000/8/32000*1152 = 1440 */
+
+#define DRMP3_MAX_BITRESERVOIR_BYTES      511
+#define DRMP3_SHORT_BLOCK_TYPE            2
+#define DRMP3_STOP_BLOCK_TYPE             3
+#define DRMP3_MODE_MONO                   3
+#define DRMP3_MODE_JOINT_STEREO           1
+#define DRMP3_HDR_SIZE                    4
+#define DRMP3_HDR_IS_MONO(h)              (((h[3]) & 0xC0) == 0xC0)
+#define DRMP3_HDR_IS_MS_STEREO(h)         (((h[3]) & 0xE0) == 0x60)
+#define DRMP3_HDR_IS_FREE_FORMAT(h)       (((h[2]) & 0xF0) == 0)
+#define DRMP3_HDR_IS_CRC(h)               (!((h[1]) & 1))
+#define DRMP3_HDR_TEST_PADDING(h)         ((h[2]) & 0x2)
+#define DRMP3_HDR_TEST_MPEG1(h)           ((h[1]) & 0x8)
+#define DRMP3_HDR_TEST_NOT_MPEG25(h)      ((h[1]) & 0x10)
+#define DRMP3_HDR_TEST_I_STEREO(h)        ((h[3]) & 0x10)
+#define DRMP3_HDR_TEST_MS_STEREO(h)       ((h[3]) & 0x20)
+#define DRMP3_HDR_GET_STEREO_MODE(h)      (((h[3]) >> 6) & 3)
+#define DRMP3_HDR_GET_STEREO_MODE_EXT(h)  (((h[3]) >> 4) & 3)
+#define DRMP3_HDR_GET_LAYER(h)            (((h[1]) >> 1) & 3)
+#define DRMP3_HDR_GET_BITRATE(h)          ((h[2]) >> 4)
+#define DRMP3_HDR_GET_SAMPLE_RATE(h)      (((h[2]) >> 2) & 3)
+#define DRMP3_HDR_GET_MY_SAMPLE_RATE(h)   (DRMP3_HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1))*3)
+#define DRMP3_HDR_IS_FRAME_576(h)         ((h[1] & 14) == 2)
+#define DRMP3_HDR_IS_LAYER_1(h)           ((h[1] & 6) == 6)
+
+#define DRMP3_BITS_DEQUANTIZER_OUT        -1
+#define DRMP3_MAX_SCF                     (255 + DRMP3_BITS_DEQUANTIZER_OUT*4 - 210)
+#define DRMP3_MAX_SCFI                    ((DRMP3_MAX_SCF + 3) & ~3)
+
+#define DRMP3_MIN(a, b)           ((a) > (b) ? (b) : (a))
+#define DRMP3_MAX(a, b)           ((a) < (b) ? (b) : (a))
+
+#if !defined(DR_MP3_NO_SIMD)
+
+#if !defined(DR_MP3_ONLY_SIMD) && (defined(_M_X64) || defined(__x86_64__) || defined(__aarch64__) || defined(_M_ARM64))
+/* x64 always have SSE2, arm64 always have neon, no need for generic code */
+#define DR_MP3_ONLY_SIMD
+#endif
+
+#if ((defined(_MSC_VER) && _MSC_VER >= 1400) && defined(_M_X64)) || ((defined(__i386) || defined(_M_IX86) || defined(__i386__) || defined(__x86_64__)) && ((defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__)))
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif
+#include <emmintrin.h>
+#define DRMP3_HAVE_SSE 1
+#define DRMP3_HAVE_SIMD 1
+#define DRMP3_VSTORE _mm_storeu_ps
+#define DRMP3_VLD _mm_loadu_ps
+#define DRMP3_VSET _mm_set1_ps
+#define DRMP3_VADD _mm_add_ps
+#define DRMP3_VSUB _mm_sub_ps
+#define DRMP3_VMUL _mm_mul_ps
+#define DRMP3_VMAC(a, x, y) _mm_add_ps(a, _mm_mul_ps(x, y))
+#define DRMP3_VMSB(a, x, y) _mm_sub_ps(a, _mm_mul_ps(x, y))
+#define DRMP3_VMUL_S(x, s)  _mm_mul_ps(x, _mm_set1_ps(s))
+#define DRMP3_VREV(x) _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 1, 2, 3))
+typedef __m128 drmp3_f4;
+#if defined(_MSC_VER) || defined(DR_MP3_ONLY_SIMD)
+#define drmp3_cpuid __cpuid
+#else
+static __inline__ __attribute__((always_inline)) void drmp3_cpuid(int CPUInfo[], const int InfoType)
+{
+#if defined(__PIC__)
+    __asm__ __volatile__(
+#if defined(__x86_64__)
+        "push %%rbx\n"
+        "cpuid\n"
+        "xchgl %%ebx, %1\n"
+        "pop  %%rbx\n"
+#else
+        "xchgl %%ebx, %1\n"
+        "cpuid\n"
+        "xchgl %%ebx, %1\n"
+#endif
+        : "=a" (CPUInfo[0]), "=r" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
+        : "a" (InfoType));
+#else
+    __asm__ __volatile__(
+        "cpuid"
+        : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
+        : "a" (InfoType));
+#endif
+}
+#endif
+static int drmp3_have_simd(void)
+{
+#ifdef DR_MP3_ONLY_SIMD
+    return 1;
+#else
+    static int g_have_simd;
+    int CPUInfo[4];
+#ifdef MINIMP3_TEST
+    static int g_counter;
+    if (g_counter++ > 100)
+        return 0;
+#endif
+    if (g_have_simd)
+        goto end;
+    drmp3_cpuid(CPUInfo, 0);
+    if (CPUInfo[0] > 0)
+    {
+        drmp3_cpuid(CPUInfo, 1);
+        g_have_simd = (CPUInfo[3] & (1 << 26)) + 1; /* SSE2 */
+        return g_have_simd - 1;
+    }
+
+end:
+    return g_have_simd - 1;
+#endif
+}
+#elif defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64)
+#include <arm_neon.h>
+#define DRMP3_HAVE_SSE 0
+#define DRMP3_HAVE_SIMD 1
+#define DRMP3_VSTORE vst1q_f32
+#define DRMP3_VLD vld1q_f32
+#define DRMP3_VSET vmovq_n_f32
+#define DRMP3_VADD vaddq_f32
+#define DRMP3_VSUB vsubq_f32
+#define DRMP3_VMUL vmulq_f32
+#define DRMP3_VMAC(a, x, y) vmlaq_f32(a, x, y)
+#define DRMP3_VMSB(a, x, y) vmlsq_f32(a, x, y)
+#define DRMP3_VMUL_S(x, s)  vmulq_f32(x, vmovq_n_f32(s))
+#define DRMP3_VREV(x) vcombine_f32(vget_high_f32(vrev64q_f32(x)), vget_low_f32(vrev64q_f32(x)))
+typedef float32x4_t drmp3_f4;
+static int drmp3_have_simd(void)
+{   /* TODO: detect neon for !DR_MP3_ONLY_SIMD */
+    return 1;
+}
+#else
+#define DRMP3_HAVE_SSE 0
+#define DRMP3_HAVE_SIMD 0
+#ifdef DR_MP3_ONLY_SIMD
+#error DR_MP3_ONLY_SIMD used, but SSE/NEON not enabled
+#endif
+#endif
+
+#else
+
+#define DRMP3_HAVE_SIMD 0
+
+#endif
+
+#if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__) && !defined(_M_ARM64) && !defined(__ARM_ARCH_6M__)
+#define DRMP3_HAVE_ARMV6 1
+static __inline__ __attribute__((always_inline)) drmp3_int32 drmp3_clip_int16_arm(drmp3_int32 a)
+{
+    drmp3_int32 x = 0;
+    __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
+    return x;
+}
+#else
+#define DRMP3_HAVE_ARMV6 0
+#endif
+
+
+/* Standard library stuff. */
+#ifndef DRMP3_ASSERT
+#include <assert.h>
+#define DRMP3_ASSERT(expression) assert(expression)
+#endif
+#ifndef DRMP3_COPY_MEMORY
+#define DRMP3_COPY_MEMORY(dst, src, sz) memcpy((dst), (src), (sz))
+#endif
+#ifndef DRMP3_MOVE_MEMORY
+#define DRMP3_MOVE_MEMORY(dst, src, sz) memmove((dst), (src), (sz))
+#endif
+#ifndef DRMP3_ZERO_MEMORY
+#define DRMP3_ZERO_MEMORY(p, sz) memset((p), 0, (sz))
+#endif
+#define DRMP3_ZERO_OBJECT(p) DRMP3_ZERO_MEMORY((p), sizeof(*(p)))
+#ifndef DRMP3_MALLOC
+#define DRMP3_MALLOC(sz) malloc((sz))
+#endif
+#ifndef DRMP3_REALLOC
+#define DRMP3_REALLOC(p, sz) realloc((p), (sz))
+#endif
+#ifndef DRMP3_FREE
+#define DRMP3_FREE(p) free((p))
+#endif
+
+typedef struct
+{
+    const drmp3_uint8 *buf;
+    int pos, limit;
+} drmp3_bs;
+
+typedef struct
+{
+    float scf[3*64];
+    drmp3_uint8 total_bands, stereo_bands, bitalloc[64], scfcod[64];
+} drmp3_L12_scale_info;
+
+typedef struct
+{
+    drmp3_uint8 tab_offset, code_tab_width, band_count;
+} drmp3_L12_subband_alloc;
+
+typedef struct
+{
+    const drmp3_uint8 *sfbtab;
+    drmp3_uint16 part_23_length, big_values, scalefac_compress;
+    drmp3_uint8 global_gain, block_type, mixed_block_flag, n_long_sfb, n_short_sfb;
+    drmp3_uint8 table_select[3], region_count[3], subblock_gain[3];
+    drmp3_uint8 preflag, scalefac_scale, count1_table, scfsi;
+} drmp3_L3_gr_info;
+
+typedef struct
+{
+    drmp3_bs bs;
+    drmp3_uint8 maindata[DRMP3_MAX_BITRESERVOIR_BYTES + DRMP3_MAX_L3_FRAME_PAYLOAD_BYTES];
+    drmp3_L3_gr_info gr_info[4];
+    float grbuf[2][576], scf[40], syn[18 + 15][2*32];
+    drmp3_uint8 ist_pos[2][39];
+} drmp3dec_scratch;
+
+static void drmp3_bs_init(drmp3_bs *bs, const drmp3_uint8 *data, int bytes)
+{
+    bs->buf   = data;
+    bs->pos   = 0;
+    bs->limit = bytes*8;
+}
+
+static drmp3_uint32 drmp3_bs_get_bits(drmp3_bs *bs, int n)
+{
+    drmp3_uint32 next, cache = 0, s = bs->pos & 7;
+    int shl = n + s;
+    const drmp3_uint8 *p = bs->buf + (bs->pos >> 3);
+    if ((bs->pos += n) > bs->limit)
+        return 0;
+    next = *p++ & (255 >> s);
+    while ((shl -= 8) > 0)
+    {
+        cache |= next << shl;
+        next = *p++;
+    }
+    return cache | (next >> -shl);
+}
+
+static int drmp3_hdr_valid(const drmp3_uint8 *h)
+{
+    return h[0] == 0xff &&
+        ((h[1] & 0xF0) == 0xf0 || (h[1] & 0xFE) == 0xe2) &&
+        (DRMP3_HDR_GET_LAYER(h) != 0) &&
+        (DRMP3_HDR_GET_BITRATE(h) != 15) &&
+        (DRMP3_HDR_GET_SAMPLE_RATE(h) != 3);
+}
+
+static int drmp3_hdr_compare(const drmp3_uint8 *h1, const drmp3_uint8 *h2)
+{
+    return drmp3_hdr_valid(h2) &&
+        ((h1[1] ^ h2[1]) & 0xFE) == 0 &&
+        ((h1[2] ^ h2[2]) & 0x0C) == 0 &&
+        !(DRMP3_HDR_IS_FREE_FORMAT(h1) ^ DRMP3_HDR_IS_FREE_FORMAT(h2));
+}
+
+static unsigned drmp3_hdr_bitrate_kbps(const drmp3_uint8 *h)
+{
+    static const drmp3_uint8 halfrate[2][3][15] = {
+        { { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,16,24,28,32,40,48,56,64,72,80,88,96,112,128 } },
+        { { 0,16,20,24,28,32,40,48,56,64,80,96,112,128,160 }, { 0,16,24,28,32,40,48,56,64,80,96,112,128,160,192 }, { 0,16,32,48,64,80,96,112,128,144,160,176,192,208,224 } },
+    };
+    return 2*halfrate[!!DRMP3_HDR_TEST_MPEG1(h)][DRMP3_HDR_GET_LAYER(h) - 1][DRMP3_HDR_GET_BITRATE(h)];
+}
+
+static unsigned drmp3_hdr_sample_rate_hz(const drmp3_uint8 *h)
+{
+    static const unsigned g_hz[3] = { 44100, 48000, 32000 };
+    return g_hz[DRMP3_HDR_GET_SAMPLE_RATE(h)] >> (int)!DRMP3_HDR_TEST_MPEG1(h) >> (int)!DRMP3_HDR_TEST_NOT_MPEG25(h);
+}
+
+static unsigned drmp3_hdr_frame_samples(const drmp3_uint8 *h)
+{
+    return DRMP3_HDR_IS_LAYER_1(h) ? 384 : (1152 >> (int)DRMP3_HDR_IS_FRAME_576(h));
+}
+
+static int drmp3_hdr_frame_bytes(const drmp3_uint8 *h, int free_format_size)
+{
+    int frame_bytes = drmp3_hdr_frame_samples(h)*drmp3_hdr_bitrate_kbps(h)*125/drmp3_hdr_sample_rate_hz(h);
+    if (DRMP3_HDR_IS_LAYER_1(h))
+    {
+        frame_bytes &= ~3; /* slot align */
+    }
+    return frame_bytes ? frame_bytes : free_format_size;
+}
+
+static int drmp3_hdr_padding(const drmp3_uint8 *h)
+{
+    return DRMP3_HDR_TEST_PADDING(h) ? (DRMP3_HDR_IS_LAYER_1(h) ? 4 : 1) : 0;
+}
+
+#ifndef DR_MP3_ONLY_MP3
+static const drmp3_L12_subband_alloc *drmp3_L12_subband_alloc_table(const drmp3_uint8 *hdr, drmp3_L12_scale_info *sci)
+{
+    const drmp3_L12_subband_alloc *alloc;
+    int mode = DRMP3_HDR_GET_STEREO_MODE(hdr);
+    int nbands, stereo_bands = (mode == DRMP3_MODE_MONO) ? 0 : (mode == DRMP3_MODE_JOINT_STEREO) ? (DRMP3_HDR_GET_STEREO_MODE_EXT(hdr) << 2) + 4 : 32;
+
+    if (DRMP3_HDR_IS_LAYER_1(hdr))
+    {
+        static const drmp3_L12_subband_alloc g_alloc_L1[] = { { 76, 4, 32 } };
+        alloc = g_alloc_L1;
+        nbands = 32;
+    } else if (!DRMP3_HDR_TEST_MPEG1(hdr))
+    {
+        static const drmp3_L12_subband_alloc g_alloc_L2M2[] = { { 60, 4, 4 }, { 44, 3, 7 }, { 44, 2, 19 } };
+        alloc = g_alloc_L2M2;
+        nbands = 30;
+    } else
+    {
+        static const drmp3_L12_subband_alloc g_alloc_L2M1[] = { { 0, 4, 3 }, { 16, 4, 8 }, { 32, 3, 12 }, { 40, 2, 7 } };
+        int sample_rate_idx = DRMP3_HDR_GET_SAMPLE_RATE(hdr);
+        unsigned kbps = drmp3_hdr_bitrate_kbps(hdr) >> (int)(mode != DRMP3_MODE_MONO);
+        if (!kbps) /* free-format */
+        {
+            kbps = 192;
+        }
+
+        alloc = g_alloc_L2M1;
+        nbands = 27;
+        if (kbps < 56)
+        {
+            static const drmp3_L12_subband_alloc g_alloc_L2M1_lowrate[] = { { 44, 4, 2 }, { 44, 3, 10 } };
+            alloc = g_alloc_L2M1_lowrate;
+            nbands = sample_rate_idx == 2 ? 12 : 8;
+        } else if (kbps >= 96 && sample_rate_idx != 1)
+        {
+            nbands = 30;
+        }
+    }
+
+    sci->total_bands = (drmp3_uint8)nbands;
+    sci->stereo_bands = (drmp3_uint8)DRMP3_MIN(stereo_bands, nbands);
+
+    return alloc;
+}
+
+static void drmp3_L12_read_scalefactors(drmp3_bs *bs, drmp3_uint8 *pba, drmp3_uint8 *scfcod, int bands, float *scf)
+{
+    static const float g_deq_L12[18*3] = {
+#define DRMP3_DQ(x) 9.53674316e-07f/x, 7.56931807e-07f/x, 6.00777173e-07f/x
+        DRMP3_DQ(3),DRMP3_DQ(7),DRMP3_DQ(15),DRMP3_DQ(31),DRMP3_DQ(63),DRMP3_DQ(127),DRMP3_DQ(255),DRMP3_DQ(511),DRMP3_DQ(1023),DRMP3_DQ(2047),DRMP3_DQ(4095),DRMP3_DQ(8191),DRMP3_DQ(16383),DRMP3_DQ(32767),DRMP3_DQ(65535),DRMP3_DQ(3),DRMP3_DQ(5),DRMP3_DQ(9)
+    };
+    int i, m;
+    for (i = 0; i < bands; i++)
+    {
+        float s = 0;
+        int ba = *pba++;
+        int mask = ba ? 4 + ((19 >> scfcod[i]) & 3) : 0;
+        for (m = 4; m; m >>= 1)
+        {
+            if (mask & m)
+            {
+                int b = drmp3_bs_get_bits(bs, 6);
+                s = g_deq_L12[ba*3 - 6 + b % 3]*(int)(1 << 21 >> b/3);
+            }
+            *scf++ = s;
+        }
+    }
+}
+
+static void drmp3_L12_read_scale_info(const drmp3_uint8 *hdr, drmp3_bs *bs, drmp3_L12_scale_info *sci)
+{
+    static const drmp3_uint8 g_bitalloc_code_tab[] = {
+        0,17, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16,
+        0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,16,
+        0,17,18, 3,19,4,5,16,
+        0,17,18,16,
+        0,17,18,19, 4,5,6, 7,8, 9,10,11,12,13,14,15,
+        0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,14,
+        0, 2, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16
+    };
+    const drmp3_L12_subband_alloc *subband_alloc = drmp3_L12_subband_alloc_table(hdr, sci);
+
+    int i, k = 0, ba_bits = 0;
+    const drmp3_uint8 *ba_code_tab = g_bitalloc_code_tab;
+
+    for (i = 0; i < sci->total_bands; i++)
+    {
+        drmp3_uint8 ba;
+        if (i == k)
+        {
+            k += subband_alloc->band_count;
+            ba_bits = subband_alloc->code_tab_width;
+            ba_code_tab = g_bitalloc_code_tab + subband_alloc->tab_offset;
+            subband_alloc++;
+        }
+        ba = ba_code_tab[drmp3_bs_get_bits(bs, ba_bits)];
+        sci->bitalloc[2*i] = ba;
+        if (i < sci->stereo_bands)
+        {
+            ba = ba_code_tab[drmp3_bs_get_bits(bs, ba_bits)];
+        }
+        sci->bitalloc[2*i + 1] = sci->stereo_bands ? ba : 0;
+    }
+
+    for (i = 0; i < 2*sci->total_bands; i++)
+    {
+        sci->scfcod[i] = (drmp3_uint8)(sci->bitalloc[i] ? DRMP3_HDR_IS_LAYER_1(hdr) ? 2 : drmp3_bs_get_bits(bs, 2) : 6);
+    }
+
+    drmp3_L12_read_scalefactors(bs, sci->bitalloc, sci->scfcod, sci->total_bands*2, sci->scf);
+
+    for (i = sci->stereo_bands; i < sci->total_bands; i++)
+    {
+        sci->bitalloc[2*i + 1] = 0;
+    }
+}
+
+static int drmp3_L12_dequantize_granule(float *grbuf, drmp3_bs *bs, drmp3_L12_scale_info *sci, int group_size)
+{
+    int i, j, k, choff = 576;
+    for (j = 0; j < 4; j++)
+    {
+        float *dst = grbuf + group_size*j;
+        for (i = 0; i < 2*sci->total_bands; i++)
+        {
+            int ba = sci->bitalloc[i];
+            if (ba != 0)
+            {
+                if (ba < 17)
+                {
+                    int half = (1 << (ba - 1)) - 1;
+                    for (k = 0; k < group_size; k++)
+                    {
+                        dst[k] = (float)((int)drmp3_bs_get_bits(bs, ba) - half);
+                    }
+                } else
+                {
+                    unsigned mod = (2 << (ba - 17)) + 1;    /* 3, 5, 9 */
+                    unsigned code = drmp3_bs_get_bits(bs, mod + 2 - (mod >> 3));  /* 5, 7, 10 */
+                    for (k = 0; k < group_size; k++, code /= mod)
+                    {
+                        dst[k] = (float)((int)(code % mod - mod/2));
+                    }
+                }
+            }
+            dst += choff;
+            choff = 18 - choff;
+        }
+    }
+    return group_size*4;
+}
+
+static void drmp3_L12_apply_scf_384(drmp3_L12_scale_info *sci, const float *scf, float *dst)
+{
+    int i, k;
+    DRMP3_COPY_MEMORY(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float));
+    for (i = 0; i < sci->total_bands; i++, dst += 18, scf += 6)
+    {
+        for (k = 0; k < 12; k++)
+        {
+            dst[k + 0]   *= scf[0];
+            dst[k + 576] *= scf[3];
+        }
+    }
+}
+#endif
+
+static int drmp3_L3_read_side_info(drmp3_bs *bs, drmp3_L3_gr_info *gr, const drmp3_uint8 *hdr)
+{
+    static const drmp3_uint8 g_scf_long[8][23] = {
+        { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
+        { 12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2,0 },
+        { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
+        { 6,6,6,6,6,6,8,10,12,14,16,18,22,26,32,38,46,54,62,70,76,36,0 },
+        { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
+        { 4,4,4,4,4,4,6,6,8,8,10,12,16,20,24,28,34,42,50,54,76,158,0 },
+        { 4,4,4,4,4,4,6,6,6,8,10,12,16,18,22,28,34,40,46,54,54,192,0 },
+        { 4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102,26,0 }
+    };
+    static const drmp3_uint8 g_scf_short[8][40] = {
+        { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
+        { 8,8,8,8,8,8,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
+        { 4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
+        { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
+        { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
+        { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
+        { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
+        { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
+    };
+    static const drmp3_uint8 g_scf_mixed[8][40] = {
+        { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
+        { 12,12,12,4,4,4,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
+        { 6,6,6,6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
+        { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
+        { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
+        { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
+        { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
+        { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
+    };
+
+    unsigned tables, scfsi = 0;
+    int main_data_begin, part_23_sum = 0;
+    int gr_count = DRMP3_HDR_IS_MONO(hdr) ? 1 : 2;
+    int sr_idx = DRMP3_HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0);
+
+    if (DRMP3_HDR_TEST_MPEG1(hdr))
+    {
+        gr_count *= 2;
+        main_data_begin = drmp3_bs_get_bits(bs, 9);
+        scfsi = drmp3_bs_get_bits(bs, 7 + gr_count);
+    } else
+    {
+        main_data_begin = drmp3_bs_get_bits(bs, 8 + gr_count) >> gr_count;
+    }
+
+    do
+    {
+        if (DRMP3_HDR_IS_MONO(hdr))
+        {
+            scfsi <<= 4;
+        }
+        gr->part_23_length = (drmp3_uint16)drmp3_bs_get_bits(bs, 12);
+        part_23_sum += gr->part_23_length;
+        gr->big_values = (drmp3_uint16)drmp3_bs_get_bits(bs,  9);
+        if (gr->big_values > 288)
+        {
+            return -1;
+        }
+        gr->global_gain = (drmp3_uint8)drmp3_bs_get_bits(bs, 8);
+        gr->scalefac_compress = (drmp3_uint16)drmp3_bs_get_bits(bs, DRMP3_HDR_TEST_MPEG1(hdr) ? 4 : 9);
+        gr->sfbtab = g_scf_long[sr_idx];
+        gr->n_long_sfb  = 22;
+        gr->n_short_sfb = 0;
+        if (drmp3_bs_get_bits(bs, 1))
+        {
+            gr->block_type = (drmp3_uint8)drmp3_bs_get_bits(bs, 2);
+            if (!gr->block_type)
+            {
+                return -1;
+            }
+            gr->mixed_block_flag = (drmp3_uint8)drmp3_bs_get_bits(bs, 1);
+            gr->region_count[0] = 7;
+            gr->region_count[1] = 255;
+            if (gr->block_type == DRMP3_SHORT_BLOCK_TYPE)
+            {
+                scfsi &= 0x0F0F;
+                if (!gr->mixed_block_flag)
+                {
+                    gr->region_count[0] = 8;
+                    gr->sfbtab = g_scf_short[sr_idx];
+                    gr->n_long_sfb = 0;
+                    gr->n_short_sfb = 39;
+                } else
+                {
+                    gr->sfbtab = g_scf_mixed[sr_idx];
+                    gr->n_long_sfb = DRMP3_HDR_TEST_MPEG1(hdr) ? 8 : 6;
+                    gr->n_short_sfb = 30;
+                }
+            }
+            tables = drmp3_bs_get_bits(bs, 10);
+            tables <<= 5;
+            gr->subblock_gain[0] = (drmp3_uint8)drmp3_bs_get_bits(bs, 3);
+            gr->subblock_gain[1] = (drmp3_uint8)drmp3_bs_get_bits(bs, 3);
+            gr->subblock_gain[2] = (drmp3_uint8)drmp3_bs_get_bits(bs, 3);
+        } else
+        {
+            gr->block_type = 0;
+            gr->mixed_block_flag = 0;
+            tables = drmp3_bs_get_bits(bs, 15);
+            gr->region_count[0] = (drmp3_uint8)drmp3_bs_get_bits(bs, 4);
+            gr->region_count[1] = (drmp3_uint8)drmp3_bs_get_bits(bs, 3);
+            gr->region_count[2] = 255;
+        }
+        gr->table_select[0] = (drmp3_uint8)(tables >> 10);
+        gr->table_select[1] = (drmp3_uint8)((tables >> 5) & 31);
+        gr->table_select[2] = (drmp3_uint8)((tables) & 31);
+        gr->preflag = (drmp3_uint8)(DRMP3_HDR_TEST_MPEG1(hdr) ? drmp3_bs_get_bits(bs, 1) : (gr->scalefac_compress >= 500));
+        gr->scalefac_scale = (drmp3_uint8)drmp3_bs_get_bits(bs, 1);
+        gr->count1_table = (drmp3_uint8)drmp3_bs_get_bits(bs, 1);
+        gr->scfsi = (drmp3_uint8)((scfsi >> 12) & 15);
+        scfsi <<= 4;
+        gr++;
+    } while(--gr_count);
+
+    if (part_23_sum + bs->pos > bs->limit + main_data_begin*8)
+    {
+        return -1;
+    }
+
+    return main_data_begin;
+}
+
+static void drmp3_L3_read_scalefactors(drmp3_uint8 *scf, drmp3_uint8 *ist_pos, const drmp3_uint8 *scf_size, const drmp3_uint8 *scf_count, drmp3_bs *bitbuf, int scfsi)
+{
+    int i, k;
+    for (i = 0; i < 4 && scf_count[i]; i++, scfsi *= 2)
+    {
+        int cnt = scf_count[i];
+        if (scfsi & 8)
+        {
+            DRMP3_COPY_MEMORY(scf, ist_pos, cnt);
+        } else
+        {
+            int bits = scf_size[i];
+            if (!bits)
+            {
+                DRMP3_ZERO_MEMORY(scf, cnt);
+                DRMP3_ZERO_MEMORY(ist_pos, cnt);
+            } else
+            {
+                int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1;
+                for (k = 0; k < cnt; k++)
+                {
+                    int s = drmp3_bs_get_bits(bitbuf, bits);
+                    ist_pos[k] = (drmp3_uint8)(s == max_scf ? -1 : s);
+                    scf[k] = (drmp3_uint8)s;
+                }
+            }
+        }
+        ist_pos += cnt;
+        scf += cnt;
+    }
+    scf[0] = scf[1] = scf[2] = 0;
+}
+
+static float drmp3_L3_ldexp_q2(float y, int exp_q2)
+{
+    static const float g_expfrac[4] = { 9.31322575e-10f,7.83145814e-10f,6.58544508e-10f,5.53767716e-10f };
+    int e;
+    do
+    {
+        e = DRMP3_MIN(30*4, exp_q2);
+        y *= g_expfrac[e & 3]*(1 << 30 >> (e >> 2));
+    } while ((exp_q2 -= e) > 0);
+    return y;
+}
+
+static void drmp3_L3_decode_scalefactors(const drmp3_uint8 *hdr, drmp3_uint8 *ist_pos, drmp3_bs *bs, const drmp3_L3_gr_info *gr, float *scf, int ch)
+{
+    static const drmp3_uint8 g_scf_partitions[3][28] = {
+        { 6,5,5, 5,6,5,5,5,6,5, 7,3,11,10,0,0, 7, 7, 7,0, 6, 6,6,3, 8, 8,5,0 },
+        { 8,9,6,12,6,9,9,9,6,9,12,6,15,18,0,0, 6,15,12,0, 6,12,9,6, 6,18,9,0 },
+        { 9,9,6,12,9,9,9,9,9,9,12,6,18,18,0,0,12,12,12,0,12, 9,9,6,15,12,9,0 }
+    };
+    const drmp3_uint8 *scf_partition = g_scf_partitions[!!gr->n_short_sfb + !gr->n_long_sfb];
+    drmp3_uint8 scf_size[4], iscf[40];
+    int i, scf_shift = gr->scalefac_scale + 1, gain_exp, scfsi = gr->scfsi;
+    float gain;
+
+    if (DRMP3_HDR_TEST_MPEG1(hdr))
+    {
+        static const drmp3_uint8 g_scfc_decode[16] = { 0,1,2,3, 12,5,6,7, 9,10,11,13, 14,15,18,19 };
+        int part = g_scfc_decode[gr->scalefac_compress];
+        scf_size[1] = scf_size[0] = (drmp3_uint8)(part >> 2);
+        scf_size[3] = scf_size[2] = (drmp3_uint8)(part & 3);
+    } else
+    {
+        static const drmp3_uint8 g_mod[6*4] = { 5,5,4,4,5,5,4,1,4,3,1,1,5,6,6,1,4,4,4,1,4,3,1,1 };
+        int k, modprod, sfc, ist = DRMP3_HDR_TEST_I_STEREO(hdr) && ch;
+        sfc = gr->scalefac_compress >> ist;
+        for (k = ist*3*4; sfc >= 0; sfc -= modprod, k += 4)
+        {
+            for (modprod = 1, i = 3; i >= 0; i--)
+            {
+                scf_size[i] = (drmp3_uint8)(sfc / modprod % g_mod[k + i]);
+                modprod *= g_mod[k + i];
+            }
+        }
+        scf_partition += k;
+        scfsi = -16;
+    }
+    drmp3_L3_read_scalefactors(iscf, ist_pos, scf_size, scf_partition, bs, scfsi);
+
+    if (gr->n_short_sfb)
+    {
+        int sh = 3 - scf_shift;
+        for (i = 0; i < gr->n_short_sfb; i += 3)
+        {
+            iscf[gr->n_long_sfb + i + 0] = (drmp3_uint8)(iscf[gr->n_long_sfb + i + 0] + (gr->subblock_gain[0] << sh));
+            iscf[gr->n_long_sfb + i + 1] = (drmp3_uint8)(iscf[gr->n_long_sfb + i + 1] + (gr->subblock_gain[1] << sh));
+            iscf[gr->n_long_sfb + i + 2] = (drmp3_uint8)(iscf[gr->n_long_sfb + i + 2] + (gr->subblock_gain[2] << sh));
+        }
+    } else if (gr->preflag)
+    {
+        static const drmp3_uint8 g_preamp[10] = { 1,1,1,1,2,2,3,3,3,2 };
+        for (i = 0; i < 10; i++)
+        {
+            iscf[11 + i] = (drmp3_uint8)(iscf[11 + i] + g_preamp[i]);
+        }
+    }
+
+    gain_exp = gr->global_gain + DRMP3_BITS_DEQUANTIZER_OUT*4 - 210 - (DRMP3_HDR_IS_MS_STEREO(hdr) ? 2 : 0);
+    gain = drmp3_L3_ldexp_q2(1 << (DRMP3_MAX_SCFI/4),  DRMP3_MAX_SCFI - gain_exp);
+    for (i = 0; i < (int)(gr->n_long_sfb + gr->n_short_sfb); i++)
+    {
+        scf[i] = drmp3_L3_ldexp_q2(gain, iscf[i] << scf_shift);
+    }
+}
+
+static const float g_drmp3_pow43[129 + 16] = {
+    0,-1,-2.519842f,-4.326749f,-6.349604f,-8.549880f,-10.902724f,-13.390518f,-16.000000f,-18.720754f,-21.544347f,-24.463781f,-27.473142f,-30.567351f,-33.741992f,-36.993181f,
+    0,1,2.519842f,4.326749f,6.349604f,8.549880f,10.902724f,13.390518f,16.000000f,18.720754f,21.544347f,24.463781f,27.473142f,30.567351f,33.741992f,36.993181f,40.317474f,43.711787f,47.173345f,50.699631f,54.288352f,57.937408f,61.644865f,65.408941f,69.227979f,73.100443f,77.024898f,81.000000f,85.024491f,89.097188f,93.216975f,97.382800f,101.593667f,105.848633f,110.146801f,114.487321f,118.869381f,123.292209f,127.755065f,132.257246f,136.798076f,141.376907f,145.993119f,150.646117f,155.335327f,160.060199f,164.820202f,169.614826f,174.443577f,179.305980f,184.201575f,189.129918f,194.090580f,199.083145f,204.107210f,209.162385f,214.248292f,219.364564f,224.510845f,229.686789f,234.892058f,240.126328f,245.389280f,250.680604f,256.000000f,261.347174f,266.721841f,272.123723f,277.552547f,283.008049f,288.489971f,293.998060f,299.532071f,305.091761f,310.676898f,316.287249f,321.922592f,327.582707f,333.267377f,338.976394f,344.709550f,350.466646f,356.247482f,362.051866f,367.879608f,373.730522f,379.604427f,385.501143f,391.420496f,397.362314f,403.326427f,409.312672f,415.320884f,421.350905f,427.402579f,433.475750f,439.570269f,445.685987f,451.822757f,457.980436f,464.158883f,470.357960f,476.577530f,482.817459f,489.077615f,495.357868f,501.658090f,507.978156f,514.317941f,520.677324f,527.056184f,533.454404f,539.871867f,546.308458f,552.764065f,559.238575f,565.731879f,572.243870f,578.774440f,585.323483f,591.890898f,598.476581f,605.080431f,611.702349f,618.342238f,625.000000f,631.675540f,638.368763f,645.079578f
+};
+
+static float drmp3_L3_pow_43(int x)
+{
+    float frac;
+    int sign, mult = 256;
+
+    if (x < 129)
+    {
+        return g_drmp3_pow43[16 + x];
+    }
+
+    if (x < 1024)
+    {
+        mult = 16;
+        x <<= 3;
+    }
+
+    sign = 2*x & 64;
+    frac = (float)((x & 63) - sign) / ((x & ~63) + sign);
+    return g_drmp3_pow43[16 + ((x + sign) >> 6)]*(1.f + frac*((4.f/3) + frac*(2.f/9)))*mult;
+}
+
+static void drmp3_L3_huffman(float *dst, drmp3_bs *bs, const drmp3_L3_gr_info *gr_info, const float *scf, int layer3gr_limit)
+{
+    static const drmp3_int16 tabs[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
+        -255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288,
+        -255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288,
+        -253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258,
+        -254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259,
+        -252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258,
+        -252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258,
+        -253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259,
+        -251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258,
+        -251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290,
+        -252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259,
+        -250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258,
+        -250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259,
+        -251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258,
+        -253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 };
+    static const drmp3_uint8 tab32[] = { 130,162,193,209,44,28,76,140,9,9,9,9,9,9,9,9,190,254,222,238,126,94,157,157,109,61,173,205};
+    static const drmp3_uint8 tab33[] = { 252,236,220,204,188,172,156,140,124,108,92,76,60,44,28,12 };
+    static const drmp3_int16 tabindex[2*16] = { 0,32,64,98,0,132,180,218,292,364,426,538,648,746,0,1126,1460,1460,1460,1460,1460,1460,1460,1460,1842,1842,1842,1842,1842,1842,1842,1842 };
+    static const drmp3_uint8 g_linbits[] =  { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,6,8,10,13,4,5,6,7,8,9,11,13 };
+
+#define DRMP3_PEEK_BITS(n)    (bs_cache >> (32 - (n)))
+#define DRMP3_FLUSH_BITS(n)   { bs_cache <<= (n); bs_sh += (n); }
+#define DRMP3_CHECK_BITS      while (bs_sh >= 0) { bs_cache |= (drmp3_uint32)*bs_next_ptr++ << bs_sh; bs_sh -= 8; }
+#define DRMP3_BSPOS           ((bs_next_ptr - bs->buf)*8 - 24 + bs_sh)
+
+    float one = 0.0f;
+    int ireg = 0, big_val_cnt = gr_info->big_values;
+    const drmp3_uint8 *sfb = gr_info->sfbtab;
+    const drmp3_uint8 *bs_next_ptr = bs->buf + bs->pos/8;
+    drmp3_uint32 bs_cache = (((bs_next_ptr[0]*256u + bs_next_ptr[1])*256u + bs_next_ptr[2])*256u + bs_next_ptr[3]) << (bs->pos & 7);
+    int pairs_to_decode, np, bs_sh = (bs->pos & 7) - 8;
+    bs_next_ptr += 4;
+
+    while (big_val_cnt > 0)
+    {
+        int tab_num = gr_info->table_select[ireg];
+        int sfb_cnt = gr_info->region_count[ireg++];
+        const drmp3_int16 *codebook = tabs + tabindex[tab_num];
+        int linbits = g_linbits[tab_num];
+        if (linbits)
+        {
+            do
+            {
+                np = *sfb++ / 2;
+                pairs_to_decode = DRMP3_MIN(big_val_cnt, np);
+                one = *scf++;
+                do
+                {
+                    int j, w = 5;
+                    int leaf = codebook[DRMP3_PEEK_BITS(w)];
+                    while (leaf < 0)
+                    {
+                        DRMP3_FLUSH_BITS(w);
+                        w = leaf & 7;
+                        leaf = codebook[DRMP3_PEEK_BITS(w) - (leaf >> 3)];
+                    }
+                    DRMP3_FLUSH_BITS(leaf >> 8);
+
+                    for (j = 0; j < 2; j++, dst++, leaf >>= 4)
+                    {
+                        int lsb = leaf & 0x0F;
+                        if (lsb == 15)
+                        {
+                            lsb += DRMP3_PEEK_BITS(linbits);
+                            DRMP3_FLUSH_BITS(linbits);
+                            DRMP3_CHECK_BITS;
+                            *dst = one*drmp3_L3_pow_43(lsb)*((drmp3_int32)bs_cache < 0 ? -1: 1);
+                        } else
+                        {
+                            *dst = g_drmp3_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
+                        }
+                        DRMP3_FLUSH_BITS(lsb ? 1 : 0);
+                    }
+                    DRMP3_CHECK_BITS;
+                } while (--pairs_to_decode);
+            } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
+        } else
+        {
+            do
+            {
+                np = *sfb++ / 2;
+                pairs_to_decode = DRMP3_MIN(big_val_cnt, np);
+                one = *scf++;
+                do
+                {
+                    int j, w = 5;
+                    int leaf = codebook[DRMP3_PEEK_BITS(w)];
+                    while (leaf < 0)
+                    {
+                        DRMP3_FLUSH_BITS(w);
+                        w = leaf & 7;
+                        leaf = codebook[DRMP3_PEEK_BITS(w) - (leaf >> 3)];
+                    }
+                    DRMP3_FLUSH_BITS(leaf >> 8);
+
+                    for (j = 0; j < 2; j++, dst++, leaf >>= 4)
+                    {
+                        int lsb = leaf & 0x0F;
+                        *dst = g_drmp3_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
+                        DRMP3_FLUSH_BITS(lsb ? 1 : 0);
+                    }
+                    DRMP3_CHECK_BITS;
+                } while (--pairs_to_decode);
+            } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
+        }
+    }
+
+    for (np = 1 - big_val_cnt;; dst += 4)
+    {
+        const drmp3_uint8 *codebook_count1 = (gr_info->count1_table) ? tab33 : tab32;
+        int leaf = codebook_count1[DRMP3_PEEK_BITS(4)];
+        if (!(leaf & 8))
+        {
+            leaf = codebook_count1[(leaf >> 3) + (bs_cache << 4 >> (32 - (leaf & 3)))];
+        }
+        DRMP3_FLUSH_BITS(leaf & 7);
+        if (DRMP3_BSPOS > layer3gr_limit)
+        {
+            break;
+        }
+#define DRMP3_RELOAD_SCALEFACTOR  if (!--np) { np = *sfb++/2; if (!np) break; one = *scf++; }
+#define DRMP3_DEQ_COUNT1(s) if (leaf & (128 >> s)) { dst[s] = ((drmp3_int32)bs_cache < 0) ? -one : one; DRMP3_FLUSH_BITS(1) }
+        DRMP3_RELOAD_SCALEFACTOR;
+        DRMP3_DEQ_COUNT1(0);
+        DRMP3_DEQ_COUNT1(1);
+        DRMP3_RELOAD_SCALEFACTOR;
+        DRMP3_DEQ_COUNT1(2);
+        DRMP3_DEQ_COUNT1(3);
+        DRMP3_CHECK_BITS;
+    }
+
+    bs->pos = layer3gr_limit;
+}
+
+static void drmp3_L3_midside_stereo(float *left, int n)
+{
+    int i = 0;
+    float *right = left + 576;
+#if DRMP3_HAVE_SIMD
+    if (drmp3_have_simd())
+    {
+        for (; i < n - 3; i += 4)
+        {
+            drmp3_f4 vl = DRMP3_VLD(left + i);
+            drmp3_f4 vr = DRMP3_VLD(right + i);
+            DRMP3_VSTORE(left + i, DRMP3_VADD(vl, vr));
+            DRMP3_VSTORE(right + i, DRMP3_VSUB(vl, vr));
+        }
+#ifdef __GNUC__
+        /* Workaround for spurious -Waggressive-loop-optimizations warning from gcc.
+         * For more info see: https://github.com/lieff/minimp3/issues/88
+         */
+        if (__builtin_constant_p(n % 4 == 0) && n % 4 == 0)
+            return;
+#endif
+    }
+#endif
+    for (; i < n; i++)
+    {
+        float a = left[i];
+        float b = right[i];
+        left[i] = a + b;
+        right[i] = a - b;
+    }
+}
+
+static void drmp3_L3_intensity_stereo_band(float *left, int n, float kl, float kr)
+{
+    int i;
+    for (i = 0; i < n; i++)
+    {
+        left[i + 576] = left[i]*kr;
+        left[i] = left[i]*kl;
+    }
+}
+
+static void drmp3_L3_stereo_top_band(const float *right, const drmp3_uint8 *sfb, int nbands, int max_band[3])
+{
+    int i, k;
+
+    max_band[0] = max_band[1] = max_band[2] = -1;
+
+    for (i = 0; i < nbands; i++)
+    {
+        for (k = 0; k < sfb[i]; k += 2)
+        {
+            if (right[k] != 0 || right[k + 1] != 0)
+            {
+                max_band[i % 3] = i;
+                break;
+            }
+        }
+        right += sfb[i];
+    }
+}
+
+static void drmp3_L3_stereo_process(float *left, const drmp3_uint8 *ist_pos, const drmp3_uint8 *sfb, const drmp3_uint8 *hdr, int max_band[3], int mpeg2_sh)
+{
+    static const float g_pan[7*2] = { 0,1,0.21132487f,0.78867513f,0.36602540f,0.63397460f,0.5f,0.5f,0.63397460f,0.36602540f,0.78867513f,0.21132487f,1,0 };
+    unsigned i, max_pos = DRMP3_HDR_TEST_MPEG1(hdr) ? 7 : 64;
+
+    for (i = 0; sfb[i]; i++)
+    {
+        unsigned ipos = ist_pos[i];
+        if ((int)i > max_band[i % 3] && ipos < max_pos)
+        {
+            float kl, kr, s = DRMP3_HDR_TEST_MS_STEREO(hdr) ? 1.41421356f : 1;
+            if (DRMP3_HDR_TEST_MPEG1(hdr))
+            {
+                kl = g_pan[2*ipos];
+                kr = g_pan[2*ipos + 1];
+            } else
+            {
+                kl = 1;
+                kr = drmp3_L3_ldexp_q2(1, (ipos + 1) >> 1 << mpeg2_sh);
+                if (ipos & 1)
+                {
+                    kl = kr;
+                    kr = 1;
+                }
+            }
+            drmp3_L3_intensity_stereo_band(left, sfb[i], kl*s, kr*s);
+        } else if (DRMP3_HDR_TEST_MS_STEREO(hdr))
+        {
+            drmp3_L3_midside_stereo(left, sfb[i]);
+        }
+        left += sfb[i];
+    }
+}
+
+static void drmp3_L3_intensity_stereo(float *left, drmp3_uint8 *ist_pos, const drmp3_L3_gr_info *gr, const drmp3_uint8 *hdr)
+{
+    int max_band[3], n_sfb = gr->n_long_sfb + gr->n_short_sfb;
+    int i, max_blocks = gr->n_short_sfb ? 3 : 1;
+
+    drmp3_L3_stereo_top_band(left + 576, gr->sfbtab, n_sfb, max_band);
+    if (gr->n_long_sfb)
+    {
+        max_band[0] = max_band[1] = max_band[2] = DRMP3_MAX(DRMP3_MAX(max_band[0], max_band[1]), max_band[2]);
+    }
+    for (i = 0; i < max_blocks; i++)
+    {
+        int default_pos = DRMP3_HDR_TEST_MPEG1(hdr) ? 3 : 0;
+        int itop = n_sfb - max_blocks + i;
+        int prev = itop - max_blocks;
+        ist_pos[itop] = (drmp3_uint8)(max_band[i] >= prev ? default_pos : ist_pos[prev]);
+    }
+    drmp3_L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress & 1);
+}
+
+static void drmp3_L3_reorder(float *grbuf, float *scratch, const drmp3_uint8 *sfb)
+{
+    int i, len;
+    float *src = grbuf, *dst = scratch;
+
+    for (;0 != (len = *sfb); sfb += 3, src += 2*len)
+    {
+        for (i = 0; i < len; i++, src++)
+        {
+            *dst++ = src[0*len];
+            *dst++ = src[1*len];
+            *dst++ = src[2*len];
+        }
+    }
+    DRMP3_COPY_MEMORY(grbuf, scratch, (dst - scratch)*sizeof(float));
+}
+
+static void drmp3_L3_antialias(float *grbuf, int nbands)
+{
+    static const float g_aa[2][8] = {
+        {0.85749293f,0.88174200f,0.94962865f,0.98331459f,0.99551782f,0.99916056f,0.99989920f,0.99999316f},
+        {0.51449576f,0.47173197f,0.31337745f,0.18191320f,0.09457419f,0.04096558f,0.01419856f,0.00369997f}
+    };
+
+    for (; nbands > 0; nbands--, grbuf += 18)
+    {
+        int i = 0;
+#if DRMP3_HAVE_SIMD
+        if (drmp3_have_simd()) for (; i < 8; i += 4)
+        {
+            drmp3_f4 vu = DRMP3_VLD(grbuf + 18 + i);
+            drmp3_f4 vd = DRMP3_VLD(grbuf + 14 - i);
+            drmp3_f4 vc0 = DRMP3_VLD(g_aa[0] + i);
+            drmp3_f4 vc1 = DRMP3_VLD(g_aa[1] + i);
+            vd = DRMP3_VREV(vd);
+            DRMP3_VSTORE(grbuf + 18 + i, DRMP3_VSUB(DRMP3_VMUL(vu, vc0), DRMP3_VMUL(vd, vc1)));
+            vd = DRMP3_VADD(DRMP3_VMUL(vu, vc1), DRMP3_VMUL(vd, vc0));
+            DRMP3_VSTORE(grbuf + 14 - i, DRMP3_VREV(vd));
+        }
+#endif
+#ifndef DR_MP3_ONLY_SIMD
+        for(; i < 8; i++)
+        {
+            float u = grbuf[18 + i];
+            float d = grbuf[17 - i];
+            grbuf[18 + i] = u*g_aa[0][i] - d*g_aa[1][i];
+            grbuf[17 - i] = u*g_aa[1][i] + d*g_aa[0][i];
+        }
+#endif
+    }
+}
+
+static void drmp3_L3_dct3_9(float *y)
+{
+    float s0, s1, s2, s3, s4, s5, s6, s7, s8, t0, t2, t4;
+
+    s0 = y[0]; s2 = y[2]; s4 = y[4]; s6 = y[6]; s8 = y[8];
+    t0 = s0 + s6*0.5f;
+    s0 -= s6;
+    t4 = (s4 + s2)*0.93969262f;
+    t2 = (s8 + s2)*0.76604444f;
+    s6 = (s4 - s8)*0.17364818f;
+    s4 += s8 - s2;
+
+    s2 = s0 - s4*0.5f;
+    y[4] = s4 + s0;
+    s8 = t0 - t2 + s6;
+    s0 = t0 - t4 + t2;
+    s4 = t0 + t4 - s6;
+
+    s1 = y[1]; s3 = y[3]; s5 = y[5]; s7 = y[7];
+
+    s3 *= 0.86602540f;
+    t0 = (s5 + s1)*0.98480775f;
+    t4 = (s5 - s7)*0.34202014f;
+    t2 = (s1 + s7)*0.64278761f;
+    s1 = (s1 - s5 - s7)*0.86602540f;
+
+    s5 = t0 - s3 - t2;
+    s7 = t4 - s3 - t0;
+    s3 = t4 + s3 - t2;
+
+    y[0] = s4 - s7;
+    y[1] = s2 + s1;
+    y[2] = s0 - s3;
+    y[3] = s8 + s5;
+    y[5] = s8 - s5;
+    y[6] = s0 + s3;
+    y[7] = s2 - s1;
+    y[8] = s4 + s7;
+}
+
+static void drmp3_L3_imdct36(float *grbuf, float *overlap, const float *window, int nbands)
+{
+    int i, j;
+    static const float g_twid9[18] = {
+        0.73727734f,0.79335334f,0.84339145f,0.88701083f,0.92387953f,0.95371695f,0.97629601f,0.99144486f,0.99904822f,0.67559021f,0.60876143f,0.53729961f,0.46174861f,0.38268343f,0.30070580f,0.21643961f,0.13052619f,0.04361938f
+    };
+
+    for (j = 0; j < nbands; j++, grbuf += 18, overlap += 9)
+    {
+        float co[9], si[9];
+        co[0] = -grbuf[0];
+        si[0] = grbuf[17];
+        for (i = 0; i < 4; i++)
+        {
+            si[8 - 2*i] =   grbuf[4*i + 1] - grbuf[4*i + 2];
+            co[1 + 2*i] =   grbuf[4*i + 1] + grbuf[4*i + 2];
+            si[7 - 2*i] =   grbuf[4*i + 4] - grbuf[4*i + 3];
+            co[2 + 2*i] = -(grbuf[4*i + 3] + grbuf[4*i + 4]);
+        }
+        drmp3_L3_dct3_9(co);
+        drmp3_L3_dct3_9(si);
+
+        si[1] = -si[1];
+        si[3] = -si[3];
+        si[5] = -si[5];
+        si[7] = -si[7];
+
+        i = 0;
+
+#if DRMP3_HAVE_SIMD
+        if (drmp3_have_simd()) for (; i < 8; i += 4)
+        {
+            drmp3_f4 vovl = DRMP3_VLD(overlap + i);
+            drmp3_f4 vc = DRMP3_VLD(co + i);
+            drmp3_f4 vs = DRMP3_VLD(si + i);
+            drmp3_f4 vr0 = DRMP3_VLD(g_twid9 + i);
+            drmp3_f4 vr1 = DRMP3_VLD(g_twid9 + 9 + i);
+            drmp3_f4 vw0 = DRMP3_VLD(window + i);
+            drmp3_f4 vw1 = DRMP3_VLD(window + 9 + i);
+            drmp3_f4 vsum = DRMP3_VADD(DRMP3_VMUL(vc, vr1), DRMP3_VMUL(vs, vr0));
+            DRMP3_VSTORE(overlap + i, DRMP3_VSUB(DRMP3_VMUL(vc, vr0), DRMP3_VMUL(vs, vr1)));
+            DRMP3_VSTORE(grbuf + i, DRMP3_VSUB(DRMP3_VMUL(vovl, vw0), DRMP3_VMUL(vsum, vw1)));
+            vsum = DRMP3_VADD(DRMP3_VMUL(vovl, vw1), DRMP3_VMUL(vsum, vw0));
+            DRMP3_VSTORE(grbuf + 14 - i, DRMP3_VREV(vsum));
+        }
+#endif
+        for (; i < 9; i++)
+        {
+            float ovl  = overlap[i];
+            float sum  = co[i]*g_twid9[9 + i] + si[i]*g_twid9[0 + i];
+            overlap[i] = co[i]*g_twid9[0 + i] - si[i]*g_twid9[9 + i];
+            grbuf[i]      = ovl*window[0 + i] - sum*window[9 + i];
+            grbuf[17 - i] = ovl*window[9 + i] + sum*window[0 + i];
+        }
+    }
+}
+
+static void drmp3_L3_idct3(float x0, float x1, float x2, float *dst)
+{
+    float m1 = x1*0.86602540f;
+    float a1 = x0 - x2*0.5f;
+    dst[1] = x0 + x2;
+    dst[0] = a1 + m1;
+    dst[2] = a1 - m1;
+}
+
+static void drmp3_L3_imdct12(float *x, float *dst, float *overlap)
+{
+    static const float g_twid3[6] = { 0.79335334f,0.92387953f,0.99144486f, 0.60876143f,0.38268343f,0.13052619f };
+    float co[3], si[3];
+    int i;
+
+    drmp3_L3_idct3(-x[0], x[6] + x[3], x[12] + x[9], co);
+    drmp3_L3_idct3(x[15], x[12] - x[9], x[6] - x[3], si);
+    si[1] = -si[1];
+
+    for (i = 0; i < 3; i++)
+    {
+        float ovl  = overlap[i];
+        float sum  = co[i]*g_twid3[3 + i] + si[i]*g_twid3[0 + i];
+        overlap[i] = co[i]*g_twid3[0 + i] - si[i]*g_twid3[3 + i];
+        dst[i]     = ovl*g_twid3[2 - i] - sum*g_twid3[5 - i];
+        dst[5 - i] = ovl*g_twid3[5 - i] + sum*g_twid3[2 - i];
+    }
+}
+
+static void drmp3_L3_imdct_short(float *grbuf, float *overlap, int nbands)
+{
+    for (;nbands > 0; nbands--, overlap += 9, grbuf += 18)
+    {
+        float tmp[18];
+        DRMP3_COPY_MEMORY(tmp, grbuf, sizeof(tmp));
+        DRMP3_COPY_MEMORY(grbuf, overlap, 6*sizeof(float));
+        drmp3_L3_imdct12(tmp, grbuf + 6, overlap + 6);
+        drmp3_L3_imdct12(tmp + 1, grbuf + 12, overlap + 6);
+        drmp3_L3_imdct12(tmp + 2, overlap, overlap + 6);
+    }
+}
+
+static void drmp3_L3_change_sign(float *grbuf)
+{
+    int b, i;
+    for (b = 0, grbuf += 18; b < 32; b += 2, grbuf += 36)
+        for (i = 1; i < 18; i += 2)
+            grbuf[i] = -grbuf[i];
+}
+
+static void drmp3_L3_imdct_gr(float *grbuf, float *overlap, unsigned block_type, unsigned n_long_bands)
+{
+    static const float g_mdct_window[2][18] = {
+        { 0.99904822f,0.99144486f,0.97629601f,0.95371695f,0.92387953f,0.88701083f,0.84339145f,0.79335334f,0.73727734f,0.04361938f,0.13052619f,0.21643961f,0.30070580f,0.38268343f,0.46174861f,0.53729961f,0.60876143f,0.67559021f },
+        { 1,1,1,1,1,1,0.99144486f,0.92387953f,0.79335334f,0,0,0,0,0,0,0.13052619f,0.38268343f,0.60876143f }
+    };
+    if (n_long_bands)
+    {
+        drmp3_L3_imdct36(grbuf, overlap, g_mdct_window[0], n_long_bands);
+        grbuf += 18*n_long_bands;
+        overlap += 9*n_long_bands;
+    }
+    if (block_type == DRMP3_SHORT_BLOCK_TYPE)
+        drmp3_L3_imdct_short(grbuf, overlap, 32 - n_long_bands);
+    else
+        drmp3_L3_imdct36(grbuf, overlap, g_mdct_window[block_type == DRMP3_STOP_BLOCK_TYPE], 32 - n_long_bands);
+}
+
+static void drmp3_L3_save_reservoir(drmp3dec *h, drmp3dec_scratch *s)
+{
+    int pos = (s->bs.pos + 7)/8u;
+    int remains = s->bs.limit/8u - pos;
+    if (remains > DRMP3_MAX_BITRESERVOIR_BYTES)
+    {
+        pos += remains - DRMP3_MAX_BITRESERVOIR_BYTES;
+        remains = DRMP3_MAX_BITRESERVOIR_BYTES;
+    }
+    if (remains > 0)
+    {
+        DRMP3_MOVE_MEMORY(h->reserv_buf, s->maindata + pos, remains);
+    }
+    h->reserv = remains;
+}
+
+static int drmp3_L3_restore_reservoir(drmp3dec *h, drmp3_bs *bs, drmp3dec_scratch *s, int main_data_begin)
+{
+    int frame_bytes = (bs->limit - bs->pos)/8;
+    int bytes_have = DRMP3_MIN(h->reserv, main_data_begin);
+    DRMP3_COPY_MEMORY(s->maindata, h->reserv_buf + DRMP3_MAX(0, h->reserv - main_data_begin), DRMP3_MIN(h->reserv, main_data_begin));
+    DRMP3_COPY_MEMORY(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes);
+    drmp3_bs_init(&s->bs, s->maindata, bytes_have + frame_bytes);
+    return h->reserv >= main_data_begin;
+}
+
+static void drmp3_L3_decode(drmp3dec *h, drmp3dec_scratch *s, drmp3_L3_gr_info *gr_info, int nch)
+{
+    int ch;
+
+    for (ch = 0; ch < nch; ch++)
+    {
+        int layer3gr_limit = s->bs.pos + gr_info[ch].part_23_length;
+        drmp3_L3_decode_scalefactors(h->header, s->ist_pos[ch], &s->bs, gr_info + ch, s->scf, ch);
+        drmp3_L3_huffman(s->grbuf[ch], &s->bs, gr_info + ch, s->scf, layer3gr_limit);
+    }
+
+    if (DRMP3_HDR_TEST_I_STEREO(h->header))
+    {
+        drmp3_L3_intensity_stereo(s->grbuf[0], s->ist_pos[1], gr_info, h->header);
+    } else if (DRMP3_HDR_IS_MS_STEREO(h->header))
+    {
+        drmp3_L3_midside_stereo(s->grbuf[0], 576);
+    }
+
+    for (ch = 0; ch < nch; ch++, gr_info++)
+    {
+        int aa_bands = 31;
+        int n_long_bands = (gr_info->mixed_block_flag ? 2 : 0) << (int)(DRMP3_HDR_GET_MY_SAMPLE_RATE(h->header) == 2);
+
+        if (gr_info->n_short_sfb)
+        {
+            aa_bands = n_long_bands - 1;
+            drmp3_L3_reorder(s->grbuf[ch] + n_long_bands*18, s->syn[0], gr_info->sfbtab + gr_info->n_long_sfb);
+        }
+
+        drmp3_L3_antialias(s->grbuf[ch], aa_bands);
+        drmp3_L3_imdct_gr(s->grbuf[ch], h->mdct_overlap[ch], gr_info->block_type, n_long_bands);
+        drmp3_L3_change_sign(s->grbuf[ch]);
+    }
+}
+
+static void drmp3d_DCT_II(float *grbuf, int n)
+{
+    static const float g_sec[24] = {
+        10.19000816f,0.50060302f,0.50241929f,3.40760851f,0.50547093f,0.52249861f,2.05778098f,0.51544732f,0.56694406f,1.48416460f,0.53104258f,0.64682180f,1.16943991f,0.55310392f,0.78815460f,0.97256821f,0.58293498f,1.06067765f,0.83934963f,0.62250412f,1.72244716f,0.74453628f,0.67480832f,5.10114861f
+    };
+    int i, k = 0;
+#if DRMP3_HAVE_SIMD
+    if (drmp3_have_simd()) for (; k < n; k += 4)
+    {
+        drmp3_f4 t[4][8], *x;
+        float *y = grbuf + k;
+
+        for (x = t[0], i = 0; i < 8; i++, x++)
+        {
+            drmp3_f4 x0 = DRMP3_VLD(&y[i*18]);
+            drmp3_f4 x1 = DRMP3_VLD(&y[(15 - i)*18]);
+            drmp3_f4 x2 = DRMP3_VLD(&y[(16 + i)*18]);
+            drmp3_f4 x3 = DRMP3_VLD(&y[(31 - i)*18]);
+            drmp3_f4 t0 = DRMP3_VADD(x0, x3);
+            drmp3_f4 t1 = DRMP3_VADD(x1, x2);
+            drmp3_f4 t2 = DRMP3_VMUL_S(DRMP3_VSUB(x1, x2), g_sec[3*i + 0]);
+            drmp3_f4 t3 = DRMP3_VMUL_S(DRMP3_VSUB(x0, x3), g_sec[3*i + 1]);
+            x[0] = DRMP3_VADD(t0, t1);
+            x[8] = DRMP3_VMUL_S(DRMP3_VSUB(t0, t1), g_sec[3*i + 2]);
+            x[16] = DRMP3_VADD(t3, t2);
+            x[24] = DRMP3_VMUL_S(DRMP3_VSUB(t3, t2), g_sec[3*i + 2]);
+        }
+        for (x = t[0], i = 0; i < 4; i++, x += 8)
+        {
+            drmp3_f4 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
+            xt = DRMP3_VSUB(x0, x7); x0 = DRMP3_VADD(x0, x7);
+            x7 = DRMP3_VSUB(x1, x6); x1 = DRMP3_VADD(x1, x6);
+            x6 = DRMP3_VSUB(x2, x5); x2 = DRMP3_VADD(x2, x5);
+            x5 = DRMP3_VSUB(x3, x4); x3 = DRMP3_VADD(x3, x4);
+            x4 = DRMP3_VSUB(x0, x3); x0 = DRMP3_VADD(x0, x3);
+            x3 = DRMP3_VSUB(x1, x2); x1 = DRMP3_VADD(x1, x2);
+            x[0] = DRMP3_VADD(x0, x1);
+            x[4] = DRMP3_VMUL_S(DRMP3_VSUB(x0, x1), 0.70710677f);
+            x5 = DRMP3_VADD(x5, x6);
+            x6 = DRMP3_VMUL_S(DRMP3_VADD(x6, x7), 0.70710677f);
+            x7 = DRMP3_VADD(x7, xt);
+            x3 = DRMP3_VMUL_S(DRMP3_VADD(x3, x4), 0.70710677f);
+            x5 = DRMP3_VSUB(x5, DRMP3_VMUL_S(x7, 0.198912367f)); /* rotate by PI/8 */
+            x7 = DRMP3_VADD(x7, DRMP3_VMUL_S(x5, 0.382683432f));
+            x5 = DRMP3_VSUB(x5, DRMP3_VMUL_S(x7, 0.198912367f));
+            x0 = DRMP3_VSUB(xt, x6); xt = DRMP3_VADD(xt, x6);
+            x[1] = DRMP3_VMUL_S(DRMP3_VADD(xt, x7), 0.50979561f);
+            x[2] = DRMP3_VMUL_S(DRMP3_VADD(x4, x3), 0.54119611f);
+            x[3] = DRMP3_VMUL_S(DRMP3_VSUB(x0, x5), 0.60134488f);
+            x[5] = DRMP3_VMUL_S(DRMP3_VADD(x0, x5), 0.89997619f);
+            x[6] = DRMP3_VMUL_S(DRMP3_VSUB(x4, x3), 1.30656302f);
+            x[7] = DRMP3_VMUL_S(DRMP3_VSUB(xt, x7), 2.56291556f);
+        }
+
+        if (k > n - 3)
+        {
+#if DRMP3_HAVE_SSE
+#define DRMP3_VSAVE2(i, v) _mm_storel_pi((__m64 *)(void*)&y[i*18], v)
+#else
+#define DRMP3_VSAVE2(i, v) vst1_f32((float32_t *)&y[(i)*18],  vget_low_f32(v))
+#endif
+            for (i = 0; i < 7; i++, y += 4*18)
+            {
+                drmp3_f4 s = DRMP3_VADD(t[3][i], t[3][i + 1]);
+                DRMP3_VSAVE2(0, t[0][i]);
+                DRMP3_VSAVE2(1, DRMP3_VADD(t[2][i], s));
+                DRMP3_VSAVE2(2, DRMP3_VADD(t[1][i], t[1][i + 1]));
+                DRMP3_VSAVE2(3, DRMP3_VADD(t[2][1 + i], s));
+            }
+            DRMP3_VSAVE2(0, t[0][7]);
+            DRMP3_VSAVE2(1, DRMP3_VADD(t[2][7], t[3][7]));
+            DRMP3_VSAVE2(2, t[1][7]);
+            DRMP3_VSAVE2(3, t[3][7]);
+        } else
+        {
+#define DRMP3_VSAVE4(i, v) DRMP3_VSTORE(&y[(i)*18], v)
+            for (i = 0; i < 7; i++, y += 4*18)
+            {
+                drmp3_f4 s = DRMP3_VADD(t[3][i], t[3][i + 1]);
+                DRMP3_VSAVE4(0, t[0][i]);
+                DRMP3_VSAVE4(1, DRMP3_VADD(t[2][i], s));
+                DRMP3_VSAVE4(2, DRMP3_VADD(t[1][i], t[1][i + 1]));
+                DRMP3_VSAVE4(3, DRMP3_VADD(t[2][1 + i], s));
+            }
+            DRMP3_VSAVE4(0, t[0][7]);
+            DRMP3_VSAVE4(1, DRMP3_VADD(t[2][7], t[3][7]));
+            DRMP3_VSAVE4(2, t[1][7]);
+            DRMP3_VSAVE4(3, t[3][7]);
+        }
+    } else
+#endif
+#ifdef DR_MP3_ONLY_SIMD
+    {} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */
+#else
+    for (; k < n; k++)
+    {
+        float t[4][8], *x, *y = grbuf + k;
+
+        for (x = t[0], i = 0; i < 8; i++, x++)
+        {
+            float x0 = y[i*18];
+            float x1 = y[(15 - i)*18];
+            float x2 = y[(16 + i)*18];
+            float x3 = y[(31 - i)*18];
+            float t0 = x0 + x3;
+            float t1 = x1 + x2;
+            float t2 = (x1 - x2)*g_sec[3*i + 0];
+            float t3 = (x0 - x3)*g_sec[3*i + 1];
+            x[0] = t0 + t1;
+            x[8] = (t0 - t1)*g_sec[3*i + 2];
+            x[16] = t3 + t2;
+            x[24] = (t3 - t2)*g_sec[3*i + 2];
+        }
+        for (x = t[0], i = 0; i < 4; i++, x += 8)
+        {
+            float x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
+            xt = x0 - x7; x0 += x7;
+            x7 = x1 - x6; x1 += x6;
+            x6 = x2 - x5; x2 += x5;
+            x5 = x3 - x4; x3 += x4;
+            x4 = x0 - x3; x0 += x3;
+            x3 = x1 - x2; x1 += x2;
+            x[0] = x0 + x1;
+            x[4] = (x0 - x1)*0.70710677f;
+            x5 =  x5 + x6;
+            x6 = (x6 + x7)*0.70710677f;
+            x7 =  x7 + xt;
+            x3 = (x3 + x4)*0.70710677f;
+            x5 -= x7*0.198912367f;  /* rotate by PI/8 */
+            x7 += x5*0.382683432f;
+            x5 -= x7*0.198912367f;
+            x0 = xt - x6; xt += x6;
+            x[1] = (xt + x7)*0.50979561f;
+            x[2] = (x4 + x3)*0.54119611f;
+            x[3] = (x0 - x5)*0.60134488f;
+            x[5] = (x0 + x5)*0.89997619f;
+            x[6] = (x4 - x3)*1.30656302f;
+            x[7] = (xt - x7)*2.56291556f;
+
+        }
+        for (i = 0; i < 7; i++, y += 4*18)
+        {
+            y[0*18] = t[0][i];
+            y[1*18] = t[2][i] + t[3][i] + t[3][i + 1];
+            y[2*18] = t[1][i] + t[1][i + 1];
+            y[3*18] = t[2][i + 1] + t[3][i] + t[3][i + 1];
+        }
+        y[0*18] = t[0][7];
+        y[1*18] = t[2][7] + t[3][7];
+        y[2*18] = t[1][7];
+        y[3*18] = t[3][7];
+    }
+#endif
+}
+
+#ifndef DR_MP3_FLOAT_OUTPUT
+typedef drmp3_int16 drmp3d_sample_t;
+
+static drmp3_int16 drmp3d_scale_pcm(float sample)
+{
+    drmp3_int16 s;
+#if DRMP3_HAVE_ARMV6
+    drmp3_int32 s32 = (drmp3_int32)(sample + .5f);
+    s32 -= (s32 < 0);
+    s = (drmp3_int16)drmp3_clip_int16_arm(s32);
+#else
+    if (sample >=  32766.5) return (drmp3_int16) 32767;
+    if (sample <= -32767.5) return (drmp3_int16)-32768;
+    s = (drmp3_int16)(sample + .5f);
+    s -= (s < 0);   /* away from zero, to be compliant */
+#endif
+    return s;
+}
+#else
+typedef float drmp3d_sample_t;
+
+static float drmp3d_scale_pcm(float sample)
+{
+    return sample*(1.f/32768.f);
+}
+#endif
+
+static void drmp3d_synth_pair(drmp3d_sample_t *pcm, int nch, const float *z)
+{
+    float a;
+    a  = (z[14*64] - z[    0]) * 29;
+    a += (z[ 1*64] + z[13*64]) * 213;
+    a += (z[12*64] - z[ 2*64]) * 459;
+    a += (z[ 3*64] + z[11*64]) * 2037;
+    a += (z[10*64] - z[ 4*64]) * 5153;
+    a += (z[ 5*64] + z[ 9*64]) * 6574;
+    a += (z[ 8*64] - z[ 6*64]) * 37489;
+    a +=  z[ 7*64]             * 75038;
+    pcm[0] = drmp3d_scale_pcm(a);
+
+    z += 2;
+    a  = z[14*64] * 104;
+    a += z[12*64] * 1567;
+    a += z[10*64] * 9727;
+    a += z[ 8*64] * 64019;
+    a += z[ 6*64] * -9975;
+    a += z[ 4*64] * -45;
+    a += z[ 2*64] * 146;
+    a += z[ 0*64] * -5;
+    pcm[16*nch] = drmp3d_scale_pcm(a);
+}
+
+static void drmp3d_synth(float *xl, drmp3d_sample_t *dstl, int nch, float *lins)
+{
+    int i;
+    float *xr = xl + 576*(nch - 1);
+    drmp3d_sample_t *dstr = dstl + (nch - 1);
+
+    static const float g_win[] = {
+        -1,26,-31,208,218,401,-519,2063,2000,4788,-5517,7134,5959,35640,-39336,74992,
+        -1,24,-35,202,222,347,-581,2080,1952,4425,-5879,7640,5288,33791,-41176,74856,
+        -1,21,-38,196,225,294,-645,2087,1893,4063,-6237,8092,4561,31947,-43006,74630,
+        -1,19,-41,190,227,244,-711,2085,1822,3705,-6589,8492,3776,30112,-44821,74313,
+        -1,17,-45,183,228,197,-779,2075,1739,3351,-6935,8840,2935,28289,-46617,73908,
+        -1,16,-49,176,228,153,-848,2057,1644,3004,-7271,9139,2037,26482,-48390,73415,
+        -2,14,-53,169,227,111,-919,2032,1535,2663,-7597,9389,1082,24694,-50137,72835,
+        -2,13,-58,161,224,72,-991,2001,1414,2330,-7910,9592,70,22929,-51853,72169,
+        -2,11,-63,154,221,36,-1064,1962,1280,2006,-8209,9750,-998,21189,-53534,71420,
+        -2,10,-68,147,215,2,-1137,1919,1131,1692,-8491,9863,-2122,19478,-55178,70590,
+        -3,9,-73,139,208,-29,-1210,1870,970,1388,-8755,9935,-3300,17799,-56778,69679,
+        -3,8,-79,132,200,-57,-1283,1817,794,1095,-8998,9966,-4533,16155,-58333,68692,
+        -4,7,-85,125,189,-83,-1356,1759,605,814,-9219,9959,-5818,14548,-59838,67629,
+        -4,7,-91,117,177,-106,-1428,1698,402,545,-9416,9916,-7154,12980,-61289,66494,
+        -5,6,-97,111,163,-127,-1498,1634,185,288,-9585,9838,-8540,11455,-62684,65290
+    };
+    float *zlin = lins + 15*64;
+    const float *w = g_win;
+
+    zlin[4*15]     = xl[18*16];
+    zlin[4*15 + 1] = xr[18*16];
+    zlin[4*15 + 2] = xl[0];
+    zlin[4*15 + 3] = xr[0];
+
+    zlin[4*31]     = xl[1 + 18*16];
+    zlin[4*31 + 1] = xr[1 + 18*16];
+    zlin[4*31 + 2] = xl[1];
+    zlin[4*31 + 3] = xr[1];
+
+    drmp3d_synth_pair(dstr, nch, lins + 4*15 + 1);
+    drmp3d_synth_pair(dstr + 32*nch, nch, lins + 4*15 + 64 + 1);
+    drmp3d_synth_pair(dstl, nch, lins + 4*15);
+    drmp3d_synth_pair(dstl + 32*nch, nch, lins + 4*15 + 64);
+
+#if DRMP3_HAVE_SIMD
+    if (drmp3_have_simd()) for (i = 14; i >= 0; i--)
+    {
+#define DRMP3_VLOAD(k) drmp3_f4 w0 = DRMP3_VSET(*w++); drmp3_f4 w1 = DRMP3_VSET(*w++); drmp3_f4 vz = DRMP3_VLD(&zlin[4*i - 64*k]); drmp3_f4 vy = DRMP3_VLD(&zlin[4*i - 64*(15 - k)]);
+#define DRMP3_V0(k) { DRMP3_VLOAD(k) b =               DRMP3_VADD(DRMP3_VMUL(vz, w1), DRMP3_VMUL(vy, w0)) ; a =               DRMP3_VSUB(DRMP3_VMUL(vz, w0), DRMP3_VMUL(vy, w1));  }
+#define DRMP3_V1(k) { DRMP3_VLOAD(k) b = DRMP3_VADD(b, DRMP3_VADD(DRMP3_VMUL(vz, w1), DRMP3_VMUL(vy, w0))); a = DRMP3_VADD(a, DRMP3_VSUB(DRMP3_VMUL(vz, w0), DRMP3_VMUL(vy, w1))); }
+#define DRMP3_V2(k) { DRMP3_VLOAD(k) b = DRMP3_VADD(b, DRMP3_VADD(DRMP3_VMUL(vz, w1), DRMP3_VMUL(vy, w0))); a = DRMP3_VADD(a, DRMP3_VSUB(DRMP3_VMUL(vy, w1), DRMP3_VMUL(vz, w0))); }
+        drmp3_f4 a, b;
+        zlin[4*i]     = xl[18*(31 - i)];
+        zlin[4*i + 1] = xr[18*(31 - i)];
+        zlin[4*i + 2] = xl[1 + 18*(31 - i)];
+        zlin[4*i + 3] = xr[1 + 18*(31 - i)];
+        zlin[4*i + 64] = xl[1 + 18*(1 + i)];
+        zlin[4*i + 64 + 1] = xr[1 + 18*(1 + i)];
+        zlin[4*i - 64 + 2] = xl[18*(1 + i)];
+        zlin[4*i - 64 + 3] = xr[18*(1 + i)];
+
+        DRMP3_V0(0) DRMP3_V2(1) DRMP3_V1(2) DRMP3_V2(3) DRMP3_V1(4) DRMP3_V2(5) DRMP3_V1(6) DRMP3_V2(7)
+
+        {
+#ifndef DR_MP3_FLOAT_OUTPUT
+#if DRMP3_HAVE_SSE
+            static const drmp3_f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
+            static const drmp3_f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
+            __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
+                                           _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
+            dstr[(15 - i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 1);
+            dstr[(17 + i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 5);
+            dstl[(15 - i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 0);
+            dstl[(17 + i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 4);
+            dstr[(47 - i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 3);
+            dstr[(49 + i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 7);
+            dstl[(47 - i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 2);
+            dstl[(49 + i)*nch] = (drmp3_int16)_mm_extract_epi16(pcm8, 6);
+#else
+            int16x4_t pcma, pcmb;
+            a = DRMP3_VADD(a, DRMP3_VSET(0.5f));
+            b = DRMP3_VADD(b, DRMP3_VSET(0.5f));
+            pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, DRMP3_VSET(0)))));
+            pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, DRMP3_VSET(0)))));
+            vst1_lane_s16(dstr + (15 - i)*nch, pcma, 1);
+            vst1_lane_s16(dstr + (17 + i)*nch, pcmb, 1);
+            vst1_lane_s16(dstl + (15 - i)*nch, pcma, 0);
+            vst1_lane_s16(dstl + (17 + i)*nch, pcmb, 0);
+            vst1_lane_s16(dstr + (47 - i)*nch, pcma, 3);
+            vst1_lane_s16(dstr + (49 + i)*nch, pcmb, 3);
+            vst1_lane_s16(dstl + (47 - i)*nch, pcma, 2);
+            vst1_lane_s16(dstl + (49 + i)*nch, pcmb, 2);
+#endif
+#else
+        #if DRMP3_HAVE_SSE
+            static const drmp3_f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f };
+        #else
+            const drmp3_f4 g_scale = vdupq_n_f32(1.0f/32768.0f);
+        #endif
+            a = DRMP3_VMUL(a, g_scale);
+            b = DRMP3_VMUL(b, g_scale);
+#if DRMP3_HAVE_SSE
+            _mm_store_ss(dstr + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)));
+            _mm_store_ss(dstr + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1)));
+            _mm_store_ss(dstl + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)));
+            _mm_store_ss(dstl + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0)));
+            _mm_store_ss(dstr + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3)));
+            _mm_store_ss(dstr + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3)));
+            _mm_store_ss(dstl + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)));
+            _mm_store_ss(dstl + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2)));
+#else
+            vst1q_lane_f32(dstr + (15 - i)*nch, a, 1);
+            vst1q_lane_f32(dstr + (17 + i)*nch, b, 1);
+            vst1q_lane_f32(dstl + (15 - i)*nch, a, 0);
+            vst1q_lane_f32(dstl + (17 + i)*nch, b, 0);
+            vst1q_lane_f32(dstr + (47 - i)*nch, a, 3);
+            vst1q_lane_f32(dstr + (49 + i)*nch, b, 3);
+            vst1q_lane_f32(dstl + (47 - i)*nch, a, 2);
+            vst1q_lane_f32(dstl + (49 + i)*nch, b, 2);
+#endif
+#endif /* DR_MP3_FLOAT_OUTPUT */
+        }
+    } else
+#endif
+#ifdef DR_MP3_ONLY_SIMD
+    {} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */
+#else
+    for (i = 14; i >= 0; i--)
+    {
+#define DRMP3_LOAD(k) float w0 = *w++; float w1 = *w++; float *vz = &zlin[4*i - k*64]; float *vy = &zlin[4*i - (15 - k)*64];
+#define DRMP3_S0(k) { int j; DRMP3_LOAD(k); for (j = 0; j < 4; j++) b[j]  = vz[j]*w1 + vy[j]*w0, a[j]  = vz[j]*w0 - vy[j]*w1; }
+#define DRMP3_S1(k) { int j; DRMP3_LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vz[j]*w0 - vy[j]*w1; }
+#define DRMP3_S2(k) { int j; DRMP3_LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vy[j]*w1 - vz[j]*w0; }
+        float a[4], b[4];
+
+        zlin[4*i]     = xl[18*(31 - i)];
+        zlin[4*i + 1] = xr[18*(31 - i)];
+        zlin[4*i + 2] = xl[1 + 18*(31 - i)];
+        zlin[4*i + 3] = xr[1 + 18*(31 - i)];
+        zlin[4*(i + 16)]   = xl[1 + 18*(1 + i)];
+        zlin[4*(i + 16) + 1] = xr[1 + 18*(1 + i)];
+        zlin[4*(i - 16) + 2] = xl[18*(1 + i)];
+        zlin[4*(i - 16) + 3] = xr[18*(1 + i)];
+
+        DRMP3_S0(0) DRMP3_S2(1) DRMP3_S1(2) DRMP3_S2(3) DRMP3_S1(4) DRMP3_S2(5) DRMP3_S1(6) DRMP3_S2(7)
+
+        dstr[(15 - i)*nch] = drmp3d_scale_pcm(a[1]);
+        dstr[(17 + i)*nch] = drmp3d_scale_pcm(b[1]);
+        dstl[(15 - i)*nch] = drmp3d_scale_pcm(a[0]);
+        dstl[(17 + i)*nch] = drmp3d_scale_pcm(b[0]);
+        dstr[(47 - i)*nch] = drmp3d_scale_pcm(a[3]);
+        dstr[(49 + i)*nch] = drmp3d_scale_pcm(b[3]);
+        dstl[(47 - i)*nch] = drmp3d_scale_pcm(a[2]);
+        dstl[(49 + i)*nch] = drmp3d_scale_pcm(b[2]);
+    }
+#endif
+}
+
+static void drmp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, drmp3d_sample_t *pcm, float *lins)
+{
+    int i;
+    for (i = 0; i < nch; i++)
+    {
+        drmp3d_DCT_II(grbuf + 576*i, nbands);
+    }
+
+    DRMP3_COPY_MEMORY(lins, qmf_state, sizeof(float)*15*64);
+
+    for (i = 0; i < nbands; i += 2)
+    {
+        drmp3d_synth(grbuf + i, pcm + 32*nch*i, nch, lins + i*64);
+    }
+#ifndef DR_MP3_NONSTANDARD_BUT_LOGICAL
+    if (nch == 1)
+    {
+        for (i = 0; i < 15*64; i += 2)
+        {
+            qmf_state[i] = lins[nbands*64 + i];
+        }
+    } else
+#endif
+    {
+        DRMP3_COPY_MEMORY(qmf_state, lins + nbands*64, sizeof(float)*15*64);
+    }
+}
+
+static int drmp3d_match_frame(const drmp3_uint8 *hdr, int mp3_bytes, int frame_bytes)
+{
+    int i, nmatch;
+    for (i = 0, nmatch = 0; nmatch < DRMP3_MAX_FRAME_SYNC_MATCHES; nmatch++)
+    {
+        i += drmp3_hdr_frame_bytes(hdr + i, frame_bytes) + drmp3_hdr_padding(hdr + i);
+        if (i + DRMP3_HDR_SIZE > mp3_bytes)
+            return nmatch > 0;
+        if (!drmp3_hdr_compare(hdr, hdr + i))
+            return 0;
+    }
+    return 1;
+}
+
+static int drmp3d_find_frame(const drmp3_uint8 *mp3, int mp3_bytes, int *free_format_bytes, int *ptr_frame_bytes)
+{
+    int i, k;
+    for (i = 0; i < mp3_bytes - DRMP3_HDR_SIZE; i++, mp3++)
+    {
+        if (drmp3_hdr_valid(mp3))
+        {
+            int frame_bytes = drmp3_hdr_frame_bytes(mp3, *free_format_bytes);
+            int frame_and_padding = frame_bytes + drmp3_hdr_padding(mp3);
+
+            for (k = DRMP3_HDR_SIZE; !frame_bytes && k < DRMP3_MAX_FREE_FORMAT_FRAME_SIZE && i + 2*k < mp3_bytes - DRMP3_HDR_SIZE; k++)
+            {
+                if (drmp3_hdr_compare(mp3, mp3 + k))
+                {
+                    int fb = k - drmp3_hdr_padding(mp3);
+                    int nextfb = fb + drmp3_hdr_padding(mp3 + k);
+                    if (i + k + nextfb + DRMP3_HDR_SIZE > mp3_bytes || !drmp3_hdr_compare(mp3, mp3 + k + nextfb))
+                        continue;
+                    frame_and_padding = k;
+                    frame_bytes = fb;
+                    *free_format_bytes = fb;
+                }
+            }
+
+            if ((frame_bytes && i + frame_and_padding <= mp3_bytes &&
+                drmp3d_match_frame(mp3, mp3_bytes - i, frame_bytes)) ||
+                (!i && frame_and_padding == mp3_bytes))
+            {
+                *ptr_frame_bytes = frame_and_padding;
+                return i;
+            }
+            *free_format_bytes = 0;
+        }
+    }
+    *ptr_frame_bytes = 0;
+    return mp3_bytes;
+}
+
+DRMP3_API void drmp3dec_init(drmp3dec *dec)
+{
+    dec->header[0] = 0;
+}
+
+DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int mp3_bytes, void *pcm, drmp3dec_frame_info *info)
+{
+    int i = 0, igr, frame_size = 0, success = 1;
+    const drmp3_uint8 *hdr;
+    drmp3_bs bs_frame[1];
+    drmp3dec_scratch scratch;
+
+    if (mp3_bytes > 4 && dec->header[0] == 0xff && drmp3_hdr_compare(dec->header, mp3))
+    {
+        frame_size = drmp3_hdr_frame_bytes(mp3, dec->free_format_bytes) + drmp3_hdr_padding(mp3);
+        if (frame_size != mp3_bytes && (frame_size + DRMP3_HDR_SIZE > mp3_bytes || !drmp3_hdr_compare(mp3, mp3 + frame_size)))
+        {
+            frame_size = 0;
+        }
+    }
+    if (!frame_size)
+    {
+        DRMP3_ZERO_MEMORY(dec, sizeof(drmp3dec));
+        i = drmp3d_find_frame(mp3, mp3_bytes, &dec->free_format_bytes, &frame_size);
+        if (!frame_size || i + frame_size > mp3_bytes)
+        {
+            info->frame_bytes = i;
+            return 0;
+        }
+    }
+
+    hdr = mp3 + i;
+    DRMP3_COPY_MEMORY(dec->header, hdr, DRMP3_HDR_SIZE);
+    info->frame_bytes = i + frame_size;
+    info->channels = DRMP3_HDR_IS_MONO(hdr) ? 1 : 2;
+    info->hz = drmp3_hdr_sample_rate_hz(hdr);
+    info->layer = 4 - DRMP3_HDR_GET_LAYER(hdr);
+    info->bitrate_kbps = drmp3_hdr_bitrate_kbps(hdr);
+
+    drmp3_bs_init(bs_frame, hdr + DRMP3_HDR_SIZE, frame_size - DRMP3_HDR_SIZE);
+    if (DRMP3_HDR_IS_CRC(hdr))
+    {
+        drmp3_bs_get_bits(bs_frame, 16);
+    }
+
+    if (info->layer == 3)
+    {
+        int main_data_begin = drmp3_L3_read_side_info(bs_frame, scratch.gr_info, hdr);
+        if (main_data_begin < 0 || bs_frame->pos > bs_frame->limit)
+        {
+            drmp3dec_init(dec);
+            return 0;
+        }
+        success = drmp3_L3_restore_reservoir(dec, bs_frame, &scratch, main_data_begin);
+        if (success && pcm != NULL)
+        {
+            for (igr = 0; igr < (DRMP3_HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm = DRMP3_OFFSET_PTR(pcm, sizeof(drmp3d_sample_t)*576*info->channels))
+            {
+                DRMP3_ZERO_MEMORY(scratch.grbuf[0], 576*2*sizeof(float));
+                drmp3_L3_decode(dec, &scratch, scratch.gr_info + igr*info->channels, info->channels);
+                drmp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 18, info->channels, (drmp3d_sample_t*)pcm, scratch.syn[0]);
+            }
+        }
+        drmp3_L3_save_reservoir(dec, &scratch);
+    } else
+    {
+#ifdef DR_MP3_ONLY_MP3
+        return 0;
+#else
+        drmp3_L12_scale_info sci[1];
+
+        if (pcm == NULL) {
+            return drmp3_hdr_frame_samples(hdr);
+        }
+
+        drmp3_L12_read_scale_info(hdr, bs_frame, sci);
+
+        DRMP3_ZERO_MEMORY(scratch.grbuf[0], 576*2*sizeof(float));
+        for (i = 0, igr = 0; igr < 3; igr++)
+        {
+            if (12 == (i += drmp3_L12_dequantize_granule(scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1)))
+            {
+                i = 0;
+                drmp3_L12_apply_scf_384(sci, sci->scf + igr, scratch.grbuf[0]);
+                drmp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 12, info->channels, (drmp3d_sample_t*)pcm, scratch.syn[0]);
+                DRMP3_ZERO_MEMORY(scratch.grbuf[0], 576*2*sizeof(float));
+                pcm = DRMP3_OFFSET_PTR(pcm, sizeof(drmp3d_sample_t)*384*info->channels);
+            }
+            if (bs_frame->pos > bs_frame->limit)
+            {
+                drmp3dec_init(dec);
+                return 0;
+            }
+        }
+#endif
+    }
+
+    return success*drmp3_hdr_frame_samples(dec->header);
+}
+
+DRMP3_API void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, size_t num_samples)
+{
+    size_t i = 0;
+#if DRMP3_HAVE_SIMD
+    size_t aligned_count = num_samples & ~7;
+    for(; i < aligned_count; i+=8)
+    {
+        drmp3_f4 scale = DRMP3_VSET(32768.0f);
+        drmp3_f4 a = DRMP3_VMUL(DRMP3_VLD(&in[i  ]), scale);
+        drmp3_f4 b = DRMP3_VMUL(DRMP3_VLD(&in[i+4]), scale);
+#if DRMP3_HAVE_SSE
+        drmp3_f4 s16max = DRMP3_VSET( 32767.0f);
+        drmp3_f4 s16min = DRMP3_VSET(-32768.0f);
+        __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, s16max), s16min)),
+                                        _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, s16max), s16min)));
+        out[i  ] = (drmp3_int16)_mm_extract_epi16(pcm8, 0);
+        out[i+1] = (drmp3_int16)_mm_extract_epi16(pcm8, 1);
+        out[i+2] = (drmp3_int16)_mm_extract_epi16(pcm8, 2);
+        out[i+3] = (drmp3_int16)_mm_extract_epi16(pcm8, 3);
+        out[i+4] = (drmp3_int16)_mm_extract_epi16(pcm8, 4);
+        out[i+5] = (drmp3_int16)_mm_extract_epi16(pcm8, 5);
+        out[i+6] = (drmp3_int16)_mm_extract_epi16(pcm8, 6);
+        out[i+7] = (drmp3_int16)_mm_extract_epi16(pcm8, 7);
+#else
+        int16x4_t pcma, pcmb;
+        a = DRMP3_VADD(a, DRMP3_VSET(0.5f));
+        b = DRMP3_VADD(b, DRMP3_VSET(0.5f));
+        pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, DRMP3_VSET(0)))));
+        pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, DRMP3_VSET(0)))));
+        vst1_lane_s16(out+i  , pcma, 0);
+        vst1_lane_s16(out+i+1, pcma, 1);
+        vst1_lane_s16(out+i+2, pcma, 2);
+        vst1_lane_s16(out+i+3, pcma, 3);
+        vst1_lane_s16(out+i+4, pcmb, 0);
+        vst1_lane_s16(out+i+5, pcmb, 1);
+        vst1_lane_s16(out+i+6, pcmb, 2);
+        vst1_lane_s16(out+i+7, pcmb, 3);
+#endif
+    }
+#endif
+    for(; i < num_samples; i++)
+    {
+        float sample = in[i] * 32768.0f;
+        if (sample >=  32766.5)
+            out[i] = (drmp3_int16) 32767;
+        else if (sample <= -32767.5)
+            out[i] = (drmp3_int16)-32768;
+        else
+        {
+            short s = (drmp3_int16)(sample + .5f);
+            s -= (s < 0);   /* away from zero, to be compliant */
+            out[i] = s;
+        }
+    }
+}
+
+
+
+/************************************************************************************************************************************************************
+
+ Main Public API
+
+ ************************************************************************************************************************************************************/
+/* SIZE_MAX */
+#if defined(SIZE_MAX)
+    #define DRMP3_SIZE_MAX  SIZE_MAX
+#else
+    #if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
+        #define DRMP3_SIZE_MAX  ((drmp3_uint64)0xFFFFFFFFFFFFFFFF)
+    #else
+        #define DRMP3_SIZE_MAX  0xFFFFFFFF
+    #endif
+#endif
+/* End SIZE_MAX */
+
+/* Options. */
+#ifndef DRMP3_SEEK_LEADING_MP3_FRAMES
+#define DRMP3_SEEK_LEADING_MP3_FRAMES   2
+#endif
+
+#define DRMP3_MIN_DATA_CHUNK_SIZE   16384
+
+/* The size in bytes of each chunk of data to read from the MP3 stream. minimp3 recommends at least 16K, but in an attempt to reduce data movement I'm making this slightly larger. */
+#ifndef DRMP3_DATA_CHUNK_SIZE
+#define DRMP3_DATA_CHUNK_SIZE  (DRMP3_MIN_DATA_CHUNK_SIZE*4)
+#endif
+
+
+#define DRMP3_COUNTOF(x)        (sizeof(x) / sizeof(x[0]))
+#define DRMP3_CLAMP(x, lo, hi)  (DRMP3_MAX(lo, DRMP3_MIN(x, hi)))
+
+#ifndef DRMP3_PI_D
+#define DRMP3_PI_D    3.14159265358979323846264
+#endif
+
+#define DRMP3_DEFAULT_RESAMPLER_LPF_ORDER   2
+
+static DRMP3_INLINE float drmp3_mix_f32(float x, float y, float a)
+{
+    return x*(1-a) + y*a;
+}
+static DRMP3_INLINE float drmp3_mix_f32_fast(float x, float y, float a)
+{
+    float r0 = (y - x);
+    float r1 = r0*a;
+    return x + r1;
+    /*return x + (y - x)*a;*/
+}
+
+
+/*
+Greatest common factor using Euclid's algorithm iteratively.
+*/
+static DRMP3_INLINE drmp3_uint32 drmp3_gcf_u32(drmp3_uint32 a, drmp3_uint32 b)
+{
+    for (;;) {
+        if (b == 0) {
+            break;
+        } else {
+            drmp3_uint32 t = a;
+            a = b;
+            b = t % a;
+        }
+    }
+
+    return a;
+}
+
+
+static void* drmp3__malloc_default(size_t sz, void* pUserData)
+{
+    (void)pUserData;
+    return DRMP3_MALLOC(sz);
+}
+
+static void* drmp3__realloc_default(void* p, size_t sz, void* pUserData)
+{
+    (void)pUserData;
+    return DRMP3_REALLOC(p, sz);
+}
+
+static void drmp3__free_default(void* p, void* pUserData)
+{
+    (void)pUserData;
+    DRMP3_FREE(p);
+}
+
+
+static void* drmp3__malloc_from_callbacks(size_t sz, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks == NULL) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks->onMalloc != NULL) {
+        return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData);
+    }
+
+    /* Try using realloc(). */
+    if (pAllocationCallbacks->onRealloc != NULL) {
+        return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData);
+    }
+
+    return NULL;
+}
+
+static void* drmp3__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks == NULL) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks->onRealloc != NULL) {
+        return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData);
+    }
+
+    /* Try emulating realloc() in terms of malloc()/free(). */
+    if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) {
+        void* p2;
+
+        p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData);
+        if (p2 == NULL) {
+            return NULL;
+        }
+
+        if (p != NULL) {
+            DRMP3_COPY_MEMORY(p2, p, szOld);
+            pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
+        }
+
+        return p2;
+    }
+
+    return NULL;
+}
+
+static void drmp3__free_from_callbacks(void* p, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    if (p == NULL || pAllocationCallbacks == NULL) {
+        return;
+    }
+
+    if (pAllocationCallbacks->onFree != NULL) {
+        pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
+    }
+}
+
+
+static drmp3_allocation_callbacks drmp3_copy_allocation_callbacks_or_defaults(const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks != NULL) {
+        /* Copy. */
+        return *pAllocationCallbacks;
+    } else {
+        /* Defaults. */
+        drmp3_allocation_callbacks allocationCallbacks;
+        allocationCallbacks.pUserData = NULL;
+        allocationCallbacks.onMalloc  = drmp3__malloc_default;
+        allocationCallbacks.onRealloc = drmp3__realloc_default;
+        allocationCallbacks.onFree    = drmp3__free_default;
+        return allocationCallbacks;
+    }
+}
+
+
+
+static size_t drmp3__on_read(drmp3* pMP3, void* pBufferOut, size_t bytesToRead)
+{
+    size_t bytesRead = pMP3->onRead(pMP3->pUserData, pBufferOut, bytesToRead);
+    pMP3->streamCursor += bytesRead;
+    return bytesRead;
+}
+
+static drmp3_bool32 drmp3__on_seek(drmp3* pMP3, int offset, drmp3_seek_origin origin)
+{
+    DRMP3_ASSERT(offset >= 0);
+
+    if (!pMP3->onSeek(pMP3->pUserData, offset, origin)) {
+        return DRMP3_FALSE;
+    }
+
+    if (origin == drmp3_seek_origin_start) {
+        pMP3->streamCursor = (drmp3_uint64)offset;
+    } else {
+        pMP3->streamCursor += offset;
+    }
+
+    return DRMP3_TRUE;
+}
+
+static drmp3_bool32 drmp3__on_seek_64(drmp3* pMP3, drmp3_uint64 offset, drmp3_seek_origin origin)
+{
+    if (offset <= 0x7FFFFFFF) {
+        return drmp3__on_seek(pMP3, (int)offset, origin);
+    }
+
+
+    /* Getting here "offset" is too large for a 32-bit integer. We just keep seeking forward until we hit the offset. */
+    if (!drmp3__on_seek(pMP3, 0x7FFFFFFF, drmp3_seek_origin_start)) {
+        return DRMP3_FALSE;
+    }
+
+    offset -= 0x7FFFFFFF;
+    while (offset > 0) {
+        if (offset <= 0x7FFFFFFF) {
+            if (!drmp3__on_seek(pMP3, (int)offset, drmp3_seek_origin_current)) {
+                return DRMP3_FALSE;
+            }
+            offset = 0;
+        } else {
+            if (!drmp3__on_seek(pMP3, 0x7FFFFFFF, drmp3_seek_origin_current)) {
+                return DRMP3_FALSE;
+            }
+            offset -= 0x7FFFFFFF;
+        }
+    }
+
+    return DRMP3_TRUE;
+}
+
+
+static drmp3_uint32 drmp3_decode_next_frame_ex__callbacks(drmp3* pMP3, drmp3d_sample_t* pPCMFrames)
+{
+    drmp3_uint32 pcmFramesRead = 0;
+
+    DRMP3_ASSERT(pMP3 != NULL);
+    DRMP3_ASSERT(pMP3->onRead != NULL);
+
+    if (pMP3->atEnd) {
+        return 0;
+    }
+
+    for (;;) {
+        drmp3dec_frame_info info;
+
+        /* minimp3 recommends doing data submission in chunks of at least 16K. If we don't have at least 16K bytes available, get more. */
+        if (pMP3->dataSize < DRMP3_MIN_DATA_CHUNK_SIZE) {
+            size_t bytesRead;
+
+            /* First we need to move the data down. */
+            if (pMP3->pData != NULL) {
+                DRMP3_MOVE_MEMORY(pMP3->pData, pMP3->pData + pMP3->dataConsumed, pMP3->dataSize);
+            }
+
+            pMP3->dataConsumed = 0;
+
+            if (pMP3->dataCapacity < DRMP3_DATA_CHUNK_SIZE) {
+                drmp3_uint8* pNewData;
+                size_t newDataCap;
+
+                newDataCap = DRMP3_DATA_CHUNK_SIZE;
+
+                pNewData = (drmp3_uint8*)drmp3__realloc_from_callbacks(pMP3->pData, newDataCap, pMP3->dataCapacity, &pMP3->allocationCallbacks);
+                if (pNewData == NULL) {
+                    return 0; /* Out of memory. */
+                }
+
+                pMP3->pData = pNewData;
+                pMP3->dataCapacity = newDataCap;
+            }
+
+            bytesRead = drmp3__on_read(pMP3, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize));
+            if (bytesRead == 0) {
+                if (pMP3->dataSize == 0) {
+                    pMP3->atEnd = DRMP3_TRUE;
+                    return 0; /* No data. */
+                }
+            }
+
+            pMP3->dataSize += bytesRead;
+        }
+
+        if (pMP3->dataSize > INT_MAX) {
+            pMP3->atEnd = DRMP3_TRUE;
+            return 0; /* File too big. */
+        }
+
+        DRMP3_ASSERT(pMP3->pData != NULL);
+        DRMP3_ASSERT(pMP3->dataCapacity > 0);
+
+        /* Do a runtime check here to try silencing a false-positive from clang-analyzer. */
+        if (pMP3->pData == NULL) {
+            return 0;
+        }
+
+        pcmFramesRead = drmp3dec_decode_frame(&pMP3->decoder, pMP3->pData + pMP3->dataConsumed, (int)pMP3->dataSize, pPCMFrames, &info);    /* <-- Safe size_t -> int conversion thanks to the check above. */
+
+        /* Consume the data. */
+        if (info.frame_bytes > 0) {
+            pMP3->dataConsumed += (size_t)info.frame_bytes;
+            pMP3->dataSize     -= (size_t)info.frame_bytes;
+        }
+
+        /* pcmFramesRead will be equal to 0 if decoding failed. If it is zero and info.frame_bytes > 0 then we have successfully decoded the frame. */
+        if (pcmFramesRead > 0) {
+            pcmFramesRead = drmp3_hdr_frame_samples(pMP3->decoder.header);
+            pMP3->pcmFramesConsumedInMP3Frame = 0;
+            pMP3->pcmFramesRemainingInMP3Frame = pcmFramesRead;
+            pMP3->mp3FrameChannels = info.channels;
+            pMP3->mp3FrameSampleRate = info.hz;
+            break;
+        } else if (info.frame_bytes == 0) {
+            /* Need more data. minimp3 recommends doing data submission in 16K chunks. */
+            size_t bytesRead;
+
+            /* First we need to move the data down. */
+            DRMP3_MOVE_MEMORY(pMP3->pData, pMP3->pData + pMP3->dataConsumed, pMP3->dataSize);
+            pMP3->dataConsumed = 0;
+
+            if (pMP3->dataCapacity == pMP3->dataSize) {
+                /* No room. Expand. */
+                drmp3_uint8* pNewData;
+                size_t newDataCap;
+
+                newDataCap = pMP3->dataCapacity + DRMP3_DATA_CHUNK_SIZE;
+
+                pNewData = (drmp3_uint8*)drmp3__realloc_from_callbacks(pMP3->pData, newDataCap, pMP3->dataCapacity, &pMP3->allocationCallbacks);
+                if (pNewData == NULL) {
+                    return 0; /* Out of memory. */
+                }
+
+                pMP3->pData = pNewData;
+                pMP3->dataCapacity = newDataCap;
+            }
+
+            /* Fill in a chunk. */
+            bytesRead = drmp3__on_read(pMP3, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize));
+            if (bytesRead == 0) {
+                pMP3->atEnd = DRMP3_TRUE;
+                return 0; /* Error reading more data. */
+            }
+
+            pMP3->dataSize += bytesRead;
+        }
+    };
+
+    return pcmFramesRead;
+}
+
+static drmp3_uint32 drmp3_decode_next_frame_ex__memory(drmp3* pMP3, drmp3d_sample_t* pPCMFrames)
+{
+    drmp3_uint32 pcmFramesRead = 0;
+    drmp3dec_frame_info info;
+
+    DRMP3_ASSERT(pMP3 != NULL);
+    DRMP3_ASSERT(pMP3->memory.pData != NULL);
+
+    if (pMP3->atEnd) {
+        return 0;
+    }
+
+    for (;;) {
+        pcmFramesRead = drmp3dec_decode_frame(&pMP3->decoder, pMP3->memory.pData + pMP3->memory.currentReadPos, (int)(pMP3->memory.dataSize - pMP3->memory.currentReadPos), pPCMFrames, &info);
+        if (pcmFramesRead > 0) {
+            pcmFramesRead = drmp3_hdr_frame_samples(pMP3->decoder.header);
+            pMP3->pcmFramesConsumedInMP3Frame  = 0;
+            pMP3->pcmFramesRemainingInMP3Frame = pcmFramesRead;
+            pMP3->mp3FrameChannels             = info.channels;
+            pMP3->mp3FrameSampleRate           = info.hz;
+            break;
+        } else if (info.frame_bytes > 0) {
+            /* No frames were read, but it looks like we skipped past one. Read the next MP3 frame. */
+            pMP3->memory.currentReadPos += (size_t)info.frame_bytes;
+        } else {
+            /* Nothing at all was read. Abort. */
+            break;
+        }
+    }
+
+    /* Consume the data. */
+    pMP3->memory.currentReadPos += (size_t)info.frame_bytes;
+
+    return pcmFramesRead;
+}
+
+static drmp3_uint32 drmp3_decode_next_frame_ex(drmp3* pMP3, drmp3d_sample_t* pPCMFrames)
+{
+    if (pMP3->memory.pData != NULL && pMP3->memory.dataSize > 0) {
+        return drmp3_decode_next_frame_ex__memory(pMP3, pPCMFrames);
+    } else {
+        return drmp3_decode_next_frame_ex__callbacks(pMP3, pPCMFrames);
+    }
+}
+
+static drmp3_uint32 drmp3_decode_next_frame(drmp3* pMP3)
+{
+    DRMP3_ASSERT(pMP3 != NULL);
+    return drmp3_decode_next_frame_ex(pMP3, (drmp3d_sample_t*)pMP3->pcmFrames);
+}
+
+#if 0
+static drmp3_uint32 drmp3_seek_next_frame(drmp3* pMP3)
+{
+    drmp3_uint32 pcmFrameCount;
+
+    DRMP3_ASSERT(pMP3 != NULL);
+
+    pcmFrameCount = drmp3_decode_next_frame_ex(pMP3, NULL);
+    if (pcmFrameCount == 0) {
+        return 0;
+    }
+
+    /* We have essentially just skipped past the frame, so just set the remaining samples to 0. */
+    pMP3->currentPCMFrame             += pcmFrameCount;
+    pMP3->pcmFramesConsumedInMP3Frame  = pcmFrameCount;
+    pMP3->pcmFramesRemainingInMP3Frame = 0;
+
+    return pcmFrameCount;
+}
+#endif
+
+static drmp3_bool32 drmp3_init_internal(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    DRMP3_ASSERT(pMP3 != NULL);
+    DRMP3_ASSERT(onRead != NULL);
+
+    /* This function assumes the output object has already been reset to 0. Do not do that here, otherwise things will break. */
+    drmp3dec_init(&pMP3->decoder);
+
+    pMP3->onRead = onRead;
+    pMP3->onSeek = onSeek;
+    pMP3->pUserData = pUserData;
+    pMP3->allocationCallbacks = drmp3_copy_allocation_callbacks_or_defaults(pAllocationCallbacks);
+
+    if (pMP3->allocationCallbacks.onFree == NULL || (pMP3->allocationCallbacks.onMalloc == NULL && pMP3->allocationCallbacks.onRealloc == NULL)) {
+        return DRMP3_FALSE;    /* Invalid allocation callbacks. */
+    }
+
+    /* Decode the first frame to confirm that it is indeed a valid MP3 stream. */
+    if (drmp3_decode_next_frame(pMP3) == 0) {
+        drmp3__free_from_callbacks(pMP3->pData, &pMP3->allocationCallbacks);    /* The call above may have allocated memory. Need to make sure it's freed before aborting. */
+        return DRMP3_FALSE; /* Not a valid MP3 stream. */
+    }
+
+    pMP3->channels   = pMP3->mp3FrameChannels;
+    pMP3->sampleRate = pMP3->mp3FrameSampleRate;
+
+    return DRMP3_TRUE;
+}
+
+DRMP3_API drmp3_bool32 drmp3_init(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pMP3 == NULL || onRead == NULL) {
+        return DRMP3_FALSE;
+    }
+
+    DRMP3_ZERO_OBJECT(pMP3);
+    return drmp3_init_internal(pMP3, onRead, onSeek, pUserData, pAllocationCallbacks);
+}
+
+
+static size_t drmp3__on_read_memory(void* pUserData, void* pBufferOut, size_t bytesToRead)
+{
+    drmp3* pMP3 = (drmp3*)pUserData;
+    size_t bytesRemaining;
+
+    DRMP3_ASSERT(pMP3 != NULL);
+    DRMP3_ASSERT(pMP3->memory.dataSize >= pMP3->memory.currentReadPos);
+
+    bytesRemaining = pMP3->memory.dataSize - pMP3->memory.currentReadPos;
+    if (bytesToRead > bytesRemaining) {
+        bytesToRead = bytesRemaining;
+    }
+
+    if (bytesToRead > 0) {
+        DRMP3_COPY_MEMORY(pBufferOut, pMP3->memory.pData + pMP3->memory.currentReadPos, bytesToRead);
+        pMP3->memory.currentReadPos += bytesToRead;
+    }
+
+    return bytesToRead;
+}
+
+static drmp3_bool32 drmp3__on_seek_memory(void* pUserData, int byteOffset, drmp3_seek_origin origin)
+{
+    drmp3* pMP3 = (drmp3*)pUserData;
+
+    DRMP3_ASSERT(pMP3 != NULL);
+
+    if (origin == drmp3_seek_origin_current) {
+        if (byteOffset > 0) {
+            if (pMP3->memory.currentReadPos + byteOffset > pMP3->memory.dataSize) {
+                byteOffset = (int)(pMP3->memory.dataSize - pMP3->memory.currentReadPos);  /* Trying to seek too far forward. */
+            }
+        } else {
+            if (pMP3->memory.currentReadPos < (size_t)-byteOffset) {
+                byteOffset = -(int)pMP3->memory.currentReadPos;  /* Trying to seek too far backwards. */
+            }
+        }
+
+        /* This will never underflow thanks to the clamps above. */
+        pMP3->memory.currentReadPos += byteOffset;
+    } else {
+        if ((drmp3_uint32)byteOffset <= pMP3->memory.dataSize) {
+            pMP3->memory.currentReadPos = byteOffset;
+        } else {
+            pMP3->memory.currentReadPos = pMP3->memory.dataSize;  /* Trying to seek too far forward. */
+        }
+    }
+
+    return DRMP3_TRUE;
+}
+
+DRMP3_API drmp3_bool32 drmp3_init_memory(drmp3* pMP3, const void* pData, size_t dataSize, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pMP3 == NULL) {
+        return DRMP3_FALSE;
+    }
+
+    DRMP3_ZERO_OBJECT(pMP3);
+
+    if (pData == NULL || dataSize == 0) {
+        return DRMP3_FALSE;
+    }
+
+    pMP3->memory.pData = (const drmp3_uint8*)pData;
+    pMP3->memory.dataSize = dataSize;
+    pMP3->memory.currentReadPos = 0;
+
+    return drmp3_init_internal(pMP3, drmp3__on_read_memory, drmp3__on_seek_memory, pMP3, pAllocationCallbacks);
+}
+
+
+#ifndef DR_MP3_NO_STDIO
+#include <stdio.h>
+#include <wchar.h>      /* For wcslen(), wcsrtombs() */
+
+/* Errno */
+/* drmp3_result_from_errno() is only used inside DR_MP3_NO_STDIO for now. Move this out if it's ever used elsewhere. */
+#include <errno.h>
+static drmp3_result drmp3_result_from_errno(int e)
+{
+    switch (e)
+    {
+        case 0: return DRMP3_SUCCESS;
+    #ifdef EPERM
+        case EPERM: return DRMP3_INVALID_OPERATION;
+    #endif
+    #ifdef ENOENT
+        case ENOENT: return DRMP3_DOES_NOT_EXIST;
+    #endif
+    #ifdef ESRCH
+        case ESRCH: return DRMP3_DOES_NOT_EXIST;
+    #endif
+    #ifdef EINTR
+        case EINTR: return DRMP3_INTERRUPT;
+    #endif
+    #ifdef EIO
+        case EIO: return DRMP3_IO_ERROR;
+    #endif
+    #ifdef ENXIO
+        case ENXIO: return DRMP3_DOES_NOT_EXIST;
+    #endif
+    #ifdef E2BIG
+        case E2BIG: return DRMP3_INVALID_ARGS;
+    #endif
+    #ifdef ENOEXEC
+        case ENOEXEC: return DRMP3_INVALID_FILE;
+    #endif
+    #ifdef EBADF
+        case EBADF: return DRMP3_INVALID_FILE;
+    #endif
+    #ifdef ECHILD
+        case ECHILD: return DRMP3_ERROR;
+    #endif
+    #ifdef EAGAIN
+        case EAGAIN: return DRMP3_UNAVAILABLE;
+    #endif
+    #ifdef ENOMEM
+        case ENOMEM: return DRMP3_OUT_OF_MEMORY;
+    #endif
+    #ifdef EACCES
+        case EACCES: return DRMP3_ACCESS_DENIED;
+    #endif
+    #ifdef EFAULT
+        case EFAULT: return DRMP3_BAD_ADDRESS;
+    #endif
+    #ifdef ENOTBLK
+        case ENOTBLK: return DRMP3_ERROR;
+    #endif
+    #ifdef EBUSY
+        case EBUSY: return DRMP3_BUSY;
+    #endif
+    #ifdef EEXIST
+        case EEXIST: return DRMP3_ALREADY_EXISTS;
+    #endif
+    #ifdef EXDEV
+        case EXDEV: return DRMP3_ERROR;
+    #endif
+    #ifdef ENODEV
+        case ENODEV: return DRMP3_DOES_NOT_EXIST;
+    #endif
+    #ifdef ENOTDIR
+        case ENOTDIR: return DRMP3_NOT_DIRECTORY;
+    #endif
+    #ifdef EISDIR
+        case EISDIR: return DRMP3_IS_DIRECTORY;
+    #endif
+    #ifdef EINVAL
+        case EINVAL: return DRMP3_INVALID_ARGS;
+    #endif
+    #ifdef ENFILE
+        case ENFILE: return DRMP3_TOO_MANY_OPEN_FILES;
+    #endif
+    #ifdef EMFILE
+        case EMFILE: return DRMP3_TOO_MANY_OPEN_FILES;
+    #endif
+    #ifdef ENOTTY
+        case ENOTTY: return DRMP3_INVALID_OPERATION;
+    #endif
+    #ifdef ETXTBSY
+        case ETXTBSY: return DRMP3_BUSY;
+    #endif
+    #ifdef EFBIG
+        case EFBIG: return DRMP3_TOO_BIG;
+    #endif
+    #ifdef ENOSPC
+        case ENOSPC: return DRMP3_NO_SPACE;
+    #endif
+    #ifdef ESPIPE
+        case ESPIPE: return DRMP3_BAD_SEEK;
+    #endif
+    #ifdef EROFS
+        case EROFS: return DRMP3_ACCESS_DENIED;
+    #endif
+    #ifdef EMLINK
+        case EMLINK: return DRMP3_TOO_MANY_LINKS;
+    #endif
+    #ifdef EPIPE
+        case EPIPE: return DRMP3_BAD_PIPE;
+    #endif
+    #ifdef EDOM
+        case EDOM: return DRMP3_OUT_OF_RANGE;
+    #endif
+    #ifdef ERANGE
+        case ERANGE: return DRMP3_OUT_OF_RANGE;
+    #endif
+    #ifdef EDEADLK
+        case EDEADLK: return DRMP3_DEADLOCK;
+    #endif
+    #ifdef ENAMETOOLONG
+        case ENAMETOOLONG: return DRMP3_PATH_TOO_LONG;
+    #endif
+    #ifdef ENOLCK
+        case ENOLCK: return DRMP3_ERROR;
+    #endif
+    #ifdef ENOSYS
+        case ENOSYS: return DRMP3_NOT_IMPLEMENTED;
+    #endif
+    #ifdef ENOTEMPTY
+        case ENOTEMPTY: return DRMP3_DIRECTORY_NOT_EMPTY;
+    #endif
+    #ifdef ELOOP
+        case ELOOP: return DRMP3_TOO_MANY_LINKS;
+    #endif
+    #ifdef ENOMSG
+        case ENOMSG: return DRMP3_NO_MESSAGE;
+    #endif
+    #ifdef EIDRM
+        case EIDRM: return DRMP3_ERROR;
+    #endif
+    #ifdef ECHRNG
+        case ECHRNG: return DRMP3_ERROR;
+    #endif
+    #ifdef EL2NSYNC
+        case EL2NSYNC: return DRMP3_ERROR;
+    #endif
+    #ifdef EL3HLT
+        case EL3HLT: return DRMP3_ERROR;
+    #endif
+    #ifdef EL3RST
+        case EL3RST: return DRMP3_ERROR;
+    #endif
+    #ifdef ELNRNG
+        case ELNRNG: return DRMP3_OUT_OF_RANGE;
+    #endif
+    #ifdef EUNATCH
+        case EUNATCH: return DRMP3_ERROR;
+    #endif
+    #ifdef ENOCSI
+        case ENOCSI: return DRMP3_ERROR;
+    #endif
+    #ifdef EL2HLT
+        case EL2HLT: return DRMP3_ERROR;
+    #endif
+    #ifdef EBADE
+        case EBADE: return DRMP3_ERROR;
+    #endif
+    #ifdef EBADR
+        case EBADR: return DRMP3_ERROR;
+    #endif
+    #ifdef EXFULL
+        case EXFULL: return DRMP3_ERROR;
+    #endif
+    #ifdef ENOANO
+        case ENOANO: return DRMP3_ERROR;
+    #endif
+    #ifdef EBADRQC
+        case EBADRQC: return DRMP3_ERROR;
+    #endif
+    #ifdef EBADSLT
+        case EBADSLT: return DRMP3_ERROR;
+    #endif
+    #ifdef EBFONT
+        case EBFONT: return DRMP3_INVALID_FILE;
+    #endif
+    #ifdef ENOSTR
+        case ENOSTR: return DRMP3_ERROR;
+    #endif
+    #ifdef ENODATA
+        case ENODATA: return DRMP3_NO_DATA_AVAILABLE;
+    #endif
+    #ifdef ETIME
+        case ETIME: return DRMP3_TIMEOUT;
+    #endif
+    #ifdef ENOSR
+        case ENOSR: return DRMP3_NO_DATA_AVAILABLE;
+    #endif
+    #ifdef ENONET
+        case ENONET: return DRMP3_NO_NETWORK;
+    #endif
+    #ifdef ENOPKG
+        case ENOPKG: return DRMP3_ERROR;
+    #endif
+    #ifdef EREMOTE
+        case EREMOTE: return DRMP3_ERROR;
+    #endif
+    #ifdef ENOLINK
+        case ENOLINK: return DRMP3_ERROR;
+    #endif
+    #ifdef EADV
+        case EADV: return DRMP3_ERROR;
+    #endif
+    #ifdef ESRMNT
+        case ESRMNT: return DRMP3_ERROR;
+    #endif
+    #ifdef ECOMM
+        case ECOMM: return DRMP3_ERROR;
+    #endif
+    #ifdef EPROTO
+        case EPROTO: return DRMP3_ERROR;
+    #endif
+    #ifdef EMULTIHOP
+        case EMULTIHOP: return DRMP3_ERROR;
+    #endif
+    #ifdef EDOTDOT
+        case EDOTDOT: return DRMP3_ERROR;
+    #endif
+    #ifdef EBADMSG
+        case EBADMSG: return DRMP3_BAD_MESSAGE;
+    #endif
+    #ifdef EOVERFLOW
+        case EOVERFLOW: return DRMP3_TOO_BIG;
+    #endif
+    #ifdef ENOTUNIQ
+        case ENOTUNIQ: return DRMP3_NOT_UNIQUE;
+    #endif
+    #ifdef EBADFD
+        case EBADFD: return DRMP3_ERROR;
+    #endif
+    #ifdef EREMCHG
+        case EREMCHG: return DRMP3_ERROR;
+    #endif
+    #ifdef ELIBACC
+        case ELIBACC: return DRMP3_ACCESS_DENIED;
+    #endif
+    #ifdef ELIBBAD
+        case ELIBBAD: return DRMP3_INVALID_FILE;
+    #endif
+    #ifdef ELIBSCN
+        case ELIBSCN: return DRMP3_INVALID_FILE;
+    #endif
+    #ifdef ELIBMAX
+        case ELIBMAX: return DRMP3_ERROR;
+    #endif
+    #ifdef ELIBEXEC
+        case ELIBEXEC: return DRMP3_ERROR;
+    #endif
+    #ifdef EILSEQ
+        case EILSEQ: return DRMP3_INVALID_DATA;
+    #endif
+    #ifdef ERESTART
+        case ERESTART: return DRMP3_ERROR;
+    #endif
+    #ifdef ESTRPIPE
+        case ESTRPIPE: return DRMP3_ERROR;
+    #endif
+    #ifdef EUSERS
+        case EUSERS: return DRMP3_ERROR;
+    #endif
+    #ifdef ENOTSOCK
+        case ENOTSOCK: return DRMP3_NOT_SOCKET;
+    #endif
+    #ifdef EDESTADDRREQ
+        case EDESTADDRREQ: return DRMP3_NO_ADDRESS;
+    #endif
+    #ifdef EMSGSIZE
+        case EMSGSIZE: return DRMP3_TOO_BIG;
+    #endif
+    #ifdef EPROTOTYPE
+        case EPROTOTYPE: return DRMP3_BAD_PROTOCOL;
+    #endif
+    #ifdef ENOPROTOOPT
+        case ENOPROTOOPT: return DRMP3_PROTOCOL_UNAVAILABLE;
+    #endif
+    #ifdef EPROTONOSUPPORT
+        case EPROTONOSUPPORT: return DRMP3_PROTOCOL_NOT_SUPPORTED;
+    #endif
+    #ifdef ESOCKTNOSUPPORT
+        case ESOCKTNOSUPPORT: return DRMP3_SOCKET_NOT_SUPPORTED;
+    #endif
+    #ifdef EOPNOTSUPP
+        case EOPNOTSUPP: return DRMP3_INVALID_OPERATION;
+    #endif
+    #ifdef EPFNOSUPPORT
+        case EPFNOSUPPORT: return DRMP3_PROTOCOL_FAMILY_NOT_SUPPORTED;
+    #endif
+    #ifdef EAFNOSUPPORT
+        case EAFNOSUPPORT: return DRMP3_ADDRESS_FAMILY_NOT_SUPPORTED;
+    #endif
+    #ifdef EADDRINUSE
+        case EADDRINUSE: return DRMP3_ALREADY_IN_USE;
+    #endif
+    #ifdef EADDRNOTAVAIL
+        case EADDRNOTAVAIL: return DRMP3_ERROR;
+    #endif
+    #ifdef ENETDOWN
+        case ENETDOWN: return DRMP3_NO_NETWORK;
+    #endif
+    #ifdef ENETUNREACH
+        case ENETUNREACH: return DRMP3_NO_NETWORK;
+    #endif
+    #ifdef ENETRESET
+        case ENETRESET: return DRMP3_NO_NETWORK;
+    #endif
+    #ifdef ECONNABORTED
+        case ECONNABORTED: return DRMP3_NO_NETWORK;
+    #endif
+    #ifdef ECONNRESET
+        case ECONNRESET: return DRMP3_CONNECTION_RESET;
+    #endif
+    #ifdef ENOBUFS
+        case ENOBUFS: return DRMP3_NO_SPACE;
+    #endif
+    #ifdef EISCONN
+        case EISCONN: return DRMP3_ALREADY_CONNECTED;
+    #endif
+    #ifdef ENOTCONN
+        case ENOTCONN: return DRMP3_NOT_CONNECTED;
+    #endif
+    #ifdef ESHUTDOWN
+        case ESHUTDOWN: return DRMP3_ERROR;
+    #endif
+    #ifdef ETOOMANYREFS
+        case ETOOMANYREFS: return DRMP3_ERROR;
+    #endif
+    #ifdef ETIMEDOUT
+        case ETIMEDOUT: return DRMP3_TIMEOUT;
+    #endif
+    #ifdef ECONNREFUSED
+        case ECONNREFUSED: return DRMP3_CONNECTION_REFUSED;
+    #endif
+    #ifdef EHOSTDOWN
+        case EHOSTDOWN: return DRMP3_NO_HOST;
+    #endif
+    #ifdef EHOSTUNREACH
+        case EHOSTUNREACH: return DRMP3_NO_HOST;
+    #endif
+    #ifdef EALREADY
+        case EALREADY: return DRMP3_IN_PROGRESS;
+    #endif
+    #ifdef EINPROGRESS
+        case EINPROGRESS: return DRMP3_IN_PROGRESS;
+    #endif
+    #ifdef ESTALE
+        case ESTALE: return DRMP3_INVALID_FILE;
+    #endif
+    #ifdef EUCLEAN
+        case EUCLEAN: return DRMP3_ERROR;
+    #endif
+    #ifdef ENOTNAM
+        case ENOTNAM: return DRMP3_ERROR;
+    #endif
+    #ifdef ENAVAIL
+        case ENAVAIL: return DRMP3_ERROR;
+    #endif
+    #ifdef EISNAM
+        case EISNAM: return DRMP3_ERROR;
+    #endif
+    #ifdef EREMOTEIO
+        case EREMOTEIO: return DRMP3_IO_ERROR;
+    #endif
+    #ifdef EDQUOT
+        case EDQUOT: return DRMP3_NO_SPACE;
+    #endif
+    #ifdef ENOMEDIUM
+        case ENOMEDIUM: return DRMP3_DOES_NOT_EXIST;
+    #endif
+    #ifdef EMEDIUMTYPE
+        case EMEDIUMTYPE: return DRMP3_ERROR;
+    #endif
+    #ifdef ECANCELED
+        case ECANCELED: return DRMP3_CANCELLED;
+    #endif
+    #ifdef ENOKEY
+        case ENOKEY: return DRMP3_ERROR;
+    #endif
+    #ifdef EKEYEXPIRED
+        case EKEYEXPIRED: return DRMP3_ERROR;
+    #endif
+    #ifdef EKEYREVOKED
+        case EKEYREVOKED: return DRMP3_ERROR;
+    #endif
+    #ifdef EKEYREJECTED
+        case EKEYREJECTED: return DRMP3_ERROR;
+    #endif
+    #ifdef EOWNERDEAD
+        case EOWNERDEAD: return DRMP3_ERROR;
+    #endif
+    #ifdef ENOTRECOVERABLE
+        case ENOTRECOVERABLE: return DRMP3_ERROR;
+    #endif
+    #ifdef ERFKILL
+        case ERFKILL: return DRMP3_ERROR;
+    #endif
+    #ifdef EHWPOISON
+        case EHWPOISON: return DRMP3_ERROR;
+    #endif
+        default: return DRMP3_ERROR;
+    }
+}
+/* End Errno */
+
+/* fopen */
+static drmp3_result drmp3_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode)
+{
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    errno_t err;
+#endif
+
+    if (ppFile != NULL) {
+        *ppFile = NULL;  /* Safety. */
+    }
+
+    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
+        return DRMP3_INVALID_ARGS;
+    }
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    err = fopen_s(ppFile, pFilePath, pOpenMode);
+    if (err != 0) {
+        return drmp3_result_from_errno(err);
+    }
+#else
+#if defined(_WIN32) || defined(__APPLE__)
+    *ppFile = fopen(pFilePath, pOpenMode);
+#else
+    #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64 && defined(_LARGEFILE64_SOURCE)
+        *ppFile = fopen64(pFilePath, pOpenMode);
+    #else
+        *ppFile = fopen(pFilePath, pOpenMode);
+    #endif
+#endif
+    if (*ppFile == NULL) {
+        drmp3_result result = drmp3_result_from_errno(errno);
+        if (result == DRMP3_SUCCESS) {
+            result = DRMP3_ERROR;   /* Just a safety check to make sure we never ever return success when pFile == NULL. */
+        }
+
+        return result;
+    }
+#endif
+
+    return DRMP3_SUCCESS;
+}
+
+/*
+_wfopen() isn't always available in all compilation environments.
+
+    * Windows only.
+    * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back).
+    * MinGW-64 (both 32- and 64-bit) seems to support it.
+    * MinGW wraps it in !defined(__STRICT_ANSI__).
+    * OpenWatcom wraps it in !defined(_NO_EXT_KEYS).
+
+This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs()
+fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support.
+*/
+#if defined(_WIN32)
+    #if defined(_MSC_VER) || defined(__MINGW64__) || (!defined(__STRICT_ANSI__) && !defined(_NO_EXT_KEYS))
+        #define DRMP3_HAS_WFOPEN
+    #endif
+#endif
+
+static drmp3_result drmp3_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    if (ppFile != NULL) {
+        *ppFile = NULL;  /* Safety. */
+    }
+
+    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
+        return DRMP3_INVALID_ARGS;
+    }
+
+#if defined(DRMP3_HAS_WFOPEN)
+    {
+        /* Use _wfopen() on Windows. */
+    #if defined(_MSC_VER) && _MSC_VER >= 1400
+        errno_t err = _wfopen_s(ppFile, pFilePath, pOpenMode);
+        if (err != 0) {
+            return drmp3_result_from_errno(err);
+        }
+    #else
+        *ppFile = _wfopen(pFilePath, pOpenMode);
+        if (*ppFile == NULL) {
+            return drmp3_result_from_errno(errno);
+        }
+    #endif
+        (void)pAllocationCallbacks;
+    }
+#else
+    /*
+    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because
+	fopen() is locale specific. The only real way I can think of to do this is with wcsrtombs(). Note
+	that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
+    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler
+	error I'll look into improving compatibility.
+    */
+
+	/*
+	Some compilers don't support wchar_t or wcsrtombs() which we're using below. In this case we just
+	need to abort with an error. If you encounter a compiler lacking such support, add it to this list
+	and submit a bug report and it'll be added to the library upstream.
+	*/
+	#if defined(__DJGPP__)
+	{
+		/* Nothing to do here. This will fall through to the error check below. */
+	}
+	#else
+    {
+        mbstate_t mbs;
+        size_t lenMB;
+        const wchar_t* pFilePathTemp = pFilePath;
+        char* pFilePathMB = NULL;
+        char pOpenModeMB[32] = {0};
+
+        /* Get the length first. */
+        DRMP3_ZERO_OBJECT(&mbs);
+        lenMB = wcsrtombs(NULL, &pFilePathTemp, 0, &mbs);
+        if (lenMB == (size_t)-1) {
+            return drmp3_result_from_errno(errno);
+        }
+
+        pFilePathMB = (char*)drmp3__malloc_from_callbacks(lenMB + 1, pAllocationCallbacks);
+        if (pFilePathMB == NULL) {
+            return DRMP3_OUT_OF_MEMORY;
+        }
+
+        pFilePathTemp = pFilePath;
+        DRMP3_ZERO_OBJECT(&mbs);
+        wcsrtombs(pFilePathMB, &pFilePathTemp, lenMB + 1, &mbs);
+
+        /* The open mode should always consist of ASCII characters so we should be able to do a trivial conversion. */
+        {
+            size_t i = 0;
+            for (;;) {
+                if (pOpenMode[i] == 0) {
+                    pOpenModeMB[i] = '\0';
+                    break;
+                }
+
+                pOpenModeMB[i] = (char)pOpenMode[i];
+                i += 1;
+            }
+        }
+
+        *ppFile = fopen(pFilePathMB, pOpenModeMB);
+
+        drmp3__free_from_callbacks(pFilePathMB, pAllocationCallbacks);
+    }
+	#endif
+
+    if (*ppFile == NULL) {
+        return DRMP3_ERROR;
+    }
+#endif
+
+    return DRMP3_SUCCESS;
+}
+/* End fopen */
+
+
+static size_t drmp3__on_read_stdio(void* pUserData, void* pBufferOut, size_t bytesToRead)
+{
+    return fread(pBufferOut, 1, bytesToRead, (FILE*)pUserData);
+}
+
+static drmp3_bool32 drmp3__on_seek_stdio(void* pUserData, int offset, drmp3_seek_origin origin)
+{
+    return fseek((FILE*)pUserData, offset, (origin == drmp3_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
+}
+
+DRMP3_API drmp3_bool32 drmp3_init_file(drmp3* pMP3, const char* pFilePath, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    drmp3_bool32 result;
+    FILE* pFile;
+
+    if (drmp3_fopen(&pFile, pFilePath, "rb") != DRMP3_SUCCESS) {
+        return DRMP3_FALSE;
+    }
+
+    result = drmp3_init(pMP3, drmp3__on_read_stdio, drmp3__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (result != DRMP3_TRUE) {
+        fclose(pFile);
+        return result;
+    }
+
+    return DRMP3_TRUE;
+}
+
+DRMP3_API drmp3_bool32 drmp3_init_file_w(drmp3* pMP3, const wchar_t* pFilePath, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    drmp3_bool32 result;
+    FILE* pFile;
+
+    if (drmp3_wfopen(&pFile, pFilePath, L"rb", pAllocationCallbacks) != DRMP3_SUCCESS) {
+        return DRMP3_FALSE;
+    }
+
+    result = drmp3_init(pMP3, drmp3__on_read_stdio, drmp3__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (result != DRMP3_TRUE) {
+        fclose(pFile);
+        return result;
+    }
+
+    return DRMP3_TRUE;
+}
+#endif
+
+DRMP3_API void drmp3_uninit(drmp3* pMP3)
+{
+    if (pMP3 == NULL) {
+        return;
+    }
+    
+#ifndef DR_MP3_NO_STDIO
+    if (pMP3->onRead == drmp3__on_read_stdio) {
+        FILE* pFile = (FILE*)pMP3->pUserData;
+        if (pFile != NULL) {
+            fclose(pFile);
+            pMP3->pUserData = NULL; /* Make sure the file handle is cleared to NULL to we don't attempt to close it a second time. */
+        }
+    }
+#endif
+
+    drmp3__free_from_callbacks(pMP3->pData, &pMP3->allocationCallbacks);
+}
+
+#if defined(DR_MP3_FLOAT_OUTPUT)
+static void drmp3_f32_to_s16(drmp3_int16* dst, const float* src, drmp3_uint64 sampleCount)
+{
+    drmp3_uint64 i;
+    drmp3_uint64 i4;
+    drmp3_uint64 sampleCount4;
+
+    /* Unrolled. */
+    i = 0;
+    sampleCount4 = sampleCount >> 2;
+    for (i4 = 0; i4 < sampleCount4; i4 += 1) {
+        float x0 = src[i+0];
+        float x1 = src[i+1];
+        float x2 = src[i+2];
+        float x3 = src[i+3];
+
+        x0 = ((x0 < -1) ? -1 : ((x0 > 1) ? 1 : x0));
+        x1 = ((x1 < -1) ? -1 : ((x1 > 1) ? 1 : x1));
+        x2 = ((x2 < -1) ? -1 : ((x2 > 1) ? 1 : x2));
+        x3 = ((x3 < -1) ? -1 : ((x3 > 1) ? 1 : x3));
+
+        x0 = x0 * 32767.0f;
+        x1 = x1 * 32767.0f;
+        x2 = x2 * 32767.0f;
+        x3 = x3 * 32767.0f;
+
+        dst[i+0] = (drmp3_int16)x0;
+        dst[i+1] = (drmp3_int16)x1;
+        dst[i+2] = (drmp3_int16)x2;
+        dst[i+3] = (drmp3_int16)x3;
+
+        i += 4;
+    }
+
+    /* Leftover. */
+    for (; i < sampleCount; i += 1) {
+        float x = src[i];
+        x = ((x < -1) ? -1 : ((x > 1) ? 1 : x));    /* clip */
+        x = x * 32767.0f;                           /* -1..1 to -32767..32767 */
+
+        dst[i] = (drmp3_int16)x;
+    }
+}
+#endif
+
+#if !defined(DR_MP3_FLOAT_OUTPUT)
+static void drmp3_s16_to_f32(float* dst, const drmp3_int16* src, drmp3_uint64 sampleCount)
+{
+    drmp3_uint64 i;
+    for (i = 0; i < sampleCount; i += 1) {
+        float x = (float)src[i];
+        x = x * 0.000030517578125f;         /* -32768..32767 to -1..0.999969482421875 */
+        dst[i] = x;
+    }
+}
+#endif
+
+
+static drmp3_uint64 drmp3_read_pcm_frames_raw(drmp3* pMP3, drmp3_uint64 framesToRead, void* pBufferOut)
+{
+    drmp3_uint64 totalFramesRead = 0;
+
+    DRMP3_ASSERT(pMP3 != NULL);
+    DRMP3_ASSERT(pMP3->onRead != NULL);
+
+    while (framesToRead > 0) {
+        drmp3_uint32 framesToConsume = (drmp3_uint32)DRMP3_MIN(pMP3->pcmFramesRemainingInMP3Frame, framesToRead);
+        if (pBufferOut != NULL) {
+        #if defined(DR_MP3_FLOAT_OUTPUT)
+            /* f32 */
+            float* pFramesOutF32 = (float*)DRMP3_OFFSET_PTR(pBufferOut,          sizeof(float) * totalFramesRead                   * pMP3->channels);
+            float* pFramesInF32  = (float*)DRMP3_OFFSET_PTR(&pMP3->pcmFrames[0], sizeof(float) * pMP3->pcmFramesConsumedInMP3Frame * pMP3->mp3FrameChannels);
+            DRMP3_COPY_MEMORY(pFramesOutF32, pFramesInF32, sizeof(float) * framesToConsume * pMP3->channels);
+        #else
+            /* s16 */
+            drmp3_int16* pFramesOutS16 = (drmp3_int16*)DRMP3_OFFSET_PTR(pBufferOut,          sizeof(drmp3_int16) * totalFramesRead                   * pMP3->channels);
+            drmp3_int16* pFramesInS16  = (drmp3_int16*)DRMP3_OFFSET_PTR(&pMP3->pcmFrames[0], sizeof(drmp3_int16) * pMP3->pcmFramesConsumedInMP3Frame * pMP3->mp3FrameChannels);
+            DRMP3_COPY_MEMORY(pFramesOutS16, pFramesInS16, sizeof(drmp3_int16) * framesToConsume * pMP3->channels);
+        #endif
+        }
+
+        pMP3->currentPCMFrame              += framesToConsume;
+        pMP3->pcmFramesConsumedInMP3Frame  += framesToConsume;
+        pMP3->pcmFramesRemainingInMP3Frame -= framesToConsume;
+        totalFramesRead                    += framesToConsume;
+        framesToRead                       -= framesToConsume;
+
+        if (framesToRead == 0) {
+            break;
+        }
+
+        DRMP3_ASSERT(pMP3->pcmFramesRemainingInMP3Frame == 0);
+
+        /*
+        At this point we have exhausted our in-memory buffer so we need to re-fill. Note that the sample rate may have changed
+        at this point which means we'll also need to update our sample rate conversion pipeline.
+        */
+        if (drmp3_decode_next_frame(pMP3) == 0) {
+            break;
+        }
+    }
+
+    return totalFramesRead;
+}
+
+
+DRMP3_API drmp3_uint64 drmp3_read_pcm_frames_f32(drmp3* pMP3, drmp3_uint64 framesToRead, float* pBufferOut)
+{
+    if (pMP3 == NULL || pMP3->onRead == NULL) {
+        return 0;
+    }
+
+#if defined(DR_MP3_FLOAT_OUTPUT)
+    /* Fast path. No conversion required. */
+    return drmp3_read_pcm_frames_raw(pMP3, framesToRead, pBufferOut);
+#else
+    /* Slow path. Convert from s16 to f32. */
+    {
+        drmp3_int16 pTempS16[8192];
+        drmp3_uint64 totalPCMFramesRead = 0;
+
+        while (totalPCMFramesRead < framesToRead) {
+            drmp3_uint64 framesJustRead;
+            drmp3_uint64 framesRemaining = framesToRead - totalPCMFramesRead;
+            drmp3_uint64 framesToReadNow = DRMP3_COUNTOF(pTempS16) / pMP3->channels;
+            if (framesToReadNow > framesRemaining) {
+                framesToReadNow = framesRemaining;
+            }
+
+            framesJustRead = drmp3_read_pcm_frames_raw(pMP3, framesToReadNow, pTempS16);
+            if (framesJustRead == 0) {
+                break;
+            }
+
+            drmp3_s16_to_f32((float*)DRMP3_OFFSET_PTR(pBufferOut, sizeof(float) * totalPCMFramesRead * pMP3->channels), pTempS16, framesJustRead * pMP3->channels);
+            totalPCMFramesRead += framesJustRead;
+        }
+
+        return totalPCMFramesRead;
+    }
+#endif
+}
+
+DRMP3_API drmp3_uint64 drmp3_read_pcm_frames_s16(drmp3* pMP3, drmp3_uint64 framesToRead, drmp3_int16* pBufferOut)
+{
+    if (pMP3 == NULL || pMP3->onRead == NULL) {
+        return 0;
+    }
+
+#if !defined(DR_MP3_FLOAT_OUTPUT)
+    /* Fast path. No conversion required. */
+    return drmp3_read_pcm_frames_raw(pMP3, framesToRead, pBufferOut);
+#else
+    /* Slow path. Convert from f32 to s16. */
+    {
+        float pTempF32[4096];
+        drmp3_uint64 totalPCMFramesRead = 0;
+
+        while (totalPCMFramesRead < framesToRead) {
+            drmp3_uint64 framesJustRead;
+            drmp3_uint64 framesRemaining = framesToRead - totalPCMFramesRead;
+            drmp3_uint64 framesToReadNow = DRMP3_COUNTOF(pTempF32) / pMP3->channels;
+            if (framesToReadNow > framesRemaining) {
+                framesToReadNow = framesRemaining;
+            }
+
+            framesJustRead = drmp3_read_pcm_frames_raw(pMP3, framesToReadNow, pTempF32);
+            if (framesJustRead == 0) {
+                break;
+            }
+
+            drmp3_f32_to_s16((drmp3_int16*)DRMP3_OFFSET_PTR(pBufferOut, sizeof(drmp3_int16) * totalPCMFramesRead * pMP3->channels), pTempF32, framesJustRead * pMP3->channels);
+            totalPCMFramesRead += framesJustRead;
+        }
+
+        return totalPCMFramesRead;
+    }
+#endif
+}
+
+static void drmp3_reset(drmp3* pMP3)
+{
+    DRMP3_ASSERT(pMP3 != NULL);
+
+    pMP3->pcmFramesConsumedInMP3Frame = 0;
+    pMP3->pcmFramesRemainingInMP3Frame = 0;
+    pMP3->currentPCMFrame = 0;
+    pMP3->dataSize = 0;
+    pMP3->atEnd = DRMP3_FALSE;
+    drmp3dec_init(&pMP3->decoder);
+}
+
+static drmp3_bool32 drmp3_seek_to_start_of_stream(drmp3* pMP3)
+{
+    DRMP3_ASSERT(pMP3 != NULL);
+    DRMP3_ASSERT(pMP3->onSeek != NULL);
+
+    /* Seek to the start of the stream to begin with. */
+    if (!drmp3__on_seek(pMP3, 0, drmp3_seek_origin_start)) {
+        return DRMP3_FALSE;
+    }
+
+    /* Clear any cached data. */
+    drmp3_reset(pMP3);
+    return DRMP3_TRUE;
+}
+
+
+static drmp3_bool32 drmp3_seek_forward_by_pcm_frames__brute_force(drmp3* pMP3, drmp3_uint64 frameOffset)
+{
+    drmp3_uint64 framesRead;
+
+    /*
+    Just using a dumb read-and-discard for now. What would be nice is to parse only the header of the MP3 frame, and then skip over leading
+    frames without spending the time doing a full decode. I cannot see an easy way to do this in minimp3, however, so it may involve some
+    kind of manual processing.
+    */
+#if defined(DR_MP3_FLOAT_OUTPUT)
+    framesRead = drmp3_read_pcm_frames_f32(pMP3, frameOffset, NULL);
+#else
+    framesRead = drmp3_read_pcm_frames_s16(pMP3, frameOffset, NULL);
+#endif
+    if (framesRead != frameOffset) {
+        return DRMP3_FALSE;
+    }
+
+    return DRMP3_TRUE;
+}
+
+static drmp3_bool32 drmp3_seek_to_pcm_frame__brute_force(drmp3* pMP3, drmp3_uint64 frameIndex)
+{
+    DRMP3_ASSERT(pMP3 != NULL);
+
+    if (frameIndex == pMP3->currentPCMFrame) {
+        return DRMP3_TRUE;
+    }
+
+    /*
+    If we're moving foward we just read from where we're at. Otherwise we need to move back to the start of
+    the stream and read from the beginning.
+    */
+    if (frameIndex < pMP3->currentPCMFrame) {
+        /* Moving backward. Move to the start of the stream and then move forward. */
+        if (!drmp3_seek_to_start_of_stream(pMP3)) {
+            return DRMP3_FALSE;
+        }
+    }
+
+    DRMP3_ASSERT(frameIndex >= pMP3->currentPCMFrame);
+    return drmp3_seek_forward_by_pcm_frames__brute_force(pMP3, (frameIndex - pMP3->currentPCMFrame));
+}
+
+static drmp3_bool32 drmp3_find_closest_seek_point(drmp3* pMP3, drmp3_uint64 frameIndex, drmp3_uint32* pSeekPointIndex)
+{
+    drmp3_uint32 iSeekPoint;
+
+    DRMP3_ASSERT(pSeekPointIndex != NULL);
+
+    *pSeekPointIndex = 0;
+
+    if (frameIndex < pMP3->pSeekPoints[0].pcmFrameIndex) {
+        return DRMP3_FALSE;
+    }
+
+    /* Linear search for simplicity to begin with while I'm getting this thing working. Once it's all working change this to a binary search. */
+    for (iSeekPoint = 0; iSeekPoint < pMP3->seekPointCount; ++iSeekPoint) {
+        if (pMP3->pSeekPoints[iSeekPoint].pcmFrameIndex > frameIndex) {
+            break;  /* Found it. */
+        }
+
+        *pSeekPointIndex = iSeekPoint;
+    }
+
+    return DRMP3_TRUE;
+}
+
+static drmp3_bool32 drmp3_seek_to_pcm_frame__seek_table(drmp3* pMP3, drmp3_uint64 frameIndex)
+{
+    drmp3_seek_point seekPoint;
+    drmp3_uint32 priorSeekPointIndex;
+    drmp3_uint16 iMP3Frame;
+    drmp3_uint64 leftoverFrames;
+
+    DRMP3_ASSERT(pMP3 != NULL);
+    DRMP3_ASSERT(pMP3->pSeekPoints != NULL);
+    DRMP3_ASSERT(pMP3->seekPointCount > 0);
+
+    /* If there is no prior seekpoint it means the target PCM frame comes before the first seek point. Just assume a seekpoint at the start of the file in this case. */
+    if (drmp3_find_closest_seek_point(pMP3, frameIndex, &priorSeekPointIndex)) {
+        seekPoint = pMP3->pSeekPoints[priorSeekPointIndex];
+    } else {
+        seekPoint.seekPosInBytes     = 0;
+        seekPoint.pcmFrameIndex      = 0;
+        seekPoint.mp3FramesToDiscard = 0;
+        seekPoint.pcmFramesToDiscard = 0;
+    }
+
+    /* First thing to do is seek to the first byte of the relevant MP3 frame. */
+    if (!drmp3__on_seek_64(pMP3, seekPoint.seekPosInBytes, drmp3_seek_origin_start)) {
+        return DRMP3_FALSE; /* Failed to seek. */
+    }
+
+    /* Clear any cached data. */
+    drmp3_reset(pMP3);
+
+    /* Whole MP3 frames need to be discarded first. */
+    for (iMP3Frame = 0; iMP3Frame < seekPoint.mp3FramesToDiscard; ++iMP3Frame) {
+        drmp3_uint32 pcmFramesRead;
+        drmp3d_sample_t* pPCMFrames;
+
+        /* Pass in non-null for the last frame because we want to ensure the sample rate converter is preloaded correctly. */
+        pPCMFrames = NULL;
+        if (iMP3Frame == seekPoint.mp3FramesToDiscard-1) {
+            pPCMFrames = (drmp3d_sample_t*)pMP3->pcmFrames;
+        }
+
+        /* We first need to decode the next frame. */
+        pcmFramesRead = drmp3_decode_next_frame_ex(pMP3, pPCMFrames);
+        if (pcmFramesRead == 0) {
+            return DRMP3_FALSE;
+        }
+    }
+
+    /* We seeked to an MP3 frame in the raw stream so we need to make sure the current PCM frame is set correctly. */
+    pMP3->currentPCMFrame = seekPoint.pcmFrameIndex - seekPoint.pcmFramesToDiscard;
+
+    /*
+    Now at this point we can follow the same process as the brute force technique where we just skip over unnecessary MP3 frames and then
+    read-and-discard at least 2 whole MP3 frames.
+    */
+    leftoverFrames = frameIndex - pMP3->currentPCMFrame;
+    return drmp3_seek_forward_by_pcm_frames__brute_force(pMP3, leftoverFrames);
+}
+
+DRMP3_API drmp3_bool32 drmp3_seek_to_pcm_frame(drmp3* pMP3, drmp3_uint64 frameIndex)
+{
+    if (pMP3 == NULL || pMP3->onSeek == NULL) {
+        return DRMP3_FALSE;
+    }
+
+    if (frameIndex == 0) {
+        return drmp3_seek_to_start_of_stream(pMP3);
+    }
+
+    /* Use the seek table if we have one. */
+    if (pMP3->pSeekPoints != NULL && pMP3->seekPointCount > 0) {
+        return drmp3_seek_to_pcm_frame__seek_table(pMP3, frameIndex);
+    } else {
+        return drmp3_seek_to_pcm_frame__brute_force(pMP3, frameIndex);
+    }
+}
+
+DRMP3_API drmp3_bool32 drmp3_get_mp3_and_pcm_frame_count(drmp3* pMP3, drmp3_uint64* pMP3FrameCount, drmp3_uint64* pPCMFrameCount)
+{
+    drmp3_uint64 currentPCMFrame;
+    drmp3_uint64 totalPCMFrameCount;
+    drmp3_uint64 totalMP3FrameCount;
+
+    if (pMP3 == NULL) {
+        return DRMP3_FALSE;
+    }
+
+    /*
+    The way this works is we move back to the start of the stream, iterate over each MP3 frame and calculate the frame count based
+    on our output sample rate, the seek back to the PCM frame we were sitting on before calling this function.
+    */
+
+    /* The stream must support seeking for this to work. */
+    if (pMP3->onSeek == NULL) {
+        return DRMP3_FALSE;
+    }
+
+    /* We'll need to seek back to where we were, so grab the PCM frame we're currently sitting on so we can restore later. */
+    currentPCMFrame = pMP3->currentPCMFrame;
+    
+    if (!drmp3_seek_to_start_of_stream(pMP3)) {
+        return DRMP3_FALSE;
+    }
+
+    totalPCMFrameCount = 0;
+    totalMP3FrameCount = 0;
+
+    for (;;) {
+        drmp3_uint32 pcmFramesInCurrentMP3Frame;
+
+        pcmFramesInCurrentMP3Frame = drmp3_decode_next_frame_ex(pMP3, NULL);
+        if (pcmFramesInCurrentMP3Frame == 0) {
+            break;
+        }
+
+        totalPCMFrameCount += pcmFramesInCurrentMP3Frame;
+        totalMP3FrameCount += 1;
+    }
+
+    /* Finally, we need to seek back to where we were. */
+    if (!drmp3_seek_to_start_of_stream(pMP3)) {
+        return DRMP3_FALSE;
+    }
+
+    if (!drmp3_seek_to_pcm_frame(pMP3, currentPCMFrame)) {
+        return DRMP3_FALSE;
+    }
+
+    if (pMP3FrameCount != NULL) {
+        *pMP3FrameCount = totalMP3FrameCount;
+    }
+    if (pPCMFrameCount != NULL) {
+        *pPCMFrameCount = totalPCMFrameCount;
+    }
+
+    return DRMP3_TRUE;
+}
+
+DRMP3_API drmp3_uint64 drmp3_get_pcm_frame_count(drmp3* pMP3)
+{
+    drmp3_uint64 totalPCMFrameCount;
+    if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, NULL, &totalPCMFrameCount)) {
+        return 0;
+    }
+
+    return totalPCMFrameCount;
+}
+
+DRMP3_API drmp3_uint64 drmp3_get_mp3_frame_count(drmp3* pMP3)
+{
+    drmp3_uint64 totalMP3FrameCount;
+    if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, &totalMP3FrameCount, NULL)) {
+        return 0;
+    }
+
+    return totalMP3FrameCount;
+}
+
+static void drmp3__accumulate_running_pcm_frame_count(drmp3* pMP3, drmp3_uint32 pcmFrameCountIn, drmp3_uint64* pRunningPCMFrameCount, float* pRunningPCMFrameCountFractionalPart)
+{
+    float srcRatio;
+    float pcmFrameCountOutF;
+    drmp3_uint32 pcmFrameCountOut;
+
+    srcRatio = (float)pMP3->mp3FrameSampleRate / (float)pMP3->sampleRate;
+    DRMP3_ASSERT(srcRatio > 0);
+
+    pcmFrameCountOutF = *pRunningPCMFrameCountFractionalPart + (pcmFrameCountIn / srcRatio);
+    pcmFrameCountOut  = (drmp3_uint32)pcmFrameCountOutF;
+    *pRunningPCMFrameCountFractionalPart = pcmFrameCountOutF - pcmFrameCountOut;
+    *pRunningPCMFrameCount += pcmFrameCountOut;
+}
+
+typedef struct
+{
+    drmp3_uint64 bytePos;
+    drmp3_uint64 pcmFrameIndex; /* <-- After sample rate conversion. */
+} drmp3__seeking_mp3_frame_info;
+
+DRMP3_API drmp3_bool32 drmp3_calculate_seek_points(drmp3* pMP3, drmp3_uint32* pSeekPointCount, drmp3_seek_point* pSeekPoints)
+{
+    drmp3_uint32 seekPointCount;
+    drmp3_uint64 currentPCMFrame;
+    drmp3_uint64 totalMP3FrameCount;
+    drmp3_uint64 totalPCMFrameCount;
+
+    if (pMP3 == NULL || pSeekPointCount == NULL || pSeekPoints == NULL) {
+        return DRMP3_FALSE; /* Invalid args. */
+    }
+
+    seekPointCount = *pSeekPointCount;
+    if (seekPointCount == 0) {
+        return DRMP3_FALSE;  /* The client has requested no seek points. Consider this to be invalid arguments since the client has probably not intended this. */
+    }
+
+    /* We'll need to seek back to the current sample after calculating the seekpoints so we need to go ahead and grab the current location at the top. */
+    currentPCMFrame = pMP3->currentPCMFrame;
+    
+    /* We never do more than the total number of MP3 frames and we limit it to 32-bits. */
+    if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, &totalMP3FrameCount, &totalPCMFrameCount)) {
+        return DRMP3_FALSE;
+    }
+
+    /* If there's less than DRMP3_SEEK_LEADING_MP3_FRAMES+1 frames we just report 1 seek point which will be the very start of the stream. */
+    if (totalMP3FrameCount < DRMP3_SEEK_LEADING_MP3_FRAMES+1) {
+        seekPointCount = 1;
+        pSeekPoints[0].seekPosInBytes     = 0;
+        pSeekPoints[0].pcmFrameIndex      = 0;
+        pSeekPoints[0].mp3FramesToDiscard = 0;
+        pSeekPoints[0].pcmFramesToDiscard = 0;
+    } else {
+        drmp3_uint64 pcmFramesBetweenSeekPoints;
+        drmp3__seeking_mp3_frame_info mp3FrameInfo[DRMP3_SEEK_LEADING_MP3_FRAMES+1];
+        drmp3_uint64 runningPCMFrameCount = 0;
+        float runningPCMFrameCountFractionalPart = 0;
+        drmp3_uint64 nextTargetPCMFrame;
+        drmp3_uint32 iMP3Frame;
+        drmp3_uint32 iSeekPoint;
+
+        if (seekPointCount > totalMP3FrameCount-1) {
+            seekPointCount = (drmp3_uint32)totalMP3FrameCount-1;
+        }
+
+        pcmFramesBetweenSeekPoints = totalPCMFrameCount / (seekPointCount+1);
+
+        /*
+        Here is where we actually calculate the seek points. We need to start by moving the start of the stream. We then enumerate over each
+        MP3 frame.
+        */
+        if (!drmp3_seek_to_start_of_stream(pMP3)) {
+            return DRMP3_FALSE;
+        }
+
+        /*
+        We need to cache the byte positions of the previous MP3 frames. As a new MP3 frame is iterated, we cycle the byte positions in this
+        array. The value in the first item in this array is the byte position that will be reported in the next seek point.
+        */
+
+        /* We need to initialize the array of MP3 byte positions for the leading MP3 frames. */
+        for (iMP3Frame = 0; iMP3Frame < DRMP3_SEEK_LEADING_MP3_FRAMES+1; ++iMP3Frame) {
+            drmp3_uint32 pcmFramesInCurrentMP3FrameIn;
+
+            /* The byte position of the next frame will be the stream's cursor position, minus whatever is sitting in the buffer. */
+            DRMP3_ASSERT(pMP3->streamCursor >= pMP3->dataSize);
+            mp3FrameInfo[iMP3Frame].bytePos       = pMP3->streamCursor - pMP3->dataSize;
+            mp3FrameInfo[iMP3Frame].pcmFrameIndex = runningPCMFrameCount;
+
+            /* We need to get information about this frame so we can know how many samples it contained. */
+            pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL);
+            if (pcmFramesInCurrentMP3FrameIn == 0) {
+                return DRMP3_FALSE; /* This should never happen. */
+            }
+
+            drmp3__accumulate_running_pcm_frame_count(pMP3, pcmFramesInCurrentMP3FrameIn, &runningPCMFrameCount, &runningPCMFrameCountFractionalPart);
+        }
+
+        /*
+        At this point we will have extracted the byte positions of the leading MP3 frames. We can now start iterating over each seek point and
+        calculate them.
+        */
+        nextTargetPCMFrame = 0;
+        for (iSeekPoint = 0; iSeekPoint < seekPointCount; ++iSeekPoint) {
+            nextTargetPCMFrame += pcmFramesBetweenSeekPoints;
+
+            for (;;) {
+                if (nextTargetPCMFrame < runningPCMFrameCount) {
+                    /* The next seek point is in the current MP3 frame. */
+                    pSeekPoints[iSeekPoint].seekPosInBytes     = mp3FrameInfo[0].bytePos;
+                    pSeekPoints[iSeekPoint].pcmFrameIndex      = nextTargetPCMFrame;
+                    pSeekPoints[iSeekPoint].mp3FramesToDiscard = DRMP3_SEEK_LEADING_MP3_FRAMES;
+                    pSeekPoints[iSeekPoint].pcmFramesToDiscard = (drmp3_uint16)(nextTargetPCMFrame - mp3FrameInfo[DRMP3_SEEK_LEADING_MP3_FRAMES-1].pcmFrameIndex);
+                    break;
+                } else {
+                    size_t i;
+                    drmp3_uint32 pcmFramesInCurrentMP3FrameIn;
+
+                    /*
+                    The next seek point is not in the current MP3 frame, so continue on to the next one. The first thing to do is cycle the cached
+                    MP3 frame info.
+                    */
+                    for (i = 0; i < DRMP3_COUNTOF(mp3FrameInfo)-1; ++i) {
+                        mp3FrameInfo[i] = mp3FrameInfo[i+1];
+                    }
+
+                    /* Cache previous MP3 frame info. */
+                    mp3FrameInfo[DRMP3_COUNTOF(mp3FrameInfo)-1].bytePos       = pMP3->streamCursor - pMP3->dataSize;
+                    mp3FrameInfo[DRMP3_COUNTOF(mp3FrameInfo)-1].pcmFrameIndex = runningPCMFrameCount;
+
+                    /*
+                    Go to the next MP3 frame. This shouldn't ever fail, but just in case it does we just set the seek point and break. If it happens, it
+                    should only ever do it for the last seek point.
+                    */
+                    pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL);
+                    if (pcmFramesInCurrentMP3FrameIn == 0) {
+                        pSeekPoints[iSeekPoint].seekPosInBytes     = mp3FrameInfo[0].bytePos;
+                        pSeekPoints[iSeekPoint].pcmFrameIndex      = nextTargetPCMFrame;
+                        pSeekPoints[iSeekPoint].mp3FramesToDiscard = DRMP3_SEEK_LEADING_MP3_FRAMES;
+                        pSeekPoints[iSeekPoint].pcmFramesToDiscard = (drmp3_uint16)(nextTargetPCMFrame - mp3FrameInfo[DRMP3_SEEK_LEADING_MP3_FRAMES-1].pcmFrameIndex);
+                        break;
+                    }
+
+                    drmp3__accumulate_running_pcm_frame_count(pMP3, pcmFramesInCurrentMP3FrameIn, &runningPCMFrameCount, &runningPCMFrameCountFractionalPart);
+                }
+            }
+        }
+
+        /* Finally, we need to seek back to where we were. */
+        if (!drmp3_seek_to_start_of_stream(pMP3)) {
+            return DRMP3_FALSE;
+        }
+        if (!drmp3_seek_to_pcm_frame(pMP3, currentPCMFrame)) {
+            return DRMP3_FALSE;
+        }
+    }
+
+    *pSeekPointCount = seekPointCount;
+    return DRMP3_TRUE;
+}
+
+DRMP3_API drmp3_bool32 drmp3_bind_seek_table(drmp3* pMP3, drmp3_uint32 seekPointCount, drmp3_seek_point* pSeekPoints)
+{
+    if (pMP3 == NULL) {
+        return DRMP3_FALSE;
+    }
+
+    if (seekPointCount == 0 || pSeekPoints == NULL) {
+        /* Unbinding. */
+        pMP3->seekPointCount = 0;
+        pMP3->pSeekPoints = NULL;
+    } else {
+        /* Binding. */
+        pMP3->seekPointCount = seekPointCount;
+        pMP3->pSeekPoints = pSeekPoints;
+    }
+
+    return DRMP3_TRUE;
+}
+
+
+static float* drmp3__full_read_and_close_f32(drmp3* pMP3, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount)
+{
+    drmp3_uint64 totalFramesRead = 0;
+    drmp3_uint64 framesCapacity = 0;
+    float* pFrames = NULL;
+    float temp[4096];
+
+    DRMP3_ASSERT(pMP3 != NULL);
+
+    for (;;) {
+        drmp3_uint64 framesToReadRightNow = DRMP3_COUNTOF(temp) / pMP3->channels;
+        drmp3_uint64 framesJustRead = drmp3_read_pcm_frames_f32(pMP3, framesToReadRightNow, temp);
+        if (framesJustRead == 0) {
+            break;
+        }
+
+        /* Reallocate the output buffer if there's not enough room. */
+        if (framesCapacity < totalFramesRead + framesJustRead) {
+            drmp3_uint64 oldFramesBufferSize;
+            drmp3_uint64 newFramesBufferSize;
+            drmp3_uint64 newFramesCap;
+            float* pNewFrames;
+
+            newFramesCap = framesCapacity * 2;
+            if (newFramesCap < totalFramesRead + framesJustRead) {
+                newFramesCap = totalFramesRead + framesJustRead;
+            }
+
+            oldFramesBufferSize = framesCapacity * pMP3->channels * sizeof(float);
+            newFramesBufferSize = newFramesCap   * pMP3->channels * sizeof(float);
+            if (newFramesBufferSize > (drmp3_uint64)DRMP3_SIZE_MAX) {
+                break;
+            }
+
+            pNewFrames = (float*)drmp3__realloc_from_callbacks(pFrames, (size_t)newFramesBufferSize, (size_t)oldFramesBufferSize, &pMP3->allocationCallbacks);
+            if (pNewFrames == NULL) {
+                drmp3__free_from_callbacks(pFrames, &pMP3->allocationCallbacks);
+                break;
+            }
+
+            pFrames = pNewFrames;
+            framesCapacity = newFramesCap;
+        }
+
+        DRMP3_COPY_MEMORY(pFrames + totalFramesRead*pMP3->channels, temp, (size_t)(framesJustRead*pMP3->channels*sizeof(float)));
+        totalFramesRead += framesJustRead;
+
+        /* If the number of frames we asked for is less that what we actually read it means we've reached the end. */
+        if (framesJustRead != framesToReadRightNow) {
+            break;
+        }
+    }
+
+    if (pConfig != NULL) {
+        pConfig->channels   = pMP3->channels;
+        pConfig->sampleRate = pMP3->sampleRate;
+    }
+
+    drmp3_uninit(pMP3);
+
+    if (pTotalFrameCount) {
+        *pTotalFrameCount = totalFramesRead;
+    }
+
+    return pFrames;
+}
+
+static drmp3_int16* drmp3__full_read_and_close_s16(drmp3* pMP3, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount)
+{
+    drmp3_uint64 totalFramesRead = 0;
+    drmp3_uint64 framesCapacity = 0;
+    drmp3_int16* pFrames = NULL;
+    drmp3_int16 temp[4096];
+
+    DRMP3_ASSERT(pMP3 != NULL);
+
+    for (;;) {
+        drmp3_uint64 framesToReadRightNow = DRMP3_COUNTOF(temp) / pMP3->channels;
+        drmp3_uint64 framesJustRead = drmp3_read_pcm_frames_s16(pMP3, framesToReadRightNow, temp);
+        if (framesJustRead == 0) {
+            break;
+        }
+
+        /* Reallocate the output buffer if there's not enough room. */
+        if (framesCapacity < totalFramesRead + framesJustRead) {
+            drmp3_uint64 newFramesBufferSize;
+            drmp3_uint64 oldFramesBufferSize;
+            drmp3_uint64 newFramesCap;
+            drmp3_int16* pNewFrames;
+
+            newFramesCap = framesCapacity * 2;
+            if (newFramesCap < totalFramesRead + framesJustRead) {
+                newFramesCap = totalFramesRead + framesJustRead;
+            }
+
+            oldFramesBufferSize = framesCapacity * pMP3->channels * sizeof(drmp3_int16);
+            newFramesBufferSize = newFramesCap   * pMP3->channels * sizeof(drmp3_int16);
+            if (newFramesBufferSize > (drmp3_uint64)DRMP3_SIZE_MAX) {
+                break;
+            }
+
+            pNewFrames = (drmp3_int16*)drmp3__realloc_from_callbacks(pFrames, (size_t)newFramesBufferSize, (size_t)oldFramesBufferSize, &pMP3->allocationCallbacks);
+            if (pNewFrames == NULL) {
+                drmp3__free_from_callbacks(pFrames, &pMP3->allocationCallbacks);
+                break;
+            }
+
+            pFrames = pNewFrames;
+            framesCapacity = newFramesCap;
+        }
+
+        DRMP3_COPY_MEMORY(pFrames + totalFramesRead*pMP3->channels, temp, (size_t)(framesJustRead*pMP3->channels*sizeof(drmp3_int16)));
+        totalFramesRead += framesJustRead;
+
+        /* If the number of frames we asked for is less that what we actually read it means we've reached the end. */
+        if (framesJustRead != framesToReadRightNow) {
+            break;
+        }
+    }
+
+    if (pConfig != NULL) {
+        pConfig->channels   = pMP3->channels;
+        pConfig->sampleRate = pMP3->sampleRate;
+    }
+
+    drmp3_uninit(pMP3);
+
+    if (pTotalFrameCount) {
+        *pTotalFrameCount = totalFramesRead;
+    }
+
+    return pFrames;
+}
+
+
+DRMP3_API float* drmp3_open_and_read_pcm_frames_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    drmp3 mp3;
+    if (!drmp3_init(&mp3, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drmp3__full_read_and_close_f32(&mp3, pConfig, pTotalFrameCount);
+}
+
+DRMP3_API drmp3_int16* drmp3_open_and_read_pcm_frames_s16(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    drmp3 mp3;
+    if (!drmp3_init(&mp3, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drmp3__full_read_and_close_s16(&mp3, pConfig, pTotalFrameCount);
+}
+
+
+DRMP3_API float* drmp3_open_memory_and_read_pcm_frames_f32(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    drmp3 mp3;
+    if (!drmp3_init_memory(&mp3, pData, dataSize, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drmp3__full_read_and_close_f32(&mp3, pConfig, pTotalFrameCount);
+}
+
+DRMP3_API drmp3_int16* drmp3_open_memory_and_read_pcm_frames_s16(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    drmp3 mp3;
+    if (!drmp3_init_memory(&mp3, pData, dataSize, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drmp3__full_read_and_close_s16(&mp3, pConfig, pTotalFrameCount);
+}
+
+
+#ifndef DR_MP3_NO_STDIO
+DRMP3_API float* drmp3_open_file_and_read_pcm_frames_f32(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    drmp3 mp3;
+    if (!drmp3_init_file(&mp3, filePath, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drmp3__full_read_and_close_f32(&mp3, pConfig, pTotalFrameCount);
+}
+
+DRMP3_API drmp3_int16* drmp3_open_file_and_read_pcm_frames_s16(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    drmp3 mp3;
+    if (!drmp3_init_file(&mp3, filePath, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drmp3__full_read_and_close_s16(&mp3, pConfig, pTotalFrameCount);
+}
+#endif
+
+DRMP3_API void* drmp3_malloc(size_t sz, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks != NULL) {
+        return drmp3__malloc_from_callbacks(sz, pAllocationCallbacks);
+    } else {
+        return drmp3__malloc_default(sz, NULL);
+    }
+}
+
+DRMP3_API void drmp3_free(void* p, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks != NULL) {
+        drmp3__free_from_callbacks(p, pAllocationCallbacks);
+    } else {
+        drmp3__free_default(p, NULL);
+    }
+}
+
+#endif  /* dr_mp3_c */
+#endif  /*DR_MP3_IMPLEMENTATION*/
+
+/*
+DIFFERENCES BETWEEN minimp3 AND dr_mp3
+======================================
+- First, keep in mind that minimp3 (https://github.com/lieff/minimp3) is where all the real work was done. All of the
+  code relating to the actual decoding remains mostly unmodified, apart from some namespacing changes.
+- dr_mp3 adds a pulling style API which allows you to deliver raw data via callbacks. So, rather than pushing data
+  to the decoder, the decoder _pulls_ data from your callbacks.
+- In addition to callbacks, a decoder can be initialized from a block of memory and a file.
+- The dr_mp3 pull API reads PCM frames rather than whole MP3 frames.
+- dr_mp3 adds convenience APIs for opening and decoding entire files in one go.
+- dr_mp3 is fully namespaced, including the implementation section, which is more suitable when compiling projects
+  as a single translation unit (aka unity builds). At the time of writing this, a unity build is not possible when
+  using minimp3 in conjunction with stb_vorbis. dr_mp3 addresses this.
+*/
+
+/*
+RELEASE NOTES - v0.5.0
+=======================
+Version 0.5.0 has breaking API changes.
+
+Improved Client-Defined Memory Allocation
+-----------------------------------------
+The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The
+existing system of DRMP3_MALLOC, DRMP3_REALLOC and DRMP3_FREE are still in place and will be used by default when no custom
+allocation callbacks are specified.
+
+To use the new system, you pass in a pointer to a drmp3_allocation_callbacks object to drmp3_init() and family, like this:
+
+    void* my_malloc(size_t sz, void* pUserData)
+    {
+        return malloc(sz);
+    }
+    void* my_realloc(void* p, size_t sz, void* pUserData)
+    {
+        return realloc(p, sz);
+    }
+    void my_free(void* p, void* pUserData)
+    {
+        free(p);
+    }
+
+    ...
+
+    drmp3_allocation_callbacks allocationCallbacks;
+    allocationCallbacks.pUserData = &myData;
+    allocationCallbacks.onMalloc  = my_malloc;
+    allocationCallbacks.onRealloc = my_realloc;
+    allocationCallbacks.onFree    = my_free;
+    drmp3_init_file(&mp3, "my_file.mp3", NULL, &allocationCallbacks);
+
+The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines.
+
+Passing in null for the allocation callbacks object will cause dr_mp3 to use defaults which is the same as DRMP3_MALLOC,
+DRMP3_REALLOC and DRMP3_FREE and the equivalent of how it worked in previous versions.
+
+Every API that opens a drmp3 object now takes this extra parameter. These include the following:
+
+    drmp3_init()
+    drmp3_init_file()
+    drmp3_init_memory()
+    drmp3_open_and_read_pcm_frames_f32()
+    drmp3_open_and_read_pcm_frames_s16()
+    drmp3_open_memory_and_read_pcm_frames_f32()
+    drmp3_open_memory_and_read_pcm_frames_s16()
+    drmp3_open_file_and_read_pcm_frames_f32()
+    drmp3_open_file_and_read_pcm_frames_s16()
+
+Renamed APIs
+------------
+The following APIs have been renamed for consistency with other dr_* libraries and to make it clear that they return PCM frame
+counts rather than sample counts.
+
+    drmp3_open_and_read_f32()        -> drmp3_open_and_read_pcm_frames_f32()
+    drmp3_open_and_read_s16()        -> drmp3_open_and_read_pcm_frames_s16()
+    drmp3_open_memory_and_read_f32() -> drmp3_open_memory_and_read_pcm_frames_f32()
+    drmp3_open_memory_and_read_s16() -> drmp3_open_memory_and_read_pcm_frames_s16()
+    drmp3_open_file_and_read_f32()   -> drmp3_open_file_and_read_pcm_frames_f32()
+    drmp3_open_file_and_read_s16()   -> drmp3_open_file_and_read_pcm_frames_s16()
+*/
+
+/*
+REVISION HISTORY
+================
+v0.6.38 - 2023-11-02
+  - Fix build for ARMv6-M.
+
+v0.6.37 - 2023-07-07
+  - Silence a static analysis warning.
+
+v0.6.36 - 2023-06-17
+  - Fix an incorrect date in revision history. No functional change.
+
+v0.6.35 - 2023-05-22
+  - Minor code restructure. No functional change.
+
+v0.6.34 - 2022-09-17
+  - Fix compilation with DJGPP.
+  - Fix compilation when compiling with x86 with no SSE2.
+  - Remove an unnecessary variable from the drmp3 structure.
+
+v0.6.33 - 2022-04-10
+  - Fix compilation error with the MSVC ARM64 build.
+  - Fix compilation error on older versions of GCC.
+  - Remove some unused functions.
+
+v0.6.32 - 2021-12-11
+  - Fix a warning with Clang.
+
+v0.6.31 - 2021-08-22
+  - Fix a bug when loading from memory.
+
+v0.6.30 - 2021-08-16
+  - Silence some warnings.
+  - Replace memory operations with DRMP3_* macros.
+
+v0.6.29 - 2021-08-08
+  - Bring up to date with minimp3.
+
+v0.6.28 - 2021-07-31
+  - Fix platform detection for ARM64.
+  - Fix a compilation error with C89.
+
+v0.6.27 - 2021-02-21
+  - Fix a warning due to referencing _MSC_VER when it is undefined.
+
+v0.6.26 - 2021-01-31
+  - Bring up to date with minimp3.
+
+v0.6.25 - 2020-12-26
+  - Remove DRMP3_DEFAULT_CHANNELS and DRMP3_DEFAULT_SAMPLE_RATE which are leftovers from some removed APIs.
+
+v0.6.24 - 2020-12-07
+  - Fix a typo in version date for 0.6.23.
+
+v0.6.23 - 2020-12-03
+  - Fix an error where a file can be closed twice when initialization of the decoder fails.
+
+v0.6.22 - 2020-12-02
+  - Fix an error where it's possible for a file handle to be left open when initialization of the decoder fails.
+
+v0.6.21 - 2020-11-28
+  - Bring up to date with minimp3.
+
+v0.6.20 - 2020-11-21
+  - Fix compilation with OpenWatcom.
+
+v0.6.19 - 2020-11-13
+  - Minor code clean up.
+
+v0.6.18 - 2020-11-01
+  - Improve compiler support for older versions of GCC.
+
+v0.6.17 - 2020-09-28
+  - Bring up to date with minimp3.
+
+v0.6.16 - 2020-08-02
+  - Simplify sized types.
+
+v0.6.15 - 2020-07-25
+  - Fix a compilation warning.
+
+v0.6.14 - 2020-07-23
+  - Fix undefined behaviour with memmove().
+
+v0.6.13 - 2020-07-06
+  - Fix a bug when converting from s16 to f32 in drmp3_read_pcm_frames_f32().
+
+v0.6.12 - 2020-06-23
+  - Add include guard for the implementation section.
+
+v0.6.11 - 2020-05-26
+  - Fix use of uninitialized variable error.
+
+v0.6.10 - 2020-05-16
+  - Add compile-time and run-time version querying.
+    - DRMP3_VERSION_MINOR
+    - DRMP3_VERSION_MAJOR
+    - DRMP3_VERSION_REVISION
+    - DRMP3_VERSION_STRING
+    - drmp3_version()
+    - drmp3_version_string()
+
+v0.6.9 - 2020-04-30
+  - Change the `pcm` parameter of drmp3dec_decode_frame() to a `const drmp3_uint8*` for consistency with internal APIs.
+
+v0.6.8 - 2020-04-26
+  - Optimizations to decoding when initializing from memory.
+
+v0.6.7 - 2020-04-25
+  - Fix a compilation error with DR_MP3_NO_STDIO
+  - Optimization to decoding by reducing some data movement.
+
+v0.6.6 - 2020-04-23
+  - Fix a minor bug with the running PCM frame counter.
+
+v0.6.5 - 2020-04-19
+  - Fix compilation error on ARM builds.
+
+v0.6.4 - 2020-04-19
+  - Bring up to date with changes to minimp3.
+
+v0.6.3 - 2020-04-13
+  - Fix some pedantic warnings.
+
+v0.6.2 - 2020-04-10
+  - Fix a crash in drmp3_open_*_and_read_pcm_frames_*() if the output config object is NULL.
+
+v0.6.1 - 2020-04-05
+  - Fix warnings.
+
+v0.6.0 - 2020-04-04
+  - API CHANGE: Remove the pConfig parameter from the following APIs:
+    - drmp3_init()
+    - drmp3_init_memory()
+    - drmp3_init_file()
+  - Add drmp3_init_file_w() for opening a file from a wchar_t encoded path.
+
+v0.5.6 - 2020-02-12
+  - Bring up to date with minimp3.
+
+v0.5.5 - 2020-01-29
+  - Fix a memory allocation bug in high level s16 decoding APIs.
+
+v0.5.4 - 2019-12-02
+  - Fix a possible null pointer dereference when using custom memory allocators for realloc().
+
+v0.5.3 - 2019-11-14
+  - Fix typos in documentation.
+
+v0.5.2 - 2019-11-02
+  - Bring up to date with minimp3.
+
+v0.5.1 - 2019-10-08
+  - Fix a warning with GCC.
+
+v0.5.0 - 2019-10-07
+  - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation
+    routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs:
+    - drmp3_init()
+    - drmp3_init_file()
+    - drmp3_init_memory()
+    - drmp3_open_and_read_pcm_frames_f32()
+    - drmp3_open_and_read_pcm_frames_s16()
+    - drmp3_open_memory_and_read_pcm_frames_f32()
+    - drmp3_open_memory_and_read_pcm_frames_s16()
+    - drmp3_open_file_and_read_pcm_frames_f32()
+    - drmp3_open_file_and_read_pcm_frames_s16()
+  - API CHANGE: Renamed the following APIs:
+    - drmp3_open_and_read_f32()        -> drmp3_open_and_read_pcm_frames_f32()
+    - drmp3_open_and_read_s16()        -> drmp3_open_and_read_pcm_frames_s16()
+    - drmp3_open_memory_and_read_f32() -> drmp3_open_memory_and_read_pcm_frames_f32()
+    - drmp3_open_memory_and_read_s16() -> drmp3_open_memory_and_read_pcm_frames_s16()
+    - drmp3_open_file_and_read_f32()   -> drmp3_open_file_and_read_pcm_frames_f32()
+    - drmp3_open_file_and_read_s16()   -> drmp3_open_file_and_read_pcm_frames_s16()
+
+v0.4.7 - 2019-07-28
+  - Fix a compiler error.
+
+v0.4.6 - 2019-06-14
+  - Fix a compiler error.
+
+v0.4.5 - 2019-06-06
+  - Bring up to date with minimp3.
+
+v0.4.4 - 2019-05-06
+  - Fixes to the VC6 build.
+
+v0.4.3 - 2019-05-05
+  - Use the channel count and/or sample rate of the first MP3 frame instead of DRMP3_DEFAULT_CHANNELS and
+    DRMP3_DEFAULT_SAMPLE_RATE when they are set to 0. To use the old behaviour, just set the relevant property to
+    DRMP3_DEFAULT_CHANNELS or DRMP3_DEFAULT_SAMPLE_RATE.
+  - Add s16 reading APIs
+    - drmp3_read_pcm_frames_s16
+    - drmp3_open_memory_and_read_pcm_frames_s16
+    - drmp3_open_and_read_pcm_frames_s16
+    - drmp3_open_file_and_read_pcm_frames_s16
+  - Add drmp3_get_mp3_and_pcm_frame_count() to the public header section.
+  - Add support for C89.
+  - Change license to choice of public domain or MIT-0.
+
+v0.4.2 - 2019-02-21
+  - Fix a warning.
+
+v0.4.1 - 2018-12-30
+  - Fix a warning.
+
+v0.4.0 - 2018-12-16
+  - API CHANGE: Rename some APIs:
+    - drmp3_read_f32 -> to drmp3_read_pcm_frames_f32
+    - drmp3_seek_to_frame -> drmp3_seek_to_pcm_frame
+    - drmp3_open_and_decode_f32 -> drmp3_open_and_read_pcm_frames_f32
+    - drmp3_open_and_decode_memory_f32 -> drmp3_open_memory_and_read_pcm_frames_f32
+    - drmp3_open_and_decode_file_f32 -> drmp3_open_file_and_read_pcm_frames_f32
+  - Add drmp3_get_pcm_frame_count().
+  - Add drmp3_get_mp3_frame_count().
+  - Improve seeking performance.
+
+v0.3.2 - 2018-09-11
+  - Fix a couple of memory leaks.
+  - Bring up to date with minimp3.
+
+v0.3.1 - 2018-08-25
+  - Fix C++ build.
+
+v0.3.0 - 2018-08-25
+  - Bring up to date with minimp3. This has a minor API change: the "pcm" parameter of drmp3dec_decode_frame() has
+    been changed from short* to void* because it can now output both s16 and f32 samples, depending on whether or
+    not the DR_MP3_FLOAT_OUTPUT option is set.
+
+v0.2.11 - 2018-08-08
+  - Fix a bug where the last part of a file is not read.
+
+v0.2.10 - 2018-08-07
+  - Improve 64-bit detection.
+
+v0.2.9 - 2018-08-05
+  - Fix C++ build on older versions of GCC.
+  - Bring up to date with minimp3.
+
+v0.2.8 - 2018-08-02
+  - Fix compilation errors with older versions of GCC.
+
+v0.2.7 - 2018-07-13
+  - Bring up to date with minimp3.
+
+v0.2.6 - 2018-07-12
+  - Bring up to date with minimp3.
+
+v0.2.5 - 2018-06-22
+  - Bring up to date with minimp3.
+
+v0.2.4 - 2018-05-12
+  - Bring up to date with minimp3.
+
+v0.2.3 - 2018-04-29
+  - Fix TCC build.
+
+v0.2.2 - 2018-04-28
+  - Fix bug when opening a decoder from memory.
+
+v0.2.1 - 2018-04-27
+  - Efficiency improvements when the decoder reaches the end of the stream.
+
+v0.2 - 2018-04-21
+  - Bring up to date with minimp3.
+  - Start using major.minor.revision versioning.
+
+v0.1d - 2018-03-30
+  - Bring up to date with minimp3.
+
+v0.1c - 2018-03-11
+  - Fix C++ build error.
+
+v0.1b - 2018-03-07
+  - Bring up to date with minimp3.
+
+v0.1a - 2018-02-28
+  - Fix compilation error on GCC/Clang.
+  - Fix some warnings.
+
+v0.1 - 2018-02-xx
+  - Initial versioned release.
+*/
+
+/*
+This software is available as a choice of the following licenses. Choose
+whichever you prefer.
+
+===============================================================================
+ALTERNATIVE 1 - Public Domain (www.unlicense.org)
+===============================================================================
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
+
+===============================================================================
+ALTERNATIVE 2 - MIT No Attribution
+===============================================================================
+Copyright 2023 David Reid
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+/*
+    https://github.com/lieff/minimp3
+    To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide.
+    This software is distributed without any warranty.
+    See <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
diff --git a/shared/external/dr_wav.h b/shared/external/dr_wav.h
new file mode 100644
index 0000000..36451b5
--- /dev/null
+++ b/shared/external/dr_wav.h
@@ -0,0 +1,8803 @@
+/*
+WAV audio loader and writer. Choice of public domain or MIT-0. See license statements at the end of this file.
+dr_wav - v0.13.13 - 2023-11-02
+
+David Reid - mackron@gmail.com
+
+GitHub: https://github.com/mackron/dr_libs
+*/
+
+/*
+Introduction
+============
+This is a single file library. To use it, do something like the following in one .c file.
+
+    ```c
+    #define DR_WAV_IMPLEMENTATION
+    #include "dr_wav.h"
+    ```
+
+You can then #include this file in other parts of the program as you would with any other header file. Do something like the following to read audio data:
+
+    ```c
+    drwav wav;
+    if (!drwav_init_file(&wav, "my_song.wav", NULL)) {
+        // Error opening WAV file.
+    }
+
+    drwav_int32* pDecodedInterleavedPCMFrames = malloc(wav.totalPCMFrameCount * wav.channels * sizeof(drwav_int32));
+    size_t numberOfSamplesActuallyDecoded = drwav_read_pcm_frames_s32(&wav, wav.totalPCMFrameCount, pDecodedInterleavedPCMFrames);
+
+    ...
+
+    drwav_uninit(&wav);
+    ```
+
+If you just want to quickly open and read the audio data in a single operation you can do something like this:
+
+    ```c
+    unsigned int channels;
+    unsigned int sampleRate;
+    drwav_uint64 totalPCMFrameCount;
+    float* pSampleData = drwav_open_file_and_read_pcm_frames_f32("my_song.wav", &channels, &sampleRate, &totalPCMFrameCount, NULL);
+    if (pSampleData == NULL) {
+        // Error opening and reading WAV file.
+    }
+
+    ...
+
+    drwav_free(pSampleData, NULL);
+    ```
+
+The examples above use versions of the API that convert the audio data to a consistent format (32-bit signed PCM, in this case), but you can still output the
+audio data in its internal format (see notes below for supported formats):
+
+    ```c
+    size_t framesRead = drwav_read_pcm_frames(&wav, wav.totalPCMFrameCount, pDecodedInterleavedPCMFrames);
+    ```
+
+You can also read the raw bytes of audio data, which could be useful if dr_wav does not have native support for a particular data format:
+
+    ```c
+    size_t bytesRead = drwav_read_raw(&wav, bytesToRead, pRawDataBuffer);
+    ```
+
+dr_wav can also be used to output WAV files. This does not currently support compressed formats. To use this, look at `drwav_init_write()`,
+`drwav_init_file_write()`, etc. Use `drwav_write_pcm_frames()` to write samples, or `drwav_write_raw()` to write raw data in the "data" chunk.
+
+    ```c
+    drwav_data_format format;
+    format.container = drwav_container_riff;     // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64.
+    format.format = DR_WAVE_FORMAT_PCM;          // <-- Any of the DR_WAVE_FORMAT_* codes.
+    format.channels = 2;
+    format.sampleRate = 44100;
+    format.bitsPerSample = 16;
+    drwav_init_file_write(&wav, "data/recording.wav", &format, NULL);
+
+    ...
+
+    drwav_uint64 framesWritten = drwav_write_pcm_frames(pWav, frameCount, pSamples);
+    ```
+
+Note that writing to AIFF or RIFX is not supported.
+
+dr_wav has support for decoding from a number of different encapsulation formats. See below for details.
+
+
+Build Options
+=============
+#define these options before including this file.
+
+#define DR_WAV_NO_CONVERSION_API
+  Disables conversion APIs such as `drwav_read_pcm_frames_f32()` and `drwav_s16_to_f32()`.
+
+#define DR_WAV_NO_STDIO
+  Disables APIs that initialize a decoder from a file such as `drwav_init_file()`, `drwav_init_file_write()`, etc.
+
+#define DR_WAV_NO_WCHAR
+  Disables all functions ending with `_w`. Use this if your compiler does not provide wchar.h. Not required if DR_WAV_NO_STDIO is also defined.
+
+
+Supported Encapsulations
+========================
+- RIFF (Regular WAV)
+- RIFX (Big-Endian)
+- AIFF (Does not currently support ADPCM)
+- RF64
+- W64
+
+Note that AIFF and RIFX do not support write mode, nor do they support reading of metadata.
+
+
+Supported Encodings
+===================
+- Unsigned 8-bit PCM
+- Signed 12-bit PCM
+- Signed 16-bit PCM
+- Signed 24-bit PCM
+- Signed 32-bit PCM
+- IEEE 32-bit floating point
+- IEEE 64-bit floating point
+- A-law and u-law
+- Microsoft ADPCM
+- IMA ADPCM (DVI, format code 0x11)
+
+8-bit PCM encodings are always assumed to be unsigned. Signed 8-bit encoding can only be read with `drwav_read_raw()`.
+
+Note that ADPCM is not currently supported with AIFF. Contributions welcome.
+
+
+Notes
+=====
+- Samples are always interleaved.
+- The default read function does not do any data conversion. Use `drwav_read_pcm_frames_f32()`, `drwav_read_pcm_frames_s32()` and `drwav_read_pcm_frames_s16()`
+  to read and convert audio data to 32-bit floating point, signed 32-bit integer and signed 16-bit integer samples respectively.
+- dr_wav will try to read the WAV file as best it can, even if it's not strictly conformant to the WAV format.
+*/
+
+#ifndef dr_wav_h
+#define dr_wav_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DRWAV_STRINGIFY(x)      #x
+#define DRWAV_XSTRINGIFY(x)     DRWAV_STRINGIFY(x)
+
+#define DRWAV_VERSION_MAJOR     0
+#define DRWAV_VERSION_MINOR     13
+#define DRWAV_VERSION_REVISION  13
+#define DRWAV_VERSION_STRING    DRWAV_XSTRINGIFY(DRWAV_VERSION_MAJOR) "." DRWAV_XSTRINGIFY(DRWAV_VERSION_MINOR) "." DRWAV_XSTRINGIFY(DRWAV_VERSION_REVISION)
+
+#include <stddef.h> /* For size_t. */
+
+/* Sized Types */
+typedef   signed char           drwav_int8;
+typedef unsigned char           drwav_uint8;
+typedef   signed short          drwav_int16;
+typedef unsigned short          drwav_uint16;
+typedef   signed int            drwav_int32;
+typedef unsigned int            drwav_uint32;
+#if defined(_MSC_VER) && !defined(__clang__)
+    typedef   signed __int64    drwav_int64;
+    typedef unsigned __int64    drwav_uint64;
+#else
+    #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
+        #pragma GCC diagnostic push
+        #pragma GCC diagnostic ignored "-Wlong-long"
+        #if defined(__clang__)
+            #pragma GCC diagnostic ignored "-Wc++11-long-long"
+        #endif
+    #endif
+    typedef   signed long long  drwav_int64;
+    typedef unsigned long long  drwav_uint64;
+    #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
+        #pragma GCC diagnostic pop
+    #endif
+#endif
+#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__powerpc64__)
+    typedef drwav_uint64        drwav_uintptr;
+#else
+    typedef drwav_uint32        drwav_uintptr;
+#endif
+typedef drwav_uint8             drwav_bool8;
+typedef drwav_uint32            drwav_bool32;
+#define DRWAV_TRUE              1
+#define DRWAV_FALSE             0
+/* End Sized Types */
+
+/* Decorations */
+#if !defined(DRWAV_API)
+    #if defined(DRWAV_DLL)
+        #if defined(_WIN32)
+            #define DRWAV_DLL_IMPORT  __declspec(dllimport)
+            #define DRWAV_DLL_EXPORT  __declspec(dllexport)
+            #define DRWAV_DLL_PRIVATE static
+        #else
+            #if defined(__GNUC__) && __GNUC__ >= 4
+                #define DRWAV_DLL_IMPORT  __attribute__((visibility("default")))
+                #define DRWAV_DLL_EXPORT  __attribute__((visibility("default")))
+                #define DRWAV_DLL_PRIVATE __attribute__((visibility("hidden")))
+            #else
+                #define DRWAV_DLL_IMPORT
+                #define DRWAV_DLL_EXPORT
+                #define DRWAV_DLL_PRIVATE static
+            #endif
+        #endif
+
+        #if defined(DR_WAV_IMPLEMENTATION) || defined(DRWAV_IMPLEMENTATION)
+            #define DRWAV_API  DRWAV_DLL_EXPORT
+        #else
+            #define DRWAV_API  DRWAV_DLL_IMPORT
+        #endif
+        #define DRWAV_PRIVATE DRWAV_DLL_PRIVATE
+    #else
+        #define DRWAV_API extern
+        #define DRWAV_PRIVATE static
+    #endif
+#endif
+/* End Decorations */
+
+/* Result Codes */
+typedef drwav_int32 drwav_result;
+#define DRWAV_SUCCESS                        0
+#define DRWAV_ERROR                         -1   /* A generic error. */
+#define DRWAV_INVALID_ARGS                  -2
+#define DRWAV_INVALID_OPERATION             -3
+#define DRWAV_OUT_OF_MEMORY                 -4
+#define DRWAV_OUT_OF_RANGE                  -5
+#define DRWAV_ACCESS_DENIED                 -6
+#define DRWAV_DOES_NOT_EXIST                -7
+#define DRWAV_ALREADY_EXISTS                -8
+#define DRWAV_TOO_MANY_OPEN_FILES           -9
+#define DRWAV_INVALID_FILE                  -10
+#define DRWAV_TOO_BIG                       -11
+#define DRWAV_PATH_TOO_LONG                 -12
+#define DRWAV_NAME_TOO_LONG                 -13
+#define DRWAV_NOT_DIRECTORY                 -14
+#define DRWAV_IS_DIRECTORY                  -15
+#define DRWAV_DIRECTORY_NOT_EMPTY           -16
+#define DRWAV_END_OF_FILE                   -17
+#define DRWAV_NO_SPACE                      -18
+#define DRWAV_BUSY                          -19
+#define DRWAV_IO_ERROR                      -20
+#define DRWAV_INTERRUPT                     -21
+#define DRWAV_UNAVAILABLE                   -22
+#define DRWAV_ALREADY_IN_USE                -23
+#define DRWAV_BAD_ADDRESS                   -24
+#define DRWAV_BAD_SEEK                      -25
+#define DRWAV_BAD_PIPE                      -26
+#define DRWAV_DEADLOCK                      -27
+#define DRWAV_TOO_MANY_LINKS                -28
+#define DRWAV_NOT_IMPLEMENTED               -29
+#define DRWAV_NO_MESSAGE                    -30
+#define DRWAV_BAD_MESSAGE                   -31
+#define DRWAV_NO_DATA_AVAILABLE             -32
+#define DRWAV_INVALID_DATA                  -33
+#define DRWAV_TIMEOUT                       -34
+#define DRWAV_NO_NETWORK                    -35
+#define DRWAV_NOT_UNIQUE                    -36
+#define DRWAV_NOT_SOCKET                    -37
+#define DRWAV_NO_ADDRESS                    -38
+#define DRWAV_BAD_PROTOCOL                  -39
+#define DRWAV_PROTOCOL_UNAVAILABLE          -40
+#define DRWAV_PROTOCOL_NOT_SUPPORTED        -41
+#define DRWAV_PROTOCOL_FAMILY_NOT_SUPPORTED -42
+#define DRWAV_ADDRESS_FAMILY_NOT_SUPPORTED  -43
+#define DRWAV_SOCKET_NOT_SUPPORTED          -44
+#define DRWAV_CONNECTION_RESET              -45
+#define DRWAV_ALREADY_CONNECTED             -46
+#define DRWAV_NOT_CONNECTED                 -47
+#define DRWAV_CONNECTION_REFUSED            -48
+#define DRWAV_NO_HOST                       -49
+#define DRWAV_IN_PROGRESS                   -50
+#define DRWAV_CANCELLED                     -51
+#define DRWAV_MEMORY_ALREADY_MAPPED         -52
+#define DRWAV_AT_END                        -53
+/* End Result Codes */
+
+/* Common data formats. */
+#define DR_WAVE_FORMAT_PCM          0x1
+#define DR_WAVE_FORMAT_ADPCM        0x2
+#define DR_WAVE_FORMAT_IEEE_FLOAT   0x3
+#define DR_WAVE_FORMAT_ALAW         0x6
+#define DR_WAVE_FORMAT_MULAW        0x7
+#define DR_WAVE_FORMAT_DVI_ADPCM    0x11
+#define DR_WAVE_FORMAT_EXTENSIBLE   0xFFFE
+
+/* Flags to pass into drwav_init_ex(), etc. */
+#define DRWAV_SEQUENTIAL            0x00000001
+#define DRWAV_WITH_METADATA         0x00000002
+
+DRWAV_API void drwav_version(drwav_uint32* pMajor, drwav_uint32* pMinor, drwav_uint32* pRevision);
+DRWAV_API const char* drwav_version_string(void);
+
+/* Allocation Callbacks */
+typedef struct
+{
+    void* pUserData;
+    void* (* onMalloc)(size_t sz, void* pUserData);
+    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
+    void  (* onFree)(void* p, void* pUserData);
+} drwav_allocation_callbacks;
+/* End Allocation Callbacks */
+
+typedef enum
+{
+    drwav_seek_origin_start,
+    drwav_seek_origin_current
+} drwav_seek_origin;
+
+typedef enum
+{
+    drwav_container_riff,
+    drwav_container_rifx,
+    drwav_container_w64,
+    drwav_container_rf64,
+    drwav_container_aiff
+} drwav_container;
+
+typedef struct
+{
+    union
+    {
+        drwav_uint8 fourcc[4];
+        drwav_uint8 guid[16];
+    } id;
+
+    /* The size in bytes of the chunk. */
+    drwav_uint64 sizeInBytes;
+
+    /*
+    RIFF = 2 byte alignment.
+    W64  = 8 byte alignment.
+    */
+    unsigned int paddingSize;
+} drwav_chunk_header;
+
+typedef struct
+{
+    /*
+    The format tag exactly as specified in the wave file's "fmt" chunk. This can be used by applications
+    that require support for data formats not natively supported by dr_wav.
+    */
+    drwav_uint16 formatTag;
+
+    /* The number of channels making up the audio data. When this is set to 1 it is mono, 2 is stereo, etc. */
+    drwav_uint16 channels;
+
+    /* The sample rate. Usually set to something like 44100. */
+    drwav_uint32 sampleRate;
+
+    /* Average bytes per second. You probably don't need this, but it's left here for informational purposes. */
+    drwav_uint32 avgBytesPerSec;
+
+    /* Block align. This is equal to the number of channels * bytes per sample. */
+    drwav_uint16 blockAlign;
+
+    /* Bits per sample. */
+    drwav_uint16 bitsPerSample;
+
+    /* The size of the extended data. Only used internally for validation, but left here for informational purposes. */
+    drwav_uint16 extendedSize;
+
+    /*
+    The number of valid bits per sample. When <formatTag> is equal to WAVE_FORMAT_EXTENSIBLE, <bitsPerSample>
+    is always rounded up to the nearest multiple of 8. This variable contains information about exactly how
+    many bits are valid per sample. Mainly used for informational purposes.
+    */
+    drwav_uint16 validBitsPerSample;
+
+    /* The channel mask. Not used at the moment. */
+    drwav_uint32 channelMask;
+
+    /* The sub-format, exactly as specified by the wave file. */
+    drwav_uint8 subFormat[16];
+} drwav_fmt;
+
+DRWAV_API drwav_uint16 drwav_fmt_get_format(const drwav_fmt* pFMT);
+
+
+/*
+Callback for when data is read. Return value is the number of bytes actually read.
+
+pUserData   [in]  The user data that was passed to drwav_init() and family.
+pBufferOut  [out] The output buffer.
+bytesToRead [in]  The number of bytes to read.
+
+Returns the number of bytes actually read.
+
+A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until
+either the entire bytesToRead is filled or you have reached the end of the stream.
+*/
+typedef size_t (* drwav_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
+
+/*
+Callback for when data is written. Returns value is the number of bytes actually written.
+
+pUserData    [in]  The user data that was passed to drwav_init_write() and family.
+pData        [out] A pointer to the data to write.
+bytesToWrite [in]  The number of bytes to write.
+
+Returns the number of bytes actually written.
+
+If the return value differs from bytesToWrite, it indicates an error.
+*/
+typedef size_t (* drwav_write_proc)(void* pUserData, const void* pData, size_t bytesToWrite);
+
+/*
+Callback for when data needs to be seeked.
+
+pUserData [in] The user data that was passed to drwav_init() and family.
+offset    [in] The number of bytes to move, relative to the origin. Will never be negative.
+origin    [in] The origin of the seek - the current position or the start of the stream.
+
+Returns whether or not the seek was successful.
+
+Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which will be either drwav_seek_origin_start or
+drwav_seek_origin_current.
+*/
+typedef drwav_bool32 (* drwav_seek_proc)(void* pUserData, int offset, drwav_seek_origin origin);
+
+/*
+Callback for when drwav_init_ex() finds a chunk.
+
+pChunkUserData    [in] The user data that was passed to the pChunkUserData parameter of drwav_init_ex() and family.
+onRead            [in] A pointer to the function to call when reading.
+onSeek            [in] A pointer to the function to call when seeking.
+pReadSeekUserData [in] The user data that was passed to the pReadSeekUserData parameter of drwav_init_ex() and family.
+pChunkHeader      [in] A pointer to an object containing basic header information about the chunk. Use this to identify the chunk.
+container         [in] Whether or not the WAV file is a RIFF or Wave64 container. If you're unsure of the difference, assume RIFF.
+pFMT              [in] A pointer to the object containing the contents of the "fmt" chunk.
+
+Returns the number of bytes read + seeked.
+
+To read data from the chunk, call onRead(), passing in pReadSeekUserData as the first parameter. Do the same for seeking with onSeek(). The return value must
+be the total number of bytes you have read _plus_ seeked.
+
+Use the `container` argument to discriminate the fields in `pChunkHeader->id`. If the container is `drwav_container_riff` or `drwav_container_rf64` you should
+use `id.fourcc`, otherwise you should use `id.guid`.
+
+The `pFMT` parameter can be used to determine the data format of the wave file. Use `drwav_fmt_get_format()` to get the sample format, which will be one of the
+`DR_WAVE_FORMAT_*` identifiers.
+
+The read pointer will be sitting on the first byte after the chunk's header. You must not attempt to read beyond the boundary of the chunk.
+*/
+typedef drwav_uint64 (* drwav_chunk_proc)(void* pChunkUserData, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_chunk_header* pChunkHeader, drwav_container container, const drwav_fmt* pFMT);
+
+
+/* Structure for internal use. Only used for loaders opened with drwav_init_memory(). */
+typedef struct
+{
+    const drwav_uint8* data;
+    size_t dataSize;
+    size_t currentReadPos;
+} drwav__memory_stream;
+
+/* Structure for internal use. Only used for writers opened with drwav_init_memory_write(). */
+typedef struct
+{
+    void** ppData;
+    size_t* pDataSize;
+    size_t dataSize;
+    size_t dataCapacity;
+    size_t currentWritePos;
+} drwav__memory_stream_write;
+
+typedef struct
+{
+    drwav_container container;  /* RIFF, W64. */
+    drwav_uint32 format;        /* DR_WAVE_FORMAT_* */
+    drwav_uint32 channels;
+    drwav_uint32 sampleRate;
+    drwav_uint32 bitsPerSample;
+} drwav_data_format;
+
+typedef enum
+{
+    drwav_metadata_type_none                        = 0,
+
+    /*
+    Unknown simply means a chunk that drwav does not handle specifically. You can still ask to
+    receive these chunks as metadata objects. It is then up to you to interpret the chunk's data.
+    You can also write unknown metadata to a wav file. Be careful writing unknown chunks if you
+    have also edited the audio data. The unknown chunks could represent offsets/sizes that no
+    longer correctly correspond to the audio data.
+    */
+    drwav_metadata_type_unknown                     = 1 << 0,
+
+    /* Only 1 of each of these metadata items are allowed in a wav file. */
+    drwav_metadata_type_smpl                        = 1 << 1,
+    drwav_metadata_type_inst                        = 1 << 2,
+    drwav_metadata_type_cue                         = 1 << 3,
+    drwav_metadata_type_acid                        = 1 << 4,
+    drwav_metadata_type_bext                        = 1 << 5,
+
+    /*
+    Wav files often have a LIST chunk. This is a chunk that contains a set of subchunks. For this
+    higher-level metadata API, we don't make a distinction between a regular chunk and a LIST
+    subchunk. Instead, they are all just 'metadata' items.
+
+    There can be multiple of these metadata items in a wav file.
+    */
+    drwav_metadata_type_list_label                  = 1 << 6,
+    drwav_metadata_type_list_note                   = 1 << 7,
+    drwav_metadata_type_list_labelled_cue_region    = 1 << 8,
+
+    drwav_metadata_type_list_info_software          = 1 << 9,
+    drwav_metadata_type_list_info_copyright         = 1 << 10,
+    drwav_metadata_type_list_info_title             = 1 << 11,
+    drwav_metadata_type_list_info_artist            = 1 << 12,
+    drwav_metadata_type_list_info_comment           = 1 << 13,
+    drwav_metadata_type_list_info_date              = 1 << 14,
+    drwav_metadata_type_list_info_genre             = 1 << 15,
+    drwav_metadata_type_list_info_album             = 1 << 16,
+    drwav_metadata_type_list_info_tracknumber       = 1 << 17,
+
+    /* Other type constants for convenience. */
+    drwav_metadata_type_list_all_info_strings       = drwav_metadata_type_list_info_software
+                                                    | drwav_metadata_type_list_info_copyright
+                                                    | drwav_metadata_type_list_info_title
+                                                    | drwav_metadata_type_list_info_artist
+                                                    | drwav_metadata_type_list_info_comment
+                                                    | drwav_metadata_type_list_info_date
+                                                    | drwav_metadata_type_list_info_genre
+                                                    | drwav_metadata_type_list_info_album
+                                                    | drwav_metadata_type_list_info_tracknumber,
+
+    drwav_metadata_type_list_all_adtl               = drwav_metadata_type_list_label
+                                                    | drwav_metadata_type_list_note
+                                                    | drwav_metadata_type_list_labelled_cue_region,
+
+    drwav_metadata_type_all                         = -2,   /*0xFFFFFFFF & ~drwav_metadata_type_unknown,*/
+    drwav_metadata_type_all_including_unknown       = -1    /*0xFFFFFFFF,*/
+} drwav_metadata_type;
+
+/*
+Sampler Metadata
+
+The sampler chunk contains information about how a sound should be played in the context of a whole
+audio production, and when used in a sampler. See https://en.wikipedia.org/wiki/Sample-based_synthesis.
+*/
+typedef enum
+{
+    drwav_smpl_loop_type_forward  = 0,
+    drwav_smpl_loop_type_pingpong = 1,
+    drwav_smpl_loop_type_backward = 2
+} drwav_smpl_loop_type;
+
+typedef struct
+{
+    /* The ID of the associated cue point, see drwav_cue and drwav_cue_point. As with all cue point IDs, this can correspond to a label chunk to give this loop a name, see drwav_list_label_or_note. */
+    drwav_uint32 cuePointId;
+
+    /* See drwav_smpl_loop_type. */
+    drwav_uint32 type;
+
+    /* The byte offset of the first sample to be played in the loop. */
+    drwav_uint32 firstSampleByteOffset;
+
+    /* The byte offset into the audio data of the last sample to be played in the loop. */
+    drwav_uint32 lastSampleByteOffset;
+
+    /* A value to represent that playback should occur at a point between samples. This value ranges from 0 to UINT32_MAX. Where a value of 0 means no fraction, and a value of (UINT32_MAX / 2) would mean half a sample. */
+    drwav_uint32 sampleFraction;
+
+    /* Number of times to play the loop. 0 means loop infinitely. */
+    drwav_uint32 playCount;
+} drwav_smpl_loop;
+
+typedef struct
+{
+    /* IDs for a particular MIDI manufacturer. 0 if not used. */
+    drwav_uint32 manufacturerId;
+    drwav_uint32 productId;
+
+    /* The period of 1 sample in nanoseconds. */
+    drwav_uint32 samplePeriodNanoseconds;
+
+    /* The MIDI root note of this file. 0 to 127. */
+    drwav_uint32 midiUnityNote;
+
+    /* The fraction of a semitone up from the given MIDI note. This is a value from 0 to UINT32_MAX, where 0 means no change and (UINT32_MAX / 2) is half a semitone (AKA 50 cents). */
+    drwav_uint32 midiPitchFraction;
+
+    /* Data relating to SMPTE standards which are used for syncing audio and video. 0 if not used. */
+    drwav_uint32 smpteFormat;
+    drwav_uint32 smpteOffset;
+
+    /* drwav_smpl_loop loops. */
+    drwav_uint32 sampleLoopCount;
+
+    /* Optional sampler-specific data. */
+    drwav_uint32 samplerSpecificDataSizeInBytes;
+
+    drwav_smpl_loop* pLoops;
+    drwav_uint8* pSamplerSpecificData;
+} drwav_smpl;
+
+/*
+Instrument Metadata
+
+The inst metadata contains data about how a sound should be played as part of an instrument. This
+commonly read by samplers. See https://en.wikipedia.org/wiki/Sample-based_synthesis.
+*/
+typedef struct
+{
+    drwav_int8 midiUnityNote;   /* The root note of the audio as a MIDI note number. 0 to 127. */
+    drwav_int8 fineTuneCents;   /* -50 to +50 */
+    drwav_int8 gainDecibels;    /* -64 to +64 */
+    drwav_int8 lowNote;         /* 0 to 127 */
+    drwav_int8 highNote;        /* 0 to 127 */
+    drwav_int8 lowVelocity;     /* 1 to 127 */
+    drwav_int8 highVelocity;    /* 1 to 127 */
+} drwav_inst;
+
+/*
+Cue Metadata
+
+Cue points are markers at specific points in the audio. They often come with an associated piece of
+drwav_list_label_or_note metadata which contains the text for the marker.
+*/
+typedef struct
+{
+    /* Unique identification value. */
+    drwav_uint32 id;
+
+    /* Set to 0. This is only relevant if there is a 'playlist' chunk - which is not supported by dr_wav. */
+    drwav_uint32 playOrderPosition;
+
+    /* Should always be "data". This represents the fourcc value of the chunk that this cue point corresponds to. dr_wav only supports a single data chunk so this should always be "data". */
+    drwav_uint8 dataChunkId[4];
+
+    /* Set to 0. This is only relevant if there is a wave list chunk. dr_wav, like lots of readers/writers, do not support this. */
+    drwav_uint32 chunkStart;
+
+    /* Set to 0 for uncompressed formats. Else the last byte in compressed wave data where decompression can begin to find the value of the corresponding sample value. */
+    drwav_uint32 blockStart;
+
+    /* For uncompressed formats this is the byte offset of the cue point into the audio data. For compressed formats this is relative to the block specified with blockStart. */
+    drwav_uint32 sampleByteOffset;
+} drwav_cue_point;
+
+typedef struct
+{
+    drwav_uint32 cuePointCount;
+    drwav_cue_point *pCuePoints;
+} drwav_cue;
+
+/*
+Acid Metadata
+
+This chunk contains some information about the time signature and the tempo of the audio.
+*/
+typedef enum
+{
+    drwav_acid_flag_one_shot      = 1,  /* If this is not set, then it is a loop instead of a one-shot. */
+    drwav_acid_flag_root_note_set = 2,
+    drwav_acid_flag_stretch       = 4,
+    drwav_acid_flag_disk_based    = 8,
+    drwav_acid_flag_acidizer      = 16  /* Not sure what this means. */
+} drwav_acid_flag;
+
+typedef struct
+{
+    /* A bit-field, see drwav_acid_flag. */
+    drwav_uint32 flags;
+
+    /* Valid if flags contains drwav_acid_flag_root_note_set. It represents the MIDI root note the file - a value from 0 to 127. */
+    drwav_uint16 midiUnityNote;
+
+    /* Reserved values that should probably be ignored. reserved1 seems to often be 128 and reserved2 is 0. */
+    drwav_uint16 reserved1;
+    float reserved2;
+
+    /* Number of beats. */
+    drwav_uint32 numBeats;
+
+    /* The time signature of the audio. */
+    drwav_uint16 meterDenominator;
+    drwav_uint16 meterNumerator;
+
+    /* Beats per minute of the track. Setting a value of 0 suggests that there is no tempo. */
+    float tempo;
+} drwav_acid;
+
+/*
+Cue Label or Note metadata
+
+These are 2 different types of metadata, but they have the exact same format. Labels tend to be the
+more common and represent a short name for a cue point. Notes might be used to represent a longer
+comment.
+*/
+typedef struct
+{
+    /* The ID of a cue point that this label or note corresponds to. */
+    drwav_uint32 cuePointId;
+
+    /* Size of the string not including any null terminator. */
+    drwav_uint32 stringLength;
+
+    /* The string. The *init_with_metadata functions null terminate this for convenience. */
+    char* pString;
+} drwav_list_label_or_note;
+
+/*
+BEXT metadata, also known as Broadcast Wave Format (BWF)
+
+This metadata adds some extra description to an audio file. You must check the version field to
+determine if the UMID or the loudness fields are valid.
+*/
+typedef struct
+{
+    /*
+    These top 3 fields, and the umid field are actually defined in the standard as a statically
+    sized buffers. In order to reduce the size of this struct (and therefore the union in the
+    metadata struct), we instead store these as pointers.
+    */
+    char* pDescription;                 /* Can be NULL or a null-terminated string, must be <= 256 characters. */
+    char* pOriginatorName;              /* Can be NULL or a null-terminated string, must be <= 32 characters. */
+    char* pOriginatorReference;         /* Can be NULL or a null-terminated string, must be <= 32 characters. */
+    char  pOriginationDate[10];         /* ASCII "yyyy:mm:dd". */
+    char  pOriginationTime[8];          /* ASCII "hh:mm:ss". */
+    drwav_uint64 timeReference;         /* First sample count since midnight. */
+    drwav_uint16 version;               /* Version of the BWF, check this to see if the fields below are valid. */
+
+    /*
+    Unrestricted ASCII characters containing a collection of strings terminated by CR/LF. Each
+    string shall contain a description of a coding process applied to the audio data.
+    */
+    char* pCodingHistory;
+    drwav_uint32 codingHistorySize;
+
+    /* Fields below this point are only valid if the version is 1 or above. */
+    drwav_uint8* pUMID;                  /* Exactly 64 bytes of SMPTE UMID */
+
+    /* Fields below this point are only valid if the version is 2 or above. */
+    drwav_uint16 loudnessValue;         /* Integrated Loudness Value of the file in LUFS (multiplied by 100). */
+    drwav_uint16 loudnessRange;         /* Loudness Range of the file in LU (multiplied by 100). */
+    drwav_uint16 maxTruePeakLevel;      /* Maximum True Peak Level of the file expressed as dBTP (multiplied by 100). */
+    drwav_uint16 maxMomentaryLoudness;  /* Highest value of the Momentary Loudness Level of the file in LUFS (multiplied by 100). */
+    drwav_uint16 maxShortTermLoudness;  /* Highest value of the Short-Term Loudness Level of the file in LUFS (multiplied by 100). */
+} drwav_bext;
+
+/*
+Info Text Metadata
+
+There a many different types of information text that can be saved in this format. This is where
+things like the album name, the artists, the year it was produced, etc are saved. See
+drwav_metadata_type for the full list of types that dr_wav supports.
+*/
+typedef struct
+{
+    /* Size of the string not including any null terminator. */
+    drwav_uint32 stringLength;
+
+    /* The string. The *init_with_metadata functions null terminate this for convenience. */
+    char* pString;
+} drwav_list_info_text;
+
+/*
+Labelled Cue Region Metadata
+
+The labelled cue region metadata is used to associate some region of audio with text. The region
+starts at a cue point, and extends for the given number of samples.
+*/
+typedef struct
+{
+    /* The ID of a cue point that this object corresponds to. */
+    drwav_uint32 cuePointId;
+
+    /* The number of samples from the cue point forwards that should be considered this region */
+    drwav_uint32 sampleLength;
+
+    /* Four characters used to say what the purpose of this region is. */
+    drwav_uint8 purposeId[4];
+
+    /* Unsure of the exact meanings of these. It appears to be acceptable to set them all to 0. */
+    drwav_uint16 country;
+    drwav_uint16 language;
+    drwav_uint16 dialect;
+    drwav_uint16 codePage;
+
+    /* Size of the string not including any null terminator. */
+    drwav_uint32 stringLength;
+
+    /* The string. The *init_with_metadata functions null terminate this for convenience. */
+    char* pString;
+} drwav_list_labelled_cue_region;
+
+/*
+Unknown Metadata
+
+This chunk just represents a type of chunk that dr_wav does not understand.
+
+Unknown metadata has a location attached to it. This is because wav files can have a LIST chunk
+that contains subchunks. These LIST chunks can be one of two types. An adtl list, or an INFO
+list. This enum is used to specify the location of a chunk that dr_wav currently doesn't support.
+*/
+typedef enum
+{
+    drwav_metadata_location_invalid,
+    drwav_metadata_location_top_level,
+    drwav_metadata_location_inside_info_list,
+    drwav_metadata_location_inside_adtl_list
+} drwav_metadata_location;
+
+typedef struct
+{
+    drwav_uint8 id[4];
+    drwav_metadata_location chunkLocation;
+    drwav_uint32 dataSizeInBytes;
+    drwav_uint8* pData;
+} drwav_unknown_metadata;
+
+/*
+Metadata is saved as a union of all the supported types.
+*/
+typedef struct
+{
+    /* Determines which item in the union is valid. */
+    drwav_metadata_type type;
+
+    union
+    {
+        drwav_cue cue;
+        drwav_smpl smpl;
+        drwav_acid acid;
+        drwav_inst inst;
+        drwav_bext bext;
+        drwav_list_label_or_note labelOrNote;   /* List label or list note. */
+        drwav_list_labelled_cue_region labelledCueRegion;
+        drwav_list_info_text infoText;          /* Any of the list info types. */
+        drwav_unknown_metadata unknown;
+    } data;
+} drwav_metadata;
+
+typedef struct
+{
+    /* A pointer to the function to call when more data is needed. */
+    drwav_read_proc onRead;
+
+    /* A pointer to the function to call when data needs to be written. Only used when the drwav object is opened in write mode. */
+    drwav_write_proc onWrite;
+
+    /* A pointer to the function to call when the wav file needs to be seeked. */
+    drwav_seek_proc onSeek;
+
+    /* The user data to pass to callbacks. */
+    void* pUserData;
+
+    /* Allocation callbacks. */
+    drwav_allocation_callbacks allocationCallbacks;
+
+
+    /* Whether or not the WAV file is formatted as a standard RIFF file or W64. */
+    drwav_container container;
+
+
+    /* Structure containing format information exactly as specified by the wav file. */
+    drwav_fmt fmt;
+
+    /* The sample rate. Will be set to something like 44100. */
+    drwav_uint32 sampleRate;
+
+    /* The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. */
+    drwav_uint16 channels;
+
+    /* The bits per sample. Will be set to something like 16, 24, etc. */
+    drwav_uint16 bitsPerSample;
+
+    /* Equal to fmt.formatTag, or the value specified by fmt.subFormat if fmt.formatTag is equal to 65534 (WAVE_FORMAT_EXTENSIBLE). */
+    drwav_uint16 translatedFormatTag;
+
+    /* The total number of PCM frames making up the audio data. */
+    drwav_uint64 totalPCMFrameCount;
+
+
+    /* The size in bytes of the data chunk. */
+    drwav_uint64 dataChunkDataSize;
+
+    /* The position in the stream of the first data byte of the data chunk. This is used for seeking. */
+    drwav_uint64 dataChunkDataPos;
+
+    /* The number of bytes remaining in the data chunk. */
+    drwav_uint64 bytesRemaining;
+
+    /* The current read position in PCM frames. */
+    drwav_uint64 readCursorInPCMFrames;
+
+
+    /*
+    Only used in sequential write mode. Keeps track of the desired size of the "data" chunk at the point of initialization time. Always
+    set to 0 for non-sequential writes and when the drwav object is opened in read mode. Used for validation.
+    */
+    drwav_uint64 dataChunkDataSizeTargetWrite;
+
+    /* Keeps track of whether or not the wav writer was initialized in sequential mode. */
+    drwav_bool32 isSequentialWrite;
+
+
+    /* A array of metadata. This is valid after the *init_with_metadata call returns. It will be valid until drwav_uninit() is called. You can take ownership of this data with drwav_take_ownership_of_metadata(). */
+    drwav_metadata* pMetadata;
+    drwav_uint32 metadataCount;
+
+
+    /* A hack to avoid a DRWAV_MALLOC() when opening a decoder with drwav_init_memory(). */
+    drwav__memory_stream memoryStream;
+    drwav__memory_stream_write memoryStreamWrite;
+
+
+    /* Microsoft ADPCM specific data. */
+    struct
+    {
+        drwav_uint32 bytesRemainingInBlock;
+        drwav_uint16 predictor[2];
+        drwav_int32  delta[2];
+        drwav_int32  cachedFrames[4];  /* Samples are stored in this cache during decoding. */
+        drwav_uint32 cachedFrameCount;
+        drwav_int32  prevFrames[2][2]; /* The previous 2 samples for each channel (2 channels at most). */
+    } msadpcm;
+
+    /* IMA ADPCM specific data. */
+    struct
+    {
+        drwav_uint32 bytesRemainingInBlock;
+        drwav_int32  predictor[2];
+        drwav_int32  stepIndex[2];
+        drwav_int32  cachedFrames[16]; /* Samples are stored in this cache during decoding. */
+        drwav_uint32 cachedFrameCount;
+    } ima;
+
+    /* AIFF specific data. */
+    struct
+    {
+        drwav_bool8 isLE;   /* Will be set to true if the audio data is little-endian encoded. */
+        drwav_bool8 isUnsigned; /* Only used for 8-bit samples. When set to true, will be treated as unsigned. */
+    } aiff;
+} drwav;
+
+
+/*
+Initializes a pre-allocated drwav object for reading.
+
+pWav                         [out]          A pointer to the drwav object being initialized.
+onRead                       [in]           The function to call when data needs to be read from the client.
+onSeek                       [in]           The function to call when the read position of the client data needs to move.
+onChunk                      [in, optional] The function to call when a chunk is enumerated at initialized time.
+pUserData, pReadSeekUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
+pChunkUserData               [in, optional] A pointer to application defined data that will be passed to onChunk.
+flags                        [in, optional] A set of flags for controlling how things are loaded.
+
+Returns true if successful; false otherwise.
+
+Close the loader with drwav_uninit().
+
+This is the lowest level function for initializing a WAV file. You can also use drwav_init_file() and drwav_init_memory()
+to open the stream from a file or from a block of memory respectively.
+
+Possible values for flags:
+  DRWAV_SEQUENTIAL: Never perform a backwards seek while loading. This disables the chunk callback and will cause this function
+                    to return as soon as the data chunk is found. Any chunks after the data chunk will be ignored.
+
+drwav_init() is equivalent to "drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0);".
+
+The onChunk callback is not called for the WAVE or FMT chunks. The contents of the FMT chunk can be read from pWav->fmt
+after the function returns.
+
+See also: drwav_init_file(), drwav_init_memory(), drwav_uninit()
+*/
+DRWAV_API drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_with_metadata(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Initializes a pre-allocated drwav object for writing.
+
+onWrite               [in]           The function to call when data needs to be written.
+onSeek                [in]           The function to call when the write position needs to move.
+pUserData             [in, optional] A pointer to application defined data that will be passed to onWrite and onSeek.
+metadata, numMetadata [in, optional] An array of metadata objects that should be written to the file. The array is not edited. You are responsible for this metadata memory and it must maintain valid until drwav_uninit() is called.
+
+Returns true if successful; false otherwise.
+
+Close the writer with drwav_uninit().
+
+This is the lowest level function for initializing a WAV file. You can also use drwav_init_file_write() and drwav_init_memory_write()
+to open the stream from a file or from a block of memory respectively.
+
+If the total sample count is known, you can use drwav_init_write_sequential(). This avoids the need for dr_wav to perform
+a post-processing step for storing the total sample count and the size of the data chunk which requires a backwards seek.
+
+See also: drwav_init_file_write(), drwav_init_memory_write(), drwav_uninit()
+*/
+DRWAV_API drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_write_sequential_pcm_frames(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_write_with_metadata(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks, drwav_metadata* pMetadata, drwav_uint32 metadataCount);
+
+/*
+Utility function to determine the target size of the entire data to be written (including all headers and chunks).
+
+Returns the target size in bytes.
+
+The metadata argument can be NULL meaning no metadata exists.
+
+Useful if the application needs to know the size to allocate.
+
+Only writing to the RIFF chunk and one data chunk is currently supported.
+
+See also: drwav_init_write(), drwav_init_file_write(), drwav_init_memory_write()
+*/
+DRWAV_API drwav_uint64 drwav_target_write_size_bytes(const drwav_data_format* pFormat, drwav_uint64 totalFrameCount, drwav_metadata* pMetadata, drwav_uint32 metadataCount);
+
+/*
+Take ownership of the metadata objects that were allocated via one of the init_with_metadata() function calls. The init_with_metdata functions perform a single heap allocation for this metadata.
+
+Useful if you want the data to persist beyond the lifetime of the drwav object.
+
+You must free the data returned from this function using drwav_free().
+*/
+DRWAV_API drwav_metadata* drwav_take_ownership_of_metadata(drwav* pWav);
+
+/*
+Uninitializes the given drwav object.
+
+Use this only for objects initialized with drwav_init*() functions (drwav_init(), drwav_init_ex(), drwav_init_write(), drwav_init_write_sequential()).
+*/
+DRWAV_API drwav_result drwav_uninit(drwav* pWav);
+
+
+/*
+Reads raw audio data.
+
+This is the lowest level function for reading audio data. It simply reads the given number of
+bytes of the raw internal sample data.
+
+Consider using drwav_read_pcm_frames_s16(), drwav_read_pcm_frames_s32() or drwav_read_pcm_frames_f32() for
+reading sample data in a consistent format.
+
+pBufferOut can be NULL in which case a seek will be performed.
+
+Returns the number of bytes actually read.
+*/
+DRWAV_API size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut);
+
+/*
+Reads up to the specified number of PCM frames from the WAV file.
+
+The output data will be in the file's internal format, converted to native-endian byte order. Use
+drwav_read_pcm_frames_s16/f32/s32() to read data in a specific format.
+
+If the return value is less than <framesToRead> it means the end of the file has been reached or
+you have requested more PCM frames than can possibly fit in the output buffer.
+
+This function will only work when sample data is of a fixed size and uncompressed. If you are
+using a compressed format consider using drwav_read_raw() or drwav_read_pcm_frames_s16/s32/f32().
+
+pBufferOut can be NULL in which case a seek will be performed.
+*/
+DRWAV_API drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_le(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_be(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
+
+/*
+Seeks to the given PCM frame.
+
+Returns true if successful; false otherwise.
+*/
+DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex);
+
+/*
+Retrieves the current read position in pcm frames.
+*/
+DRWAV_API drwav_result drwav_get_cursor_in_pcm_frames(drwav* pWav, drwav_uint64* pCursor);
+
+/*
+Retrieves the length of the file.
+*/
+DRWAV_API drwav_result drwav_get_length_in_pcm_frames(drwav* pWav, drwav_uint64* pLength);
+
+
+/*
+Writes raw audio data.
+
+Returns the number of bytes actually written. If this differs from bytesToWrite, it indicates an error.
+*/
+DRWAV_API size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData);
+
+/*
+Writes PCM frames.
+
+Returns the number of PCM frames written.
+
+Input samples need to be in native-endian byte order. On big-endian architectures the input data will be converted to
+little-endian. Use drwav_write_raw() to write raw audio data without performing any conversion.
+*/
+DRWAV_API drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
+DRWAV_API drwav_uint64 drwav_write_pcm_frames_le(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
+DRWAV_API drwav_uint64 drwav_write_pcm_frames_be(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
+
+/* Conversion Utilities */
+#ifndef DR_WAV_NO_CONVERSION_API
+
+/*
+Reads a chunk of audio data and converts it to signed 16-bit PCM samples.
+
+pBufferOut can be NULL in which case a seek will be performed.
+
+Returns the number of PCM frames actually read.
+
+If the return value is less than <framesToRead> it means the end of the file has been reached.
+*/
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16le(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16be(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut);
+
+/* Low-level function for converting unsigned 8-bit PCM samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 24-bit PCM samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 32-bit PCM samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCount);
+
+/* Low-level function for converting IEEE 32-bit floating point samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_f32_to_s16(drwav_int16* pOut, const float* pIn, size_t sampleCount);
+
+/* Low-level function for converting IEEE 64-bit floating point samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount);
+
+/* Low-level function for converting A-law samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting u-law samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+
+/*
+Reads a chunk of audio data and converts it to IEEE 32-bit floating point samples.
+
+pBufferOut can be NULL in which case a seek will be performed.
+
+Returns the number of PCM frames actually read.
+
+If the return value is less than <framesToRead> it means the end of the file has been reached.
+*/
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32le(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32be(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut);
+
+/* Low-level function for converting unsigned 8-bit PCM samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 16-bit PCM samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_s16_to_f32(float* pOut, const drwav_int16* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 24-bit PCM samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_s24_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 32-bit PCM samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_s32_to_f32(float* pOut, const drwav_int32* pIn, size_t sampleCount);
+
+/* Low-level function for converting IEEE 64-bit floating point samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_f64_to_f32(float* pOut, const double* pIn, size_t sampleCount);
+
+/* Low-level function for converting A-law samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_alaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting u-law samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_mulaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+
+/*
+Reads a chunk of audio data and converts it to signed 32-bit PCM samples.
+
+pBufferOut can be NULL in which case a seek will be performed.
+
+Returns the number of PCM frames actually read.
+
+If the return value is less than <framesToRead> it means the end of the file has been reached.
+*/
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32le(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32be(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut);
+
+/* Low-level function for converting unsigned 8-bit PCM samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 16-bit PCM samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 24-bit PCM samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting IEEE 32-bit floating point samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_f32_to_s32(drwav_int32* pOut, const float* pIn, size_t sampleCount);
+
+/* Low-level function for converting IEEE 64-bit floating point samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount);
+
+/* Low-level function for converting A-law samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting u-law samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+#endif  /* DR_WAV_NO_CONVERSION_API */
+
+
+/* High-Level Convenience Helpers */
+
+#ifndef DR_WAV_NO_STDIO
+/*
+Helper for initializing a wave file for reading using stdio.
+
+This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav
+objects because the operating system may restrict the number of file handles an application can have open at
+any given time.
+*/
+DRWAV_API drwav_bool32 drwav_init_file(drwav* pWav, const char* filename, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_w(drwav* pWav, const wchar_t* filename, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_ex_w(drwav* pWav, const wchar_t* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_with_metadata(drwav* pWav, const char* filename, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_with_metadata_w(drwav* pWav, const wchar_t* filename, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+
+/*
+Helper for initializing a wave file for writing using stdio.
+
+This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav
+objects because the operating system may restrict the number of file handles an application can have open at
+any given time.
+*/
+DRWAV_API drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_write_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+#endif  /* DR_WAV_NO_STDIO */
+
+/*
+Helper for initializing a loader from a pre-allocated memory buffer.
+
+This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for
+the lifetime of the drwav object.
+
+The buffer should contain the contents of the entire wave file, not just the sample data.
+*/
+DRWAV_API drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_memory_with_metadata(drwav* pWav, const void* data, size_t dataSize, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Helper for initializing a writer which outputs data to a memory buffer.
+
+dr_wav will manage the memory allocations, however it is up to the caller to free the data with drwav_free().
+
+The buffer will remain allocated even after drwav_uninit() is called. The buffer should not be considered valid
+until after drwav_uninit() has been called.
+*/
+DRWAV_API drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_memory_write_sequential_pcm_frames(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+
+#ifndef DR_WAV_NO_CONVERSION_API
+/*
+Opens and reads an entire wav file in a single operation.
+
+The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer.
+*/
+DRWAV_API drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+#ifndef DR_WAV_NO_STDIO
+/*
+Opens and decodes an entire wav file in a single operation.
+
+The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer.
+*/
+DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API float* drwav_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API float* drwav_open_file_and_read_pcm_frames_f32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+#endif
+/*
+Opens and decodes an entire wav file from a block of memory in a single operation.
+
+The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer.
+*/
+DRWAV_API drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API float* drwav_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+#endif
+
+/* Frees data that was allocated internally by dr_wav. */
+DRWAV_API void drwav_free(void* p, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+/* Converts bytes from a wav stream to a sized type of native endian. */
+DRWAV_API drwav_uint16 drwav_bytes_to_u16(const drwav_uint8* data);
+DRWAV_API drwav_int16 drwav_bytes_to_s16(const drwav_uint8* data);
+DRWAV_API drwav_uint32 drwav_bytes_to_u32(const drwav_uint8* data);
+DRWAV_API drwav_int32 drwav_bytes_to_s32(const drwav_uint8* data);
+DRWAV_API drwav_uint64 drwav_bytes_to_u64(const drwav_uint8* data);
+DRWAV_API drwav_int64 drwav_bytes_to_s64(const drwav_uint8* data);
+DRWAV_API float drwav_bytes_to_f32(const drwav_uint8* data);
+
+/* Compares a GUID for the purpose of checking the type of a Wave64 chunk. */
+DRWAV_API drwav_bool32 drwav_guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16]);
+
+/* Compares a four-character-code for the purpose of checking the type of a RIFF chunk. */
+DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const char* b);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* dr_wav_h */
+
+
+/************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************
+
+ IMPLEMENTATION
+
+ ************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************/
+#if defined(DR_WAV_IMPLEMENTATION) || defined(DRWAV_IMPLEMENTATION)
+#ifndef dr_wav_c
+#define dr_wav_c
+
+#ifdef __MRC__
+/* MrC currently doesn't compile dr_wav correctly with any optimizations enabled. */
+#pragma options opt off
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h> /* For INT_MAX */
+
+#ifndef DR_WAV_NO_STDIO
+#include <stdio.h>
+#ifndef DR_WAV_NO_WCHAR
+#include <wchar.h>
+#endif
+#endif
+
+/* Standard library stuff. */
+#ifndef DRWAV_ASSERT
+#include <assert.h>
+#define DRWAV_ASSERT(expression)           assert(expression)
+#endif
+#ifndef DRWAV_MALLOC
+#define DRWAV_MALLOC(sz)                   malloc((sz))
+#endif
+#ifndef DRWAV_REALLOC
+#define DRWAV_REALLOC(p, sz)               realloc((p), (sz))
+#endif
+#ifndef DRWAV_FREE
+#define DRWAV_FREE(p)                      free((p))
+#endif
+#ifndef DRWAV_COPY_MEMORY
+#define DRWAV_COPY_MEMORY(dst, src, sz)    memcpy((dst), (src), (sz))
+#endif
+#ifndef DRWAV_ZERO_MEMORY
+#define DRWAV_ZERO_MEMORY(p, sz)           memset((p), 0, (sz))
+#endif
+#ifndef DRWAV_ZERO_OBJECT
+#define DRWAV_ZERO_OBJECT(p)               DRWAV_ZERO_MEMORY((p), sizeof(*p))
+#endif
+
+#define drwav_countof(x)                   (sizeof(x) / sizeof(x[0]))
+#define drwav_align(x, a)                  ((((x) + (a) - 1) / (a)) * (a))
+#define drwav_min(a, b)                    (((a) < (b)) ? (a) : (b))
+#define drwav_max(a, b)                    (((a) > (b)) ? (a) : (b))
+#define drwav_clamp(x, lo, hi)             (drwav_max((lo), drwav_min((hi), (x))))
+#define drwav_offset_ptr(p, offset)        (((drwav_uint8*)(p)) + (offset))
+
+#define DRWAV_MAX_SIMD_VECTOR_SIZE         32
+
+/* Architecture Detection */
+#if defined(__x86_64__) || defined(_M_X64)
+    #define DRWAV_X64
+#elif defined(__i386) || defined(_M_IX86)
+    #define DRWAV_X86
+#elif defined(__arm__) || defined(_M_ARM)
+    #define DRWAV_ARM
+#endif
+/* End Architecture Detection */
+
+/* Inline */
+#ifdef _MSC_VER
+    #define DRWAV_INLINE __forceinline
+#elif defined(__GNUC__)
+    /*
+    I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when
+    the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some
+    case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the
+    command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue
+    I am using "__inline__" only when we're compiling in strict ANSI mode.
+    */
+    #if defined(__STRICT_ANSI__)
+        #define DRWAV_GNUC_INLINE_HINT __inline__
+    #else
+        #define DRWAV_GNUC_INLINE_HINT inline
+    #endif
+
+    #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)) || defined(__clang__)
+        #define DRWAV_INLINE DRWAV_GNUC_INLINE_HINT __attribute__((always_inline))
+    #else
+        #define DRWAV_INLINE DRWAV_GNUC_INLINE_HINT
+    #endif
+#elif defined(__WATCOMC__)
+    #define DRWAV_INLINE __inline
+#else
+    #define DRWAV_INLINE
+#endif
+/* End Inline */
+
+/* SIZE_MAX */
+#if defined(SIZE_MAX)
+    #define DRWAV_SIZE_MAX  SIZE_MAX
+#else
+    #if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
+        #define DRWAV_SIZE_MAX  ((drwav_uint64)0xFFFFFFFFFFFFFFFF)
+    #else
+        #define DRWAV_SIZE_MAX  0xFFFFFFFF
+    #endif
+#endif
+/* End SIZE_MAX */
+
+/* Weird bit manipulation is for C89 compatibility (no direct support for 64-bit integers). */
+#define DRWAV_INT64_MIN ((drwav_int64) ((drwav_uint64)0x80000000 << 32))
+#define DRWAV_INT64_MAX ((drwav_int64)(((drwav_uint64)0x7FFFFFFF << 32) | 0xFFFFFFFF))
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    #define DRWAV_HAS_BYTESWAP16_INTRINSIC
+    #define DRWAV_HAS_BYTESWAP32_INTRINSIC
+    #define DRWAV_HAS_BYTESWAP64_INTRINSIC
+#elif defined(__clang__)
+    #if defined(__has_builtin)
+        #if __has_builtin(__builtin_bswap16)
+            #define DRWAV_HAS_BYTESWAP16_INTRINSIC
+        #endif
+        #if __has_builtin(__builtin_bswap32)
+            #define DRWAV_HAS_BYTESWAP32_INTRINSIC
+        #endif
+        #if __has_builtin(__builtin_bswap64)
+            #define DRWAV_HAS_BYTESWAP64_INTRINSIC
+        #endif
+    #endif
+#elif defined(__GNUC__)
+    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+        #define DRWAV_HAS_BYTESWAP32_INTRINSIC
+        #define DRWAV_HAS_BYTESWAP64_INTRINSIC
+    #endif
+    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+        #define DRWAV_HAS_BYTESWAP16_INTRINSIC
+    #endif
+#endif
+
+DRWAV_API void drwav_version(drwav_uint32* pMajor, drwav_uint32* pMinor, drwav_uint32* pRevision)
+{
+    if (pMajor) {
+        *pMajor = DRWAV_VERSION_MAJOR;
+    }
+
+    if (pMinor) {
+        *pMinor = DRWAV_VERSION_MINOR;
+    }
+
+    if (pRevision) {
+        *pRevision = DRWAV_VERSION_REVISION;
+    }
+}
+
+DRWAV_API const char* drwav_version_string(void)
+{
+    return DRWAV_VERSION_STRING;
+}
+
+/*
+These limits are used for basic validation when initializing the decoder. If you exceed these limits, first of all: what on Earth are
+you doing?! (Let me know, I'd be curious!) Second, you can adjust these by #define-ing them before the dr_wav implementation.
+*/
+#ifndef DRWAV_MAX_SAMPLE_RATE
+#define DRWAV_MAX_SAMPLE_RATE       384000
+#endif
+#ifndef DRWAV_MAX_CHANNELS
+#define DRWAV_MAX_CHANNELS          256
+#endif
+#ifndef DRWAV_MAX_BITS_PER_SAMPLE
+#define DRWAV_MAX_BITS_PER_SAMPLE   64
+#endif
+
+static const drwav_uint8 drwavGUID_W64_RIFF[16] = {0x72,0x69,0x66,0x66, 0x2E,0x91, 0xCF,0x11, 0xA5,0xD6, 0x28,0xDB,0x04,0xC1,0x00,0x00};    /* 66666972-912E-11CF-A5D6-28DB04C10000 */
+static const drwav_uint8 drwavGUID_W64_WAVE[16] = {0x77,0x61,0x76,0x65, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 65766177-ACF3-11D3-8CD1-00C04F8EDB8A */
+/*static const drwav_uint8 drwavGUID_W64_JUNK[16] = {0x6A,0x75,0x6E,0x6B, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};*/    /* 6B6E756A-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_FMT [16] = {0x66,0x6D,0x74,0x20, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 20746D66-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_FACT[16] = {0x66,0x61,0x63,0x74, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 74636166-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_DATA[16] = {0x64,0x61,0x74,0x61, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 61746164-ACF3-11D3-8CD1-00C04F8EDB8A */
+/*static const drwav_uint8 drwavGUID_W64_SMPL[16] = {0x73,0x6D,0x70,0x6C, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};*/    /* 6C706D73-ACF3-11D3-8CD1-00C04F8EDB8A */
+
+
+static DRWAV_INLINE int drwav__is_little_endian(void)
+{
+#if defined(DRWAV_X86) || defined(DRWAV_X64)
+    return DRWAV_TRUE;
+#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN
+    return DRWAV_TRUE;
+#else
+    int n = 1;
+    return (*(char*)&n) == 1;
+#endif
+}
+
+
+static DRWAV_INLINE void drwav_bytes_to_guid(const drwav_uint8* data, drwav_uint8* guid)
+{
+    int i;
+    for (i = 0; i < 16; ++i) {
+        guid[i] = data[i];
+    }
+}
+
+
+static DRWAV_INLINE drwav_uint16 drwav__bswap16(drwav_uint16 n)
+{
+#ifdef DRWAV_HAS_BYTESWAP16_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_ushort(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap16(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & 0xFF00) >> 8) |
+           ((n & 0x00FF) << 8);
+#endif
+}
+
+static DRWAV_INLINE drwav_uint32 drwav__bswap32(drwav_uint32 n)
+{
+#ifdef DRWAV_HAS_BYTESWAP32_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_ulong(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        #if defined(DRWAV_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRWAV_64BIT)   /* <-- 64-bit inline assembly has not been tested, so disabling for now. */
+            /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */
+            drwav_uint32 r;
+            __asm__ __volatile__ (
+            #if defined(DRWAV_64BIT)
+                "rev %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(n)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
+            #else
+                "rev %[out], %[in]" : [out]"=r"(r) : [in]"r"(n)
+            #endif
+            );
+            return r;
+        #else
+            return __builtin_bswap32(n);
+        #endif
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & 0xFF000000) >> 24) |
+           ((n & 0x00FF0000) >>  8) |
+           ((n & 0x0000FF00) <<  8) |
+           ((n & 0x000000FF) << 24);
+#endif
+}
+
+static DRWAV_INLINE drwav_uint64 drwav__bswap64(drwav_uint64 n)
+{
+#ifdef DRWAV_HAS_BYTESWAP64_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_uint64(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap64(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */
+    return ((n & ((drwav_uint64)0xFF000000 << 32)) >> 56) |
+           ((n & ((drwav_uint64)0x00FF0000 << 32)) >> 40) |
+           ((n & ((drwav_uint64)0x0000FF00 << 32)) >> 24) |
+           ((n & ((drwav_uint64)0x000000FF << 32)) >>  8) |
+           ((n & ((drwav_uint64)0xFF000000      )) <<  8) |
+           ((n & ((drwav_uint64)0x00FF0000      )) << 24) |
+           ((n & ((drwav_uint64)0x0000FF00      )) << 40) |
+           ((n & ((drwav_uint64)0x000000FF      )) << 56);
+#endif
+}
+
+
+static DRWAV_INLINE drwav_int16 drwav__bswap_s16(drwav_int16 n)
+{
+    return (drwav_int16)drwav__bswap16((drwav_uint16)n);
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_s16(drwav_int16* pSamples, drwav_uint64 sampleCount)
+{
+    drwav_uint64 iSample;
+    for (iSample = 0; iSample < sampleCount; iSample += 1) {
+        pSamples[iSample] = drwav__bswap_s16(pSamples[iSample]);
+    }
+}
+
+
+static DRWAV_INLINE void drwav__bswap_s24(drwav_uint8* p)
+{
+    drwav_uint8 t;
+    t = p[0];
+    p[0] = p[2];
+    p[2] = t;
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_s24(drwav_uint8* pSamples, drwav_uint64 sampleCount)
+{
+    drwav_uint64 iSample;
+    for (iSample = 0; iSample < sampleCount; iSample += 1) {
+        drwav_uint8* pSample = pSamples + (iSample*3);
+        drwav__bswap_s24(pSample);
+    }
+}
+
+
+static DRWAV_INLINE drwav_int32 drwav__bswap_s32(drwav_int32 n)
+{
+    return (drwav_int32)drwav__bswap32((drwav_uint32)n);
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_s32(drwav_int32* pSamples, drwav_uint64 sampleCount)
+{
+    drwav_uint64 iSample;
+    for (iSample = 0; iSample < sampleCount; iSample += 1) {
+        pSamples[iSample] = drwav__bswap_s32(pSamples[iSample]);
+    }
+}
+
+
+static DRWAV_INLINE drwav_int64 drwav__bswap_s64(drwav_int64 n)
+{
+    return (drwav_int64)drwav__bswap64((drwav_uint64)n);
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_s64(drwav_int64* pSamples, drwav_uint64 sampleCount)
+{
+    drwav_uint64 iSample;
+    for (iSample = 0; iSample < sampleCount; iSample += 1) {
+        pSamples[iSample] = drwav__bswap_s64(pSamples[iSample]);
+    }
+}
+
+
+static DRWAV_INLINE float drwav__bswap_f32(float n)
+{
+    union {
+        drwav_uint32 i;
+        float f;
+    } x;
+    x.f = n;
+    x.i = drwav__bswap32(x.i);
+
+    return x.f;
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_f32(float* pSamples, drwav_uint64 sampleCount)
+{
+    drwav_uint64 iSample;
+    for (iSample = 0; iSample < sampleCount; iSample += 1) {
+        pSamples[iSample] = drwav__bswap_f32(pSamples[iSample]);
+    }
+}
+
+
+static DRWAV_INLINE void drwav__bswap_samples(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample)
+{
+    switch (bytesPerSample)
+    {
+        case 1:
+        {
+            /* No-op. */
+        } break;
+        case 2:
+        {
+            drwav__bswap_samples_s16((drwav_int16*)pSamples, sampleCount);
+        } break;
+        case 3:
+        {
+            drwav__bswap_samples_s24((drwav_uint8*)pSamples, sampleCount);
+        } break;
+        case 4:
+        {
+            drwav__bswap_samples_s32((drwav_int32*)pSamples, sampleCount);
+        } break;
+        case 8:
+        {
+            drwav__bswap_samples_s64((drwav_int64*)pSamples, sampleCount);
+        } break;
+        default:
+        {
+            /* Unsupported format. */
+            DRWAV_ASSERT(DRWAV_FALSE);
+        } break;
+    }
+}
+
+
+
+DRWAV_PRIVATE DRWAV_INLINE drwav_bool32 drwav_is_container_be(drwav_container container)
+{
+    if (container == drwav_container_rifx || container == drwav_container_aiff) {
+        return DRWAV_TRUE;
+    } else {
+        return DRWAV_FALSE;
+    }
+}
+
+
+DRWAV_PRIVATE DRWAV_INLINE drwav_uint16 drwav_bytes_to_u16_le(const drwav_uint8* data)
+{
+    return ((drwav_uint16)data[0] << 0) | ((drwav_uint16)data[1] << 8);
+}
+
+DRWAV_PRIVATE DRWAV_INLINE drwav_uint16 drwav_bytes_to_u16_be(const drwav_uint8* data)
+{
+    return ((drwav_uint16)data[1] << 0) | ((drwav_uint16)data[0] << 8);
+}
+
+DRWAV_PRIVATE DRWAV_INLINE drwav_uint16 drwav_bytes_to_u16_ex(const drwav_uint8* data, drwav_container container)
+{
+    if (drwav_is_container_be(container)) {
+        return drwav_bytes_to_u16_be(data);
+    } else {
+        return drwav_bytes_to_u16_le(data);
+    }
+}
+
+
+DRWAV_PRIVATE DRWAV_INLINE drwav_uint32 drwav_bytes_to_u32_le(const drwav_uint8* data)
+{
+    return ((drwav_uint32)data[0] << 0) | ((drwav_uint32)data[1] << 8) | ((drwav_uint32)data[2] << 16) | ((drwav_uint32)data[3] << 24);
+}
+
+DRWAV_PRIVATE DRWAV_INLINE drwav_uint32 drwav_bytes_to_u32_be(const drwav_uint8* data)
+{
+    return ((drwav_uint32)data[3] << 0) | ((drwav_uint32)data[2] << 8) | ((drwav_uint32)data[1] << 16) | ((drwav_uint32)data[0] << 24);
+}
+
+DRWAV_PRIVATE DRWAV_INLINE drwav_uint32 drwav_bytes_to_u32_ex(const drwav_uint8* data, drwav_container container)
+{
+    if (drwav_is_container_be(container)) {
+        return drwav_bytes_to_u32_be(data);
+    } else {
+        return drwav_bytes_to_u32_le(data);
+    }
+}
+
+
+
+DRWAV_PRIVATE drwav_int64 drwav_aiff_extented_to_s64(const drwav_uint8* data)
+{
+    drwav_uint32 exponent = ((drwav_uint32)data[0] << 8) | data[1];
+    drwav_uint64 hi = ((drwav_uint64)data[2] << 24) | ((drwav_uint64)data[3] << 16) | ((drwav_uint64)data[4] <<  8) | ((drwav_uint64)data[5] <<  0);
+    drwav_uint64 lo = ((drwav_uint64)data[6] << 24) | ((drwav_uint64)data[7] << 16) | ((drwav_uint64)data[8] <<  8) | ((drwav_uint64)data[9] <<  0);
+    drwav_uint64 significand = (hi << 32) | lo;
+    int sign = exponent >> 15;
+
+    /* Remove sign bit. */
+    exponent &= 0x7FFF;
+
+    /* Special cases. */
+    if (exponent == 0 && significand == 0) {
+        return 0;
+    } else if (exponent == 0x7FFF) {
+        return sign ? DRWAV_INT64_MIN : DRWAV_INT64_MAX;    /* Infinite. */
+    }
+
+    exponent -= 16383;
+
+    if (exponent > 63) {
+        return sign ? DRWAV_INT64_MIN : DRWAV_INT64_MAX;    /* Too big for a 64-bit integer. */
+    } else if (exponent < 1) {
+        return 0;  /* Number is less than 1, so rounds down to 0. */
+    }
+
+    significand >>= (63 - exponent);
+
+    if (sign) {
+        return -(drwav_int64)significand;
+    } else {
+        return  (drwav_int64)significand;
+    }
+}
+
+
+DRWAV_PRIVATE void* drwav__malloc_default(size_t sz, void* pUserData)
+{
+    (void)pUserData;
+    return DRWAV_MALLOC(sz);
+}
+
+DRWAV_PRIVATE void* drwav__realloc_default(void* p, size_t sz, void* pUserData)
+{
+    (void)pUserData;
+    return DRWAV_REALLOC(p, sz);
+}
+
+DRWAV_PRIVATE void drwav__free_default(void* p, void* pUserData)
+{
+    (void)pUserData;
+    DRWAV_FREE(p);
+}
+
+
+DRWAV_PRIVATE void* drwav__malloc_from_callbacks(size_t sz, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks == NULL) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks->onMalloc != NULL) {
+        return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData);
+    }
+
+    /* Try using realloc(). */
+    if (pAllocationCallbacks->onRealloc != NULL) {
+        return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData);
+    }
+
+    return NULL;
+}
+
+DRWAV_PRIVATE void* drwav__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks == NULL) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks->onRealloc != NULL) {
+        return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData);
+    }
+
+    /* Try emulating realloc() in terms of malloc()/free(). */
+    if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) {
+        void* p2;
+
+        p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData);
+        if (p2 == NULL) {
+            return NULL;
+        }
+
+        if (p != NULL) {
+            DRWAV_COPY_MEMORY(p2, p, szOld);
+            pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
+        }
+
+        return p2;
+    }
+
+    return NULL;
+}
+
+DRWAV_PRIVATE void drwav__free_from_callbacks(void* p, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (p == NULL || pAllocationCallbacks == NULL) {
+        return;
+    }
+
+    if (pAllocationCallbacks->onFree != NULL) {
+        pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
+    }
+}
+
+
+DRWAV_PRIVATE drwav_allocation_callbacks drwav_copy_allocation_callbacks_or_defaults(const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks != NULL) {
+        /* Copy. */
+        return *pAllocationCallbacks;
+    } else {
+        /* Defaults. */
+        drwav_allocation_callbacks allocationCallbacks;
+        allocationCallbacks.pUserData = NULL;
+        allocationCallbacks.onMalloc  = drwav__malloc_default;
+        allocationCallbacks.onRealloc = drwav__realloc_default;
+        allocationCallbacks.onFree    = drwav__free_default;
+        return allocationCallbacks;
+    }
+}
+
+
+static DRWAV_INLINE drwav_bool32 drwav__is_compressed_format_tag(drwav_uint16 formatTag)
+{
+    return
+        formatTag == DR_WAVE_FORMAT_ADPCM ||
+        formatTag == DR_WAVE_FORMAT_DVI_ADPCM;
+}
+
+DRWAV_PRIVATE unsigned int drwav__chunk_padding_size_riff(drwav_uint64 chunkSize)
+{
+    return (unsigned int)(chunkSize % 2);
+}
+
+DRWAV_PRIVATE unsigned int drwav__chunk_padding_size_w64(drwav_uint64 chunkSize)
+{
+    return (unsigned int)(chunkSize % 8);
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
+DRWAV_PRIVATE drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount);
+
+DRWAV_PRIVATE drwav_result drwav__read_chunk_header(drwav_read_proc onRead, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_chunk_header* pHeaderOut)
+{
+    if (container == drwav_container_riff || container == drwav_container_rifx || container == drwav_container_rf64 || container == drwav_container_aiff) {
+        drwav_uint8 sizeInBytes[4];
+
+        if (onRead(pUserData, pHeaderOut->id.fourcc, 4) != 4) {
+            return DRWAV_AT_END;
+        }
+
+        if (onRead(pUserData, sizeInBytes, 4) != 4) {
+            return DRWAV_INVALID_FILE;
+        }
+
+        pHeaderOut->sizeInBytes = drwav_bytes_to_u32_ex(sizeInBytes, container);
+        pHeaderOut->paddingSize = drwav__chunk_padding_size_riff(pHeaderOut->sizeInBytes);
+
+        *pRunningBytesReadOut += 8;
+    } else if (container == drwav_container_w64) {
+        drwav_uint8 sizeInBytes[8];
+
+        if (onRead(pUserData, pHeaderOut->id.guid, 16) != 16) {
+            return DRWAV_AT_END;
+        }
+
+        if (onRead(pUserData, sizeInBytes, 8) != 8) {
+            return DRWAV_INVALID_FILE;
+        }
+
+        pHeaderOut->sizeInBytes = drwav_bytes_to_u64(sizeInBytes) - 24;    /* <-- Subtract 24 because w64 includes the size of the header. */
+        pHeaderOut->paddingSize = drwav__chunk_padding_size_w64(pHeaderOut->sizeInBytes);
+        *pRunningBytesReadOut += 24;
+    } else {
+        return DRWAV_INVALID_FILE;
+    }
+
+    return DRWAV_SUCCESS;
+}
+
+DRWAV_PRIVATE drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
+{
+    drwav_uint64 bytesRemainingToSeek = offset;
+    while (bytesRemainingToSeek > 0) {
+        if (bytesRemainingToSeek > 0x7FFFFFFF) {
+            if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_current)) {
+                return DRWAV_FALSE;
+            }
+            bytesRemainingToSeek -= 0x7FFFFFFF;
+        } else {
+            if (!onSeek(pUserData, (int)bytesRemainingToSeek, drwav_seek_origin_current)) {
+                return DRWAV_FALSE;
+            }
+            bytesRemainingToSeek = 0;
+        }
+    }
+
+    return DRWAV_TRUE;
+}
+
+DRWAV_PRIVATE drwav_bool32 drwav__seek_from_start(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
+{
+    if (offset <= 0x7FFFFFFF) {
+        return onSeek(pUserData, (int)offset, drwav_seek_origin_start);
+    }
+
+    /* Larger than 32-bit seek. */
+    if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_start)) {
+        return DRWAV_FALSE;
+    }
+    offset -= 0x7FFFFFFF;
+
+    for (;;) {
+        if (offset <= 0x7FFFFFFF) {
+            return onSeek(pUserData, (int)offset, drwav_seek_origin_current);
+        }
+
+        if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_current)) {
+            return DRWAV_FALSE;
+        }
+        offset -= 0x7FFFFFFF;
+    }
+
+    /* Should never get here. */
+    /*return DRWAV_TRUE; */
+}
+
+
+
+DRWAV_PRIVATE size_t drwav__on_read(drwav_read_proc onRead, void* pUserData, void* pBufferOut, size_t bytesToRead, drwav_uint64* pCursor)
+{
+    size_t bytesRead;
+
+    DRWAV_ASSERT(onRead != NULL);
+    DRWAV_ASSERT(pCursor != NULL);
+
+    bytesRead = onRead(pUserData, pBufferOut, bytesToRead);
+    *pCursor += bytesRead;
+    return bytesRead;
+}
+
+#if 0
+DRWAV_PRIVATE drwav_bool32 drwav__on_seek(drwav_seek_proc onSeek, void* pUserData, int offset, drwav_seek_origin origin, drwav_uint64* pCursor)
+{
+    DRWAV_ASSERT(onSeek != NULL);
+    DRWAV_ASSERT(pCursor != NULL);
+
+    if (!onSeek(pUserData, offset, origin)) {
+        return DRWAV_FALSE;
+    }
+
+    if (origin == drwav_seek_origin_start) {
+        *pCursor = offset;
+    } else {
+        *pCursor += offset;
+    }
+
+    return DRWAV_TRUE;
+}
+#endif
+
+
+#define DRWAV_SMPL_BYTES                    36
+#define DRWAV_SMPL_LOOP_BYTES               24
+#define DRWAV_INST_BYTES                    7
+#define DRWAV_ACID_BYTES                    24
+#define DRWAV_CUE_BYTES                     4
+#define DRWAV_BEXT_BYTES                    602
+#define DRWAV_BEXT_DESCRIPTION_BYTES        256
+#define DRWAV_BEXT_ORIGINATOR_NAME_BYTES    32
+#define DRWAV_BEXT_ORIGINATOR_REF_BYTES     32
+#define DRWAV_BEXT_RESERVED_BYTES           180
+#define DRWAV_BEXT_UMID_BYTES               64
+#define DRWAV_CUE_POINT_BYTES               24
+#define DRWAV_LIST_LABEL_OR_NOTE_BYTES      4
+#define DRWAV_LIST_LABELLED_TEXT_BYTES      20
+
+#define DRWAV_METADATA_ALIGNMENT            8
+
+typedef enum
+{
+    drwav__metadata_parser_stage_count,
+    drwav__metadata_parser_stage_read
+} drwav__metadata_parser_stage;
+
+typedef struct
+{
+    drwav_read_proc onRead;
+    drwav_seek_proc onSeek;
+    void *pReadSeekUserData;
+    drwav__metadata_parser_stage stage;
+    drwav_metadata *pMetadata;
+    drwav_uint32 metadataCount;
+    drwav_uint8 *pData;
+    drwav_uint8 *pDataCursor;
+    drwav_uint64 metadataCursor;
+    drwav_uint64 extraCapacity;
+} drwav__metadata_parser;
+
+DRWAV_PRIVATE size_t drwav__metadata_memory_capacity(drwav__metadata_parser* pParser)
+{
+    drwav_uint64 cap = sizeof(drwav_metadata) * (drwav_uint64)pParser->metadataCount + pParser->extraCapacity;
+    if (cap > DRWAV_SIZE_MAX) {
+        return 0;   /* Too big. */
+    }
+
+    return (size_t)cap; /* Safe cast thanks to the check above. */
+}
+
+DRWAV_PRIVATE drwav_uint8* drwav__metadata_get_memory(drwav__metadata_parser* pParser, size_t size, size_t align)
+{
+    drwav_uint8* pResult;
+
+    if (align) {
+        drwav_uintptr modulo = (drwav_uintptr)pParser->pDataCursor % align;
+        if (modulo != 0) {
+            pParser->pDataCursor += align - modulo;
+        }
+    }
+    
+    pResult = pParser->pDataCursor;
+
+    /*
+    Getting to the point where this function is called means there should always be memory
+    available. Out of memory checks should have been done at an earlier stage.
+    */
+    DRWAV_ASSERT((pResult + size) <= (pParser->pData + drwav__metadata_memory_capacity(pParser)));
+
+    pParser->pDataCursor += size;
+    return pResult;
+}
+
+DRWAV_PRIVATE void drwav__metadata_request_extra_memory_for_stage_2(drwav__metadata_parser* pParser, size_t bytes, size_t align)
+{
+    size_t extra = bytes + (align ? (align - 1) : 0);
+    pParser->extraCapacity += extra;
+}
+
+DRWAV_PRIVATE drwav_result drwav__metadata_alloc(drwav__metadata_parser* pParser, drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pParser->extraCapacity != 0 || pParser->metadataCount != 0) {
+        pAllocationCallbacks->onFree(pParser->pData, pAllocationCallbacks->pUserData);
+
+        pParser->pData = (drwav_uint8*)pAllocationCallbacks->onMalloc(drwav__metadata_memory_capacity(pParser), pAllocationCallbacks->pUserData);
+        pParser->pDataCursor = pParser->pData;
+
+        if (pParser->pData == NULL) {
+            return DRWAV_OUT_OF_MEMORY;
+        }
+
+        /*
+        We don't need to worry about specifying an alignment here because malloc always returns something
+        of suitable alignment. This also means pParser->pMetadata is all that we need to store in order
+        for us to free when we are done.
+        */
+        pParser->pMetadata = (drwav_metadata*)drwav__metadata_get_memory(pParser, sizeof(drwav_metadata) * pParser->metadataCount, 1);
+        pParser->metadataCursor = 0;
+    }
+
+    return DRWAV_SUCCESS;
+}
+
+DRWAV_PRIVATE size_t drwav__metadata_parser_read(drwav__metadata_parser* pParser, void* pBufferOut, size_t bytesToRead, drwav_uint64* pCursor)
+{
+    if (pCursor != NULL) {
+        return drwav__on_read(pParser->onRead, pParser->pReadSeekUserData, pBufferOut, bytesToRead, pCursor);
+    } else {
+        return pParser->onRead(pParser->pReadSeekUserData, pBufferOut, bytesToRead);
+    }
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__read_smpl_to_metadata_obj(drwav__metadata_parser* pParser, const drwav_chunk_header* pChunkHeader, drwav_metadata* pMetadata)
+{
+    drwav_uint8 smplHeaderData[DRWAV_SMPL_BYTES];
+    drwav_uint64 totalBytesRead = 0;
+    size_t bytesJustRead;
+
+    if (pMetadata == NULL) {
+        return 0;
+    }
+
+    bytesJustRead = drwav__metadata_parser_read(pParser, smplHeaderData, sizeof(smplHeaderData), &totalBytesRead);
+
+    DRWAV_ASSERT(pParser->stage == drwav__metadata_parser_stage_read);
+    DRWAV_ASSERT(pChunkHeader != NULL);
+
+    if (pMetadata != NULL && bytesJustRead == sizeof(smplHeaderData)) {
+        drwav_uint32 iSampleLoop;
+
+        pMetadata->type                                     = drwav_metadata_type_smpl;
+        pMetadata->data.smpl.manufacturerId                 = drwav_bytes_to_u32(smplHeaderData + 0);
+        pMetadata->data.smpl.productId                      = drwav_bytes_to_u32(smplHeaderData + 4);
+        pMetadata->data.smpl.samplePeriodNanoseconds        = drwav_bytes_to_u32(smplHeaderData + 8);
+        pMetadata->data.smpl.midiUnityNote                  = drwav_bytes_to_u32(smplHeaderData + 12);
+        pMetadata->data.smpl.midiPitchFraction              = drwav_bytes_to_u32(smplHeaderData + 16);
+        pMetadata->data.smpl.smpteFormat                    = drwav_bytes_to_u32(smplHeaderData + 20);
+        pMetadata->data.smpl.smpteOffset                    = drwav_bytes_to_u32(smplHeaderData + 24);
+        pMetadata->data.smpl.sampleLoopCount                = drwav_bytes_to_u32(smplHeaderData + 28);
+        pMetadata->data.smpl.samplerSpecificDataSizeInBytes = drwav_bytes_to_u32(smplHeaderData + 32);
+
+        /*
+        The loop count needs to be validated against the size of the chunk for safety so we don't
+        attempt to read over the boundary of the chunk.
+        */
+        if (pMetadata->data.smpl.sampleLoopCount == (pChunkHeader->sizeInBytes - DRWAV_SMPL_BYTES) / DRWAV_SMPL_LOOP_BYTES) {
+            pMetadata->data.smpl.pLoops = (drwav_smpl_loop*)drwav__metadata_get_memory(pParser, sizeof(drwav_smpl_loop) * pMetadata->data.smpl.sampleLoopCount, DRWAV_METADATA_ALIGNMENT);
+
+            for (iSampleLoop = 0; iSampleLoop < pMetadata->data.smpl.sampleLoopCount; ++iSampleLoop) {
+                drwav_uint8 smplLoopData[DRWAV_SMPL_LOOP_BYTES];
+                bytesJustRead = drwav__metadata_parser_read(pParser, smplLoopData, sizeof(smplLoopData), &totalBytesRead);
+
+                if (bytesJustRead == sizeof(smplLoopData)) {
+                    pMetadata->data.smpl.pLoops[iSampleLoop].cuePointId            = drwav_bytes_to_u32(smplLoopData + 0);
+                    pMetadata->data.smpl.pLoops[iSampleLoop].type                  = drwav_bytes_to_u32(smplLoopData + 4);
+                    pMetadata->data.smpl.pLoops[iSampleLoop].firstSampleByteOffset = drwav_bytes_to_u32(smplLoopData + 8);
+                    pMetadata->data.smpl.pLoops[iSampleLoop].lastSampleByteOffset  = drwav_bytes_to_u32(smplLoopData + 12);
+                    pMetadata->data.smpl.pLoops[iSampleLoop].sampleFraction        = drwav_bytes_to_u32(smplLoopData + 16);
+                    pMetadata->data.smpl.pLoops[iSampleLoop].playCount             = drwav_bytes_to_u32(smplLoopData + 20);
+                } else {
+                    break;
+                }
+            }
+
+            if (pMetadata->data.smpl.samplerSpecificDataSizeInBytes > 0) {
+                pMetadata->data.smpl.pSamplerSpecificData = drwav__metadata_get_memory(pParser, pMetadata->data.smpl.samplerSpecificDataSizeInBytes, 1);
+                DRWAV_ASSERT(pMetadata->data.smpl.pSamplerSpecificData != NULL);
+
+                drwav__metadata_parser_read(pParser, pMetadata->data.smpl.pSamplerSpecificData, pMetadata->data.smpl.samplerSpecificDataSizeInBytes, &totalBytesRead);
+            }
+        }
+    }
+
+    return totalBytesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__read_cue_to_metadata_obj(drwav__metadata_parser* pParser, const drwav_chunk_header* pChunkHeader, drwav_metadata* pMetadata)
+{
+    drwav_uint8 cueHeaderSectionData[DRWAV_CUE_BYTES];
+    drwav_uint64 totalBytesRead = 0;
+    size_t bytesJustRead;
+
+    if (pMetadata == NULL) {
+        return 0;
+    }
+
+    bytesJustRead = drwav__metadata_parser_read(pParser, cueHeaderSectionData, sizeof(cueHeaderSectionData), &totalBytesRead);
+
+    DRWAV_ASSERT(pParser->stage == drwav__metadata_parser_stage_read);
+
+    if (bytesJustRead == sizeof(cueHeaderSectionData)) {
+        pMetadata->type                   = drwav_metadata_type_cue;
+        pMetadata->data.cue.cuePointCount = drwav_bytes_to_u32(cueHeaderSectionData);
+
+        /*
+        We need to validate the cue point count against the size of the chunk so we don't read
+        beyond the chunk.
+        */
+        if (pMetadata->data.cue.cuePointCount == (pChunkHeader->sizeInBytes - DRWAV_CUE_BYTES) / DRWAV_CUE_POINT_BYTES) {
+            pMetadata->data.cue.pCuePoints    = (drwav_cue_point*)drwav__metadata_get_memory(pParser, sizeof(drwav_cue_point) * pMetadata->data.cue.cuePointCount, DRWAV_METADATA_ALIGNMENT);
+            DRWAV_ASSERT(pMetadata->data.cue.pCuePoints != NULL);
+
+            if (pMetadata->data.cue.cuePointCount > 0) {
+                drwav_uint32 iCuePoint;
+
+                for (iCuePoint = 0; iCuePoint < pMetadata->data.cue.cuePointCount; ++iCuePoint) {
+                    drwav_uint8 cuePointData[DRWAV_CUE_POINT_BYTES];
+                    bytesJustRead = drwav__metadata_parser_read(pParser, cuePointData, sizeof(cuePointData), &totalBytesRead);
+
+                    if (bytesJustRead == sizeof(cuePointData)) {
+                        pMetadata->data.cue.pCuePoints[iCuePoint].id                = drwav_bytes_to_u32(cuePointData + 0);
+                        pMetadata->data.cue.pCuePoints[iCuePoint].playOrderPosition = drwav_bytes_to_u32(cuePointData + 4);
+                        pMetadata->data.cue.pCuePoints[iCuePoint].dataChunkId[0]    = cuePointData[8];
+                        pMetadata->data.cue.pCuePoints[iCuePoint].dataChunkId[1]    = cuePointData[9];
+                        pMetadata->data.cue.pCuePoints[iCuePoint].dataChunkId[2]    = cuePointData[10];
+                        pMetadata->data.cue.pCuePoints[iCuePoint].dataChunkId[3]    = cuePointData[11];
+                        pMetadata->data.cue.pCuePoints[iCuePoint].chunkStart        = drwav_bytes_to_u32(cuePointData + 12);
+                        pMetadata->data.cue.pCuePoints[iCuePoint].blockStart        = drwav_bytes_to_u32(cuePointData + 16);
+                        pMetadata->data.cue.pCuePoints[iCuePoint].sampleByteOffset  = drwav_bytes_to_u32(cuePointData + 20);
+                    } else {
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    return totalBytesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__read_inst_to_metadata_obj(drwav__metadata_parser* pParser, drwav_metadata* pMetadata)
+{
+    drwav_uint8 instData[DRWAV_INST_BYTES];
+    drwav_uint64 bytesRead;
+
+    if (pMetadata == NULL) {
+        return 0;
+    }
+
+    bytesRead = drwav__metadata_parser_read(pParser, instData, sizeof(instData), NULL);
+
+    DRWAV_ASSERT(pParser->stage == drwav__metadata_parser_stage_read);
+
+    if (bytesRead == sizeof(instData)) {
+        pMetadata->type                    = drwav_metadata_type_inst;
+        pMetadata->data.inst.midiUnityNote = (drwav_int8)instData[0];
+        pMetadata->data.inst.fineTuneCents = (drwav_int8)instData[1];
+        pMetadata->data.inst.gainDecibels  = (drwav_int8)instData[2];
+        pMetadata->data.inst.lowNote       = (drwav_int8)instData[3];
+        pMetadata->data.inst.highNote      = (drwav_int8)instData[4];
+        pMetadata->data.inst.lowVelocity   = (drwav_int8)instData[5];
+        pMetadata->data.inst.highVelocity  = (drwav_int8)instData[6];
+    }
+
+    return bytesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__read_acid_to_metadata_obj(drwav__metadata_parser* pParser, drwav_metadata* pMetadata)
+{
+    drwav_uint8 acidData[DRWAV_ACID_BYTES];
+    drwav_uint64 bytesRead;
+
+    if (pMetadata == NULL) {
+        return 0;
+    }
+
+    bytesRead = drwav__metadata_parser_read(pParser, acidData, sizeof(acidData), NULL);
+
+    DRWAV_ASSERT(pParser->stage == drwav__metadata_parser_stage_read);
+
+    if (bytesRead == sizeof(acidData)) {
+        pMetadata->type                       = drwav_metadata_type_acid;
+        pMetadata->data.acid.flags            = drwav_bytes_to_u32(acidData + 0);
+        pMetadata->data.acid.midiUnityNote    = drwav_bytes_to_u16(acidData + 4);
+        pMetadata->data.acid.reserved1        = drwav_bytes_to_u16(acidData + 6);
+        pMetadata->data.acid.reserved2        = drwav_bytes_to_f32(acidData + 8);
+        pMetadata->data.acid.numBeats         = drwav_bytes_to_u32(acidData + 12);
+        pMetadata->data.acid.meterDenominator = drwav_bytes_to_u16(acidData + 16);
+        pMetadata->data.acid.meterNumerator   = drwav_bytes_to_u16(acidData + 18);
+        pMetadata->data.acid.tempo            = drwav_bytes_to_f32(acidData + 20);
+    }
+
+    return bytesRead;
+}
+
+DRWAV_PRIVATE size_t drwav__strlen(const char* str)
+{
+    size_t result = 0;
+
+    while (*str++) {
+        result += 1;
+    }
+
+    return result;
+}
+
+DRWAV_PRIVATE size_t drwav__strlen_clamped(const char* str, size_t maxToRead)
+{
+    size_t result = 0;
+
+    while (*str++ && result < maxToRead) {
+        result += 1;
+    }
+
+    return result;
+}
+
+DRWAV_PRIVATE char* drwav__metadata_copy_string(drwav__metadata_parser* pParser, const char* str, size_t maxToRead)
+{
+    size_t len = drwav__strlen_clamped(str, maxToRead);
+
+    if (len) {
+        char* result = (char*)drwav__metadata_get_memory(pParser, len + 1, 1);
+        DRWAV_ASSERT(result != NULL);
+
+        DRWAV_COPY_MEMORY(result, str, len);
+        result[len] = '\0';
+
+        return result;
+    } else {
+        return NULL;
+    }
+}
+
+typedef struct
+{
+    const void* pBuffer;
+    size_t sizeInBytes;
+    size_t cursor;
+} drwav_buffer_reader;
+
+DRWAV_PRIVATE drwav_result drwav_buffer_reader_init(const void* pBuffer, size_t sizeInBytes, drwav_buffer_reader* pReader)
+{
+    DRWAV_ASSERT(pBuffer != NULL);
+    DRWAV_ASSERT(pReader != NULL);
+
+    DRWAV_ZERO_OBJECT(pReader);
+
+    pReader->pBuffer     = pBuffer;
+    pReader->sizeInBytes = sizeInBytes;
+    pReader->cursor      = 0;
+
+    return DRWAV_SUCCESS;
+}
+
+DRWAV_PRIVATE const void* drwav_buffer_reader_ptr(const drwav_buffer_reader* pReader)
+{
+    DRWAV_ASSERT(pReader != NULL);
+
+    return drwav_offset_ptr(pReader->pBuffer, pReader->cursor);
+}
+
+DRWAV_PRIVATE drwav_result drwav_buffer_reader_seek(drwav_buffer_reader* pReader, size_t bytesToSeek)
+{
+    DRWAV_ASSERT(pReader != NULL);
+
+    if (pReader->cursor + bytesToSeek > pReader->sizeInBytes) {
+        return DRWAV_BAD_SEEK;  /* Seeking too far forward. */
+    }
+
+    pReader->cursor += bytesToSeek;
+
+    return DRWAV_SUCCESS;
+}
+
+DRWAV_PRIVATE drwav_result drwav_buffer_reader_read(drwav_buffer_reader* pReader, void* pDst, size_t bytesToRead, size_t* pBytesRead)
+{
+    drwav_result result = DRWAV_SUCCESS;
+    size_t bytesRemaining;
+
+    DRWAV_ASSERT(pReader != NULL);
+    
+    if (pBytesRead != NULL) {
+        *pBytesRead = 0;
+    }
+
+    bytesRemaining = (pReader->sizeInBytes - pReader->cursor);
+    if (bytesToRead > bytesRemaining) {
+        bytesToRead = bytesRemaining;
+    }
+
+    if (pDst == NULL) {
+        /* Seek. */
+        result = drwav_buffer_reader_seek(pReader, bytesToRead);
+    } else {
+        /* Read. */
+        DRWAV_COPY_MEMORY(pDst, drwav_buffer_reader_ptr(pReader), bytesToRead);
+        pReader->cursor += bytesToRead;
+    }
+
+    DRWAV_ASSERT(pReader->cursor <= pReader->sizeInBytes);
+
+    if (result == DRWAV_SUCCESS) {
+        if (pBytesRead != NULL) {
+            *pBytesRead = bytesToRead;
+        }
+    }
+
+    return DRWAV_SUCCESS;
+}
+
+DRWAV_PRIVATE drwav_result drwav_buffer_reader_read_u16(drwav_buffer_reader* pReader, drwav_uint16* pDst)
+{
+    drwav_result result;
+    size_t bytesRead;
+    drwav_uint8 data[2];
+
+    DRWAV_ASSERT(pReader != NULL);
+    DRWAV_ASSERT(pDst != NULL);
+
+    *pDst = 0;  /* Safety. */
+
+    result = drwav_buffer_reader_read(pReader, data, sizeof(*pDst), &bytesRead);
+    if (result != DRWAV_SUCCESS || bytesRead != sizeof(*pDst)) {
+        return result;
+    }
+
+    *pDst = drwav_bytes_to_u16(data);
+
+    return DRWAV_SUCCESS;
+}
+
+DRWAV_PRIVATE drwav_result drwav_buffer_reader_read_u32(drwav_buffer_reader* pReader, drwav_uint32* pDst)
+{
+    drwav_result result;
+    size_t bytesRead;
+    drwav_uint8 data[4];
+
+    DRWAV_ASSERT(pReader != NULL);
+    DRWAV_ASSERT(pDst != NULL);
+
+    *pDst = 0;  /* Safety. */
+
+    result = drwav_buffer_reader_read(pReader, data, sizeof(*pDst), &bytesRead);
+    if (result != DRWAV_SUCCESS || bytesRead != sizeof(*pDst)) {
+        return result;
+    }
+
+    *pDst = drwav_bytes_to_u32(data);
+
+    return DRWAV_SUCCESS;
+}
+
+
+
+DRWAV_PRIVATE drwav_uint64 drwav__read_bext_to_metadata_obj(drwav__metadata_parser* pParser, drwav_metadata* pMetadata, drwav_uint64 chunkSize)
+{
+    drwav_uint8 bextData[DRWAV_BEXT_BYTES];
+    size_t bytesRead = drwav__metadata_parser_read(pParser, bextData, sizeof(bextData), NULL);
+
+    DRWAV_ASSERT(pParser->stage == drwav__metadata_parser_stage_read);
+    
+    if (bytesRead == sizeof(bextData)) {
+        drwav_buffer_reader reader;
+        drwav_uint32 timeReferenceLow;
+        drwav_uint32 timeReferenceHigh;
+        size_t extraBytes;
+
+        pMetadata->type = drwav_metadata_type_bext;
+
+        if (drwav_buffer_reader_init(bextData, bytesRead, &reader) == DRWAV_SUCCESS) {
+            pMetadata->data.bext.pDescription = drwav__metadata_copy_string(pParser, (const char*)drwav_buffer_reader_ptr(&reader), DRWAV_BEXT_DESCRIPTION_BYTES);
+            drwav_buffer_reader_seek(&reader, DRWAV_BEXT_DESCRIPTION_BYTES);
+
+            pMetadata->data.bext.pOriginatorName = drwav__metadata_copy_string(pParser, (const char*)drwav_buffer_reader_ptr(&reader), DRWAV_BEXT_ORIGINATOR_NAME_BYTES);
+            drwav_buffer_reader_seek(&reader, DRWAV_BEXT_ORIGINATOR_NAME_BYTES);
+
+            pMetadata->data.bext.pOriginatorReference = drwav__metadata_copy_string(pParser, (const char*)drwav_buffer_reader_ptr(&reader), DRWAV_BEXT_ORIGINATOR_REF_BYTES);
+            drwav_buffer_reader_seek(&reader, DRWAV_BEXT_ORIGINATOR_REF_BYTES);
+
+            drwav_buffer_reader_read(&reader, pMetadata->data.bext.pOriginationDate, sizeof(pMetadata->data.bext.pOriginationDate), NULL);
+            drwav_buffer_reader_read(&reader, pMetadata->data.bext.pOriginationTime, sizeof(pMetadata->data.bext.pOriginationTime), NULL);
+
+            drwav_buffer_reader_read_u32(&reader, &timeReferenceLow);
+            drwav_buffer_reader_read_u32(&reader, &timeReferenceHigh);
+            pMetadata->data.bext.timeReference = ((drwav_uint64)timeReferenceHigh << 32) + timeReferenceLow;
+
+            drwav_buffer_reader_read_u16(&reader, &pMetadata->data.bext.version);
+
+            pMetadata->data.bext.pUMID = drwav__metadata_get_memory(pParser, DRWAV_BEXT_UMID_BYTES, 1);
+            drwav_buffer_reader_read(&reader, pMetadata->data.bext.pUMID, DRWAV_BEXT_UMID_BYTES, NULL);
+
+            drwav_buffer_reader_read_u16(&reader, &pMetadata->data.bext.loudnessValue);
+            drwav_buffer_reader_read_u16(&reader, &pMetadata->data.bext.loudnessRange);
+            drwav_buffer_reader_read_u16(&reader, &pMetadata->data.bext.maxTruePeakLevel);
+            drwav_buffer_reader_read_u16(&reader, &pMetadata->data.bext.maxMomentaryLoudness);
+            drwav_buffer_reader_read_u16(&reader, &pMetadata->data.bext.maxShortTermLoudness);
+
+            DRWAV_ASSERT((drwav_offset_ptr(drwav_buffer_reader_ptr(&reader), DRWAV_BEXT_RESERVED_BYTES)) == (bextData + DRWAV_BEXT_BYTES));
+
+            extraBytes = (size_t)(chunkSize - DRWAV_BEXT_BYTES);
+            if (extraBytes > 0) {
+                pMetadata->data.bext.pCodingHistory = (char*)drwav__metadata_get_memory(pParser, extraBytes + 1, 1);
+                DRWAV_ASSERT(pMetadata->data.bext.pCodingHistory != NULL);
+
+                bytesRead += drwav__metadata_parser_read(pParser, pMetadata->data.bext.pCodingHistory, extraBytes, NULL);
+                pMetadata->data.bext.codingHistorySize = (drwav_uint32)drwav__strlen(pMetadata->data.bext.pCodingHistory);
+            } else {
+                pMetadata->data.bext.pCodingHistory    = NULL;
+                pMetadata->data.bext.codingHistorySize = 0;
+            }
+        }
+    }
+
+    return bytesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__read_list_label_or_note_to_metadata_obj(drwav__metadata_parser* pParser, drwav_metadata* pMetadata, drwav_uint64 chunkSize, drwav_metadata_type type)
+{
+    drwav_uint8 cueIDBuffer[DRWAV_LIST_LABEL_OR_NOTE_BYTES];
+    drwav_uint64 totalBytesRead = 0;
+    size_t bytesJustRead = drwav__metadata_parser_read(pParser, cueIDBuffer, sizeof(cueIDBuffer), &totalBytesRead);
+
+    DRWAV_ASSERT(pParser->stage == drwav__metadata_parser_stage_read);    
+
+    if (bytesJustRead == sizeof(cueIDBuffer)) {
+        drwav_uint32 sizeIncludingNullTerminator;
+
+        pMetadata->type = type;
+        pMetadata->data.labelOrNote.cuePointId = drwav_bytes_to_u32(cueIDBuffer);
+
+        sizeIncludingNullTerminator = (drwav_uint32)chunkSize - DRWAV_LIST_LABEL_OR_NOTE_BYTES;
+        if (sizeIncludingNullTerminator > 0) {
+            pMetadata->data.labelOrNote.stringLength = sizeIncludingNullTerminator - 1;
+            pMetadata->data.labelOrNote.pString      = (char*)drwav__metadata_get_memory(pParser, sizeIncludingNullTerminator, 1);
+            DRWAV_ASSERT(pMetadata->data.labelOrNote.pString != NULL);
+
+            drwav__metadata_parser_read(pParser, pMetadata->data.labelOrNote.pString, sizeIncludingNullTerminator, &totalBytesRead);
+        } else {
+            pMetadata->data.labelOrNote.stringLength = 0;
+            pMetadata->data.labelOrNote.pString      = NULL;
+        }
+    }
+
+    return totalBytesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__read_list_labelled_cue_region_to_metadata_obj(drwav__metadata_parser* pParser, drwav_metadata* pMetadata, drwav_uint64 chunkSize)
+{
+    drwav_uint8 buffer[DRWAV_LIST_LABELLED_TEXT_BYTES];
+    drwav_uint64 totalBytesRead = 0;
+    size_t bytesJustRead = drwav__metadata_parser_read(pParser, buffer, sizeof(buffer), &totalBytesRead);
+
+    DRWAV_ASSERT(pParser->stage == drwav__metadata_parser_stage_read);
+
+    if (bytesJustRead == sizeof(buffer)) {
+        drwav_uint32 sizeIncludingNullTerminator;
+
+        pMetadata->type                                = drwav_metadata_type_list_labelled_cue_region;
+        pMetadata->data.labelledCueRegion.cuePointId   = drwav_bytes_to_u32(buffer + 0);
+        pMetadata->data.labelledCueRegion.sampleLength = drwav_bytes_to_u32(buffer + 4);
+        pMetadata->data.labelledCueRegion.purposeId[0] = buffer[8];
+        pMetadata->data.labelledCueRegion.purposeId[1] = buffer[9];
+        pMetadata->data.labelledCueRegion.purposeId[2] = buffer[10];
+        pMetadata->data.labelledCueRegion.purposeId[3] = buffer[11];
+        pMetadata->data.labelledCueRegion.country      = drwav_bytes_to_u16(buffer + 12);
+        pMetadata->data.labelledCueRegion.language     = drwav_bytes_to_u16(buffer + 14);
+        pMetadata->data.labelledCueRegion.dialect      = drwav_bytes_to_u16(buffer + 16);
+        pMetadata->data.labelledCueRegion.codePage     = drwav_bytes_to_u16(buffer + 18);
+
+        sizeIncludingNullTerminator = (drwav_uint32)chunkSize - DRWAV_LIST_LABELLED_TEXT_BYTES;
+        if (sizeIncludingNullTerminator > 0) {
+            pMetadata->data.labelledCueRegion.stringLength = sizeIncludingNullTerminator - 1;
+            pMetadata->data.labelledCueRegion.pString      = (char*)drwav__metadata_get_memory(pParser, sizeIncludingNullTerminator, 1);
+            DRWAV_ASSERT(pMetadata->data.labelledCueRegion.pString != NULL);
+
+            drwav__metadata_parser_read(pParser, pMetadata->data.labelledCueRegion.pString, sizeIncludingNullTerminator, &totalBytesRead);
+        } else {
+            pMetadata->data.labelledCueRegion.stringLength = 0;
+            pMetadata->data.labelledCueRegion.pString      = NULL;
+        }
+    }
+
+    return totalBytesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__metadata_process_info_text_chunk(drwav__metadata_parser* pParser, drwav_uint64 chunkSize, drwav_metadata_type type)
+{
+    drwav_uint64 bytesRead = 0;
+    drwav_uint32 stringSizeWithNullTerminator = (drwav_uint32)chunkSize;
+
+    if (pParser->stage == drwav__metadata_parser_stage_count) {
+        pParser->metadataCount += 1;
+        drwav__metadata_request_extra_memory_for_stage_2(pParser, stringSizeWithNullTerminator, 1);
+    } else {
+        drwav_metadata* pMetadata = &pParser->pMetadata[pParser->metadataCursor];
+        pMetadata->type = type;
+        if (stringSizeWithNullTerminator > 0) {
+            pMetadata->data.infoText.stringLength = stringSizeWithNullTerminator - 1;
+            pMetadata->data.infoText.pString = (char*)drwav__metadata_get_memory(pParser, stringSizeWithNullTerminator, 1);
+            DRWAV_ASSERT(pMetadata->data.infoText.pString != NULL);
+
+            bytesRead = drwav__metadata_parser_read(pParser, pMetadata->data.infoText.pString, (size_t)stringSizeWithNullTerminator, NULL);
+            if (bytesRead == chunkSize) {
+                pParser->metadataCursor += 1;
+            } else {
+                /* Failed to parse. */
+            }
+        } else {
+            pMetadata->data.infoText.stringLength = 0;
+            pMetadata->data.infoText.pString      = NULL;
+            pParser->metadataCursor += 1;
+        }
+    }
+
+    return bytesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__metadata_process_unknown_chunk(drwav__metadata_parser* pParser, const drwav_uint8* pChunkId, drwav_uint64 chunkSize, drwav_metadata_location location)
+{
+    drwav_uint64 bytesRead = 0;
+
+    if (location == drwav_metadata_location_invalid) {
+        return 0;
+    }
+
+    if (drwav_fourcc_equal(pChunkId, "data") || drwav_fourcc_equal(pChunkId, "fmt ") || drwav_fourcc_equal(pChunkId, "fact")) {
+        return 0;
+    }
+
+    if (pParser->stage == drwav__metadata_parser_stage_count) {
+        pParser->metadataCount += 1;
+        drwav__metadata_request_extra_memory_for_stage_2(pParser, (size_t)chunkSize, 1);
+    } else {
+        drwav_metadata* pMetadata = &pParser->pMetadata[pParser->metadataCursor];
+        pMetadata->type                         = drwav_metadata_type_unknown;
+        pMetadata->data.unknown.chunkLocation   = location;
+        pMetadata->data.unknown.id[0]           = pChunkId[0];
+        pMetadata->data.unknown.id[1]           = pChunkId[1];
+        pMetadata->data.unknown.id[2]           = pChunkId[2];
+        pMetadata->data.unknown.id[3]           = pChunkId[3];
+        pMetadata->data.unknown.dataSizeInBytes = (drwav_uint32)chunkSize;
+        pMetadata->data.unknown.pData           = (drwav_uint8 *)drwav__metadata_get_memory(pParser, (size_t)chunkSize, 1);
+        DRWAV_ASSERT(pMetadata->data.unknown.pData != NULL);
+
+        bytesRead = drwav__metadata_parser_read(pParser, pMetadata->data.unknown.pData, pMetadata->data.unknown.dataSizeInBytes, NULL);
+        if (bytesRead == pMetadata->data.unknown.dataSizeInBytes) {
+            pParser->metadataCursor += 1;
+        } else {
+            /* Failed to read. */
+        }
+    }
+
+    return bytesRead;
+}
+
+DRWAV_PRIVATE drwav_bool32 drwav__chunk_matches(drwav_metadata_type allowedMetadataTypes, const drwav_uint8* pChunkID, drwav_metadata_type type, const char* pID)
+{
+    return (allowedMetadataTypes & type) && drwav_fourcc_equal(pChunkID, pID);
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__metadata_process_chunk(drwav__metadata_parser* pParser, const drwav_chunk_header* pChunkHeader, drwav_metadata_type allowedMetadataTypes)
+{
+    const drwav_uint8 *pChunkID = pChunkHeader->id.fourcc;
+    drwav_uint64 bytesRead = 0;
+
+    if (drwav__chunk_matches(allowedMetadataTypes, pChunkID, drwav_metadata_type_smpl, "smpl")) {
+        if (pChunkHeader->sizeInBytes >= DRWAV_SMPL_BYTES) {
+            if (pParser->stage == drwav__metadata_parser_stage_count) {
+                drwav_uint8 buffer[4];
+                size_t bytesJustRead;
+
+                if (!pParser->onSeek(pParser->pReadSeekUserData, 28, drwav_seek_origin_current)) {
+                    return bytesRead;
+                }
+                bytesRead += 28;
+
+                bytesJustRead = drwav__metadata_parser_read(pParser, buffer, sizeof(buffer), &bytesRead);
+                if (bytesJustRead == sizeof(buffer)) {
+                    drwav_uint32 loopCount = drwav_bytes_to_u32(buffer);
+                    drwav_uint64 calculatedLoopCount;
+
+                    /* The loop count must be validated against the size of the chunk. */
+                    calculatedLoopCount = (pChunkHeader->sizeInBytes - DRWAV_SMPL_BYTES) / DRWAV_SMPL_LOOP_BYTES;
+                    if (calculatedLoopCount == loopCount) {
+                        bytesJustRead = drwav__metadata_parser_read(pParser, buffer, sizeof(buffer), &bytesRead);
+                        if (bytesJustRead == sizeof(buffer)) {
+                            drwav_uint32 samplerSpecificDataSizeInBytes = drwav_bytes_to_u32(buffer);
+
+                            pParser->metadataCount += 1;
+                            drwav__metadata_request_extra_memory_for_stage_2(pParser, sizeof(drwav_smpl_loop) * loopCount, DRWAV_METADATA_ALIGNMENT);
+                            drwav__metadata_request_extra_memory_for_stage_2(pParser, samplerSpecificDataSizeInBytes, 1);
+                        }
+                    } else {
+                        /* Loop count in header does not match the size of the chunk. */
+                    }                    
+                }
+            } else {
+                bytesRead = drwav__read_smpl_to_metadata_obj(pParser, pChunkHeader, &pParser->pMetadata[pParser->metadataCursor]);
+                if (bytesRead == pChunkHeader->sizeInBytes) {
+                    pParser->metadataCursor += 1;
+                } else {
+                    /* Failed to parse. */
+                }
+            }
+        } else {
+            /* Incorrectly formed chunk. */
+        }
+    } else if (drwav__chunk_matches(allowedMetadataTypes, pChunkID, drwav_metadata_type_inst, "inst")) {
+        if (pChunkHeader->sizeInBytes == DRWAV_INST_BYTES) {
+            if (pParser->stage == drwav__metadata_parser_stage_count) {
+                pParser->metadataCount += 1;
+            } else {
+                bytesRead = drwav__read_inst_to_metadata_obj(pParser, &pParser->pMetadata[pParser->metadataCursor]);
+                if (bytesRead == pChunkHeader->sizeInBytes) {
+                    pParser->metadataCursor += 1;
+                } else {
+                    /* Failed to parse. */
+                }
+            }
+        } else {
+            /* Incorrectly formed chunk. */
+        }
+    } else if (drwav__chunk_matches(allowedMetadataTypes, pChunkID, drwav_metadata_type_acid, "acid")) {
+        if (pChunkHeader->sizeInBytes == DRWAV_ACID_BYTES) {
+            if (pParser->stage == drwav__metadata_parser_stage_count) {
+                pParser->metadataCount += 1;
+            } else {
+                bytesRead = drwav__read_acid_to_metadata_obj(pParser, &pParser->pMetadata[pParser->metadataCursor]);
+                if (bytesRead == pChunkHeader->sizeInBytes) {
+                    pParser->metadataCursor += 1;
+                } else {
+                    /* Failed to parse. */
+                }
+            }
+        } else {
+            /* Incorrectly formed chunk. */
+        }
+    } else if (drwav__chunk_matches(allowedMetadataTypes, pChunkID, drwav_metadata_type_cue, "cue ")) {
+        if (pChunkHeader->sizeInBytes >= DRWAV_CUE_BYTES) {
+            if (pParser->stage == drwav__metadata_parser_stage_count) {
+                size_t cueCount;
+
+                pParser->metadataCount += 1;
+                cueCount = (size_t)(pChunkHeader->sizeInBytes - DRWAV_CUE_BYTES) / DRWAV_CUE_POINT_BYTES;
+                drwav__metadata_request_extra_memory_for_stage_2(pParser, sizeof(drwav_cue_point) * cueCount, DRWAV_METADATA_ALIGNMENT);
+            } else {
+                bytesRead = drwav__read_cue_to_metadata_obj(pParser, pChunkHeader, &pParser->pMetadata[pParser->metadataCursor]);
+                if (bytesRead == pChunkHeader->sizeInBytes) {
+                    pParser->metadataCursor += 1;
+                } else {
+                    /* Failed to parse. */
+                }
+            }
+        } else {
+            /* Incorrectly formed chunk. */
+        }
+    } else if (drwav__chunk_matches(allowedMetadataTypes, pChunkID, drwav_metadata_type_bext, "bext")) {
+        if (pChunkHeader->sizeInBytes >= DRWAV_BEXT_BYTES) {
+            if (pParser->stage == drwav__metadata_parser_stage_count) {
+                /* The description field is the largest one in a bext chunk, so that is the max size of this temporary buffer. */
+                char buffer[DRWAV_BEXT_DESCRIPTION_BYTES + 1];
+                size_t allocSizeNeeded = DRWAV_BEXT_UMID_BYTES; /* We know we will need SMPTE umid size. */
+                size_t bytesJustRead;
+
+                buffer[DRWAV_BEXT_DESCRIPTION_BYTES] = '\0';
+                bytesJustRead = drwav__metadata_parser_read(pParser, buffer, DRWAV_BEXT_DESCRIPTION_BYTES, &bytesRead);
+                if (bytesJustRead != DRWAV_BEXT_DESCRIPTION_BYTES) {
+                    return bytesRead;
+                }
+                allocSizeNeeded += drwav__strlen(buffer) + 1;
+
+                buffer[DRWAV_BEXT_ORIGINATOR_NAME_BYTES] = '\0';
+                bytesJustRead = drwav__metadata_parser_read(pParser, buffer, DRWAV_BEXT_ORIGINATOR_NAME_BYTES, &bytesRead);
+                if (bytesJustRead != DRWAV_BEXT_ORIGINATOR_NAME_BYTES) {
+                    return bytesRead;
+                }
+                allocSizeNeeded += drwav__strlen(buffer) + 1;
+
+                buffer[DRWAV_BEXT_ORIGINATOR_REF_BYTES] = '\0';
+                bytesJustRead = drwav__metadata_parser_read(pParser, buffer, DRWAV_BEXT_ORIGINATOR_REF_BYTES, &bytesRead);
+                if (bytesJustRead != DRWAV_BEXT_ORIGINATOR_REF_BYTES) {
+                    return bytesRead;
+                }
+                allocSizeNeeded += drwav__strlen(buffer) + 1;
+                allocSizeNeeded += (size_t)pChunkHeader->sizeInBytes - DRWAV_BEXT_BYTES; /* Coding history. */
+
+                drwav__metadata_request_extra_memory_for_stage_2(pParser, allocSizeNeeded, 1);
+
+                pParser->metadataCount += 1;
+            } else {
+                bytesRead = drwav__read_bext_to_metadata_obj(pParser, &pParser->pMetadata[pParser->metadataCursor], pChunkHeader->sizeInBytes);
+                if (bytesRead == pChunkHeader->sizeInBytes) {
+                    pParser->metadataCursor += 1;
+                } else {
+                    /* Failed to parse. */
+                }
+            }
+        } else {
+            /* Incorrectly formed chunk. */
+        }
+    } else if (drwav_fourcc_equal(pChunkID, "LIST") || drwav_fourcc_equal(pChunkID, "list")) {
+        drwav_metadata_location listType = drwav_metadata_location_invalid;
+        while (bytesRead < pChunkHeader->sizeInBytes) {
+            drwav_uint8 subchunkId[4];
+            drwav_uint8 subchunkSizeBuffer[4];
+            drwav_uint64 subchunkDataSize;
+            drwav_uint64 subchunkBytesRead = 0;
+            drwav_uint64 bytesJustRead = drwav__metadata_parser_read(pParser, subchunkId, sizeof(subchunkId), &bytesRead);
+            if (bytesJustRead != sizeof(subchunkId)) {
+                break;
+            }
+
+            /*
+            The first thing in a list chunk should be "adtl" or "INFO".
+
+              - adtl means this list is a Associated Data List Chunk and will contain labels, notes
+                or labelled cue regions.
+              - INFO means this list is an Info List Chunk containing info text chunks such as IPRD
+                which would specifies the album of this wav file.
+
+            No data follows the adtl or INFO id so we just make note of what type this list is and
+            continue.
+            */
+            if (drwav_fourcc_equal(subchunkId, "adtl")) {
+                listType = drwav_metadata_location_inside_adtl_list;
+                continue;
+            } else if (drwav_fourcc_equal(subchunkId, "INFO")) {
+                listType = drwav_metadata_location_inside_info_list;
+                continue;
+            }
+
+            bytesJustRead = drwav__metadata_parser_read(pParser, subchunkSizeBuffer, sizeof(subchunkSizeBuffer), &bytesRead);
+            if (bytesJustRead != sizeof(subchunkSizeBuffer)) {
+                break;
+            }
+            subchunkDataSize = drwav_bytes_to_u32(subchunkSizeBuffer);
+
+            if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_label, "labl") || drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_note, "note")) {
+                if (subchunkDataSize >= DRWAV_LIST_LABEL_OR_NOTE_BYTES) {
+                    drwav_uint64 stringSizeWithNullTerm = subchunkDataSize - DRWAV_LIST_LABEL_OR_NOTE_BYTES;
+                    if (pParser->stage == drwav__metadata_parser_stage_count) {
+                        pParser->metadataCount += 1;
+                        drwav__metadata_request_extra_memory_for_stage_2(pParser, (size_t)stringSizeWithNullTerm, 1);
+                    } else {
+                        subchunkBytesRead = drwav__read_list_label_or_note_to_metadata_obj(pParser, &pParser->pMetadata[pParser->metadataCursor], subchunkDataSize, drwav_fourcc_equal(subchunkId, "labl") ? drwav_metadata_type_list_label : drwav_metadata_type_list_note);
+                        if (subchunkBytesRead == subchunkDataSize) {
+                            pParser->metadataCursor += 1;
+                        } else {
+                            /* Failed to parse. */
+                        }
+                    }
+                } else {
+                    /* Incorrectly formed chunk. */
+                }
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_labelled_cue_region, "ltxt")) {
+                if (subchunkDataSize >= DRWAV_LIST_LABELLED_TEXT_BYTES) {
+                    drwav_uint64 stringSizeWithNullTerminator = subchunkDataSize - DRWAV_LIST_LABELLED_TEXT_BYTES;
+                    if (pParser->stage == drwav__metadata_parser_stage_count) {
+                        pParser->metadataCount += 1;
+                        drwav__metadata_request_extra_memory_for_stage_2(pParser, (size_t)stringSizeWithNullTerminator, 1);
+                    } else {
+                        subchunkBytesRead = drwav__read_list_labelled_cue_region_to_metadata_obj(pParser, &pParser->pMetadata[pParser->metadataCursor], subchunkDataSize);
+                        if (subchunkBytesRead == subchunkDataSize) {
+                            pParser->metadataCursor += 1;
+                        } else {
+                            /* Failed to parse. */
+                        }
+                    }
+                } else {
+                    /* Incorrectly formed chunk. */
+                }
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_software, "ISFT")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_software);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_copyright, "ICOP")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_copyright);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_title, "INAM")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_title);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_artist, "IART")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_artist);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_comment, "ICMT")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_comment);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_date, "ICRD")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_date);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_genre, "IGNR")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_genre);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_album, "IPRD")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_album);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_tracknumber, "ITRK")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_tracknumber);
+            } else if ((allowedMetadataTypes & drwav_metadata_type_unknown) != 0) {
+                subchunkBytesRead = drwav__metadata_process_unknown_chunk(pParser, subchunkId, subchunkDataSize, listType);
+            }
+
+            bytesRead += subchunkBytesRead;
+            DRWAV_ASSERT(subchunkBytesRead <= subchunkDataSize);
+
+            if (subchunkBytesRead < subchunkDataSize) {
+                drwav_uint64 bytesToSeek = subchunkDataSize - subchunkBytesRead;
+
+                if (!pParser->onSeek(pParser->pReadSeekUserData, (int)bytesToSeek, drwav_seek_origin_current)) {
+                    break;
+                }
+                bytesRead += bytesToSeek;
+            }
+
+            if ((subchunkDataSize % 2) == 1) {
+                if (!pParser->onSeek(pParser->pReadSeekUserData, 1, drwav_seek_origin_current)) {
+                    break;
+                }
+                bytesRead += 1;
+            }
+        }
+    } else if ((allowedMetadataTypes & drwav_metadata_type_unknown) != 0) {
+        bytesRead = drwav__metadata_process_unknown_chunk(pParser, pChunkID, pChunkHeader->sizeInBytes, drwav_metadata_location_top_level);
+    }
+
+    return bytesRead;
+}
+
+
+DRWAV_PRIVATE drwav_uint32 drwav_get_bytes_per_pcm_frame(drwav* pWav)
+{
+    drwav_uint32 bytesPerFrame;
+
+    /*
+    The bytes per frame is a bit ambiguous. It can be either be based on the bits per sample, or the block align. The way I'm doing it here
+    is that if the bits per sample is a multiple of 8, use floor(bitsPerSample*channels/8), otherwise fall back to the block align.
+    */
+    if ((pWav->bitsPerSample & 0x7) == 0) {
+        /* Bits per sample is a multiple of 8. */
+        bytesPerFrame = (pWav->bitsPerSample * pWav->fmt.channels) >> 3;
+    } else {
+        bytesPerFrame = pWav->fmt.blockAlign;
+    }
+
+    /* Validation for known formats. a-law and mu-law should be 1 byte per channel. If it's not, it's not decodable. */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW || pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
+        if (bytesPerFrame != pWav->fmt.channels) {
+            return 0;   /* Invalid file. */
+        }
+    }
+
+    return bytesPerFrame;
+}
+
+DRWAV_API drwav_uint16 drwav_fmt_get_format(const drwav_fmt* pFMT)
+{
+    if (pFMT == NULL) {
+        return 0;
+    }
+
+    if (pFMT->formatTag != DR_WAVE_FORMAT_EXTENSIBLE) {
+        return pFMT->formatTag;
+    } else {
+        return drwav_bytes_to_u16(pFMT->subFormat);    /* Only the first two bytes are required. */
+    }
+}
+
+DRWAV_PRIVATE drwav_bool32 drwav_preinit(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pWav == NULL || onRead == NULL || onSeek == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    DRWAV_ZERO_MEMORY(pWav, sizeof(*pWav));
+    pWav->onRead    = onRead;
+    pWav->onSeek    = onSeek;
+    pWav->pUserData = pReadSeekUserData;
+    pWav->allocationCallbacks = drwav_copy_allocation_callbacks_or_defaults(pAllocationCallbacks);
+
+    if (pWav->allocationCallbacks.onFree == NULL || (pWav->allocationCallbacks.onMalloc == NULL && pWav->allocationCallbacks.onRealloc == NULL)) {
+        return DRWAV_FALSE;    /* Invalid allocation callbacks. */
+    }
+
+    return DRWAV_TRUE;
+}
+
+DRWAV_PRIVATE drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags)
+{
+    /* This function assumes drwav_preinit() has been called beforehand. */
+    drwav_result result;
+    drwav_uint64 cursor;    /* <-- Keeps track of the byte position so we can seek to specific locations. */
+    drwav_bool32 sequential;
+    drwav_uint8 riff[4];
+    drwav_fmt fmt;
+    unsigned short translatedFormatTag;
+    drwav_uint64 dataChunkSize = 0;             /* <-- Important! Don't explicitly set this to 0 anywhere else. Calculation of the size of the data chunk is performed in different paths depending on the container. */
+    drwav_uint64 sampleCountFromFactChunk = 0;  /* Same as dataChunkSize - make sure this is the only place this is initialized to 0. */
+    drwav_uint64 metadataStartPos;
+    drwav__metadata_parser metadataParser;
+    drwav_bool8 isProcessingMetadata = DRWAV_FALSE;
+    drwav_bool8 foundChunk_fmt  = DRWAV_FALSE;
+    drwav_bool8 foundChunk_data = DRWAV_FALSE;
+    drwav_bool8 isAIFCFormType = DRWAV_FALSE;   /* Only used with AIFF. */
+    drwav_uint64 aiffFrameCount = 0;
+
+    cursor = 0;
+    sequential = (flags & DRWAV_SEQUENTIAL) != 0;
+    DRWAV_ZERO_OBJECT(&fmt);
+
+    /* The first 4 bytes should be the RIFF identifier. */
+    if (drwav__on_read(pWav->onRead, pWav->pUserData, riff, sizeof(riff), &cursor) != sizeof(riff)) {
+        return DRWAV_FALSE;
+    }
+
+    /*
+    The first 4 bytes can be used to identify the container. For RIFF files it will start with "RIFF" and for
+    w64 it will start with "riff".
+    */
+    if (drwav_fourcc_equal(riff, "RIFF")) {
+        pWav->container = drwav_container_riff;
+    } else if (drwav_fourcc_equal(riff, "RIFX")) {
+        pWav->container = drwav_container_rifx;
+    } else if (drwav_fourcc_equal(riff, "riff")) {
+        int i;
+        drwav_uint8 riff2[12];
+
+        pWav->container = drwav_container_w64;
+
+        /* Check the rest of the GUID for validity. */
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, riff2, sizeof(riff2), &cursor) != sizeof(riff2)) {
+            return DRWAV_FALSE;
+        }
+
+        for (i = 0; i < 12; ++i) {
+            if (riff2[i] != drwavGUID_W64_RIFF[i+4]) {
+                return DRWAV_FALSE;
+            }
+        }
+    } else if (drwav_fourcc_equal(riff, "RF64")) {
+        pWav->container = drwav_container_rf64;
+    } else if (drwav_fourcc_equal(riff, "FORM")) {
+        pWav->container = drwav_container_aiff;
+    } else {
+        return DRWAV_FALSE;   /* Unknown or unsupported container. */
+    }
+
+
+    if (pWav->container == drwav_container_riff || pWav->container == drwav_container_rifx || pWav->container == drwav_container_rf64) {
+        drwav_uint8 chunkSizeBytes[4];
+        drwav_uint8 wave[4];
+
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
+            return DRWAV_FALSE;
+        }
+
+        if (pWav->container == drwav_container_riff || pWav->container == drwav_container_rifx) {
+            if (drwav_bytes_to_u32_ex(chunkSizeBytes, pWav->container) < 36) {
+                return DRWAV_FALSE;    /* Chunk size should always be at least 36 bytes. */
+            }
+        } else if (pWav->container == drwav_container_rf64) {
+            if (drwav_bytes_to_u32_le(chunkSizeBytes) != 0xFFFFFFFF) {
+                return DRWAV_FALSE;    /* Chunk size should always be set to -1/0xFFFFFFFF for RF64. The actual size is retrieved later. */
+            }
+        } else {
+            return DRWAV_FALSE; /* Should never hit this. */
+        }
+
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) {
+            return DRWAV_FALSE;
+        }
+
+        if (!drwav_fourcc_equal(wave, "WAVE")) {
+            return DRWAV_FALSE;    /* Expecting "WAVE". */
+        }
+    } else if (pWav->container == drwav_container_w64) {
+        drwav_uint8 chunkSizeBytes[8];
+        drwav_uint8 wave[16];
+
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav_bytes_to_u64(chunkSizeBytes) < 80) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) {
+            return DRWAV_FALSE;
+        }
+
+        if (!drwav_guid_equal(wave, drwavGUID_W64_WAVE)) {
+            return DRWAV_FALSE;
+        }
+    } else if (pWav->container == drwav_container_aiff) {
+        drwav_uint8 chunkSizeBytes[4];
+        drwav_uint8 aiff[4];
+
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav_bytes_to_u32_be(chunkSizeBytes) < 18) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, aiff, sizeof(aiff), &cursor) != sizeof(aiff)) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav_fourcc_equal(aiff, "AIFF")) {
+            isAIFCFormType = DRWAV_FALSE;
+        } else if (drwav_fourcc_equal(aiff, "AIFC")) {
+            isAIFCFormType = DRWAV_TRUE;
+        } else {
+            return DRWAV_FALSE; /* Expecting "AIFF" or "AIFC". */
+        }
+    } else {
+        return DRWAV_FALSE;
+    }
+
+
+    /* For RF64, the "ds64" chunk must come next, before the "fmt " chunk. */
+    if (pWav->container == drwav_container_rf64) {
+        drwav_uint8 sizeBytes[8];
+        drwav_uint64 bytesRemainingInChunk;
+        drwav_chunk_header header;
+        result = drwav__read_chunk_header(pWav->onRead, pWav->pUserData, pWav->container, &cursor, &header);
+        if (result != DRWAV_SUCCESS) {
+            return DRWAV_FALSE;
+        }
+
+        if (!drwav_fourcc_equal(header.id.fourcc, "ds64")) {
+            return DRWAV_FALSE; /* Expecting "ds64". */
+        }
+
+        bytesRemainingInChunk = header.sizeInBytes + header.paddingSize;
+
+        /* We don't care about the size of the RIFF chunk - skip it. */
+        if (!drwav__seek_forward(pWav->onSeek, 8, pWav->pUserData)) {
+            return DRWAV_FALSE;
+        }
+        bytesRemainingInChunk -= 8;
+        cursor += 8;
+
+
+        /* Next 8 bytes is the size of the "data" chunk. */
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, sizeBytes, sizeof(sizeBytes), &cursor) != sizeof(sizeBytes)) {
+            return DRWAV_FALSE;
+        }
+        bytesRemainingInChunk -= 8;
+        dataChunkSize = drwav_bytes_to_u64(sizeBytes);
+
+
+        /* Next 8 bytes is the same count which we would usually derived from the FACT chunk if it was available. */
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, sizeBytes, sizeof(sizeBytes), &cursor) != sizeof(sizeBytes)) {
+            return DRWAV_FALSE;
+        }
+        bytesRemainingInChunk -= 8;
+        sampleCountFromFactChunk = drwav_bytes_to_u64(sizeBytes);
+
+
+        /* Skip over everything else. */
+        if (!drwav__seek_forward(pWav->onSeek, bytesRemainingInChunk, pWav->pUserData)) {
+            return DRWAV_FALSE;
+        }
+        cursor += bytesRemainingInChunk;
+    }
+
+
+    metadataStartPos = cursor;
+
+    /*
+    Whether or not we are processing metadata controls how we load. We can load more efficiently when
+    metadata is not being processed, but we also cannot process metadata for Wave64 because I have not
+    been able to test it. If someone is able to test this and provide a patch I'm happy to enable it.
+
+    Seqential mode cannot support metadata because it involves seeking backwards.
+    */
+    isProcessingMetadata = !sequential && ((flags & DRWAV_WITH_METADATA) != 0);
+
+    /* Don't allow processing of metadata with untested containers. */
+    if (pWav->container != drwav_container_riff && pWav->container != drwav_container_rf64) {
+        isProcessingMetadata = DRWAV_FALSE;
+    }
+
+    DRWAV_ZERO_MEMORY(&metadataParser, sizeof(metadataParser));
+    if (isProcessingMetadata) {
+        metadataParser.onRead = pWav->onRead;
+        metadataParser.onSeek = pWav->onSeek;
+        metadataParser.pReadSeekUserData = pWav->pUserData;
+        metadataParser.stage  = drwav__metadata_parser_stage_count;
+    }
+
+
+    /*
+    From here on out, chunks might be in any order. In order to robustly handle metadata we'll need
+    to loop through every chunk and handle them as we find them. In sequential mode we need to get
+    out of the loop as soon as we find the data chunk because we won't be able to seek back.
+    */
+    for (;;) {  /* For each chunk... */
+        drwav_chunk_header header;
+        drwav_uint64 chunkSize;
+
+        result = drwav__read_chunk_header(pWav->onRead, pWav->pUserData, pWav->container, &cursor, &header);
+        if (result != DRWAV_SUCCESS) {
+            break;
+        }
+
+        chunkSize = header.sizeInBytes;
+
+
+        /*
+        Always tell the caller about this chunk. We cannot do this in sequential mode because the
+        callback is allowed to read from the file, in which case we'll need to rewind.
+        */
+        if (!sequential && onChunk != NULL) {
+            drwav_uint64 callbackBytesRead = onChunk(pChunkUserData, pWav->onRead, pWav->onSeek, pWav->pUserData, &header, pWav->container, &fmt);
+
+            /*
+            dr_wav may need to read the contents of the chunk, so we now need to seek back to the position before
+            we called the callback.
+            */
+            if (callbackBytesRead > 0) {
+                if (drwav__seek_from_start(pWav->onSeek, cursor, pWav->pUserData) == DRWAV_FALSE) {
+                    return DRWAV_FALSE;
+                }
+            }
+        }
+
+
+        /* Explicitly handle known chunks first. */
+
+        /* "fmt " */
+        if (((pWav->container == drwav_container_riff || pWav->container == drwav_container_rifx || pWav->container == drwav_container_rf64) && drwav_fourcc_equal(header.id.fourcc, "fmt ")) ||
+            ((pWav->container == drwav_container_w64) && drwav_guid_equal(header.id.guid, drwavGUID_W64_FMT))) {
+            drwav_uint8 fmtData[16];
+
+            foundChunk_fmt = DRWAV_TRUE;
+
+            if (pWav->onRead(pWav->pUserData, fmtData, sizeof(fmtData)) != sizeof(fmtData)) {
+                return DRWAV_FALSE;
+            }
+            cursor += sizeof(fmtData);
+
+            fmt.formatTag      = drwav_bytes_to_u16_ex(fmtData + 0,  pWav->container);
+            fmt.channels       = drwav_bytes_to_u16_ex(fmtData + 2,  pWav->container);
+            fmt.sampleRate     = drwav_bytes_to_u32_ex(fmtData + 4,  pWav->container);
+            fmt.avgBytesPerSec = drwav_bytes_to_u32_ex(fmtData + 8,  pWav->container);
+            fmt.blockAlign     = drwav_bytes_to_u16_ex(fmtData + 12, pWav->container);
+            fmt.bitsPerSample  = drwav_bytes_to_u16_ex(fmtData + 14, pWav->container);
+
+            fmt.extendedSize       = 0;
+            fmt.validBitsPerSample = 0;
+            fmt.channelMask        = 0;
+            DRWAV_ZERO_MEMORY(fmt.subFormat, sizeof(fmt.subFormat));
+
+            if (header.sizeInBytes > 16) {
+                drwav_uint8 fmt_cbSize[2];
+                int bytesReadSoFar = 0;
+
+                if (pWav->onRead(pWav->pUserData, fmt_cbSize, sizeof(fmt_cbSize)) != sizeof(fmt_cbSize)) {
+                    return DRWAV_FALSE;    /* Expecting more data. */
+                }
+                cursor += sizeof(fmt_cbSize);
+
+                bytesReadSoFar = 18;
+
+                fmt.extendedSize = drwav_bytes_to_u16_ex(fmt_cbSize, pWav->container);
+                if (fmt.extendedSize > 0) {
+                    /* Simple validation. */
+                    if (fmt.formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
+                        if (fmt.extendedSize != 22) {
+                            return DRWAV_FALSE;
+                        }
+                    }
+
+                    if (fmt.formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
+                        drwav_uint8 fmtext[22];
+
+                        if (pWav->onRead(pWav->pUserData, fmtext, fmt.extendedSize) != fmt.extendedSize) {
+                            return DRWAV_FALSE;    /* Expecting more data. */
+                        }
+
+                        fmt.validBitsPerSample = drwav_bytes_to_u16_ex(fmtext + 0, pWav->container);
+                        fmt.channelMask        = drwav_bytes_to_u32_ex(fmtext + 2, pWav->container);
+                        drwav_bytes_to_guid(fmtext + 6, fmt.subFormat);
+                    } else {
+                        if (pWav->onSeek(pWav->pUserData, fmt.extendedSize, drwav_seek_origin_current) == DRWAV_FALSE) {
+                            return DRWAV_FALSE;
+                        }
+                    }
+                    cursor += fmt.extendedSize;
+
+                    bytesReadSoFar += fmt.extendedSize;
+                }
+
+                /* Seek past any leftover bytes. For w64 the leftover will be defined based on the chunk size. */
+                if (pWav->onSeek(pWav->pUserData, (int)(header.sizeInBytes - bytesReadSoFar), drwav_seek_origin_current) == DRWAV_FALSE) {
+                    return DRWAV_FALSE;
+                }
+                cursor += (header.sizeInBytes - bytesReadSoFar);
+            }
+
+            if (header.paddingSize > 0) {
+                if (drwav__seek_forward(pWav->onSeek, header.paddingSize, pWav->pUserData) == DRWAV_FALSE) {
+                    break;
+                }
+                cursor += header.paddingSize;
+            }
+
+            /* Go to the next chunk. Don't include this chunk in metadata. */
+            continue;
+        }
+
+        /* "data" */
+        if (((pWav->container == drwav_container_riff || pWav->container == drwav_container_rifx || pWav->container == drwav_container_rf64) && drwav_fourcc_equal(header.id.fourcc, "data")) ||
+            ((pWav->container == drwav_container_w64) && drwav_guid_equal(header.id.guid, drwavGUID_W64_DATA))) {
+            foundChunk_data = DRWAV_TRUE;
+            
+            pWav->dataChunkDataPos  = cursor;
+
+            if (pWav->container != drwav_container_rf64) {  /* The data chunk size for RF64 will always be set to 0xFFFFFFFF here. It was set to it's true value earlier. */
+                dataChunkSize = chunkSize;
+            }
+
+            /* If we're running in sequential mode, or we're not reading metadata, we have enough now that we can get out of the loop. */
+            if (sequential || !isProcessingMetadata) {
+                break;      /* No need to keep reading beyond the data chunk. */
+            } else {
+                chunkSize += header.paddingSize;    /* <-- Make sure we seek past the padding. */
+                if (drwav__seek_forward(pWav->onSeek, chunkSize, pWav->pUserData) == DRWAV_FALSE) {
+                    break;
+                }
+                cursor += chunkSize;
+
+                continue;   /* There may be some more metadata to read. */
+            }
+        }
+
+        /* "fact". This is optional. Can use this to get the sample count which is useful for compressed formats. For RF64 we retrieved the sample count from the ds64 chunk earlier. */
+        if (((pWav->container == drwav_container_riff || pWav->container == drwav_container_rifx || pWav->container == drwav_container_rf64) && drwav_fourcc_equal(header.id.fourcc, "fact")) ||
+            ((pWav->container == drwav_container_w64) && drwav_guid_equal(header.id.guid, drwavGUID_W64_FACT))) {
+            if (pWav->container == drwav_container_riff || pWav->container == drwav_container_rifx) {
+                drwav_uint8 sampleCount[4];
+                if (drwav__on_read(pWav->onRead, pWav->pUserData, &sampleCount, 4, &cursor) != 4) {
+                    return DRWAV_FALSE;
+                }
+
+                chunkSize -= 4;
+
+                /*
+                The sample count in the "fact" chunk is either unreliable, or I'm not understanding it properly. For now I am only enabling this
+                for Microsoft ADPCM formats.
+                */
+                if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+                    sampleCountFromFactChunk = drwav_bytes_to_u32_ex(sampleCount, pWav->container);
+                } else {
+                    sampleCountFromFactChunk = 0;
+                }
+            } else if (pWav->container == drwav_container_w64) {
+                if (drwav__on_read(pWav->onRead, pWav->pUserData, &sampleCountFromFactChunk, 8, &cursor) != 8) {
+                    return DRWAV_FALSE;
+                }
+
+                chunkSize -= 8;
+            } else if (pWav->container == drwav_container_rf64) {
+                /* We retrieved the sample count from the ds64 chunk earlier so no need to do that here. */
+            }
+
+            /* Seek to the next chunk in preparation for the next iteration. */
+            chunkSize += header.paddingSize;    /* <-- Make sure we seek past the padding. */
+            if (drwav__seek_forward(pWav->onSeek, chunkSize, pWav->pUserData) == DRWAV_FALSE) {
+                break;
+            }
+            cursor += chunkSize;
+
+            continue;
+        }
+
+
+        /* "COMM". AIFF/AIFC only. */
+        if (pWav->container == drwav_container_aiff && drwav_fourcc_equal(header.id.fourcc, "COMM")) {
+            drwav_uint8 commData[24];
+            drwav_uint32 commDataBytesToRead;
+            drwav_uint16 channels;
+            drwav_uint32 frameCount;
+            drwav_uint16 sampleSizeInBits;
+            drwav_int64  sampleRate;
+            drwav_uint16 compressionFormat;
+
+            foundChunk_fmt = DRWAV_TRUE;
+
+            if (isAIFCFormType) {
+                commDataBytesToRead = 24;
+                if (header.sizeInBytes < commDataBytesToRead) {
+                    return DRWAV_FALSE; /* Invalid COMM chunk. */
+                }
+            } else {
+                commDataBytesToRead = 18;
+                if (header.sizeInBytes != commDataBytesToRead) {
+                    return DRWAV_FALSE; /* INVALID COMM chunk. */
+                }
+            }
+
+            if (drwav__on_read(pWav->onRead, pWav->pUserData, commData, commDataBytesToRead, &cursor) != commDataBytesToRead) {
+                return DRWAV_FALSE;
+            }
+
+            
+            channels         = drwav_bytes_to_u16_ex     (commData + 0, pWav->container);
+            frameCount       = drwav_bytes_to_u32_ex     (commData + 2, pWav->container);
+            sampleSizeInBits = drwav_bytes_to_u16_ex     (commData + 6, pWav->container);
+            sampleRate       = drwav_aiff_extented_to_s64(commData + 8);
+
+            if (sampleRate < 0 || sampleRate > 0xFFFFFFFF) {
+                return DRWAV_FALSE; /* Invalid sample rate. */
+            }
+
+            if (isAIFCFormType) {
+                const drwav_uint8* type = commData + 18;
+
+                if (drwav_fourcc_equal(type, "NONE")) {
+                    compressionFormat = DR_WAVE_FORMAT_PCM; /* PCM, big-endian. */
+                } else if (drwav_fourcc_equal(type, "raw ")) {
+                    compressionFormat = DR_WAVE_FORMAT_PCM;
+
+                    /* In my testing, it looks like when the "raw " compression type is used, 8-bit samples should be considered unsigned. */
+                    if (sampleSizeInBits == 8) {
+                        pWav->aiff.isUnsigned = DRWAV_TRUE;
+                    }
+                } else if (drwav_fourcc_equal(type, "sowt")) {
+                    compressionFormat = DR_WAVE_FORMAT_PCM; /* PCM, little-endian. */
+                    pWav->aiff.isLE = DRWAV_TRUE;
+                } else if (drwav_fourcc_equal(type, "fl32") || drwav_fourcc_equal(type, "fl64") || drwav_fourcc_equal(type, "FL32") || drwav_fourcc_equal(type, "FL64")) {
+                    compressionFormat = DR_WAVE_FORMAT_IEEE_FLOAT;
+                } else if (drwav_fourcc_equal(type, "alaw") || drwav_fourcc_equal(type, "ALAW")) {
+                    compressionFormat = DR_WAVE_FORMAT_ALAW;
+                } else if (drwav_fourcc_equal(type, "ulaw") || drwav_fourcc_equal(type, "ULAW")) {
+                    compressionFormat = DR_WAVE_FORMAT_MULAW;
+                } else if (drwav_fourcc_equal(type, "ima4")) {
+                    compressionFormat = DR_WAVE_FORMAT_DVI_ADPCM;
+                    sampleSizeInBits = 4;
+
+                    /*
+                    I haven't been able to figure out how to get correct decoding for IMA ADPCM. Until this is figured out
+                    we'll need to abort when we encounter such an encoding. Advice welcome!
+                    */
+                    return DRWAV_FALSE;
+                } else {
+                    return DRWAV_FALSE; /* Unknown or unsupported compression format. Need to abort. */
+                }
+            } else {
+                compressionFormat = DR_WAVE_FORMAT_PCM; /* It's a standard AIFF form which is always compressed. */
+            }
+
+            /* With AIFF we want to use the explicitly defined frame count rather than deriving it from the size of the chunk. */
+            aiffFrameCount = frameCount;
+
+            /* We should now have enough information to fill out our fmt structure. */
+            fmt.formatTag      = compressionFormat;
+            fmt.channels       = channels;
+            fmt.sampleRate     = (drwav_uint32)sampleRate;
+            fmt.bitsPerSample  = sampleSizeInBits;
+            fmt.blockAlign     = (drwav_uint16)(fmt.channels * fmt.bitsPerSample / 8);
+            fmt.avgBytesPerSec = fmt.blockAlign * fmt.sampleRate;
+
+            if (fmt.blockAlign == 0 && compressionFormat == DR_WAVE_FORMAT_DVI_ADPCM) {
+                fmt.blockAlign = 34 * fmt.channels;
+            }
+
+            /*
+            Weird one. I've seen some alaw and ulaw encoded files that for some reason set the bits per sample to 16 when
+            it should be 8. To get this working I need to explicitly check for this and change it.
+            */
+            if (compressionFormat == DR_WAVE_FORMAT_ALAW || compressionFormat == DR_WAVE_FORMAT_MULAW) {
+                if (fmt.bitsPerSample > 8) {
+                    fmt.bitsPerSample = 8;
+                    fmt.blockAlign = fmt.channels;
+                }
+            }
+
+            /* In AIFF, samples are padded to 8 byte boundaries. We need to round up our bits per sample here. */
+            fmt.bitsPerSample += (fmt.bitsPerSample & 7);
+            
+
+            /* If the form type is AIFC there will be some additional data in the chunk. We need to seek past it. */
+            if (isAIFCFormType) {
+                if (drwav__seek_forward(pWav->onSeek, (chunkSize - commDataBytesToRead), pWav->pUserData) == DRWAV_FALSE) {
+                    return DRWAV_FALSE;
+                }
+                cursor += (chunkSize - commDataBytesToRead);
+            }
+
+            /* Don't fall through or else we'll end up treating this chunk as metadata which is incorrect. */
+            continue;
+        }
+
+
+        /* "SSND". AIFF/AIFC only. This is the AIFF equivalent of the "data" chunk. */
+        if (pWav->container == drwav_container_aiff && drwav_fourcc_equal(header.id.fourcc, "SSND")) {
+            drwav_uint8 offsetAndBlockSizeData[8];
+            drwav_uint32 offset;
+
+            foundChunk_data = DRWAV_TRUE;
+
+            if (drwav__on_read(pWav->onRead, pWav->pUserData, offsetAndBlockSizeData, sizeof(offsetAndBlockSizeData), &cursor) != sizeof(offsetAndBlockSizeData)) {
+                return DRWAV_FALSE;
+            }
+
+            /* We need to seek forward by the offset. */
+            offset = drwav_bytes_to_u32_ex(offsetAndBlockSizeData + 0, pWav->container);
+            if (drwav__seek_forward(pWav->onSeek, offset, pWav->pUserData) == DRWAV_FALSE) {
+                return DRWAV_FALSE;
+            }
+            cursor += offset;
+
+            pWav->dataChunkDataPos = cursor;
+            dataChunkSize = chunkSize;
+
+            /* If we're running in sequential mode, or we're not reading metadata, we have enough now that we can get out of the loop. */
+            if (sequential || !isProcessingMetadata) {
+                break;      /* No need to keep reading beyond the data chunk. */
+            } else {
+                if (drwav__seek_forward(pWav->onSeek, chunkSize, pWav->pUserData) == DRWAV_FALSE) {
+                    break;
+                }
+                cursor += chunkSize;
+
+                continue;   /* There may be some more metadata to read. */
+            }
+        }
+
+
+
+        /* Getting here means it's not a chunk that we care about internally, but might need to be handled as metadata by the caller. */
+        if (isProcessingMetadata) {
+            drwav_uint64 metadataBytesRead;
+            
+            metadataBytesRead = drwav__metadata_process_chunk(&metadataParser, &header, drwav_metadata_type_all_including_unknown);
+            DRWAV_ASSERT(metadataBytesRead <= header.sizeInBytes);
+
+            /* Go back to the start of the chunk so we can normalize the position of the cursor. */
+            if (drwav__seek_from_start(pWav->onSeek, cursor, pWav->pUserData) == DRWAV_FALSE) {
+                break;  /* Failed to seek. Can't reliable read the remaining chunks. Get out. */
+            }
+        }
+
+
+        /* Make sure we skip past the content of this chunk before we go to the next one. */
+        chunkSize += header.paddingSize;    /* <-- Make sure we seek past the padding. */
+        if (drwav__seek_forward(pWav->onSeek, chunkSize, pWav->pUserData) == DRWAV_FALSE) {
+            break;
+        }
+        cursor += chunkSize;
+    }
+
+    /* There's some mandatory chunks that must exist. If they were not found in the iteration above we must abort. */
+    if (!foundChunk_fmt || !foundChunk_data) {
+        return DRWAV_FALSE;
+    }
+
+    /* Basic validation. */
+    if ((fmt.sampleRate    == 0 || fmt.sampleRate    > DRWAV_MAX_SAMPLE_RATE    ) ||
+        (fmt.channels      == 0 || fmt.channels      > DRWAV_MAX_CHANNELS       ) ||
+        (fmt.bitsPerSample == 0 || fmt.bitsPerSample > DRWAV_MAX_BITS_PER_SAMPLE) ||
+        fmt.blockAlign == 0) {
+        return DRWAV_FALSE; /* Probably an invalid WAV file. */
+    }
+
+    /* Translate the internal format. */
+    translatedFormatTag = fmt.formatTag;
+    if (translatedFormatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
+        translatedFormatTag = drwav_bytes_to_u16_ex(fmt.subFormat + 0, pWav->container);
+    }
+
+    /* We may have moved passed the data chunk. If so we need to move back. If running in sequential mode we can assume we are already sitting on the data chunk. */
+    if (!sequential) {
+        if (!drwav__seek_from_start(pWav->onSeek, pWav->dataChunkDataPos, pWav->pUserData)) {
+            return DRWAV_FALSE;
+        }
+        cursor = pWav->dataChunkDataPos;
+    }
+
+
+    /*
+    At this point we should have done the initial parsing of each of our chunks, but we now need to
+    do a second pass to extract the actual contents of the metadata (the first pass just calculated
+    the length of the memory allocation).
+
+    We only do this if we've actually got metadata to parse.
+    */
+    if (isProcessingMetadata && metadataParser.metadataCount > 0) {
+        if (drwav__seek_from_start(pWav->onSeek, metadataStartPos, pWav->pUserData) == DRWAV_FALSE) {
+            return DRWAV_FALSE;
+        }
+
+        result = drwav__metadata_alloc(&metadataParser, &pWav->allocationCallbacks);
+        if (result != DRWAV_SUCCESS) {
+            return DRWAV_FALSE;
+        }
+
+        metadataParser.stage = drwav__metadata_parser_stage_read;
+
+        for (;;) {
+            drwav_chunk_header header;
+            drwav_uint64 metadataBytesRead;
+
+            result = drwav__read_chunk_header(pWav->onRead, pWav->pUserData, pWav->container, &cursor, &header);
+            if (result != DRWAV_SUCCESS) {
+                break;
+            }
+
+            metadataBytesRead = drwav__metadata_process_chunk(&metadataParser, &header, drwav_metadata_type_all_including_unknown);
+
+            /* Move to the end of the chunk so we can keep iterating. */
+            if (drwav__seek_forward(pWav->onSeek, (header.sizeInBytes + header.paddingSize) - metadataBytesRead, pWav->pUserData) == DRWAV_FALSE) {
+                drwav_free(metadataParser.pMetadata, &pWav->allocationCallbacks);
+                return DRWAV_FALSE;
+            }
+        }
+
+        /* Getting here means we're finished parsing the metadata. */
+        pWav->pMetadata     = metadataParser.pMetadata;
+        pWav->metadataCount = metadataParser.metadataCount;
+    }
+
+
+    /* At this point we should be sitting on the first byte of the raw audio data. */
+
+    /*
+    I've seen a WAV file in the wild where a RIFF-ecapsulated file has the size of it's "RIFF" and
+    "data" chunks set to 0xFFFFFFFF when the file is definitely not that big. In this case we're
+    going to have to calculate the size by reading and discarding bytes, and then seeking back. We
+    cannot do this in sequential mode. We just assume that the rest of the file is audio data.
+    */
+    if (dataChunkSize == 0xFFFFFFFF && (pWav->container == drwav_container_riff || pWav->container == drwav_container_rifx) && pWav->isSequentialWrite == DRWAV_FALSE) {
+        dataChunkSize = 0;
+
+        for (;;) {
+            drwav_uint8 temp[4096];
+            size_t bytesRead = pWav->onRead(pWav->pUserData, temp, sizeof(temp));
+            dataChunkSize += bytesRead;
+
+            if (bytesRead < sizeof(temp)) {
+                break;
+            }
+        }
+    }
+
+    if (drwav__seek_from_start(pWav->onSeek, pWav->dataChunkDataPos, pWav->pUserData) == DRWAV_FALSE) {
+        drwav_free(pWav->pMetadata, &pWav->allocationCallbacks);
+        return DRWAV_FALSE;
+    }
+
+
+    pWav->fmt                 = fmt;
+    pWav->sampleRate          = fmt.sampleRate;
+    pWav->channels            = fmt.channels;
+    pWav->bitsPerSample       = fmt.bitsPerSample;
+    pWav->bytesRemaining      = dataChunkSize;
+    pWav->translatedFormatTag = translatedFormatTag;
+    pWav->dataChunkDataSize   = dataChunkSize;
+
+    if (sampleCountFromFactChunk != 0) {
+        pWav->totalPCMFrameCount = sampleCountFromFactChunk;
+    } else if (aiffFrameCount != 0) {
+        pWav->totalPCMFrameCount = aiffFrameCount;
+    } else {
+        drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+        if (bytesPerFrame == 0) {
+            drwav_free(pWav->pMetadata, &pWav->allocationCallbacks);
+            return DRWAV_FALSE; /* Invalid file. */
+        }
+
+        pWav->totalPCMFrameCount = dataChunkSize / bytesPerFrame;
+
+        if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+            drwav_uint64 totalBlockHeaderSizeInBytes;
+            drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
+
+            /* Make sure any trailing partial block is accounted for. */
+            if ((blockCount * fmt.blockAlign) < dataChunkSize) {
+                blockCount += 1;
+            }
+
+            /* We decode two samples per byte. There will be blockCount headers in the data chunk. This is enough to know how to calculate the total PCM frame count. */
+            totalBlockHeaderSizeInBytes = blockCount * (6*fmt.channels);
+            pWav->totalPCMFrameCount = ((dataChunkSize - totalBlockHeaderSizeInBytes) * 2) / fmt.channels;
+        }
+        if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+            drwav_uint64 totalBlockHeaderSizeInBytes;
+            drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
+
+            /* Make sure any trailing partial block is accounted for. */
+            if ((blockCount * fmt.blockAlign) < dataChunkSize) {
+                blockCount += 1;
+            }
+
+            /* We decode two samples per byte. There will be blockCount headers in the data chunk. This is enough to know how to calculate the total PCM frame count. */
+            totalBlockHeaderSizeInBytes = blockCount * (4*fmt.channels);
+            pWav->totalPCMFrameCount = ((dataChunkSize - totalBlockHeaderSizeInBytes) * 2) / fmt.channels;
+
+            /* The header includes a decoded sample for each channel which acts as the initial predictor sample. */
+            pWav->totalPCMFrameCount += blockCount;
+        }
+    }
+
+    /* Some formats only support a certain number of channels. */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM || pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        if (pWav->channels > 2) {
+            drwav_free(pWav->pMetadata, &pWav->allocationCallbacks);
+            return DRWAV_FALSE;
+        }
+    }
+
+    /* The number of bytes per frame must be known. If not, it's an invalid file and not decodable. */
+    if (drwav_get_bytes_per_pcm_frame(pWav) == 0) {
+        drwav_free(pWav->pMetadata, &pWav->allocationCallbacks);
+        return DRWAV_FALSE;
+    }
+
+#ifdef DR_WAV_LIBSNDFILE_COMPAT
+    /*
+    I use libsndfile as a benchmark for testing, however in the version I'm using (from the Windows installer on the libsndfile website),
+    it appears the total sample count libsndfile uses for MS-ADPCM is incorrect. It would seem they are computing the total sample count
+    from the number of blocks, however this results in the inclusion of extra silent samples at the end of the last block. The correct
+    way to know the total sample count is to inspect the "fact" chunk, which should always be present for compressed formats, and should
+    always include the sample count. This little block of code below is only used to emulate the libsndfile logic so I can properly run my
+    correctness tests against libsndfile, and is disabled by default.
+    */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+        drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
+        pWav->totalPCMFrameCount = (((blockCount * (fmt.blockAlign - (6*pWav->channels))) * 2)) / fmt.channels;  /* x2 because two samples per byte. */
+    }
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
+        pWav->totalPCMFrameCount = (((blockCount * (fmt.blockAlign - (4*pWav->channels))) * 2) + (blockCount * pWav->channels)) / fmt.channels;
+    }
+#endif
+
+    return DRWAV_TRUE;
+}
+
+DRWAV_API drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (!drwav_preinit(pWav, onRead, onSeek, pReadSeekUserData, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
+}
+
+DRWAV_API drwav_bool32 drwav_init_with_metadata(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (!drwav_preinit(pWav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init__internal(pWav, NULL, NULL, flags | DRWAV_WITH_METADATA);
+}
+
+DRWAV_API drwav_metadata* drwav_take_ownership_of_metadata(drwav* pWav)
+{
+    drwav_metadata *result = pWav->pMetadata;
+
+    pWav->pMetadata     = NULL;
+    pWav->metadataCount = 0;
+
+    return result;
+}
+
+
+DRWAV_PRIVATE size_t drwav__write(drwav* pWav, const void* pData, size_t dataSize)
+{
+    DRWAV_ASSERT(pWav          != NULL);
+    DRWAV_ASSERT(pWav->onWrite != NULL);
+
+    /* Generic write. Assumes no byte reordering required. */
+    return pWav->onWrite(pWav->pUserData, pData, dataSize);
+}
+
+DRWAV_PRIVATE size_t drwav__write_byte(drwav* pWav, drwav_uint8 byte)
+{
+    DRWAV_ASSERT(pWav          != NULL);
+    DRWAV_ASSERT(pWav->onWrite != NULL);
+
+    return pWav->onWrite(pWav->pUserData, &byte, 1);
+}
+
+DRWAV_PRIVATE size_t drwav__write_u16ne_to_le(drwav* pWav, drwav_uint16 value)
+{
+    DRWAV_ASSERT(pWav          != NULL);
+    DRWAV_ASSERT(pWav->onWrite != NULL);
+
+    if (!drwav__is_little_endian()) {
+        value = drwav__bswap16(value);
+    }
+
+    return drwav__write(pWav, &value, 2);
+}
+
+DRWAV_PRIVATE size_t drwav__write_u32ne_to_le(drwav* pWav, drwav_uint32 value)
+{
+    DRWAV_ASSERT(pWav          != NULL);
+    DRWAV_ASSERT(pWav->onWrite != NULL);
+
+    if (!drwav__is_little_endian()) {
+        value = drwav__bswap32(value);
+    }
+
+    return drwav__write(pWav, &value, 4);
+}
+
+DRWAV_PRIVATE size_t drwav__write_u64ne_to_le(drwav* pWav, drwav_uint64 value)
+{
+    DRWAV_ASSERT(pWav          != NULL);
+    DRWAV_ASSERT(pWav->onWrite != NULL);
+
+    if (!drwav__is_little_endian()) {
+        value = drwav__bswap64(value);
+    }
+
+    return drwav__write(pWav, &value, 8);
+}
+
+DRWAV_PRIVATE size_t drwav__write_f32ne_to_le(drwav* pWav, float value)
+{
+    union {
+       drwav_uint32 u32;
+       float f32;
+    } u;
+
+    DRWAV_ASSERT(pWav          != NULL);
+    DRWAV_ASSERT(pWav->onWrite != NULL);
+
+    u.f32 = value;
+
+    if (!drwav__is_little_endian()) {
+        u.u32 = drwav__bswap32(u.u32);
+    }
+
+    return drwav__write(pWav, &u.u32, 4);
+}
+
+DRWAV_PRIVATE size_t drwav__write_or_count(drwav* pWav, const void* pData, size_t dataSize)
+{
+    if (pWav == NULL) {
+        return dataSize;
+    }
+
+    return drwav__write(pWav, pData, dataSize);
+}
+
+DRWAV_PRIVATE size_t drwav__write_or_count_byte(drwav* pWav, drwav_uint8 byte)
+{
+    if (pWav == NULL) {
+        return 1;
+    }
+
+    return drwav__write_byte(pWav, byte);
+}
+
+DRWAV_PRIVATE size_t drwav__write_or_count_u16ne_to_le(drwav* pWav, drwav_uint16 value)
+{
+    if (pWav == NULL) {
+        return 2;
+    }
+
+    return drwav__write_u16ne_to_le(pWav, value);
+}
+
+DRWAV_PRIVATE size_t drwav__write_or_count_u32ne_to_le(drwav* pWav, drwav_uint32 value)
+{
+    if (pWav == NULL) {
+        return 4;
+    }
+
+    return drwav__write_u32ne_to_le(pWav, value);
+}
+
+#if 0   /* Unused for now. */
+DRWAV_PRIVATE size_t drwav__write_or_count_u64ne_to_le(drwav* pWav, drwav_uint64 value)
+{
+    if (pWav == NULL) {
+        return 8;
+    }
+
+    return drwav__write_u64ne_to_le(pWav, value);
+}
+#endif
+
+DRWAV_PRIVATE size_t drwav__write_or_count_f32ne_to_le(drwav* pWav, float value)
+{
+    if (pWav == NULL) {
+        return 4;
+    }
+
+    return drwav__write_f32ne_to_le(pWav, value);
+}
+
+DRWAV_PRIVATE size_t drwav__write_or_count_string_to_fixed_size_buf(drwav* pWav, char* str, size_t bufFixedSize)
+{
+    size_t len;
+
+    if (pWav == NULL) {
+        return bufFixedSize;
+    }
+
+    len = drwav__strlen_clamped(str, bufFixedSize);
+    drwav__write_or_count(pWav, str, len);
+
+    if (len < bufFixedSize) {
+        size_t i;
+        for (i = 0; i < bufFixedSize - len; ++i) {
+            drwav__write_byte(pWav, 0);
+        }
+    }
+
+    return bufFixedSize;
+}
+
+
+/* pWav can be NULL meaning just count the bytes that would be written. */
+DRWAV_PRIVATE size_t drwav__write_or_count_metadata(drwav* pWav, drwav_metadata* pMetadatas, drwav_uint32 metadataCount)
+{
+    size_t bytesWritten = 0;
+    drwav_bool32 hasListAdtl = DRWAV_FALSE;
+    drwav_bool32 hasListInfo = DRWAV_FALSE;
+    drwav_uint32 iMetadata;
+
+    if (pMetadatas == NULL || metadataCount == 0) {
+        return 0;
+    }
+
+    for (iMetadata = 0; iMetadata < metadataCount; ++iMetadata) {
+        drwav_metadata* pMetadata = &pMetadatas[iMetadata];
+        drwav_uint32 chunkSize = 0;
+
+        if ((pMetadata->type & drwav_metadata_type_list_all_info_strings) || (pMetadata->type == drwav_metadata_type_unknown && pMetadata->data.unknown.chunkLocation == drwav_metadata_location_inside_info_list)) {
+            hasListInfo = DRWAV_TRUE;
+        }
+
+        if ((pMetadata->type & drwav_metadata_type_list_all_adtl) || (pMetadata->type == drwav_metadata_type_unknown && pMetadata->data.unknown.chunkLocation == drwav_metadata_location_inside_adtl_list)) {
+            hasListAdtl = DRWAV_TRUE;
+        }
+
+        switch (pMetadata->type) {
+            case drwav_metadata_type_smpl:
+            {
+                drwav_uint32 iLoop;
+
+                chunkSize = DRWAV_SMPL_BYTES + DRWAV_SMPL_LOOP_BYTES * pMetadata->data.smpl.sampleLoopCount + pMetadata->data.smpl.samplerSpecificDataSizeInBytes;
+
+                bytesWritten += drwav__write_or_count(pWav, "smpl", 4);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, chunkSize);
+
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.manufacturerId);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.productId);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.samplePeriodNanoseconds);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.midiUnityNote);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.midiPitchFraction);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.smpteFormat);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.smpteOffset);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.sampleLoopCount);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.samplerSpecificDataSizeInBytes);
+
+                for (iLoop = 0; iLoop < pMetadata->data.smpl.sampleLoopCount; ++iLoop) {
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].cuePointId);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].type);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].firstSampleByteOffset);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].lastSampleByteOffset);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].sampleFraction);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].playCount);
+                }
+
+                if (pMetadata->data.smpl.samplerSpecificDataSizeInBytes > 0) {
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.smpl.pSamplerSpecificData, pMetadata->data.smpl.samplerSpecificDataSizeInBytes);
+                }
+            } break;
+
+            case drwav_metadata_type_inst:
+            {
+                chunkSize = DRWAV_INST_BYTES;
+
+                bytesWritten += drwav__write_or_count(pWav, "inst", 4);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, chunkSize);
+                bytesWritten += drwav__write_or_count(pWav, &pMetadata->data.inst.midiUnityNote, 1);
+                bytesWritten += drwav__write_or_count(pWav, &pMetadata->data.inst.fineTuneCents, 1);
+                bytesWritten += drwav__write_or_count(pWav, &pMetadata->data.inst.gainDecibels, 1);
+                bytesWritten += drwav__write_or_count(pWav, &pMetadata->data.inst.lowNote, 1);
+                bytesWritten += drwav__write_or_count(pWav, &pMetadata->data.inst.highNote, 1);
+                bytesWritten += drwav__write_or_count(pWav, &pMetadata->data.inst.lowVelocity, 1);
+                bytesWritten += drwav__write_or_count(pWav, &pMetadata->data.inst.highVelocity, 1);
+            } break;
+
+            case drwav_metadata_type_cue:
+            {
+                drwav_uint32 iCuePoint;
+
+                chunkSize = DRWAV_CUE_BYTES + DRWAV_CUE_POINT_BYTES * pMetadata->data.cue.cuePointCount;
+
+                bytesWritten += drwav__write_or_count(pWav, "cue ", 4);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, chunkSize);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.cue.cuePointCount);
+                for (iCuePoint = 0; iCuePoint < pMetadata->data.cue.cuePointCount; ++iCuePoint) {
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].id);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].playOrderPosition);
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].dataChunkId, 4);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].chunkStart);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].blockStart);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].sampleByteOffset);
+                }
+            } break;
+
+            case drwav_metadata_type_acid:
+            {
+                chunkSize = DRWAV_ACID_BYTES;
+
+                bytesWritten += drwav__write_or_count(pWav, "acid", 4);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, chunkSize);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.acid.flags);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.acid.midiUnityNote);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.acid.reserved1);
+                bytesWritten += drwav__write_or_count_f32ne_to_le(pWav, pMetadata->data.acid.reserved2);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.acid.numBeats);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.acid.meterDenominator);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.acid.meterNumerator);
+                bytesWritten += drwav__write_or_count_f32ne_to_le(pWav, pMetadata->data.acid.tempo);
+            } break;
+
+            case drwav_metadata_type_bext:
+            {
+                char reservedBuf[DRWAV_BEXT_RESERVED_BYTES];
+                drwav_uint32 timeReferenceLow;
+                drwav_uint32 timeReferenceHigh;
+
+                chunkSize = DRWAV_BEXT_BYTES + pMetadata->data.bext.codingHistorySize;
+
+                bytesWritten += drwav__write_or_count(pWav, "bext", 4);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, chunkSize);
+
+                bytesWritten += drwav__write_or_count_string_to_fixed_size_buf(pWav, pMetadata->data.bext.pDescription, DRWAV_BEXT_DESCRIPTION_BYTES);
+                bytesWritten += drwav__write_or_count_string_to_fixed_size_buf(pWav, pMetadata->data.bext.pOriginatorName, DRWAV_BEXT_ORIGINATOR_NAME_BYTES);
+                bytesWritten += drwav__write_or_count_string_to_fixed_size_buf(pWav, pMetadata->data.bext.pOriginatorReference, DRWAV_BEXT_ORIGINATOR_REF_BYTES);
+                bytesWritten += drwav__write_or_count(pWav, pMetadata->data.bext.pOriginationDate, sizeof(pMetadata->data.bext.pOriginationDate));
+                bytesWritten += drwav__write_or_count(pWav, pMetadata->data.bext.pOriginationTime, sizeof(pMetadata->data.bext.pOriginationTime));
+
+                timeReferenceLow  = (drwav_uint32)(pMetadata->data.bext.timeReference & 0xFFFFFFFF);
+                timeReferenceHigh = (drwav_uint32)(pMetadata->data.bext.timeReference >> 32);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, timeReferenceLow);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, timeReferenceHigh);
+
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.bext.version);
+                bytesWritten += drwav__write_or_count(pWav, pMetadata->data.bext.pUMID, DRWAV_BEXT_UMID_BYTES);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.bext.loudnessValue);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.bext.loudnessRange);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.bext.maxTruePeakLevel);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.bext.maxMomentaryLoudness);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.bext.maxShortTermLoudness);
+
+                DRWAV_ZERO_MEMORY(reservedBuf, sizeof(reservedBuf));
+                bytesWritten += drwav__write_or_count(pWav, reservedBuf, sizeof(reservedBuf));
+
+                if (pMetadata->data.bext.codingHistorySize > 0) {
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.bext.pCodingHistory, pMetadata->data.bext.codingHistorySize);
+                }
+            } break;
+
+            case drwav_metadata_type_unknown:
+            {
+                if (pMetadata->data.unknown.chunkLocation == drwav_metadata_location_top_level) {
+                    chunkSize = pMetadata->data.unknown.dataSizeInBytes;
+
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.unknown.id, 4);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, chunkSize);
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.unknown.pData, pMetadata->data.unknown.dataSizeInBytes);
+                }
+            } break;
+
+            default: break;
+        }
+        if ((chunkSize % 2) != 0) {
+            bytesWritten += drwav__write_or_count_byte(pWav, 0);
+        }
+    }
+
+    if (hasListInfo) {
+        drwav_uint32 chunkSize = 4; /* Start with 4 bytes for "INFO". */
+        for (iMetadata = 0; iMetadata < metadataCount; ++iMetadata) {
+            drwav_metadata* pMetadata = &pMetadatas[iMetadata];
+
+            if ((pMetadata->type & drwav_metadata_type_list_all_info_strings)) {
+                chunkSize += 8; /* For id and string size. */
+                chunkSize += pMetadata->data.infoText.stringLength + 1; /* Include null terminator. */
+            } else if (pMetadata->type == drwav_metadata_type_unknown && pMetadata->data.unknown.chunkLocation == drwav_metadata_location_inside_info_list) {
+                chunkSize += 8; /* For id string size. */
+                chunkSize += pMetadata->data.unknown.dataSizeInBytes;
+            }
+
+            if ((chunkSize % 2) != 0) {
+                chunkSize += 1;
+            }
+        }
+
+        bytesWritten += drwav__write_or_count(pWav, "LIST", 4);
+        bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, chunkSize);
+        bytesWritten += drwav__write_or_count(pWav, "INFO", 4);
+
+        for (iMetadata = 0; iMetadata < metadataCount; ++iMetadata) {
+            drwav_metadata* pMetadata = &pMetadatas[iMetadata];
+            drwav_uint32 subchunkSize = 0;
+
+            if (pMetadata->type & drwav_metadata_type_list_all_info_strings) {
+                const char* pID = NULL;
+
+                switch (pMetadata->type) {
+                    case drwav_metadata_type_list_info_software:    pID = "ISFT"; break;
+                    case drwav_metadata_type_list_info_copyright:   pID = "ICOP"; break;
+                    case drwav_metadata_type_list_info_title:       pID = "INAM"; break;
+                    case drwav_metadata_type_list_info_artist:      pID = "IART"; break;
+                    case drwav_metadata_type_list_info_comment:     pID = "ICMT"; break;
+                    case drwav_metadata_type_list_info_date:        pID = "ICRD"; break;
+                    case drwav_metadata_type_list_info_genre:       pID = "IGNR"; break;
+                    case drwav_metadata_type_list_info_album:       pID = "IPRD"; break;
+                    case drwav_metadata_type_list_info_tracknumber: pID = "ITRK"; break;
+                    default: break;
+                }
+
+                DRWAV_ASSERT(pID != NULL);
+
+                if (pMetadata->data.infoText.stringLength) {
+                    subchunkSize = pMetadata->data.infoText.stringLength + 1;
+                    bytesWritten += drwav__write_or_count(pWav, pID, 4);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, subchunkSize);
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.infoText.pString, pMetadata->data.infoText.stringLength);
+                    bytesWritten += drwav__write_or_count_byte(pWav, '\0');
+                }
+            } else if (pMetadata->type == drwav_metadata_type_unknown && pMetadata->data.unknown.chunkLocation == drwav_metadata_location_inside_info_list) {
+                if (pMetadata->data.unknown.dataSizeInBytes) {
+                    subchunkSize = pMetadata->data.unknown.dataSizeInBytes;
+
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.unknown.id, 4);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.unknown.dataSizeInBytes);
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.unknown.pData, subchunkSize);
+                }
+            }
+
+            if ((subchunkSize % 2) != 0) {
+                bytesWritten += drwav__write_or_count_byte(pWav, 0);
+            }
+        }
+    }
+
+    if (hasListAdtl) {
+        drwav_uint32 chunkSize = 4; /* start with 4 bytes for "adtl" */
+
+        for (iMetadata = 0; iMetadata < metadataCount; ++iMetadata) {
+            drwav_metadata* pMetadata = &pMetadatas[iMetadata];
+
+            switch (pMetadata->type)
+            {
+                case drwav_metadata_type_list_label:
+                case drwav_metadata_type_list_note:
+                {
+                    chunkSize += 8; /* for id and chunk size */
+                    chunkSize += DRWAV_LIST_LABEL_OR_NOTE_BYTES;
+
+                    if (pMetadata->data.labelOrNote.stringLength > 0) {
+                        chunkSize += pMetadata->data.labelOrNote.stringLength + 1;
+                    }    
+                } break;
+
+                case drwav_metadata_type_list_labelled_cue_region:
+                {
+                    chunkSize += 8; /* for id and chunk size */
+                    chunkSize += DRWAV_LIST_LABELLED_TEXT_BYTES;
+
+                    if (pMetadata->data.labelledCueRegion.stringLength > 0) {
+                        chunkSize += pMetadata->data.labelledCueRegion.stringLength + 1;
+                    }
+                } break;
+
+                case drwav_metadata_type_unknown:
+                {
+                    if (pMetadata->data.unknown.chunkLocation == drwav_metadata_location_inside_adtl_list) {
+                        chunkSize += 8; /* for id and chunk size */
+                        chunkSize += pMetadata->data.unknown.dataSizeInBytes;
+                    }
+                } break;
+
+                default: break;
+            }
+
+            if ((chunkSize % 2) != 0) {
+                chunkSize += 1;
+            }
+        }
+
+        bytesWritten += drwav__write_or_count(pWav, "LIST", 4);
+        bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, chunkSize);
+        bytesWritten += drwav__write_or_count(pWav, "adtl", 4);
+
+        for (iMetadata = 0; iMetadata < metadataCount; ++iMetadata) {
+            drwav_metadata* pMetadata = &pMetadatas[iMetadata];
+            drwav_uint32 subchunkSize = 0;
+
+            switch (pMetadata->type)
+            {
+                case drwav_metadata_type_list_label:
+                case drwav_metadata_type_list_note:
+                {
+                    if (pMetadata->data.labelOrNote.stringLength > 0) {
+                        const char *pID = NULL;
+
+                        if (pMetadata->type == drwav_metadata_type_list_label) {
+                            pID = "labl";
+                        }
+                        else if (pMetadata->type == drwav_metadata_type_list_note) {
+                            pID = "note";
+                        }
+
+                        DRWAV_ASSERT(pID != NULL);
+                        DRWAV_ASSERT(pMetadata->data.labelOrNote.pString != NULL);
+
+                        subchunkSize = DRWAV_LIST_LABEL_OR_NOTE_BYTES;
+
+                        bytesWritten += drwav__write_or_count(pWav, pID, 4);
+                        subchunkSize += pMetadata->data.labelOrNote.stringLength + 1;
+                        bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, subchunkSize);
+
+                        bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.labelOrNote.cuePointId);
+                        bytesWritten += drwav__write_or_count(pWav, pMetadata->data.labelOrNote.pString, pMetadata->data.labelOrNote.stringLength);
+                        bytesWritten += drwav__write_or_count_byte(pWav, '\0');
+                    }
+                } break;
+
+                case drwav_metadata_type_list_labelled_cue_region:
+                {
+                    subchunkSize = DRWAV_LIST_LABELLED_TEXT_BYTES;
+
+                    bytesWritten += drwav__write_or_count(pWav, "ltxt", 4);
+                    if (pMetadata->data.labelledCueRegion.stringLength > 0) {
+                        subchunkSize += pMetadata->data.labelledCueRegion.stringLength + 1;
+                    }
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, subchunkSize);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.labelledCueRegion.cuePointId);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.labelledCueRegion.sampleLength);
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.labelledCueRegion.purposeId, 4);
+                    bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.labelledCueRegion.country);
+                    bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.labelledCueRegion.language);
+                    bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.labelledCueRegion.dialect);
+                    bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.labelledCueRegion.codePage);
+
+                    if (pMetadata->data.labelledCueRegion.stringLength > 0) {
+                        DRWAV_ASSERT(pMetadata->data.labelledCueRegion.pString != NULL);
+
+                        bytesWritten += drwav__write_or_count(pWav, pMetadata->data.labelledCueRegion.pString, pMetadata->data.labelledCueRegion.stringLength);
+                        bytesWritten += drwav__write_or_count_byte(pWav, '\0');
+                    }
+                } break;
+
+                case drwav_metadata_type_unknown:
+                {
+                    if (pMetadata->data.unknown.chunkLocation == drwav_metadata_location_inside_adtl_list) {
+                        subchunkSize = pMetadata->data.unknown.dataSizeInBytes;
+
+                        DRWAV_ASSERT(pMetadata->data.unknown.pData != NULL);
+                        bytesWritten += drwav__write_or_count(pWav, pMetadata->data.unknown.id, 4);
+                        bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, subchunkSize);
+                        bytesWritten += drwav__write_or_count(pWav, pMetadata->data.unknown.pData, subchunkSize);
+                    }
+                } break;
+
+                default: break;
+            }
+
+            if ((subchunkSize % 2) != 0) {
+                bytesWritten += drwav__write_or_count_byte(pWav, 0);
+            }
+        }
+    }
+
+    DRWAV_ASSERT((bytesWritten % 2) == 0);
+
+    return bytesWritten;
+}
+
+DRWAV_PRIVATE drwav_uint32 drwav__riff_chunk_size_riff(drwav_uint64 dataChunkSize, drwav_metadata* pMetadata, drwav_uint32 metadataCount)
+{
+    drwav_uint64 chunkSize = 4 + 24 + (drwav_uint64)drwav__write_or_count_metadata(NULL, pMetadata, metadataCount) + 8 + dataChunkSize + drwav__chunk_padding_size_riff(dataChunkSize); /* 4 = "WAVE". 24 = "fmt " chunk. 8 = "data" + u32 data size. */
+    if (chunkSize > 0xFFFFFFFFUL) {
+        chunkSize = 0xFFFFFFFFUL;
+    }
+
+    return (drwav_uint32)chunkSize; /* Safe cast due to the clamp above. */
+}
+
+DRWAV_PRIVATE drwav_uint32 drwav__data_chunk_size_riff(drwav_uint64 dataChunkSize)
+{
+    if (dataChunkSize <= 0xFFFFFFFFUL) {
+        return (drwav_uint32)dataChunkSize;
+    } else {
+        return 0xFFFFFFFFUL;
+    }
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__riff_chunk_size_w64(drwav_uint64 dataChunkSize)
+{
+    drwav_uint64 dataSubchunkPaddingSize = drwav__chunk_padding_size_w64(dataChunkSize);
+
+    return 80 + 24 + dataChunkSize + dataSubchunkPaddingSize;   /* +24 because W64 includes the size of the GUID and size fields. */
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__data_chunk_size_w64(drwav_uint64 dataChunkSize)
+{
+    return 24 + dataChunkSize;        /* +24 because W64 includes the size of the GUID and size fields. */
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__riff_chunk_size_rf64(drwav_uint64 dataChunkSize, drwav_metadata *metadata, drwav_uint32 numMetadata)
+{
+    drwav_uint64 chunkSize = 4 + 36 + 24 + (drwav_uint64)drwav__write_or_count_metadata(NULL, metadata, numMetadata) + 8 + dataChunkSize + drwav__chunk_padding_size_riff(dataChunkSize); /* 4 = "WAVE". 36 = "ds64" chunk. 24 = "fmt " chunk. 8 = "data" + u32 data size. */
+    if (chunkSize > 0xFFFFFFFFUL) {
+        chunkSize = 0xFFFFFFFFUL;
+    }
+
+    return chunkSize;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__data_chunk_size_rf64(drwav_uint64 dataChunkSize)
+{
+    return dataChunkSize;
+}
+
+
+
+DRWAV_PRIVATE drwav_bool32 drwav_preinit_write(drwav* pWav, const drwav_data_format* pFormat, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pWav == NULL || onWrite == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    if (!isSequential && onSeek == NULL) {
+        return DRWAV_FALSE; /* <-- onSeek is required when in non-sequential mode. */
+    }
+
+    /* Not currently supporting compressed formats. Will need to add support for the "fact" chunk before we enable this. */
+    if (pFormat->format == DR_WAVE_FORMAT_EXTENSIBLE) {
+        return DRWAV_FALSE;
+    }
+    if (pFormat->format == DR_WAVE_FORMAT_ADPCM || pFormat->format == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return DRWAV_FALSE;
+    }
+
+    DRWAV_ZERO_MEMORY(pWav, sizeof(*pWav));
+    pWav->onWrite   = onWrite;
+    pWav->onSeek    = onSeek;
+    pWav->pUserData = pUserData;
+    pWav->allocationCallbacks = drwav_copy_allocation_callbacks_or_defaults(pAllocationCallbacks);
+
+    if (pWav->allocationCallbacks.onFree == NULL || (pWav->allocationCallbacks.onMalloc == NULL && pWav->allocationCallbacks.onRealloc == NULL)) {
+        return DRWAV_FALSE;    /* Invalid allocation callbacks. */
+    }
+
+    pWav->fmt.formatTag = (drwav_uint16)pFormat->format;
+    pWav->fmt.channels = (drwav_uint16)pFormat->channels;
+    pWav->fmt.sampleRate = pFormat->sampleRate;
+    pWav->fmt.avgBytesPerSec = (drwav_uint32)((pFormat->bitsPerSample * pFormat->sampleRate * pFormat->channels) / 8);
+    pWav->fmt.blockAlign = (drwav_uint16)((pFormat->channels * pFormat->bitsPerSample) / 8);
+    pWav->fmt.bitsPerSample = (drwav_uint16)pFormat->bitsPerSample;
+    pWav->fmt.extendedSize = 0;
+    pWav->isSequentialWrite = isSequential;
+
+    return DRWAV_TRUE;
+}
+
+
+DRWAV_PRIVATE drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount)
+{
+    /* The function assumes drwav_preinit_write() was called beforehand. */
+
+    size_t runningPos = 0;
+    drwav_uint64 initialDataChunkSize = 0;
+    drwav_uint64 chunkSizeFMT;
+
+    /*
+    The initial values for the "RIFF" and "data" chunks depends on whether or not we are initializing in sequential mode or not. In
+    sequential mode we set this to its final values straight away since they can be calculated from the total sample count. In non-
+    sequential mode we initialize it all to zero and fill it out in drwav_uninit() using a backwards seek.
+    */
+    if (pWav->isSequentialWrite) {
+        initialDataChunkSize = (totalSampleCount * pWav->fmt.bitsPerSample) / 8;
+
+        /*
+        The RIFF container has a limit on the number of samples. drwav is not allowing this. There's no practical limits for Wave64
+        so for the sake of simplicity I'm not doing any validation for that.
+        */
+        if (pFormat->container == drwav_container_riff) {
+            if (initialDataChunkSize > (0xFFFFFFFFUL - 36)) {
+                return DRWAV_FALSE; /* Not enough room to store every sample. */
+            }
+        }
+    }
+
+    pWav->dataChunkDataSizeTargetWrite = initialDataChunkSize;
+
+
+    /* "RIFF" chunk. */
+    if (pFormat->container == drwav_container_riff) {
+        drwav_uint32 chunkSizeRIFF = 28 + (drwav_uint32)initialDataChunkSize;   /* +28 = "WAVE" + [sizeof "fmt " chunk] */
+        runningPos += drwav__write(pWav, "RIFF", 4);
+        runningPos += drwav__write_u32ne_to_le(pWav, chunkSizeRIFF);
+        runningPos += drwav__write(pWav, "WAVE", 4);
+    } else if (pFormat->container == drwav_container_w64) {
+        drwav_uint64 chunkSizeRIFF = 80 + 24 + initialDataChunkSize;            /* +24 because W64 includes the size of the GUID and size fields. */
+        runningPos += drwav__write(pWav, drwavGUID_W64_RIFF, 16);
+        runningPos += drwav__write_u64ne_to_le(pWav, chunkSizeRIFF);
+        runningPos += drwav__write(pWav, drwavGUID_W64_WAVE, 16);
+    } else if (pFormat->container == drwav_container_rf64) {
+        runningPos += drwav__write(pWav, "RF64", 4);
+        runningPos += drwav__write_u32ne_to_le(pWav, 0xFFFFFFFF);               /* Always 0xFFFFFFFF for RF64. Set to a proper value in the "ds64" chunk. */
+        runningPos += drwav__write(pWav, "WAVE", 4);
+    } else {
+        return DRWAV_FALSE; /* Container not supported for writing. */
+    }
+
+
+    /* "ds64" chunk (RF64 only). */
+    if (pFormat->container == drwav_container_rf64) {
+        drwav_uint32 initialds64ChunkSize = 28;                                 /* 28 = [Size of RIFF (8 bytes)] + [Size of DATA (8 bytes)] + [Sample Count (8 bytes)] + [Table Length (4 bytes)]. Table length always set to 0. */
+        drwav_uint64 initialRiffChunkSize = 8 + initialds64ChunkSize + initialDataChunkSize;    /* +8 for the ds64 header. */
+
+        runningPos += drwav__write(pWav, "ds64", 4);
+        runningPos += drwav__write_u32ne_to_le(pWav, initialds64ChunkSize);     /* Size of ds64. */
+        runningPos += drwav__write_u64ne_to_le(pWav, initialRiffChunkSize);     /* Size of RIFF. Set to true value at the end. */
+        runningPos += drwav__write_u64ne_to_le(pWav, initialDataChunkSize);     /* Size of DATA. Set to true value at the end. */
+        runningPos += drwav__write_u64ne_to_le(pWav, totalSampleCount);         /* Sample count. */
+        runningPos += drwav__write_u32ne_to_le(pWav, 0);                        /* Table length. Always set to zero in our case since we're not doing any other chunks than "DATA". */
+    }
+
+
+    /* "fmt " chunk. */
+    if (pFormat->container == drwav_container_riff || pFormat->container == drwav_container_rf64) {
+        chunkSizeFMT = 16;
+        runningPos += drwav__write(pWav, "fmt ", 4);
+        runningPos += drwav__write_u32ne_to_le(pWav, (drwav_uint32)chunkSizeFMT);
+    } else if (pFormat->container == drwav_container_w64) {
+        chunkSizeFMT = 40;
+        runningPos += drwav__write(pWav, drwavGUID_W64_FMT, 16);
+        runningPos += drwav__write_u64ne_to_le(pWav, chunkSizeFMT);
+    }
+
+    runningPos += drwav__write_u16ne_to_le(pWav, pWav->fmt.formatTag);
+    runningPos += drwav__write_u16ne_to_le(pWav, pWav->fmt.channels);
+    runningPos += drwav__write_u32ne_to_le(pWav, pWav->fmt.sampleRate);
+    runningPos += drwav__write_u32ne_to_le(pWav, pWav->fmt.avgBytesPerSec);
+    runningPos += drwav__write_u16ne_to_le(pWav, pWav->fmt.blockAlign);
+    runningPos += drwav__write_u16ne_to_le(pWav, pWav->fmt.bitsPerSample);
+
+    /* TODO: is a 'fact' chunk required for DR_WAVE_FORMAT_IEEE_FLOAT? */
+
+    if (!pWav->isSequentialWrite && pWav->pMetadata != NULL && pWav->metadataCount > 0 && (pFormat->container == drwav_container_riff || pFormat->container == drwav_container_rf64)) {
+        runningPos += drwav__write_or_count_metadata(pWav, pWav->pMetadata, pWav->metadataCount);
+    }
+
+    pWav->dataChunkDataPos = runningPos;
+
+    /* "data" chunk. */
+    if (pFormat->container == drwav_container_riff) {
+        drwav_uint32 chunkSizeDATA = (drwav_uint32)initialDataChunkSize;
+        runningPos += drwav__write(pWav, "data", 4);
+        runningPos += drwav__write_u32ne_to_le(pWav, chunkSizeDATA);
+    } else if (pFormat->container == drwav_container_w64) {
+        drwav_uint64 chunkSizeDATA = 24 + initialDataChunkSize;     /* +24 because W64 includes the size of the GUID and size fields. */
+        runningPos += drwav__write(pWav, drwavGUID_W64_DATA, 16);
+        runningPos += drwav__write_u64ne_to_le(pWav, chunkSizeDATA);
+    } else if (pFormat->container == drwav_container_rf64) {
+        runningPos += drwav__write(pWav, "data", 4);
+        runningPos += drwav__write_u32ne_to_le(pWav, 0xFFFFFFFF);   /* Always set to 0xFFFFFFFF for RF64. The true size of the data chunk is specified in the ds64 chunk. */
+    }
+
+    /* Set some properties for the client's convenience. */
+    pWav->container = pFormat->container;
+    pWav->channels = (drwav_uint16)pFormat->channels;
+    pWav->sampleRate = pFormat->sampleRate;
+    pWav->bitsPerSample = (drwav_uint16)pFormat->bitsPerSample;
+    pWav->translatedFormatTag = (drwav_uint16)pFormat->format;
+    pWav->dataChunkDataPos = runningPos;
+
+    return DRWAV_TRUE;
+}
+
+
+DRWAV_API drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (!drwav_preinit_write(pWav, pFormat, DRWAV_FALSE, onWrite, onSeek, pUserData, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_write__internal(pWav, pFormat, 0);               /* DRWAV_FALSE = Not Sequential */
+}
+
+DRWAV_API drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (!drwav_preinit_write(pWav, pFormat, DRWAV_TRUE, onWrite, NULL, pUserData, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_write__internal(pWav, pFormat, totalSampleCount); /* DRWAV_TRUE = Sequential */
+}
+
+DRWAV_API drwav_bool32 drwav_init_write_sequential_pcm_frames(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pFormat == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_write_sequential(pWav, pFormat, totalPCMFrameCount*pFormat->channels, onWrite, pUserData, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_write_with_metadata(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks, drwav_metadata* pMetadata, drwav_uint32 metadataCount)
+{
+    if (!drwav_preinit_write(pWav, pFormat, DRWAV_FALSE, onWrite, onSeek, pUserData, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    pWav->pMetadata     = pMetadata;
+    pWav->metadataCount = metadataCount;
+
+    return drwav_init_write__internal(pWav, pFormat, 0);
+}
+
+
+DRWAV_API drwav_uint64 drwav_target_write_size_bytes(const drwav_data_format* pFormat, drwav_uint64 totalFrameCount, drwav_metadata* pMetadata, drwav_uint32 metadataCount)
+{
+    /* Casting totalFrameCount to drwav_int64 for VC6 compatibility. No issues in practice because nobody is going to exhaust the whole 63 bits. */
+    drwav_uint64 targetDataSizeBytes = (drwav_uint64)((drwav_int64)totalFrameCount * pFormat->channels * pFormat->bitsPerSample/8.0);
+    drwav_uint64 riffChunkSizeBytes;
+    drwav_uint64 fileSizeBytes = 0;
+
+    if (pFormat->container == drwav_container_riff) {
+        riffChunkSizeBytes = drwav__riff_chunk_size_riff(targetDataSizeBytes, pMetadata, metadataCount);
+        fileSizeBytes = (8 + riffChunkSizeBytes);   /* +8 because WAV doesn't include the size of the ChunkID and ChunkSize fields. */
+    } else if (pFormat->container == drwav_container_w64) {
+        riffChunkSizeBytes = drwav__riff_chunk_size_w64(targetDataSizeBytes);
+        fileSizeBytes = riffChunkSizeBytes;
+    } else if (pFormat->container == drwav_container_rf64) {
+        riffChunkSizeBytes = drwav__riff_chunk_size_rf64(targetDataSizeBytes, pMetadata, metadataCount);
+        fileSizeBytes = (8 + riffChunkSizeBytes);   /* +8 because WAV doesn't include the size of the ChunkID and ChunkSize fields. */
+    }
+
+    return fileSizeBytes;
+}
+
+
+#ifndef DR_WAV_NO_STDIO
+
+/* Errno */
+/* drwav_result_from_errno() is only used for fopen() and wfopen() so putting it inside DR_WAV_NO_STDIO for now. If something else needs this later we can move it out. */
+#include <errno.h>
+DRWAV_PRIVATE drwav_result drwav_result_from_errno(int e)
+{
+    switch (e)
+    {
+        case 0: return DRWAV_SUCCESS;
+    #ifdef EPERM
+        case EPERM: return DRWAV_INVALID_OPERATION;
+    #endif
+    #ifdef ENOENT
+        case ENOENT: return DRWAV_DOES_NOT_EXIST;
+    #endif
+    #ifdef ESRCH
+        case ESRCH: return DRWAV_DOES_NOT_EXIST;
+    #endif
+    #ifdef EINTR
+        case EINTR: return DRWAV_INTERRUPT;
+    #endif
+    #ifdef EIO
+        case EIO: return DRWAV_IO_ERROR;
+    #endif
+    #ifdef ENXIO
+        case ENXIO: return DRWAV_DOES_NOT_EXIST;
+    #endif
+    #ifdef E2BIG
+        case E2BIG: return DRWAV_INVALID_ARGS;
+    #endif
+    #ifdef ENOEXEC
+        case ENOEXEC: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef EBADF
+        case EBADF: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef ECHILD
+        case ECHILD: return DRWAV_ERROR;
+    #endif
+    #ifdef EAGAIN
+        case EAGAIN: return DRWAV_UNAVAILABLE;
+    #endif
+    #ifdef ENOMEM
+        case ENOMEM: return DRWAV_OUT_OF_MEMORY;
+    #endif
+    #ifdef EACCES
+        case EACCES: return DRWAV_ACCESS_DENIED;
+    #endif
+    #ifdef EFAULT
+        case EFAULT: return DRWAV_BAD_ADDRESS;
+    #endif
+    #ifdef ENOTBLK
+        case ENOTBLK: return DRWAV_ERROR;
+    #endif
+    #ifdef EBUSY
+        case EBUSY: return DRWAV_BUSY;
+    #endif
+    #ifdef EEXIST
+        case EEXIST: return DRWAV_ALREADY_EXISTS;
+    #endif
+    #ifdef EXDEV
+        case EXDEV: return DRWAV_ERROR;
+    #endif
+    #ifdef ENODEV
+        case ENODEV: return DRWAV_DOES_NOT_EXIST;
+    #endif
+    #ifdef ENOTDIR
+        case ENOTDIR: return DRWAV_NOT_DIRECTORY;
+    #endif
+    #ifdef EISDIR
+        case EISDIR: return DRWAV_IS_DIRECTORY;
+    #endif
+    #ifdef EINVAL
+        case EINVAL: return DRWAV_INVALID_ARGS;
+    #endif
+    #ifdef ENFILE
+        case ENFILE: return DRWAV_TOO_MANY_OPEN_FILES;
+    #endif
+    #ifdef EMFILE
+        case EMFILE: return DRWAV_TOO_MANY_OPEN_FILES;
+    #endif
+    #ifdef ENOTTY
+        case ENOTTY: return DRWAV_INVALID_OPERATION;
+    #endif
+    #ifdef ETXTBSY
+        case ETXTBSY: return DRWAV_BUSY;
+    #endif
+    #ifdef EFBIG
+        case EFBIG: return DRWAV_TOO_BIG;
+    #endif
+    #ifdef ENOSPC
+        case ENOSPC: return DRWAV_NO_SPACE;
+    #endif
+    #ifdef ESPIPE
+        case ESPIPE: return DRWAV_BAD_SEEK;
+    #endif
+    #ifdef EROFS
+        case EROFS: return DRWAV_ACCESS_DENIED;
+    #endif
+    #ifdef EMLINK
+        case EMLINK: return DRWAV_TOO_MANY_LINKS;
+    #endif
+    #ifdef EPIPE
+        case EPIPE: return DRWAV_BAD_PIPE;
+    #endif
+    #ifdef EDOM
+        case EDOM: return DRWAV_OUT_OF_RANGE;
+    #endif
+    #ifdef ERANGE
+        case ERANGE: return DRWAV_OUT_OF_RANGE;
+    #endif
+    #ifdef EDEADLK
+        case EDEADLK: return DRWAV_DEADLOCK;
+    #endif
+    #ifdef ENAMETOOLONG
+        case ENAMETOOLONG: return DRWAV_PATH_TOO_LONG;
+    #endif
+    #ifdef ENOLCK
+        case ENOLCK: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOSYS
+        case ENOSYS: return DRWAV_NOT_IMPLEMENTED;
+    #endif
+    #ifdef ENOTEMPTY
+        case ENOTEMPTY: return DRWAV_DIRECTORY_NOT_EMPTY;
+    #endif
+    #ifdef ELOOP
+        case ELOOP: return DRWAV_TOO_MANY_LINKS;
+    #endif
+    #ifdef ENOMSG
+        case ENOMSG: return DRWAV_NO_MESSAGE;
+    #endif
+    #ifdef EIDRM
+        case EIDRM: return DRWAV_ERROR;
+    #endif
+    #ifdef ECHRNG
+        case ECHRNG: return DRWAV_ERROR;
+    #endif
+    #ifdef EL2NSYNC
+        case EL2NSYNC: return DRWAV_ERROR;
+    #endif
+    #ifdef EL3HLT
+        case EL3HLT: return DRWAV_ERROR;
+    #endif
+    #ifdef EL3RST
+        case EL3RST: return DRWAV_ERROR;
+    #endif
+    #ifdef ELNRNG
+        case ELNRNG: return DRWAV_OUT_OF_RANGE;
+    #endif
+    #ifdef EUNATCH
+        case EUNATCH: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOCSI
+        case ENOCSI: return DRWAV_ERROR;
+    #endif
+    #ifdef EL2HLT
+        case EL2HLT: return DRWAV_ERROR;
+    #endif
+    #ifdef EBADE
+        case EBADE: return DRWAV_ERROR;
+    #endif
+    #ifdef EBADR
+        case EBADR: return DRWAV_ERROR;
+    #endif
+    #ifdef EXFULL
+        case EXFULL: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOANO
+        case ENOANO: return DRWAV_ERROR;
+    #endif
+    #ifdef EBADRQC
+        case EBADRQC: return DRWAV_ERROR;
+    #endif
+    #ifdef EBADSLT
+        case EBADSLT: return DRWAV_ERROR;
+    #endif
+    #ifdef EBFONT
+        case EBFONT: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef ENOSTR
+        case ENOSTR: return DRWAV_ERROR;
+    #endif
+    #ifdef ENODATA
+        case ENODATA: return DRWAV_NO_DATA_AVAILABLE;
+    #endif
+    #ifdef ETIME
+        case ETIME: return DRWAV_TIMEOUT;
+    #endif
+    #ifdef ENOSR
+        case ENOSR: return DRWAV_NO_DATA_AVAILABLE;
+    #endif
+    #ifdef ENONET
+        case ENONET: return DRWAV_NO_NETWORK;
+    #endif
+    #ifdef ENOPKG
+        case ENOPKG: return DRWAV_ERROR;
+    #endif
+    #ifdef EREMOTE
+        case EREMOTE: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOLINK
+        case ENOLINK: return DRWAV_ERROR;
+    #endif
+    #ifdef EADV
+        case EADV: return DRWAV_ERROR;
+    #endif
+    #ifdef ESRMNT
+        case ESRMNT: return DRWAV_ERROR;
+    #endif
+    #ifdef ECOMM
+        case ECOMM: return DRWAV_ERROR;
+    #endif
+    #ifdef EPROTO
+        case EPROTO: return DRWAV_ERROR;
+    #endif
+    #ifdef EMULTIHOP
+        case EMULTIHOP: return DRWAV_ERROR;
+    #endif
+    #ifdef EDOTDOT
+        case EDOTDOT: return DRWAV_ERROR;
+    #endif
+    #ifdef EBADMSG
+        case EBADMSG: return DRWAV_BAD_MESSAGE;
+    #endif
+    #ifdef EOVERFLOW
+        case EOVERFLOW: return DRWAV_TOO_BIG;
+    #endif
+    #ifdef ENOTUNIQ
+        case ENOTUNIQ: return DRWAV_NOT_UNIQUE;
+    #endif
+    #ifdef EBADFD
+        case EBADFD: return DRWAV_ERROR;
+    #endif
+    #ifdef EREMCHG
+        case EREMCHG: return DRWAV_ERROR;
+    #endif
+    #ifdef ELIBACC
+        case ELIBACC: return DRWAV_ACCESS_DENIED;
+    #endif
+    #ifdef ELIBBAD
+        case ELIBBAD: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef ELIBSCN
+        case ELIBSCN: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef ELIBMAX
+        case ELIBMAX: return DRWAV_ERROR;
+    #endif
+    #ifdef ELIBEXEC
+        case ELIBEXEC: return DRWAV_ERROR;
+    #endif
+    #ifdef EILSEQ
+        case EILSEQ: return DRWAV_INVALID_DATA;
+    #endif
+    #ifdef ERESTART
+        case ERESTART: return DRWAV_ERROR;
+    #endif
+    #ifdef ESTRPIPE
+        case ESTRPIPE: return DRWAV_ERROR;
+    #endif
+    #ifdef EUSERS
+        case EUSERS: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOTSOCK
+        case ENOTSOCK: return DRWAV_NOT_SOCKET;
+    #endif
+    #ifdef EDESTADDRREQ
+        case EDESTADDRREQ: return DRWAV_NO_ADDRESS;
+    #endif
+    #ifdef EMSGSIZE
+        case EMSGSIZE: return DRWAV_TOO_BIG;
+    #endif
+    #ifdef EPROTOTYPE
+        case EPROTOTYPE: return DRWAV_BAD_PROTOCOL;
+    #endif
+    #ifdef ENOPROTOOPT
+        case ENOPROTOOPT: return DRWAV_PROTOCOL_UNAVAILABLE;
+    #endif
+    #ifdef EPROTONOSUPPORT
+        case EPROTONOSUPPORT: return DRWAV_PROTOCOL_NOT_SUPPORTED;
+    #endif
+    #ifdef ESOCKTNOSUPPORT
+        case ESOCKTNOSUPPORT: return DRWAV_SOCKET_NOT_SUPPORTED;
+    #endif
+    #ifdef EOPNOTSUPP
+        case EOPNOTSUPP: return DRWAV_INVALID_OPERATION;
+    #endif
+    #ifdef EPFNOSUPPORT
+        case EPFNOSUPPORT: return DRWAV_PROTOCOL_FAMILY_NOT_SUPPORTED;
+    #endif
+    #ifdef EAFNOSUPPORT
+        case EAFNOSUPPORT: return DRWAV_ADDRESS_FAMILY_NOT_SUPPORTED;
+    #endif
+    #ifdef EADDRINUSE
+        case EADDRINUSE: return DRWAV_ALREADY_IN_USE;
+    #endif
+    #ifdef EADDRNOTAVAIL
+        case EADDRNOTAVAIL: return DRWAV_ERROR;
+    #endif
+    #ifdef ENETDOWN
+        case ENETDOWN: return DRWAV_NO_NETWORK;
+    #endif
+    #ifdef ENETUNREACH
+        case ENETUNREACH: return DRWAV_NO_NETWORK;
+    #endif
+    #ifdef ENETRESET
+        case ENETRESET: return DRWAV_NO_NETWORK;
+    #endif
+    #ifdef ECONNABORTED
+        case ECONNABORTED: return DRWAV_NO_NETWORK;
+    #endif
+    #ifdef ECONNRESET
+        case ECONNRESET: return DRWAV_CONNECTION_RESET;
+    #endif
+    #ifdef ENOBUFS
+        case ENOBUFS: return DRWAV_NO_SPACE;
+    #endif
+    #ifdef EISCONN
+        case EISCONN: return DRWAV_ALREADY_CONNECTED;
+    #endif
+    #ifdef ENOTCONN
+        case ENOTCONN: return DRWAV_NOT_CONNECTED;
+    #endif
+    #ifdef ESHUTDOWN
+        case ESHUTDOWN: return DRWAV_ERROR;
+    #endif
+    #ifdef ETOOMANYREFS
+        case ETOOMANYREFS: return DRWAV_ERROR;
+    #endif
+    #ifdef ETIMEDOUT
+        case ETIMEDOUT: return DRWAV_TIMEOUT;
+    #endif
+    #ifdef ECONNREFUSED
+        case ECONNREFUSED: return DRWAV_CONNECTION_REFUSED;
+    #endif
+    #ifdef EHOSTDOWN
+        case EHOSTDOWN: return DRWAV_NO_HOST;
+    #endif
+    #ifdef EHOSTUNREACH
+        case EHOSTUNREACH: return DRWAV_NO_HOST;
+    #endif
+    #ifdef EALREADY
+        case EALREADY: return DRWAV_IN_PROGRESS;
+    #endif
+    #ifdef EINPROGRESS
+        case EINPROGRESS: return DRWAV_IN_PROGRESS;
+    #endif
+    #ifdef ESTALE
+        case ESTALE: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef EUCLEAN
+        case EUCLEAN: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOTNAM
+        case ENOTNAM: return DRWAV_ERROR;
+    #endif
+    #ifdef ENAVAIL
+        case ENAVAIL: return DRWAV_ERROR;
+    #endif
+    #ifdef EISNAM
+        case EISNAM: return DRWAV_ERROR;
+    #endif
+    #ifdef EREMOTEIO
+        case EREMOTEIO: return DRWAV_IO_ERROR;
+    #endif
+    #ifdef EDQUOT
+        case EDQUOT: return DRWAV_NO_SPACE;
+    #endif
+    #ifdef ENOMEDIUM
+        case ENOMEDIUM: return DRWAV_DOES_NOT_EXIST;
+    #endif
+    #ifdef EMEDIUMTYPE
+        case EMEDIUMTYPE: return DRWAV_ERROR;
+    #endif
+    #ifdef ECANCELED
+        case ECANCELED: return DRWAV_CANCELLED;
+    #endif
+    #ifdef ENOKEY
+        case ENOKEY: return DRWAV_ERROR;
+    #endif
+    #ifdef EKEYEXPIRED
+        case EKEYEXPIRED: return DRWAV_ERROR;
+    #endif
+    #ifdef EKEYREVOKED
+        case EKEYREVOKED: return DRWAV_ERROR;
+    #endif
+    #ifdef EKEYREJECTED
+        case EKEYREJECTED: return DRWAV_ERROR;
+    #endif
+    #ifdef EOWNERDEAD
+        case EOWNERDEAD: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOTRECOVERABLE
+        case ENOTRECOVERABLE: return DRWAV_ERROR;
+    #endif
+    #ifdef ERFKILL
+        case ERFKILL: return DRWAV_ERROR;
+    #endif
+    #ifdef EHWPOISON
+        case EHWPOISON: return DRWAV_ERROR;
+    #endif
+        default: return DRWAV_ERROR;
+    }
+}
+/* End Errno */
+
+/* fopen */
+DRWAV_PRIVATE drwav_result drwav_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode)
+{
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    errno_t err;
+#endif
+
+    if (ppFile != NULL) {
+        *ppFile = NULL;  /* Safety. */
+    }
+
+    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    err = fopen_s(ppFile, pFilePath, pOpenMode);
+    if (err != 0) {
+        return drwav_result_from_errno(err);
+    }
+#else
+#if defined(_WIN32) || defined(__APPLE__)
+    *ppFile = fopen(pFilePath, pOpenMode);
+#else
+    #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64 && defined(_LARGEFILE64_SOURCE)
+        *ppFile = fopen64(pFilePath, pOpenMode);
+    #else
+        *ppFile = fopen(pFilePath, pOpenMode);
+    #endif
+#endif
+    if (*ppFile == NULL) {
+        drwav_result result = drwav_result_from_errno(errno);
+        if (result == DRWAV_SUCCESS) {
+            result = DRWAV_ERROR;   /* Just a safety check to make sure we never ever return success when pFile == NULL. */
+        }
+
+        return result;
+    }
+#endif
+
+    return DRWAV_SUCCESS;
+}
+
+/*
+_wfopen() isn't always available in all compilation environments.
+
+    * Windows only.
+    * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back).
+    * MinGW-64 (both 32- and 64-bit) seems to support it.
+    * MinGW wraps it in !defined(__STRICT_ANSI__).
+    * OpenWatcom wraps it in !defined(_NO_EXT_KEYS).
+
+This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs()
+fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support.
+*/
+#if defined(_WIN32)
+    #if defined(_MSC_VER) || defined(__MINGW64__) || (!defined(__STRICT_ANSI__) && !defined(_NO_EXT_KEYS))
+        #define DRWAV_HAS_WFOPEN
+    #endif
+#endif
+
+#ifndef DR_WAV_NO_WCHAR
+DRWAV_PRIVATE drwav_result drwav_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (ppFile != NULL) {
+        *ppFile = NULL;  /* Safety. */
+    }
+
+    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+#if defined(DRWAV_HAS_WFOPEN)
+    {
+        /* Use _wfopen() on Windows. */
+    #if defined(_MSC_VER) && _MSC_VER >= 1400
+        errno_t err = _wfopen_s(ppFile, pFilePath, pOpenMode);
+        if (err != 0) {
+            return drwav_result_from_errno(err);
+        }
+    #else
+        *ppFile = _wfopen(pFilePath, pOpenMode);
+        if (*ppFile == NULL) {
+            return drwav_result_from_errno(errno);
+        }
+    #endif
+        (void)pAllocationCallbacks;
+    }
+#else
+	/*
+    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because
+	fopen() is locale specific. The only real way I can think of to do this is with wcsrtombs(). Note
+	that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
+    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler
+	error I'll look into improving compatibility.
+    */
+
+	/*
+	Some compilers don't support wchar_t or wcsrtombs() which we're using below. In this case we just
+	need to abort with an error. If you encounter a compiler lacking such support, add it to this list
+	and submit a bug report and it'll be added to the library upstream.
+	*/
+	#if defined(__DJGPP__)
+	{
+		/* Nothing to do here. This will fall through to the error check below. */
+	}
+	#else
+    {
+        mbstate_t mbs;
+        size_t lenMB;
+        const wchar_t* pFilePathTemp = pFilePath;
+        char* pFilePathMB = NULL;
+        char pOpenModeMB[32] = {0};
+
+        /* Get the length first. */
+        DRWAV_ZERO_OBJECT(&mbs);
+        lenMB = wcsrtombs(NULL, &pFilePathTemp, 0, &mbs);
+        if (lenMB == (size_t)-1) {
+            return drwav_result_from_errno(errno);
+        }
+
+        pFilePathMB = (char*)drwav__malloc_from_callbacks(lenMB + 1, pAllocationCallbacks);
+        if (pFilePathMB == NULL) {
+            return DRWAV_OUT_OF_MEMORY;
+        }
+
+        pFilePathTemp = pFilePath;
+        DRWAV_ZERO_OBJECT(&mbs);
+        wcsrtombs(pFilePathMB, &pFilePathTemp, lenMB + 1, &mbs);
+
+        /* The open mode should always consist of ASCII characters so we should be able to do a trivial conversion. */
+        {
+            size_t i = 0;
+            for (;;) {
+                if (pOpenMode[i] == 0) {
+                    pOpenModeMB[i] = '\0';
+                    break;
+                }
+
+                pOpenModeMB[i] = (char)pOpenMode[i];
+                i += 1;
+            }
+        }
+
+        *ppFile = fopen(pFilePathMB, pOpenModeMB);
+
+        drwav__free_from_callbacks(pFilePathMB, pAllocationCallbacks);
+    }
+	#endif
+
+    if (*ppFile == NULL) {
+        return DRWAV_ERROR;
+    }
+#endif
+
+    return DRWAV_SUCCESS;
+}
+#endif
+/* End fopen */
+
+
+DRWAV_PRIVATE size_t drwav__on_read_stdio(void* pUserData, void* pBufferOut, size_t bytesToRead)
+{
+    return fread(pBufferOut, 1, bytesToRead, (FILE*)pUserData);
+}
+
+DRWAV_PRIVATE size_t drwav__on_write_stdio(void* pUserData, const void* pData, size_t bytesToWrite)
+{
+    return fwrite(pData, 1, bytesToWrite, (FILE*)pUserData);
+}
+
+DRWAV_PRIVATE drwav_bool32 drwav__on_seek_stdio(void* pUserData, int offset, drwav_seek_origin origin)
+{
+    return fseek((FILE*)pUserData, offset, (origin == drwav_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
+}
+
+DRWAV_API drwav_bool32 drwav_init_file(drwav* pWav, const char* filename, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_ex(pWav, filename, NULL, NULL, 0, pAllocationCallbacks);
+}
+
+
+DRWAV_PRIVATE drwav_bool32 drwav_init_file__internal_FILE(drwav* pWav, FILE* pFile, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav_bool32 result;
+
+    result = drwav_preinit(pWav, drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (result != DRWAV_TRUE) {
+        fclose(pFile);
+        return result;
+    }
+    
+    result = drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
+    if (result != DRWAV_TRUE) {
+        fclose(pFile);
+        return result;
+    }
+
+    return DRWAV_TRUE;
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    FILE* pFile;
+    if (drwav_fopen(&pFile, filename, "rb") != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+    /* This takes ownership of the FILE* object. */
+    return drwav_init_file__internal_FILE(pWav, pFile, onChunk, pChunkUserData, flags, pAllocationCallbacks);
+}
+
+#ifndef DR_WAV_NO_WCHAR
+DRWAV_API drwav_bool32 drwav_init_file_w(drwav* pWav, const wchar_t* filename, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_ex_w(pWav, filename, NULL, NULL, 0, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_ex_w(drwav* pWav, const wchar_t* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    FILE* pFile;
+    if (drwav_wfopen(&pFile, filename, L"rb", pAllocationCallbacks) != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+    /* This takes ownership of the FILE* object. */
+    return drwav_init_file__internal_FILE(pWav, pFile, onChunk, pChunkUserData, flags, pAllocationCallbacks);
+}
+#endif
+
+DRWAV_API drwav_bool32 drwav_init_file_with_metadata(drwav* pWav, const char* filename, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    FILE* pFile;
+    if (drwav_fopen(&pFile, filename, "rb") != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+    /* This takes ownership of the FILE* object. */
+    return drwav_init_file__internal_FILE(pWav, pFile, NULL, NULL, flags | DRWAV_WITH_METADATA, pAllocationCallbacks);
+}
+
+#ifndef DR_WAV_NO_WCHAR
+DRWAV_API drwav_bool32 drwav_init_file_with_metadata_w(drwav* pWav, const wchar_t* filename, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    FILE* pFile;
+    if (drwav_wfopen(&pFile, filename, L"rb", pAllocationCallbacks) != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+    /* This takes ownership of the FILE* object. */
+    return drwav_init_file__internal_FILE(pWav, pFile, NULL, NULL, flags | DRWAV_WITH_METADATA, pAllocationCallbacks);
+}
+#endif
+
+
+DRWAV_PRIVATE drwav_bool32 drwav_init_file_write__internal_FILE(drwav* pWav, FILE* pFile, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav_bool32 result;
+
+    result = drwav_preinit_write(pWav, pFormat, isSequential, drwav__on_write_stdio, drwav__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (result != DRWAV_TRUE) {
+        fclose(pFile);
+        return result;
+    }
+
+    result = drwav_init_write__internal(pWav, pFormat, totalSampleCount);
+    if (result != DRWAV_TRUE) {
+        fclose(pFile);
+        return result;
+    }
+
+    return DRWAV_TRUE;
+}
+
+DRWAV_PRIVATE drwav_bool32 drwav_init_file_write__internal(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    FILE* pFile;
+    if (drwav_fopen(&pFile, filename, "wb") != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+    /* This takes ownership of the FILE* object. */
+    return drwav_init_file_write__internal_FILE(pWav, pFile, pFormat, totalSampleCount, isSequential, pAllocationCallbacks);
+}
+
+#ifndef DR_WAV_NO_WCHAR
+DRWAV_PRIVATE drwav_bool32 drwav_init_file_write_w__internal(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    FILE* pFile;
+    if (drwav_wfopen(&pFile, filename, L"wb", pAllocationCallbacks) != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+    /* This takes ownership of the FILE* object. */
+    return drwav_init_file_write__internal_FILE(pWav, pFile, pFormat, totalSampleCount, isSequential, pAllocationCallbacks);
+}
+#endif
+
+DRWAV_API drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_write__internal(pWav, filename, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_write__internal(pWav, filename, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pFormat == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_file_write_sequential(pWav, filename, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
+}
+
+#ifndef DR_WAV_NO_WCHAR
+DRWAV_API drwav_bool32 drwav_init_file_write_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_write_w__internal(pWav, filename, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_write_w__internal(pWav, filename, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pFormat == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_file_write_sequential_w(pWav, filename, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
+}
+#endif
+#endif  /* DR_WAV_NO_STDIO */
+
+
+DRWAV_PRIVATE size_t drwav__on_read_memory(void* pUserData, void* pBufferOut, size_t bytesToRead)
+{
+    drwav* pWav = (drwav*)pUserData;
+    size_t bytesRemaining;
+
+    DRWAV_ASSERT(pWav != NULL);
+    DRWAV_ASSERT(pWav->memoryStream.dataSize >= pWav->memoryStream.currentReadPos);
+
+    bytesRemaining = pWav->memoryStream.dataSize - pWav->memoryStream.currentReadPos;
+    if (bytesToRead > bytesRemaining) {
+        bytesToRead = bytesRemaining;
+    }
+
+    if (bytesToRead > 0) {
+        DRWAV_COPY_MEMORY(pBufferOut, pWav->memoryStream.data + pWav->memoryStream.currentReadPos, bytesToRead);
+        pWav->memoryStream.currentReadPos += bytesToRead;
+    }
+
+    return bytesToRead;
+}
+
+DRWAV_PRIVATE drwav_bool32 drwav__on_seek_memory(void* pUserData, int offset, drwav_seek_origin origin)
+{
+    drwav* pWav = (drwav*)pUserData;
+    DRWAV_ASSERT(pWav != NULL);
+
+    if (origin == drwav_seek_origin_current) {
+        if (offset > 0) {
+            if (pWav->memoryStream.currentReadPos + offset > pWav->memoryStream.dataSize) {
+                return DRWAV_FALSE; /* Trying to seek too far forward. */
+            }
+        } else {
+            if (pWav->memoryStream.currentReadPos < (size_t)-offset) {
+                return DRWAV_FALSE; /* Trying to seek too far backwards. */
+            }
+        }
+
+        /* This will never underflow thanks to the clamps above. */
+        pWav->memoryStream.currentReadPos += offset;
+    } else {
+        if ((drwav_uint32)offset <= pWav->memoryStream.dataSize) {
+            pWav->memoryStream.currentReadPos = offset;
+        } else {
+            return DRWAV_FALSE; /* Trying to seek too far forward. */
+        }
+    }
+
+    return DRWAV_TRUE;
+}
+
+DRWAV_PRIVATE size_t drwav__on_write_memory(void* pUserData, const void* pDataIn, size_t bytesToWrite)
+{
+    drwav* pWav = (drwav*)pUserData;
+    size_t bytesRemaining;
+
+    DRWAV_ASSERT(pWav != NULL);
+    DRWAV_ASSERT(pWav->memoryStreamWrite.dataCapacity >= pWav->memoryStreamWrite.currentWritePos);
+
+    bytesRemaining = pWav->memoryStreamWrite.dataCapacity - pWav->memoryStreamWrite.currentWritePos;
+    if (bytesRemaining < bytesToWrite) {
+        /* Need to reallocate. */
+        void* pNewData;
+        size_t newDataCapacity = (pWav->memoryStreamWrite.dataCapacity == 0) ? 256 : pWav->memoryStreamWrite.dataCapacity * 2;
+
+        /* If doubling wasn't enough, just make it the minimum required size to write the data. */
+        if ((newDataCapacity - pWav->memoryStreamWrite.currentWritePos) < bytesToWrite) {
+            newDataCapacity = pWav->memoryStreamWrite.currentWritePos + bytesToWrite;
+        }
+
+        pNewData = drwav__realloc_from_callbacks(*pWav->memoryStreamWrite.ppData, newDataCapacity, pWav->memoryStreamWrite.dataCapacity, &pWav->allocationCallbacks);
+        if (pNewData == NULL) {
+            return 0;
+        }
+
+        *pWav->memoryStreamWrite.ppData = pNewData;
+        pWav->memoryStreamWrite.dataCapacity = newDataCapacity;
+    }
+
+    DRWAV_COPY_MEMORY(((drwav_uint8*)(*pWav->memoryStreamWrite.ppData)) + pWav->memoryStreamWrite.currentWritePos, pDataIn, bytesToWrite);
+
+    pWav->memoryStreamWrite.currentWritePos += bytesToWrite;
+    if (pWav->memoryStreamWrite.dataSize < pWav->memoryStreamWrite.currentWritePos) {
+        pWav->memoryStreamWrite.dataSize = pWav->memoryStreamWrite.currentWritePos;
+    }
+
+    *pWav->memoryStreamWrite.pDataSize = pWav->memoryStreamWrite.dataSize;
+
+    return bytesToWrite;
+}
+
+DRWAV_PRIVATE drwav_bool32 drwav__on_seek_memory_write(void* pUserData, int offset, drwav_seek_origin origin)
+{
+    drwav* pWav = (drwav*)pUserData;
+    DRWAV_ASSERT(pWav != NULL);
+
+    if (origin == drwav_seek_origin_current) {
+        if (offset > 0) {
+            if (pWav->memoryStreamWrite.currentWritePos + offset > pWav->memoryStreamWrite.dataSize) {
+                offset = (int)(pWav->memoryStreamWrite.dataSize - pWav->memoryStreamWrite.currentWritePos);  /* Trying to seek too far forward. */
+            }
+        } else {
+            if (pWav->memoryStreamWrite.currentWritePos < (size_t)-offset) {
+                offset = -(int)pWav->memoryStreamWrite.currentWritePos;  /* Trying to seek too far backwards. */
+            }
+        }
+
+        /* This will never underflow thanks to the clamps above. */
+        pWav->memoryStreamWrite.currentWritePos += offset;
+    } else {
+        if ((drwav_uint32)offset <= pWav->memoryStreamWrite.dataSize) {
+            pWav->memoryStreamWrite.currentWritePos = offset;
+        } else {
+            pWav->memoryStreamWrite.currentWritePos = pWav->memoryStreamWrite.dataSize;  /* Trying to seek too far forward. */
+        }
+    }
+
+    return DRWAV_TRUE;
+}
+
+DRWAV_API drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_memory_ex(pWav, data, dataSize, NULL, NULL, 0, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (data == NULL || dataSize == 0) {
+        return DRWAV_FALSE;
+    }
+
+    if (!drwav_preinit(pWav, drwav__on_read_memory, drwav__on_seek_memory, pWav, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    pWav->memoryStream.data = (const drwav_uint8*)data;
+    pWav->memoryStream.dataSize = dataSize;
+    pWav->memoryStream.currentReadPos = 0;
+
+    return drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
+}
+
+DRWAV_API drwav_bool32 drwav_init_memory_with_metadata(drwav* pWav, const void* data, size_t dataSize, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (data == NULL || dataSize == 0) {
+        return DRWAV_FALSE;
+    }
+
+    if (!drwav_preinit(pWav, drwav__on_read_memory, drwav__on_seek_memory, pWav, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    pWav->memoryStream.data = (const drwav_uint8*)data;
+    pWav->memoryStream.dataSize = dataSize;
+    pWav->memoryStream.currentReadPos = 0;
+
+    return drwav_init__internal(pWav, NULL, NULL, flags | DRWAV_WITH_METADATA);
+}
+
+
+DRWAV_PRIVATE drwav_bool32 drwav_init_memory_write__internal(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (ppData == NULL || pDataSize == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    *ppData = NULL; /* Important because we're using realloc()! */
+    *pDataSize = 0;
+
+    if (!drwav_preinit_write(pWav, pFormat, isSequential, drwav__on_write_memory, drwav__on_seek_memory_write, pWav, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    pWav->memoryStreamWrite.ppData = ppData;
+    pWav->memoryStreamWrite.pDataSize = pDataSize;
+    pWav->memoryStreamWrite.dataSize = 0;
+    pWav->memoryStreamWrite.dataCapacity = 0;
+    pWav->memoryStreamWrite.currentWritePos = 0;
+
+    return drwav_init_write__internal(pWav, pFormat, totalSampleCount);
+}
+
+DRWAV_API drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_memory_write__internal(pWav, ppData, pDataSize, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_memory_write__internal(pWav, ppData, pDataSize, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_memory_write_sequential_pcm_frames(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pFormat == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_memory_write_sequential(pWav, ppData, pDataSize, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
+}
+
+
+
+DRWAV_API drwav_result drwav_uninit(drwav* pWav)
+{
+    drwav_result result = DRWAV_SUCCESS;
+
+    if (pWav == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+    /*
+    If the drwav object was opened in write mode we'll need to finalize a few things:
+      - Make sure the "data" chunk is aligned to 16-bits for RIFF containers, or 64 bits for W64 containers.
+      - Set the size of the "data" chunk.
+    */
+    if (pWav->onWrite != NULL) {
+        drwav_uint32 paddingSize = 0;
+
+        /* Padding. Do not adjust pWav->dataChunkDataSize - this should not include the padding. */
+        if (pWav->container == drwav_container_riff || pWav->container == drwav_container_rf64) {
+            paddingSize = drwav__chunk_padding_size_riff(pWav->dataChunkDataSize);
+        } else {
+            paddingSize = drwav__chunk_padding_size_w64(pWav->dataChunkDataSize);
+        }
+
+        if (paddingSize > 0) {
+            drwav_uint64 paddingData = 0;
+            drwav__write(pWav, &paddingData, paddingSize);  /* Byte order does not matter for this. */
+        }
+
+        /*
+        Chunk sizes. When using sequential mode, these will have been filled in at initialization time. We only need
+        to do this when using non-sequential mode.
+        */
+        if (pWav->onSeek && !pWav->isSequentialWrite) {
+            if (pWav->container == drwav_container_riff) {
+                /* The "RIFF" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, 4, drwav_seek_origin_start)) {
+                    drwav_uint32 riffChunkSize = drwav__riff_chunk_size_riff(pWav->dataChunkDataSize, pWav->pMetadata, pWav->metadataCount);
+                    drwav__write_u32ne_to_le(pWav, riffChunkSize);
+                }
+
+                /* The "data" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos - 4, drwav_seek_origin_start)) {
+                    drwav_uint32 dataChunkSize = drwav__data_chunk_size_riff(pWav->dataChunkDataSize);
+                    drwav__write_u32ne_to_le(pWav, dataChunkSize);
+                }
+            } else if (pWav->container == drwav_container_w64) {
+                /* The "RIFF" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, 16, drwav_seek_origin_start)) {
+                    drwav_uint64 riffChunkSize = drwav__riff_chunk_size_w64(pWav->dataChunkDataSize);
+                    drwav__write_u64ne_to_le(pWav, riffChunkSize);
+                }
+
+                /* The "data" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos - 8, drwav_seek_origin_start)) {
+                    drwav_uint64 dataChunkSize = drwav__data_chunk_size_w64(pWav->dataChunkDataSize);
+                    drwav__write_u64ne_to_le(pWav, dataChunkSize);
+                }
+            } else if (pWav->container == drwav_container_rf64) {
+                /* We only need to update the ds64 chunk. The "RIFF" and "data" chunks always have their sizes set to 0xFFFFFFFF for RF64. */
+                int ds64BodyPos = 12 + 8;
+
+                /* The "RIFF" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, ds64BodyPos + 0, drwav_seek_origin_start)) {
+                    drwav_uint64 riffChunkSize = drwav__riff_chunk_size_rf64(pWav->dataChunkDataSize, pWav->pMetadata, pWav->metadataCount);
+                    drwav__write_u64ne_to_le(pWav, riffChunkSize);
+                }
+
+                /* The "data" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, ds64BodyPos + 8, drwav_seek_origin_start)) {
+                    drwav_uint64 dataChunkSize = drwav__data_chunk_size_rf64(pWav->dataChunkDataSize);
+                    drwav__write_u64ne_to_le(pWav, dataChunkSize);
+                }
+            }
+        }
+
+        /* Validation for sequential mode. */
+        if (pWav->isSequentialWrite) {
+            if (pWav->dataChunkDataSize != pWav->dataChunkDataSizeTargetWrite) {
+                result = DRWAV_INVALID_FILE;
+            }
+        }
+    } else {
+        drwav_free(pWav->pMetadata, &pWav->allocationCallbacks);
+    }
+
+#ifndef DR_WAV_NO_STDIO
+    /*
+    If we opened the file with drwav_open_file() we will want to close the file handle. We can know whether or not drwav_open_file()
+    was used by looking at the onRead and onSeek callbacks.
+    */
+    if (pWav->onRead == drwav__on_read_stdio || pWav->onWrite == drwav__on_write_stdio) {
+        fclose((FILE*)pWav->pUserData);
+    }
+#endif
+
+    return result;
+}
+
+
+
+DRWAV_API size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut)
+{
+    size_t bytesRead;
+    drwav_uint32 bytesPerFrame;
+
+    if (pWav == NULL || bytesToRead == 0) {
+        return 0;   /* Invalid args. */
+    }
+
+    if (bytesToRead > pWav->bytesRemaining) {
+        bytesToRead = (size_t)pWav->bytesRemaining;
+    }
+
+    if (bytesToRead == 0) {
+        return 0;   /* At end. */
+    }
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;   /* Could not determine the bytes per frame. */
+    }
+
+    if (pBufferOut != NULL) {
+        bytesRead = pWav->onRead(pWav->pUserData, pBufferOut, bytesToRead);
+    } else {
+        /* We need to seek. If we fail, we need to read-and-discard to make sure we get a good byte count. */
+        bytesRead = 0;
+        while (bytesRead < bytesToRead) {
+            size_t bytesToSeek = (bytesToRead - bytesRead);
+            if (bytesToSeek > 0x7FFFFFFF) {
+                bytesToSeek = 0x7FFFFFFF;
+            }
+
+            if (pWav->onSeek(pWav->pUserData, (int)bytesToSeek, drwav_seek_origin_current) == DRWAV_FALSE) {
+                break;
+            }
+
+            bytesRead += bytesToSeek;
+        }
+
+        /* When we get here we may need to read-and-discard some data. */
+        while (bytesRead < bytesToRead) {
+            drwav_uint8 buffer[4096];
+            size_t bytesSeeked;
+            size_t bytesToSeek = (bytesToRead - bytesRead);
+            if (bytesToSeek > sizeof(buffer)) {
+                bytesToSeek = sizeof(buffer);
+            }
+
+            bytesSeeked = pWav->onRead(pWav->pUserData, buffer, bytesToSeek);
+            bytesRead += bytesSeeked;
+
+            if (bytesSeeked < bytesToSeek) {
+                break;  /* Reached the end. */
+            }
+        }
+    }
+
+    pWav->readCursorInPCMFrames += bytesRead / bytesPerFrame;
+
+    pWav->bytesRemaining -= bytesRead;
+    return bytesRead;
+}
+
+
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_le(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
+{
+    drwav_uint32 bytesPerFrame;
+    drwav_uint64 bytesToRead;   /* Intentionally uint64 instead of size_t so we can do a check that we're not reading too much on 32-bit builds. */
+    drwav_uint64 framesRemainingInFile;
+
+    if (pWav == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    /* Cannot use this function for compressed formats. */
+    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
+        return 0;
+    }
+
+    framesRemainingInFile = pWav->totalPCMFrameCount - pWav->readCursorInPCMFrames;
+    if (framesToRead > framesRemainingInFile) {
+        framesToRead = framesRemainingInFile;
+    }
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    bytesToRead = framesToRead * bytesPerFrame;
+    if (bytesToRead > DRWAV_SIZE_MAX) {
+        bytesToRead = (DRWAV_SIZE_MAX / bytesPerFrame) * bytesPerFrame; /* Round the number of bytes to read to a clean frame boundary. */
+    }
+
+    /*
+    Doing an explicit check here just to make it clear that we don't want to be attempt to read anything if there's no bytes to read. There
+    *could* be a time where it evaluates to 0 due to overflowing.
+    */
+    if (bytesToRead == 0) {
+        return 0;
+    }
+
+    return drwav_read_raw(pWav, (size_t)bytesToRead, pBufferOut) / bytesPerFrame;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_be(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut);
+
+    if (pBufferOut != NULL) {
+        drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+        if (bytesPerFrame == 0) {
+            return 0;   /* Could not get the bytes per frame which means bytes per sample cannot be determined and we don't know how to byte swap. */
+        }
+
+        drwav__bswap_samples(pBufferOut, framesRead*pWav->channels, bytesPerFrame/pWav->channels);
+    }
+
+    return framesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
+{
+    drwav_uint64 framesRead = 0;
+
+    if (drwav_is_container_be(pWav->container)) {
+        /*
+        Special case for AIFF. AIFF is a big-endian encoded format, but it supports a format that is
+        PCM in little-endian encoding. In this case, we fall through this branch and treate it as
+        little-endian.
+        */
+        if (pWav->container != drwav_container_aiff || pWav->aiff.isLE == DRWAV_FALSE) {
+            if (drwav__is_little_endian()) {
+                framesRead = drwav_read_pcm_frames_be(pWav, framesToRead, pBufferOut);
+            } else {
+                framesRead = drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut);
+            }
+
+            goto post_process;
+        }
+    }
+
+    /* Getting here means the data should be considered little-endian. */
+    if (drwav__is_little_endian()) {
+        framesRead = drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut);
+    } else {
+        framesRead = drwav_read_pcm_frames_be(pWav, framesToRead, pBufferOut);
+    }
+
+    /*
+    Here is where we check if we need to do a signed/unsigned conversion for AIFF. The reason we need to do this
+    is because dr_wav always assumes an 8-bit sample is unsigned, whereas AIFF can have signed 8-bit formats.
+    */
+    post_process:
+    {
+        if (pWav->container == drwav_container_aiff && pWav->bitsPerSample == 8 && pWav->aiff.isUnsigned == DRWAV_FALSE) {
+            if (pBufferOut != NULL) {
+                drwav_uint64 iSample;
+
+                for (iSample = 0; iSample < framesRead * pWav->channels; iSample += 1) {
+                    ((drwav_uint8*)pBufferOut)[iSample] += 128;
+                }
+            }
+        }
+    }
+
+    return framesRead;
+}
+
+
+
+DRWAV_PRIVATE drwav_bool32 drwav_seek_to_first_pcm_frame(drwav* pWav)
+{
+    if (pWav->onWrite != NULL) {
+        return DRWAV_FALSE; /* No seeking in write mode. */
+    }
+
+    if (!pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos, drwav_seek_origin_start)) {
+        return DRWAV_FALSE;
+    }
+
+    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
+        /* Cached data needs to be cleared for compressed formats. */
+        if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+            DRWAV_ZERO_OBJECT(&pWav->msadpcm);
+        } else if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+            DRWAV_ZERO_OBJECT(&pWav->ima);
+        } else {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* If this assertion is triggered it means I've implemented a new compressed format but forgot to add a branch for it here. */
+        }
+    }
+
+    pWav->readCursorInPCMFrames = 0;
+    pWav->bytesRemaining = pWav->dataChunkDataSize;
+
+    return DRWAV_TRUE;
+}
+
+DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex)
+{
+    /* Seeking should be compatible with wave files > 2GB. */
+
+    if (pWav == NULL || pWav->onSeek == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    /* No seeking in write mode. */
+    if (pWav->onWrite != NULL) {
+        return DRWAV_FALSE;
+    }
+
+    /* If there are no samples, just return DRWAV_TRUE without doing anything. */
+    if (pWav->totalPCMFrameCount == 0) {
+        return DRWAV_TRUE;
+    }
+
+    /* Make sure the sample is clamped. */
+    if (targetFrameIndex > pWav->totalPCMFrameCount) {
+        targetFrameIndex = pWav->totalPCMFrameCount;
+    }
+
+    /*
+    For compressed formats we just use a slow generic seek. If we are seeking forward we just seek forward. If we are going backwards we need
+    to seek back to the start.
+    */
+    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
+        /* TODO: This can be optimized. */
+
+        /*
+        If we're seeking forward it's simple - just keep reading samples until we hit the sample we're requesting. If we're seeking backwards,
+        we first need to seek back to the start and then just do the same thing as a forward seek.
+        */
+        if (targetFrameIndex < pWav->readCursorInPCMFrames) {
+            if (!drwav_seek_to_first_pcm_frame(pWav)) {
+                return DRWAV_FALSE;
+            }
+        }
+
+        if (targetFrameIndex > pWav->readCursorInPCMFrames) {
+            drwav_uint64 offsetInFrames = targetFrameIndex - pWav->readCursorInPCMFrames;
+
+            drwav_int16 devnull[2048];
+            while (offsetInFrames > 0) {
+                drwav_uint64 framesRead = 0;
+                drwav_uint64 framesToRead = offsetInFrames;
+                if (framesToRead > drwav_countof(devnull)/pWav->channels) {
+                    framesToRead = drwav_countof(devnull)/pWav->channels;
+                }
+
+                if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+                    framesRead = drwav_read_pcm_frames_s16__msadpcm(pWav, framesToRead, devnull);
+                } else if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+                    framesRead = drwav_read_pcm_frames_s16__ima(pWav, framesToRead, devnull);
+                } else {
+                    DRWAV_ASSERT(DRWAV_FALSE);  /* If this assertion is triggered it means I've implemented a new compressed format but forgot to add a branch for it here. */
+                }
+
+                if (framesRead != framesToRead) {
+                    return DRWAV_FALSE;
+                }
+
+                offsetInFrames -= framesRead;
+            }
+        }
+    } else {
+        drwav_uint64 totalSizeInBytes;
+        drwav_uint64 currentBytePos;
+        drwav_uint64 targetBytePos;
+        drwav_uint64 offset;
+        drwav_uint32 bytesPerFrame;
+
+        bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+        if (bytesPerFrame == 0) {
+            return DRWAV_FALSE; /* Not able to calculate offset. */
+        }
+
+        totalSizeInBytes = pWav->totalPCMFrameCount * bytesPerFrame;
+        /*DRWAV_ASSERT(totalSizeInBytes >= pWav->bytesRemaining);*/
+
+        currentBytePos = totalSizeInBytes - pWav->bytesRemaining;
+        targetBytePos  = targetFrameIndex * bytesPerFrame;
+
+        if (currentBytePos < targetBytePos) {
+            /* Offset forwards. */
+            offset = (targetBytePos - currentBytePos);
+        } else {
+            /* Offset backwards. */
+            if (!drwav_seek_to_first_pcm_frame(pWav)) {
+                return DRWAV_FALSE;
+            }
+            offset = targetBytePos;
+        }
+
+        while (offset > 0) {
+            int offset32 = ((offset > INT_MAX) ? INT_MAX : (int)offset);
+            if (!pWav->onSeek(pWav->pUserData, offset32, drwav_seek_origin_current)) {
+                return DRWAV_FALSE;
+            }
+
+            pWav->readCursorInPCMFrames += offset32 / bytesPerFrame;
+            pWav->bytesRemaining        -= offset32;
+            offset                      -= offset32;
+        }
+    }
+
+    return DRWAV_TRUE;
+}
+
+DRWAV_API drwav_result drwav_get_cursor_in_pcm_frames(drwav* pWav, drwav_uint64* pCursor)
+{
+    if (pCursor == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+    *pCursor = 0;   /* Safety. */
+
+    if (pWav == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+    *pCursor = pWav->readCursorInPCMFrames;
+
+    return DRWAV_SUCCESS;
+}
+
+DRWAV_API drwav_result drwav_get_length_in_pcm_frames(drwav* pWav, drwav_uint64* pLength)
+{
+    if (pLength == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+    *pLength = 0;   /* Safety. */
+
+    if (pWav == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+    *pLength = pWav->totalPCMFrameCount;
+
+    return DRWAV_SUCCESS;
+}
+
+
+DRWAV_API size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData)
+{
+    size_t bytesWritten;
+
+    if (pWav == NULL || bytesToWrite == 0 || pData == NULL) {
+        return 0;
+    }
+
+    bytesWritten = pWav->onWrite(pWav->pUserData, pData, bytesToWrite);
+    pWav->dataChunkDataSize += bytesWritten;
+
+    return bytesWritten;
+}
+
+DRWAV_API drwav_uint64 drwav_write_pcm_frames_le(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
+{
+    drwav_uint64 bytesToWrite;
+    drwav_uint64 bytesWritten;
+    const drwav_uint8* pRunningData;
+
+    if (pWav == NULL || framesToWrite == 0 || pData == NULL) {
+        return 0;
+    }
+
+    bytesToWrite = ((framesToWrite * pWav->channels * pWav->bitsPerSample) / 8);
+    if (bytesToWrite > DRWAV_SIZE_MAX) {
+        return 0;
+    }
+
+    bytesWritten = 0;
+    pRunningData = (const drwav_uint8*)pData;
+
+    while (bytesToWrite > 0) {
+        size_t bytesJustWritten;
+        drwav_uint64 bytesToWriteThisIteration;
+
+        bytesToWriteThisIteration = bytesToWrite;
+        DRWAV_ASSERT(bytesToWriteThisIteration <= DRWAV_SIZE_MAX);  /* <-- This is checked above. */
+
+        bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, pRunningData);
+        if (bytesJustWritten == 0) {
+            break;
+        }
+
+        bytesToWrite -= bytesJustWritten;
+        bytesWritten += bytesJustWritten;
+        pRunningData += bytesJustWritten;
+    }
+
+    return (bytesWritten * 8) / pWav->bitsPerSample / pWav->channels;
+}
+
+DRWAV_API drwav_uint64 drwav_write_pcm_frames_be(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
+{
+    drwav_uint64 bytesToWrite;
+    drwav_uint64 bytesWritten;
+    drwav_uint32 bytesPerSample;
+    const drwav_uint8* pRunningData;
+
+    if (pWav == NULL || framesToWrite == 0 || pData == NULL) {
+        return 0;
+    }
+
+    bytesToWrite = ((framesToWrite * pWav->channels * pWav->bitsPerSample) / 8);
+    if (bytesToWrite > DRWAV_SIZE_MAX) {
+        return 0;
+    }
+
+    bytesWritten = 0;
+    pRunningData = (const drwav_uint8*)pData;
+
+    bytesPerSample = drwav_get_bytes_per_pcm_frame(pWav) / pWav->channels;
+    if (bytesPerSample == 0) {
+        return 0;   /* Cannot determine bytes per sample, or bytes per sample is less than one byte. */
+    }
+
+    while (bytesToWrite > 0) {
+        drwav_uint8 temp[4096];
+        drwav_uint32 sampleCount;
+        size_t bytesJustWritten;
+        drwav_uint64 bytesToWriteThisIteration;
+
+        bytesToWriteThisIteration = bytesToWrite;
+        DRWAV_ASSERT(bytesToWriteThisIteration <= DRWAV_SIZE_MAX);  /* <-- This is checked above. */
+
+        /*
+        WAV files are always little-endian. We need to byte swap on big-endian architectures. Since our input buffer is read-only we need
+        to use an intermediary buffer for the conversion.
+        */
+        sampleCount = sizeof(temp)/bytesPerSample;
+
+        if (bytesToWriteThisIteration > ((drwav_uint64)sampleCount)*bytesPerSample) {
+            bytesToWriteThisIteration = ((drwav_uint64)sampleCount)*bytesPerSample;
+        }
+
+        DRWAV_COPY_MEMORY(temp, pRunningData, (size_t)bytesToWriteThisIteration);
+        drwav__bswap_samples(temp, sampleCount, bytesPerSample);
+
+        bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, temp);
+        if (bytesJustWritten == 0) {
+            break;
+        }
+
+        bytesToWrite -= bytesJustWritten;
+        bytesWritten += bytesJustWritten;
+        pRunningData += bytesJustWritten;
+    }
+
+    return (bytesWritten * 8) / pWav->bitsPerSample / pWav->channels;
+}
+
+DRWAV_API drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
+{
+    if (drwav__is_little_endian()) {
+        return drwav_write_pcm_frames_le(pWav, framesToWrite, pData);
+    } else {
+        return drwav_write_pcm_frames_be(pWav, framesToWrite, pData);
+    }
+}
+
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalFramesRead = 0;
+
+    DRWAV_ASSERT(pWav != NULL);
+    DRWAV_ASSERT(framesToRead > 0);
+
+    /* TODO: Lots of room for optimization here. */
+
+    while (pWav->readCursorInPCMFrames < pWav->totalPCMFrameCount) {
+        DRWAV_ASSERT(framesToRead > 0); /* This loop iteration will never get hit with framesToRead == 0 because it's asserted at the top, and we check for 0 inside the loop just below. */
+
+        /* If there are no cached frames we need to load a new block. */
+        if (pWav->msadpcm.cachedFrameCount == 0 && pWav->msadpcm.bytesRemainingInBlock == 0) {
+            if (pWav->channels == 1) {
+                /* Mono. */
+                drwav_uint8 header[7];
+                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
+                    return totalFramesRead;
+                }
+                pWav->msadpcm.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
+
+                pWav->msadpcm.predictor[0]     = header[0];
+                pWav->msadpcm.delta[0]         = drwav_bytes_to_s16(header + 1);
+                pWav->msadpcm.prevFrames[0][1] = (drwav_int32)drwav_bytes_to_s16(header + 3);
+                pWav->msadpcm.prevFrames[0][0] = (drwav_int32)drwav_bytes_to_s16(header + 5);
+                pWav->msadpcm.cachedFrames[2]  = pWav->msadpcm.prevFrames[0][0];
+                pWav->msadpcm.cachedFrames[3]  = pWav->msadpcm.prevFrames[0][1];
+                pWav->msadpcm.cachedFrameCount = 2;
+            } else {
+                /* Stereo. */
+                drwav_uint8 header[14];
+                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
+                    return totalFramesRead;
+                }
+                pWav->msadpcm.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
+
+                pWav->msadpcm.predictor[0] = header[0];
+                pWav->msadpcm.predictor[1] = header[1];
+                pWav->msadpcm.delta[0] = drwav_bytes_to_s16(header + 2);
+                pWav->msadpcm.delta[1] = drwav_bytes_to_s16(header + 4);
+                pWav->msadpcm.prevFrames[0][1] = (drwav_int32)drwav_bytes_to_s16(header + 6);
+                pWav->msadpcm.prevFrames[1][1] = (drwav_int32)drwav_bytes_to_s16(header + 8);
+                pWav->msadpcm.prevFrames[0][0] = (drwav_int32)drwav_bytes_to_s16(header + 10);
+                pWav->msadpcm.prevFrames[1][0] = (drwav_int32)drwav_bytes_to_s16(header + 12);
+
+                pWav->msadpcm.cachedFrames[0] = pWav->msadpcm.prevFrames[0][0];
+                pWav->msadpcm.cachedFrames[1] = pWav->msadpcm.prevFrames[1][0];
+                pWav->msadpcm.cachedFrames[2] = pWav->msadpcm.prevFrames[0][1];
+                pWav->msadpcm.cachedFrames[3] = pWav->msadpcm.prevFrames[1][1];
+                pWav->msadpcm.cachedFrameCount = 2;
+            }
+        }
+
+        /* Output anything that's cached. */
+        while (framesToRead > 0 && pWav->msadpcm.cachedFrameCount > 0 && pWav->readCursorInPCMFrames < pWav->totalPCMFrameCount) {
+            if (pBufferOut != NULL) {
+                drwav_uint32 iSample = 0;
+                for (iSample = 0; iSample < pWav->channels; iSample += 1) {
+                    pBufferOut[iSample] = (drwav_int16)pWav->msadpcm.cachedFrames[(drwav_countof(pWav->msadpcm.cachedFrames) - (pWav->msadpcm.cachedFrameCount*pWav->channels)) + iSample];
+                }
+
+                pBufferOut += pWav->channels;
+            }
+
+            framesToRead    -= 1;
+            totalFramesRead += 1;
+            pWav->readCursorInPCMFrames += 1;
+            pWav->msadpcm.cachedFrameCount -= 1;
+        }
+
+        if (framesToRead == 0) {
+            break;
+        }
+
+
+        /*
+        If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next
+        loop iteration which will trigger the loading of a new block.
+        */
+        if (pWav->msadpcm.cachedFrameCount == 0) {
+            if (pWav->msadpcm.bytesRemainingInBlock == 0) {
+                continue;
+            } else {
+                static drwav_int32 adaptationTable[] = {
+                    230, 230, 230, 230, 307, 409, 512, 614,
+                    768, 614, 512, 409, 307, 230, 230, 230
+                };
+                static drwav_int32 coeff1Table[] = { 256, 512, 0, 192, 240, 460,  392 };
+                static drwav_int32 coeff2Table[] = { 0,  -256, 0, 64,  0,  -208, -232 };
+
+                drwav_uint8 nibbles;
+                drwav_int32 nibble0;
+                drwav_int32 nibble1;
+
+                if (pWav->onRead(pWav->pUserData, &nibbles, 1) != 1) {
+                    return totalFramesRead;
+                }
+                pWav->msadpcm.bytesRemainingInBlock -= 1;
+
+                /* TODO: Optimize away these if statements. */
+                nibble0 = ((nibbles & 0xF0) >> 4); if ((nibbles & 0x80)) { nibble0 |= 0xFFFFFFF0UL; }
+                nibble1 = ((nibbles & 0x0F) >> 0); if ((nibbles & 0x08)) { nibble1 |= 0xFFFFFFF0UL; }
+
+                if (pWav->channels == 1) {
+                    /* Mono. */
+                    drwav_int32 newSample0;
+                    drwav_int32 newSample1;
+
+                    newSample0  = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
+                    newSample0 += nibble0 * pWav->msadpcm.delta[0];
+                    newSample0  = drwav_clamp(newSample0, -32768, 32767);
+
+                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0xF0) >> 4)] * pWav->msadpcm.delta[0]) >> 8;
+                    if (pWav->msadpcm.delta[0] < 16) {
+                        pWav->msadpcm.delta[0] = 16;
+                    }
+
+                    pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1];
+                    pWav->msadpcm.prevFrames[0][1] = newSample0;
+
+
+                    newSample1  = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
+                    newSample1 += nibble1 * pWav->msadpcm.delta[0];
+                    newSample1  = drwav_clamp(newSample1, -32768, 32767);
+
+                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0x0F) >> 0)] * pWav->msadpcm.delta[0]) >> 8;
+                    if (pWav->msadpcm.delta[0] < 16) {
+                        pWav->msadpcm.delta[0] = 16;
+                    }
+
+                    pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1];
+                    pWav->msadpcm.prevFrames[0][1] = newSample1;
+
+
+                    pWav->msadpcm.cachedFrames[2] = newSample0;
+                    pWav->msadpcm.cachedFrames[3] = newSample1;
+                    pWav->msadpcm.cachedFrameCount = 2;
+                } else {
+                    /* Stereo. */
+                    drwav_int32 newSample0;
+                    drwav_int32 newSample1;
+
+                    /* Left. */
+                    newSample0  = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
+                    newSample0 += nibble0 * pWav->msadpcm.delta[0];
+                    newSample0  = drwav_clamp(newSample0, -32768, 32767);
+
+                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0xF0) >> 4)] * pWav->msadpcm.delta[0]) >> 8;
+                    if (pWav->msadpcm.delta[0] < 16) {
+                        pWav->msadpcm.delta[0] = 16;
+                    }
+
+                    pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1];
+                    pWav->msadpcm.prevFrames[0][1] = newSample0;
+
+
+                    /* Right. */
+                    newSample1  = ((pWav->msadpcm.prevFrames[1][1] * coeff1Table[pWav->msadpcm.predictor[1]]) + (pWav->msadpcm.prevFrames[1][0] * coeff2Table[pWav->msadpcm.predictor[1]])) >> 8;
+                    newSample1 += nibble1 * pWav->msadpcm.delta[1];
+                    newSample1  = drwav_clamp(newSample1, -32768, 32767);
+
+                    pWav->msadpcm.delta[1] = (adaptationTable[((nibbles & 0x0F) >> 0)] * pWav->msadpcm.delta[1]) >> 8;
+                    if (pWav->msadpcm.delta[1] < 16) {
+                        pWav->msadpcm.delta[1] = 16;
+                    }
+
+                    pWav->msadpcm.prevFrames[1][0] = pWav->msadpcm.prevFrames[1][1];
+                    pWav->msadpcm.prevFrames[1][1] = newSample1;
+
+                    pWav->msadpcm.cachedFrames[2] = newSample0;
+                    pWav->msadpcm.cachedFrames[3] = newSample1;
+                    pWav->msadpcm.cachedFrameCount = 1;
+                }
+            }
+        }
+    }
+
+    return totalFramesRead;
+}
+
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalFramesRead = 0;
+    drwav_uint32 iChannel;
+
+    static drwav_int32 indexTable[16] = {
+        -1, -1, -1, -1, 2, 4, 6, 8,
+        -1, -1, -1, -1, 2, 4, 6, 8
+    };
+
+    static drwav_int32 stepTable[89] = {
+        7,     8,     9,     10,    11,    12,    13,    14,    16,    17,
+        19,    21,    23,    25,    28,    31,    34,    37,    41,    45,
+        50,    55,    60,    66,    73,    80,    88,    97,    107,   118,
+        130,   143,   157,   173,   190,   209,   230,   253,   279,   307,
+        337,   371,   408,   449,   494,   544,   598,   658,   724,   796,
+        876,   963,   1060,  1166,  1282,  1411,  1552,  1707,  1878,  2066,
+        2272,  2499,  2749,  3024,  3327,  3660,  4026,  4428,  4871,  5358,
+        5894,  6484,  7132,  7845,  8630,  9493,  10442, 11487, 12635, 13899,
+        15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
+    };
+
+    DRWAV_ASSERT(pWav != NULL);
+    DRWAV_ASSERT(framesToRead > 0);
+
+    /* TODO: Lots of room for optimization here. */
+
+    while (pWav->readCursorInPCMFrames < pWav->totalPCMFrameCount) {
+        DRWAV_ASSERT(framesToRead > 0); /* This loop iteration will never get hit with framesToRead == 0 because it's asserted at the top, and we check for 0 inside the loop just below. */
+
+        /* If there are no cached samples we need to load a new block. */
+        if (pWav->ima.cachedFrameCount == 0 && pWav->ima.bytesRemainingInBlock == 0) {
+            if (pWav->channels == 1) {
+                /* Mono. */
+                drwav_uint8 header[4];
+                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
+                    return totalFramesRead;
+                }
+                pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
+
+                if (header[2] >= drwav_countof(stepTable)) {
+                    pWav->onSeek(pWav->pUserData, pWav->ima.bytesRemainingInBlock, drwav_seek_origin_current);
+                    pWav->ima.bytesRemainingInBlock = 0;
+                    return totalFramesRead; /* Invalid data. */
+                }
+
+                pWav->ima.predictor[0] = (drwav_int16)drwav_bytes_to_u16(header + 0);
+                pWav->ima.stepIndex[0] = drwav_clamp(header[2], 0, (drwav_int32)drwav_countof(stepTable)-1);    /* Clamp not necessary because we checked above, but adding here to silence a static analysis warning. */
+                pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 1] = pWav->ima.predictor[0];
+                pWav->ima.cachedFrameCount = 1;
+            } else {
+                /* Stereo. */
+                drwav_uint8 header[8];
+                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
+                    return totalFramesRead;
+                }
+                pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
+
+                if (header[2] >= drwav_countof(stepTable) || header[6] >= drwav_countof(stepTable)) {
+                    pWav->onSeek(pWav->pUserData, pWav->ima.bytesRemainingInBlock, drwav_seek_origin_current);
+                    pWav->ima.bytesRemainingInBlock = 0;
+                    return totalFramesRead; /* Invalid data. */
+                }
+
+                pWav->ima.predictor[0] = drwav_bytes_to_s16(header + 0);
+                pWav->ima.stepIndex[0] = drwav_clamp(header[2], 0, (drwav_int32)drwav_countof(stepTable)-1);    /* Clamp not necessary because we checked above, but adding here to silence a static analysis warning. */
+                pWav->ima.predictor[1] = drwav_bytes_to_s16(header + 4);
+                pWav->ima.stepIndex[1] = drwav_clamp(header[6], 0, (drwav_int32)drwav_countof(stepTable)-1);    /* Clamp not necessary because we checked above, but adding here to silence a static analysis warning. */
+
+                pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 2] = pWav->ima.predictor[0];
+                pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 1] = pWav->ima.predictor[1];
+                pWav->ima.cachedFrameCount = 1;
+            }
+        }
+
+        /* Output anything that's cached. */
+        while (framesToRead > 0 && pWav->ima.cachedFrameCount > 0 && pWav->readCursorInPCMFrames < pWav->totalPCMFrameCount) {
+            if (pBufferOut != NULL) {
+                drwav_uint32 iSample;
+                for (iSample = 0; iSample < pWav->channels; iSample += 1) {
+                    pBufferOut[iSample] = (drwav_int16)pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + iSample];
+                }
+                pBufferOut += pWav->channels;
+            }
+
+            framesToRead    -= 1;
+            totalFramesRead += 1;
+            pWav->readCursorInPCMFrames += 1;
+            pWav->ima.cachedFrameCount -= 1;
+        }
+
+        if (framesToRead == 0) {
+            break;
+        }
+
+        /*
+        If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next
+        loop iteration which will trigger the loading of a new block.
+        */
+        if (pWav->ima.cachedFrameCount == 0) {
+            if (pWav->ima.bytesRemainingInBlock == 0) {
+                continue;
+            } else {
+                /*
+                From what I can tell with stereo streams, it looks like every 4 bytes (8 samples) is for one channel. So it goes 4 bytes for the
+                left channel, 4 bytes for the right channel.
+                */
+                pWav->ima.cachedFrameCount = 8;
+                for (iChannel = 0; iChannel < pWav->channels; ++iChannel) {
+                    drwav_uint32 iByte;
+                    drwav_uint8 nibbles[4];
+                    if (pWav->onRead(pWav->pUserData, &nibbles, 4) != 4) {
+                        pWav->ima.cachedFrameCount = 0;
+                        return totalFramesRead;
+                    }
+                    pWav->ima.bytesRemainingInBlock -= 4;
+
+                    for (iByte = 0; iByte < 4; ++iByte) {
+                        drwav_uint8 nibble0 = ((nibbles[iByte] & 0x0F) >> 0);
+                        drwav_uint8 nibble1 = ((nibbles[iByte] & 0xF0) >> 4);
+
+                        drwav_int32 step      = stepTable[pWav->ima.stepIndex[iChannel]];
+                        drwav_int32 predictor = pWav->ima.predictor[iChannel];
+
+                        drwav_int32      diff  = step >> 3;
+                        if (nibble0 & 1) diff += step >> 2;
+                        if (nibble0 & 2) diff += step >> 1;
+                        if (nibble0 & 4) diff += step;
+                        if (nibble0 & 8) diff  = -diff;
+
+                        predictor = drwav_clamp(predictor + diff, -32768, 32767);
+                        pWav->ima.predictor[iChannel] = predictor;
+                        pWav->ima.stepIndex[iChannel] = drwav_clamp(pWav->ima.stepIndex[iChannel] + indexTable[nibble0], 0, (drwav_int32)drwav_countof(stepTable)-1);
+                        pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + (iByte*2+0)*pWav->channels + iChannel] = predictor;
+
+
+                        step      = stepTable[pWav->ima.stepIndex[iChannel]];
+                        predictor = pWav->ima.predictor[iChannel];
+
+                                         diff  = step >> 3;
+                        if (nibble1 & 1) diff += step >> 2;
+                        if (nibble1 & 2) diff += step >> 1;
+                        if (nibble1 & 4) diff += step;
+                        if (nibble1 & 8) diff  = -diff;
+
+                        predictor = drwav_clamp(predictor + diff, -32768, 32767);
+                        pWav->ima.predictor[iChannel] = predictor;
+                        pWav->ima.stepIndex[iChannel] = drwav_clamp(pWav->ima.stepIndex[iChannel] + indexTable[nibble1], 0, (drwav_int32)drwav_countof(stepTable)-1);
+                        pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + (iByte*2+1)*pWav->channels + iChannel] = predictor;
+                    }
+                }
+            }
+        }
+    }
+
+    return totalFramesRead;
+}
+
+
+#ifndef DR_WAV_NO_CONVERSION_API
+static unsigned short g_drwavAlawTable[256] = {
+    0xEA80, 0xEB80, 0xE880, 0xE980, 0xEE80, 0xEF80, 0xEC80, 0xED80, 0xE280, 0xE380, 0xE080, 0xE180, 0xE680, 0xE780, 0xE480, 0xE580,
+    0xF540, 0xF5C0, 0xF440, 0xF4C0, 0xF740, 0xF7C0, 0xF640, 0xF6C0, 0xF140, 0xF1C0, 0xF040, 0xF0C0, 0xF340, 0xF3C0, 0xF240, 0xF2C0,
+    0xAA00, 0xAE00, 0xA200, 0xA600, 0xBA00, 0xBE00, 0xB200, 0xB600, 0x8A00, 0x8E00, 0x8200, 0x8600, 0x9A00, 0x9E00, 0x9200, 0x9600,
+    0xD500, 0xD700, 0xD100, 0xD300, 0xDD00, 0xDF00, 0xD900, 0xDB00, 0xC500, 0xC700, 0xC100, 0xC300, 0xCD00, 0xCF00, 0xC900, 0xCB00,
+    0xFEA8, 0xFEB8, 0xFE88, 0xFE98, 0xFEE8, 0xFEF8, 0xFEC8, 0xFED8, 0xFE28, 0xFE38, 0xFE08, 0xFE18, 0xFE68, 0xFE78, 0xFE48, 0xFE58,
+    0xFFA8, 0xFFB8, 0xFF88, 0xFF98, 0xFFE8, 0xFFF8, 0xFFC8, 0xFFD8, 0xFF28, 0xFF38, 0xFF08, 0xFF18, 0xFF68, 0xFF78, 0xFF48, 0xFF58,
+    0xFAA0, 0xFAE0, 0xFA20, 0xFA60, 0xFBA0, 0xFBE0, 0xFB20, 0xFB60, 0xF8A0, 0xF8E0, 0xF820, 0xF860, 0xF9A0, 0xF9E0, 0xF920, 0xF960,
+    0xFD50, 0xFD70, 0xFD10, 0xFD30, 0xFDD0, 0xFDF0, 0xFD90, 0xFDB0, 0xFC50, 0xFC70, 0xFC10, 0xFC30, 0xFCD0, 0xFCF0, 0xFC90, 0xFCB0,
+    0x1580, 0x1480, 0x1780, 0x1680, 0x1180, 0x1080, 0x1380, 0x1280, 0x1D80, 0x1C80, 0x1F80, 0x1E80, 0x1980, 0x1880, 0x1B80, 0x1A80,
+    0x0AC0, 0x0A40, 0x0BC0, 0x0B40, 0x08C0, 0x0840, 0x09C0, 0x0940, 0x0EC0, 0x0E40, 0x0FC0, 0x0F40, 0x0CC0, 0x0C40, 0x0DC0, 0x0D40,
+    0x5600, 0x5200, 0x5E00, 0x5A00, 0x4600, 0x4200, 0x4E00, 0x4A00, 0x7600, 0x7200, 0x7E00, 0x7A00, 0x6600, 0x6200, 0x6E00, 0x6A00,
+    0x2B00, 0x2900, 0x2F00, 0x2D00, 0x2300, 0x2100, 0x2700, 0x2500, 0x3B00, 0x3900, 0x3F00, 0x3D00, 0x3300, 0x3100, 0x3700, 0x3500,
+    0x0158, 0x0148, 0x0178, 0x0168, 0x0118, 0x0108, 0x0138, 0x0128, 0x01D8, 0x01C8, 0x01F8, 0x01E8, 0x0198, 0x0188, 0x01B8, 0x01A8,
+    0x0058, 0x0048, 0x0078, 0x0068, 0x0018, 0x0008, 0x0038, 0x0028, 0x00D8, 0x00C8, 0x00F8, 0x00E8, 0x0098, 0x0088, 0x00B8, 0x00A8,
+    0x0560, 0x0520, 0x05E0, 0x05A0, 0x0460, 0x0420, 0x04E0, 0x04A0, 0x0760, 0x0720, 0x07E0, 0x07A0, 0x0660, 0x0620, 0x06E0, 0x06A0,
+    0x02B0, 0x0290, 0x02F0, 0x02D0, 0x0230, 0x0210, 0x0270, 0x0250, 0x03B0, 0x0390, 0x03F0, 0x03D0, 0x0330, 0x0310, 0x0370, 0x0350
+};
+
+static unsigned short g_drwavMulawTable[256] = {
+    0x8284, 0x8684, 0x8A84, 0x8E84, 0x9284, 0x9684, 0x9A84, 0x9E84, 0xA284, 0xA684, 0xAA84, 0xAE84, 0xB284, 0xB684, 0xBA84, 0xBE84,
+    0xC184, 0xC384, 0xC584, 0xC784, 0xC984, 0xCB84, 0xCD84, 0xCF84, 0xD184, 0xD384, 0xD584, 0xD784, 0xD984, 0xDB84, 0xDD84, 0xDF84,
+    0xE104, 0xE204, 0xE304, 0xE404, 0xE504, 0xE604, 0xE704, 0xE804, 0xE904, 0xEA04, 0xEB04, 0xEC04, 0xED04, 0xEE04, 0xEF04, 0xF004,
+    0xF0C4, 0xF144, 0xF1C4, 0xF244, 0xF2C4, 0xF344, 0xF3C4, 0xF444, 0xF4C4, 0xF544, 0xF5C4, 0xF644, 0xF6C4, 0xF744, 0xF7C4, 0xF844,
+    0xF8A4, 0xF8E4, 0xF924, 0xF964, 0xF9A4, 0xF9E4, 0xFA24, 0xFA64, 0xFAA4, 0xFAE4, 0xFB24, 0xFB64, 0xFBA4, 0xFBE4, 0xFC24, 0xFC64,
+    0xFC94, 0xFCB4, 0xFCD4, 0xFCF4, 0xFD14, 0xFD34, 0xFD54, 0xFD74, 0xFD94, 0xFDB4, 0xFDD4, 0xFDF4, 0xFE14, 0xFE34, 0xFE54, 0xFE74,
+    0xFE8C, 0xFE9C, 0xFEAC, 0xFEBC, 0xFECC, 0xFEDC, 0xFEEC, 0xFEFC, 0xFF0C, 0xFF1C, 0xFF2C, 0xFF3C, 0xFF4C, 0xFF5C, 0xFF6C, 0xFF7C,
+    0xFF88, 0xFF90, 0xFF98, 0xFFA0, 0xFFA8, 0xFFB0, 0xFFB8, 0xFFC0, 0xFFC8, 0xFFD0, 0xFFD8, 0xFFE0, 0xFFE8, 0xFFF0, 0xFFF8, 0x0000,
+    0x7D7C, 0x797C, 0x757C, 0x717C, 0x6D7C, 0x697C, 0x657C, 0x617C, 0x5D7C, 0x597C, 0x557C, 0x517C, 0x4D7C, 0x497C, 0x457C, 0x417C,
+    0x3E7C, 0x3C7C, 0x3A7C, 0x387C, 0x367C, 0x347C, 0x327C, 0x307C, 0x2E7C, 0x2C7C, 0x2A7C, 0x287C, 0x267C, 0x247C, 0x227C, 0x207C,
+    0x1EFC, 0x1DFC, 0x1CFC, 0x1BFC, 0x1AFC, 0x19FC, 0x18FC, 0x17FC, 0x16FC, 0x15FC, 0x14FC, 0x13FC, 0x12FC, 0x11FC, 0x10FC, 0x0FFC,
+    0x0F3C, 0x0EBC, 0x0E3C, 0x0DBC, 0x0D3C, 0x0CBC, 0x0C3C, 0x0BBC, 0x0B3C, 0x0ABC, 0x0A3C, 0x09BC, 0x093C, 0x08BC, 0x083C, 0x07BC,
+    0x075C, 0x071C, 0x06DC, 0x069C, 0x065C, 0x061C, 0x05DC, 0x059C, 0x055C, 0x051C, 0x04DC, 0x049C, 0x045C, 0x041C, 0x03DC, 0x039C,
+    0x036C, 0x034C, 0x032C, 0x030C, 0x02EC, 0x02CC, 0x02AC, 0x028C, 0x026C, 0x024C, 0x022C, 0x020C, 0x01EC, 0x01CC, 0x01AC, 0x018C,
+    0x0174, 0x0164, 0x0154, 0x0144, 0x0134, 0x0124, 0x0114, 0x0104, 0x00F4, 0x00E4, 0x00D4, 0x00C4, 0x00B4, 0x00A4, 0x0094, 0x0084,
+    0x0078, 0x0070, 0x0068, 0x0060, 0x0058, 0x0050, 0x0048, 0x0040, 0x0038, 0x0030, 0x0028, 0x0020, 0x0018, 0x0010, 0x0008, 0x0000
+};
+
+static DRWAV_INLINE drwav_int16 drwav__alaw_to_s16(drwav_uint8 sampleIn)
+{
+    return (short)g_drwavAlawTable[sampleIn];
+}
+
+static DRWAV_INLINE drwav_int16 drwav__mulaw_to_s16(drwav_uint8 sampleIn)
+{
+    return (short)g_drwavMulawTable[sampleIn];
+}
+
+
+
+DRWAV_PRIVATE void drwav__pcm_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
+{
+    size_t i;
+
+    /* Special case for 8-bit sample data because it's treated as unsigned. */
+    if (bytesPerSample == 1) {
+        drwav_u8_to_s16(pOut, pIn, totalSampleCount);
+        return;
+    }
+
+
+    /* Slightly more optimal implementation for common formats. */
+    if (bytesPerSample == 2) {
+        for (i = 0; i < totalSampleCount; ++i) {
+           *pOut++ = ((const drwav_int16*)pIn)[i];
+        }
+        return;
+    }
+    if (bytesPerSample == 3) {
+        drwav_s24_to_s16(pOut, pIn, totalSampleCount);
+        return;
+    }
+    if (bytesPerSample == 4) {
+        drwav_s32_to_s16(pOut, (const drwav_int32*)pIn, totalSampleCount);
+        return;
+    }
+
+
+    /* Anything more than 64 bits per sample is not supported. */
+    if (bytesPerSample > 8) {
+        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
+        return;
+    }
+
+
+    /* Generic, slow converter. */
+    for (i = 0; i < totalSampleCount; ++i) {
+        drwav_uint64 sample = 0;
+        unsigned int shift  = (8 - bytesPerSample) * 8;
+
+        unsigned int j;
+        for (j = 0; j < bytesPerSample; j += 1) {
+            DRWAV_ASSERT(j < 8);
+            sample |= (drwav_uint64)(pIn[j]) << shift;
+            shift  += 8;
+        }
+
+        pIn += j;
+        *pOut++ = (drwav_int16)((drwav_int64)sample >> 48);
+    }
+}
+
+DRWAV_PRIVATE void drwav__ieee_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
+{
+    if (bytesPerSample == 4) {
+        drwav_f32_to_s16(pOut, (const float*)pIn, totalSampleCount);
+        return;
+    } else if (bytesPerSample == 8) {
+        drwav_f64_to_s16(pOut, (const double*)pIn, totalSampleCount);
+        return;
+    } else {
+        /* Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. */
+        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
+        return;
+    }
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__pcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
+
+    /* Fast path. */
+    if ((pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 16) || pBufferOut == NULL) {
+        return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
+    }
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav__pcm_to_s16(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);
+
+        pBufferOut      += samplesRead;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__ieee(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
+
+    if (pBufferOut == NULL) {
+        return drwav_read_pcm_frames(pWav, framesToRead, NULL);
+    }
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav__ieee_to_s16(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);    /* Safe cast. */
+
+        pBufferOut      += samplesRead;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__alaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
+
+    if (pBufferOut == NULL) {
+        return drwav_read_pcm_frames(pWav, framesToRead, NULL);
+    }
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav_alaw_to_s16(pBufferOut, sampleData, (size_t)samplesRead);
+
+        /*
+        For some reason libsndfile seems to be returning samples of the opposite sign for a-law, but only
+        with AIFF files. For WAV files it seems to be the same as dr_wav. This is resulting in dr_wav's
+        automated tests failing. I'm not sure which is correct, but will assume dr_wav. If we're enforcing
+        libsndfile compatibility we'll swap the signs here.
+        */
+        #ifdef DR_WAV_LIBSNDFILE_COMPAT
+        {
+            if (pWav->container == drwav_container_aiff) {
+                drwav_uint64 iSample;
+                for (iSample = 0; iSample < samplesRead; iSample += 1) {
+                    pBufferOut[iSample] = -pBufferOut[iSample];
+                }
+            }
+        }
+        #endif
+
+        pBufferOut      += samplesRead;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__mulaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
+
+    if (pBufferOut == NULL) {
+        return drwav_read_pcm_frames(pWav, framesToRead, NULL);
+    }
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav_mulaw_to_s16(pBufferOut, sampleData, (size_t)samplesRead);
+
+        /*
+        Just like with alaw, for some reason the signs between libsndfile and dr_wav are opposite. We just need to
+        swap the sign if we're compiling with libsndfile compatiblity so our automated tests don't fail.
+        */
+        #ifdef DR_WAV_LIBSNDFILE_COMPAT
+        {
+            if (pWav->container == drwav_container_aiff) {
+                drwav_uint64 iSample;
+                for (iSample = 0; iSample < samplesRead; iSample += 1) {
+                    pBufferOut[iSample] = -pBufferOut[iSample];
+                }
+            }
+        }
+        #endif
+
+        pBufferOut      += samplesRead;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    if (pWav == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    if (pBufferOut == NULL) {
+        return drwav_read_pcm_frames(pWav, framesToRead, NULL);
+    }
+
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    if (framesToRead * pWav->channels * sizeof(drwav_int16) > DRWAV_SIZE_MAX) {
+        framesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int16) / pWav->channels;
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
+        return drwav_read_pcm_frames_s16__pcm(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
+        return drwav_read_pcm_frames_s16__ieee(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
+        return drwav_read_pcm_frames_s16__alaw(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
+        return drwav_read_pcm_frames_s16__mulaw(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+        return drwav_read_pcm_frames_s16__msadpcm(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return drwav_read_pcm_frames_s16__ima(pWav, framesToRead, pBufferOut);
+    }
+
+    return 0;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16le(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToRead, pBufferOut);
+    if (pBufferOut != NULL && drwav__is_little_endian() == DRWAV_FALSE) {
+        drwav__bswap_samples_s16(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16be(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToRead, pBufferOut);
+    if (pBufferOut != NULL && drwav__is_little_endian() == DRWAV_TRUE) {
+        drwav__bswap_samples_s16(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+
+DRWAV_API void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    int r;
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        int x = pIn[i];
+        r = x << 8;
+        r = r - 32768;
+        pOut[i] = (short)r;
+    }
+}
+
+DRWAV_API void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    int r;
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        int x = ((int)(((unsigned int)(((const drwav_uint8*)pIn)[i*3+0]) << 8) | ((unsigned int)(((const drwav_uint8*)pIn)[i*3+1]) << 16) | ((unsigned int)(((const drwav_uint8*)pIn)[i*3+2])) << 24)) >> 8;
+        r = x >> 8;
+        pOut[i] = (short)r;
+    }
+}
+
+DRWAV_API void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCount)
+{
+    int r;
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        int x = pIn[i];
+        r = x >> 16;
+        pOut[i] = (short)r;
+    }
+}
+
+DRWAV_API void drwav_f32_to_s16(drwav_int16* pOut, const float* pIn, size_t sampleCount)
+{
+    int r;
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        float x = pIn[i];
+        float c;
+        c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
+        c = c + 1;
+        r = (int)(c * 32767.5f);
+        r = r - 32768;
+        pOut[i] = (short)r;
+    }
+}
+
+DRWAV_API void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount)
+{
+    int r;
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        double x = pIn[i];
+        double c;
+        c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
+        c = c + 1;
+        r = (int)(c * 32767.5);
+        r = r - 32768;
+        pOut[i] = (short)r;
+    }
+}
+
+DRWAV_API void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        pOut[i] = drwav__alaw_to_s16(pIn[i]);
+    }
+}
+
+DRWAV_API void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        pOut[i] = drwav__mulaw_to_s16(pIn[i]);
+    }
+}
+
+
+DRWAV_PRIVATE void drwav__pcm_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount, unsigned int bytesPerSample)
+{
+    unsigned int i;
+
+    /* Special case for 8-bit sample data because it's treated as unsigned. */
+    if (bytesPerSample == 1) {
+        drwav_u8_to_f32(pOut, pIn, sampleCount);
+        return;
+    }
+
+    /* Slightly more optimal implementation for common formats. */
+    if (bytesPerSample == 2) {
+        drwav_s16_to_f32(pOut, (const drwav_int16*)pIn, sampleCount);
+        return;
+    }
+    if (bytesPerSample == 3) {
+        drwav_s24_to_f32(pOut, pIn, sampleCount);
+        return;
+    }
+    if (bytesPerSample == 4) {
+        drwav_s32_to_f32(pOut, (const drwav_int32*)pIn, sampleCount);
+        return;
+    }
+
+
+    /* Anything more than 64 bits per sample is not supported. */
+    if (bytesPerSample > 8) {
+        DRWAV_ZERO_MEMORY(pOut, sampleCount * sizeof(*pOut));
+        return;
+    }
+
+
+    /* Generic, slow converter. */
+    for (i = 0; i < sampleCount; ++i) {
+        drwav_uint64 sample = 0;
+        unsigned int shift  = (8 - bytesPerSample) * 8;
+
+        unsigned int j;
+        for (j = 0; j < bytesPerSample; j += 1) {
+            DRWAV_ASSERT(j < 8);
+            sample |= (drwav_uint64)(pIn[j]) << shift;
+            shift  += 8;
+        }
+
+        pIn += j;
+        *pOut++ = (float)((drwav_int64)sample / 9223372036854775807.0);
+    }
+}
+
+DRWAV_PRIVATE void drwav__ieee_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount, unsigned int bytesPerSample)
+{
+    if (bytesPerSample == 4) {
+        unsigned int i;
+        for (i = 0; i < sampleCount; ++i) {
+            *pOut++ = ((const float*)pIn)[i];
+        }
+        return;
+    } else if (bytesPerSample == 8) {
+        drwav_f64_to_f32(pOut, (const double*)pIn, sampleCount);
+        return;
+    } else {
+        /* Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. */
+        DRWAV_ZERO_MEMORY(pOut, sampleCount * sizeof(*pOut));
+        return;
+    }
+}
+
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_f32__pcm(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav__pcm_to_f32(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);
+
+        pBufferOut      += samplesRead;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_f32__msadpcm_ima(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    /*
+    We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
+    want to duplicate that code.
+    */
+    drwav_uint64 totalFramesRead;
+    drwav_int16 samples16[2048];
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels);
+        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToReadThisIteration, samples16);
+        if (framesRead == 0) {
+            break;
+        }
+
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
+        drwav_s16_to_f32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_f32__ieee(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
+
+    /* Fast path. */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT && pWav->bitsPerSample == 32) {
+        return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
+    }
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav__ieee_to_f32(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);
+
+        pBufferOut      += samplesRead;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_f32__alaw(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav_alaw_to_f32(pBufferOut, sampleData, (size_t)samplesRead);
+
+        #ifdef DR_WAV_LIBSNDFILE_COMPAT
+        {
+            if (pWav->container == drwav_container_aiff) {
+                drwav_uint64 iSample;
+                for (iSample = 0; iSample < samplesRead; iSample += 1) {
+                    pBufferOut[iSample] = -pBufferOut[iSample];
+                }
+            }
+        }
+        #endif
+
+        pBufferOut      += samplesRead;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_f32__mulaw(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav_mulaw_to_f32(pBufferOut, sampleData, (size_t)samplesRead);
+
+        #ifdef DR_WAV_LIBSNDFILE_COMPAT
+        {
+            if (pWav->container == drwav_container_aiff) {
+                drwav_uint64 iSample;
+                for (iSample = 0; iSample < samplesRead; iSample += 1) {
+                    pBufferOut[iSample] = -pBufferOut[iSample];
+                }
+            }
+        }
+        #endif
+
+        pBufferOut      += samplesRead;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    if (pWav == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    if (pBufferOut == NULL) {
+        return drwav_read_pcm_frames(pWav, framesToRead, NULL);
+    }
+
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    if (framesToRead * pWav->channels * sizeof(float) > DRWAV_SIZE_MAX) {
+        framesToRead = DRWAV_SIZE_MAX / sizeof(float) / pWav->channels;
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
+        return drwav_read_pcm_frames_f32__pcm(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM || pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return drwav_read_pcm_frames_f32__msadpcm_ima(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
+        return drwav_read_pcm_frames_f32__ieee(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
+        return drwav_read_pcm_frames_f32__alaw(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
+        return drwav_read_pcm_frames_f32__mulaw(pWav, framesToRead, pBufferOut);
+    }
+
+    return 0;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32le(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_f32(pWav, framesToRead, pBufferOut);
+    if (pBufferOut != NULL && drwav__is_little_endian() == DRWAV_FALSE) {
+        drwav__bswap_samples_f32(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32be(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_f32(pWav, framesToRead, pBufferOut);
+    if (pBufferOut != NULL && drwav__is_little_endian() == DRWAV_TRUE) {
+        drwav__bswap_samples_f32(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+
+DRWAV_API void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+#ifdef DR_WAV_LIBSNDFILE_COMPAT
+    /*
+    It appears libsndfile uses slightly different logic for the u8 -> f32 conversion to dr_wav, which in my opinion is incorrect. It appears
+    libsndfile performs the conversion something like "f32 = (u8 / 256) * 2 - 1", however I think it should be "f32 = (u8 / 255) * 2 - 1" (note
+    the divisor of 256 vs 255). I use libsndfile as a benchmark for testing, so I'm therefore leaving this block here just for my automated
+    correctness testing. This is disabled by default.
+    */
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = (pIn[i] / 256.0f) * 2 - 1;
+    }
+#else
+    for (i = 0; i < sampleCount; ++i) {
+        float x = pIn[i];
+        x = x * 0.00784313725490196078f;    /* 0..255 to 0..2 */
+        x = x - 1;                          /* 0..2 to -1..1 */
+
+        *pOut++ = x;
+    }
+#endif
+}
+
+DRWAV_API void drwav_s16_to_f32(float* pOut, const drwav_int16* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = pIn[i] * 0.000030517578125f;
+    }
+}
+
+DRWAV_API void drwav_s24_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        double x;
+        drwav_uint32 a = ((drwav_uint32)(pIn[i*3+0]) <<  8);
+        drwav_uint32 b = ((drwav_uint32)(pIn[i*3+1]) << 16);
+        drwav_uint32 c = ((drwav_uint32)(pIn[i*3+2]) << 24);
+
+        x = (double)((drwav_int32)(a | b | c) >> 8);
+        *pOut++ = (float)(x * 0.00000011920928955078125);
+    }
+}
+
+DRWAV_API void drwav_s32_to_f32(float* pOut, const drwav_int32* pIn, size_t sampleCount)
+{
+    size_t i;
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = (float)(pIn[i] / 2147483648.0);
+    }
+}
+
+DRWAV_API void drwav_f64_to_f32(float* pOut, const double* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = (float)pIn[i];
+    }
+}
+
+DRWAV_API void drwav_alaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = drwav__alaw_to_s16(pIn[i]) / 32768.0f;
+    }
+}
+
+DRWAV_API void drwav_mulaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = drwav__mulaw_to_s16(pIn[i]) / 32768.0f;
+    }
+}
+
+
+
+DRWAV_PRIVATE void drwav__pcm_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
+{
+    unsigned int i;
+
+    /* Special case for 8-bit sample data because it's treated as unsigned. */
+    if (bytesPerSample == 1) {
+        drwav_u8_to_s32(pOut, pIn, totalSampleCount);
+        return;
+    }
+
+    /* Slightly more optimal implementation for common formats. */
+    if (bytesPerSample == 2) {
+        drwav_s16_to_s32(pOut, (const drwav_int16*)pIn, totalSampleCount);
+        return;
+    }
+    if (bytesPerSample == 3) {
+        drwav_s24_to_s32(pOut, pIn, totalSampleCount);
+        return;
+    }
+    if (bytesPerSample == 4) {
+        for (i = 0; i < totalSampleCount; ++i) {
+           *pOut++ = ((const drwav_int32*)pIn)[i];
+        }
+        return;
+    }
+
+
+    /* Anything more than 64 bits per sample is not supported. */
+    if (bytesPerSample > 8) {
+        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
+        return;
+    }
+
+
+    /* Generic, slow converter. */
+    for (i = 0; i < totalSampleCount; ++i) {
+        drwav_uint64 sample = 0;
+        unsigned int shift  = (8 - bytesPerSample) * 8;
+
+        unsigned int j;
+        for (j = 0; j < bytesPerSample; j += 1) {
+            DRWAV_ASSERT(j < 8);
+            sample |= (drwav_uint64)(pIn[j]) << shift;
+            shift  += 8;
+        }
+
+        pIn += j;
+        *pOut++ = (drwav_int32)((drwav_int64)sample >> 32);
+    }
+}
+
+DRWAV_PRIVATE void drwav__ieee_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
+{
+    if (bytesPerSample == 4) {
+        drwav_f32_to_s32(pOut, (const float*)pIn, totalSampleCount);
+        return;
+    } else if (bytesPerSample == 8) {
+        drwav_f64_to_s32(pOut, (const double*)pIn, totalSampleCount);
+        return;
+    } else {
+        /* Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. */
+        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
+        return;
+    }
+}
+
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s32__pcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
+
+    /* Fast path. */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 32) {
+        return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
+    }
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav__pcm_to_s32(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);
+
+        pBufferOut      += samplesRead;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s32__msadpcm_ima(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    /*
+    We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
+    want to duplicate that code.
+    */
+    drwav_uint64 totalFramesRead = 0;
+    drwav_int16 samples16[2048];
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels);
+        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToReadThisIteration, samples16);
+        if (framesRead == 0) {
+            break;
+        }
+
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
+        drwav_s16_to_s32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s32__ieee(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav__ieee_to_s32(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);
+
+        pBufferOut      += samplesRead;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s32__alaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav_alaw_to_s32(pBufferOut, sampleData, (size_t)samplesRead);
+
+        #ifdef DR_WAV_LIBSNDFILE_COMPAT
+        {
+            if (pWav->container == drwav_container_aiff) {
+                drwav_uint64 iSample;
+                for (iSample = 0; iSample < samplesRead; iSample += 1) {
+                    pBufferOut[iSample] = -pBufferOut[iSample];
+                }
+            }
+        }
+        #endif
+
+        pBufferOut      += samplesRead;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s32__mulaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav_mulaw_to_s32(pBufferOut, sampleData, (size_t)samplesRead);
+
+        #ifdef DR_WAV_LIBSNDFILE_COMPAT
+        {
+            if (pWav->container == drwav_container_aiff) {
+                drwav_uint64 iSample;
+                for (iSample = 0; iSample < samplesRead; iSample += 1) {
+                    pBufferOut[iSample] = -pBufferOut[iSample];
+                }
+            }
+        }
+        #endif
+
+        pBufferOut      += samplesRead;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    if (pWav == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    if (pBufferOut == NULL) {
+        return drwav_read_pcm_frames(pWav, framesToRead, NULL);
+    }
+
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    if (framesToRead * pWav->channels * sizeof(drwav_int32) > DRWAV_SIZE_MAX) {
+        framesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int32) / pWav->channels;
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
+        return drwav_read_pcm_frames_s32__pcm(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM || pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return drwav_read_pcm_frames_s32__msadpcm_ima(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
+        return drwav_read_pcm_frames_s32__ieee(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
+        return drwav_read_pcm_frames_s32__alaw(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
+        return drwav_read_pcm_frames_s32__mulaw(pWav, framesToRead, pBufferOut);
+    }
+
+    return 0;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32le(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_s32(pWav, framesToRead, pBufferOut);
+    if (pBufferOut != NULL && drwav__is_little_endian() == DRWAV_FALSE) {
+        drwav__bswap_samples_s32(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32be(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_s32(pWav, framesToRead, pBufferOut);
+    if (pBufferOut != NULL && drwav__is_little_endian() == DRWAV_TRUE) {
+        drwav__bswap_samples_s32(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+
+DRWAV_API void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = ((int)pIn[i] - 128) << 24;
+    }
+}
+
+DRWAV_API void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = pIn[i] << 16;
+    }
+}
+
+DRWAV_API void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        unsigned int s0 = pIn[i*3 + 0];
+        unsigned int s1 = pIn[i*3 + 1];
+        unsigned int s2 = pIn[i*3 + 2];
+
+        drwav_int32 sample32 = (drwav_int32)((s0 << 8) | (s1 << 16) | (s2 << 24));
+        *pOut++ = sample32;
+    }
+}
+
+DRWAV_API void drwav_f32_to_s32(drwav_int32* pOut, const float* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]);
+    }
+}
+
+DRWAV_API void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]);
+    }
+}
+
+DRWAV_API void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = ((drwav_int32)drwav__alaw_to_s16(pIn[i])) << 16;
+    }
+}
+
+DRWAV_API void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i= 0; i < sampleCount; ++i) {
+        *pOut++ = ((drwav_int32)drwav__mulaw_to_s16(pIn[i])) << 16;
+    }
+}
+
+
+
+DRWAV_PRIVATE drwav_int16* drwav__read_pcm_frames_and_close_s16(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
+{
+    drwav_uint64 sampleDataSize;
+    drwav_int16* pSampleData;
+    drwav_uint64 framesRead;
+
+    DRWAV_ASSERT(pWav != NULL);
+
+    sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(drwav_int16);
+    if (sampleDataSize > DRWAV_SIZE_MAX) {
+        drwav_uninit(pWav);
+        return NULL;    /* File's too big. */
+    }
+
+    pSampleData = (drwav_int16*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */
+    if (pSampleData == NULL) {
+        drwav_uninit(pWav);
+        return NULL;    /* Failed to allocate memory. */
+    }
+
+    framesRead = drwav_read_pcm_frames_s16(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData);
+    if (framesRead != pWav->totalPCMFrameCount) {
+        drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks);
+        drwav_uninit(pWav);
+        return NULL;    /* There was an error reading the samples. */
+    }
+
+    drwav_uninit(pWav);
+
+    if (sampleRate) {
+        *sampleRate = pWav->sampleRate;
+    }
+    if (channels) {
+        *channels = pWav->channels;
+    }
+    if (totalFrameCount) {
+        *totalFrameCount = pWav->totalPCMFrameCount;
+    }
+
+    return pSampleData;
+}
+
+DRWAV_PRIVATE float* drwav__read_pcm_frames_and_close_f32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
+{
+    drwav_uint64 sampleDataSize;
+    float* pSampleData;
+    drwav_uint64 framesRead;
+
+    DRWAV_ASSERT(pWav != NULL);
+
+    sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(float);
+    if (sampleDataSize > DRWAV_SIZE_MAX) {
+        drwav_uninit(pWav);
+        return NULL;    /* File's too big. */
+    }
+
+    pSampleData = (float*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */
+    if (pSampleData == NULL) {
+        drwav_uninit(pWav);
+        return NULL;    /* Failed to allocate memory. */
+    }
+
+    framesRead = drwav_read_pcm_frames_f32(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData);
+    if (framesRead != pWav->totalPCMFrameCount) {
+        drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks);
+        drwav_uninit(pWav);
+        return NULL;    /* There was an error reading the samples. */
+    }
+
+    drwav_uninit(pWav);
+
+    if (sampleRate) {
+        *sampleRate = pWav->sampleRate;
+    }
+    if (channels) {
+        *channels = pWav->channels;
+    }
+    if (totalFrameCount) {
+        *totalFrameCount = pWav->totalPCMFrameCount;
+    }
+
+    return pSampleData;
+}
+
+DRWAV_PRIVATE drwav_int32* drwav__read_pcm_frames_and_close_s32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
+{
+    drwav_uint64 sampleDataSize;
+    drwav_int32* pSampleData;
+    drwav_uint64 framesRead;
+
+    DRWAV_ASSERT(pWav != NULL);
+
+    sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(drwav_int32);
+    if (sampleDataSize > DRWAV_SIZE_MAX) {
+        drwav_uninit(pWav);
+        return NULL;    /* File's too big. */
+    }
+
+    pSampleData = (drwav_int32*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */
+    if (pSampleData == NULL) {
+        drwav_uninit(pWav);
+        return NULL;    /* Failed to allocate memory. */
+    }
+
+    framesRead = drwav_read_pcm_frames_s32(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData);
+    if (framesRead != pWav->totalPCMFrameCount) {
+        drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks);
+        drwav_uninit(pWav);
+        return NULL;    /* There was an error reading the samples. */
+    }
+
+    drwav_uninit(pWav);
+
+    if (sampleRate) {
+        *sampleRate = pWav->sampleRate;
+    }
+    if (channels) {
+        *channels = pWav->channels;
+    }
+    if (totalFrameCount) {
+        *totalFrameCount = pWav->totalPCMFrameCount;
+    }
+
+    return pSampleData;
+}
+
+
+
+DRWAV_API drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+#ifndef DR_WAV_NO_STDIO
+DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API float* drwav_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+
+#ifndef DR_WAV_NO_WCHAR
+DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API float* drwav_open_file_and_read_pcm_frames_f32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+#endif /* DR_WAV_NO_WCHAR */
+#endif /* DR_WAV_NO_STDIO */
+
+DRWAV_API drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API float* drwav_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+#endif  /* DR_WAV_NO_CONVERSION_API */
+
+
+DRWAV_API void drwav_free(void* p, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks != NULL) {
+        drwav__free_from_callbacks(p, pAllocationCallbacks);
+    } else {
+        drwav__free_default(p, NULL);
+    }
+}
+
+DRWAV_API drwav_uint16 drwav_bytes_to_u16(const drwav_uint8* data)
+{
+    return ((drwav_uint16)data[0] << 0) | ((drwav_uint16)data[1] << 8);
+}
+
+DRWAV_API drwav_int16 drwav_bytes_to_s16(const drwav_uint8* data)
+{
+    return (drwav_int16)drwav_bytes_to_u16(data);
+}
+
+DRWAV_API drwav_uint32 drwav_bytes_to_u32(const drwav_uint8* data)
+{
+    return drwav_bytes_to_u32_le(data);
+}
+
+DRWAV_API float drwav_bytes_to_f32(const drwav_uint8* data)
+{
+    union {
+        drwav_uint32 u32;
+        float f32;
+    } value;
+
+    value.u32 = drwav_bytes_to_u32(data);
+    return value.f32;
+}
+
+DRWAV_API drwav_int32 drwav_bytes_to_s32(const drwav_uint8* data)
+{
+    return (drwav_int32)drwav_bytes_to_u32(data);
+}
+
+DRWAV_API drwav_uint64 drwav_bytes_to_u64(const drwav_uint8* data)
+{
+    return
+        ((drwav_uint64)data[0] <<  0) | ((drwav_uint64)data[1] <<  8) | ((drwav_uint64)data[2] << 16) | ((drwav_uint64)data[3] << 24) |
+        ((drwav_uint64)data[4] << 32) | ((drwav_uint64)data[5] << 40) | ((drwav_uint64)data[6] << 48) | ((drwav_uint64)data[7] << 56);
+}
+
+DRWAV_API drwav_int64 drwav_bytes_to_s64(const drwav_uint8* data)
+{
+    return (drwav_int64)drwav_bytes_to_u64(data);
+}
+
+
+DRWAV_API drwav_bool32 drwav_guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16])
+{
+    int i;
+    for (i = 0; i < 16; i += 1) {
+        if (a[i] != b[i]) {
+            return DRWAV_FALSE;
+        }
+    }
+
+    return DRWAV_TRUE;
+}
+
+DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const char* b)
+{
+    return
+        a[0] == b[0] &&
+        a[1] == b[1] &&
+        a[2] == b[2] &&
+        a[3] == b[3];
+}
+
+#ifdef __MRC__
+/* Undo the pragma at the beginning of this file. */
+#pragma options opt reset
+#endif
+
+#endif  /* dr_wav_c */
+#endif  /* DR_WAV_IMPLEMENTATION */
+
+/*
+REVISION HISTORY
+================
+v0.13.13 - 2023-11-02
+  - Fix a warning when compiling with Clang.
+
+v0.13.12 - 2023-08-07
+  - Fix a possible crash in drwav_read_pcm_frames().
+
+v0.13.11 - 2023-07-07
+  - AIFF compatibility improvements.
+
+v0.13.10 - 2023-05-29
+  - Fix a bug where drwav_init_with_metadata() does not decode any frames after initializtion.
+
+v0.13.9 - 2023-05-22
+  - Add support for AIFF decoding (writing and metadata not supported).
+  - Add support for RIFX decoding (writing and metadata not supported).
+  - Fix a bug where metadata is not processed if it's located before the "fmt " chunk.
+  - Add a workaround for a type of malformed WAV file where the size of the "RIFF" and "data" chunks
+    are incorrectly set to 0xFFFFFFFF.
+
+v0.13.8 - 2023-03-25
+  - Fix a possible null pointer dereference.
+  - Fix a crash when loading files with badly formed metadata.
+
+v0.13.7 - 2022-09-17
+  - Fix compilation with DJGPP.
+  - Add support for disabling wchar_t with DR_WAV_NO_WCHAR.
+
+v0.13.6 - 2022-04-10
+  - Fix compilation error on older versions of GCC.
+  - Remove some dependencies on the standard library.
+
+v0.13.5 - 2022-01-26
+  - Fix an error when seeking to the end of the file.
+
+v0.13.4 - 2021-12-08
+  - Fix some static analysis warnings.
+
+v0.13.3 - 2021-11-24
+  - Fix an incorrect assertion when trying to endian swap 1-byte sample formats. This is now a no-op
+    rather than a failed assertion.
+  - Fix a bug with parsing of the bext chunk.
+  - Fix some static analysis warnings.
+
+v0.13.2 - 2021-10-02
+  - Fix a possible buffer overflow when reading from compressed formats.
+
+v0.13.1 - 2021-07-31
+  - Fix platform detection for ARM64.
+
+v0.13.0 - 2021-07-01
+  - Improve support for reading and writing metadata. Use the `_with_metadata()` APIs to initialize
+    a WAV decoder and store the metadata within the `drwav` object. Use the `pMetadata` and
+    `metadataCount` members of the `drwav` object to read the data. The old way of handling metadata
+    via a callback is still usable and valid.
+  - API CHANGE: drwav_target_write_size_bytes() now takes extra parameters for calculating the
+    required write size when writing metadata.
+  - Add drwav_get_cursor_in_pcm_frames()
+  - Add drwav_get_length_in_pcm_frames()
+  - Fix a bug where drwav_read_raw() can call the read callback with a byte count of zero.
+
+v0.12.20 - 2021-06-11
+  - Fix some undefined behavior.
+
+v0.12.19 - 2021-02-21
+  - Fix a warning due to referencing _MSC_VER when it is undefined.
+  - Minor improvements to the management of some internal state concerning the data chunk cursor.
+
+v0.12.18 - 2021-01-31
+  - Clean up some static analysis warnings.
+
+v0.12.17 - 2021-01-17
+  - Minor fix to sample code in documentation.
+  - Correctly qualify a private API as private rather than public.
+  - Code cleanup.
+
+v0.12.16 - 2020-12-02
+  - Fix a bug when trying to read more bytes than can fit in a size_t.
+
+v0.12.15 - 2020-11-21
+  - Fix compilation with OpenWatcom.
+
+v0.12.14 - 2020-11-13
+  - Minor code clean up.
+
+v0.12.13 - 2020-11-01
+  - Improve compiler support for older versions of GCC.
+
+v0.12.12 - 2020-09-28
+  - Add support for RF64.
+  - Fix a bug in writing mode where the size of the RIFF chunk incorrectly includes the header section.
+
+v0.12.11 - 2020-09-08
+  - Fix a compilation error on older compilers.
+
+v0.12.10 - 2020-08-24
+  - Fix a bug when seeking with ADPCM formats.
+
+v0.12.9 - 2020-08-02
+  - Simplify sized types.
+
+v0.12.8 - 2020-07-25
+  - Fix a compilation warning.
+
+v0.12.7 - 2020-07-15
+  - Fix some bugs on big-endian architectures.
+  - Fix an error in s24 to f32 conversion.
+
+v0.12.6 - 2020-06-23
+  - Change drwav_read_*() to allow NULL to be passed in as the output buffer which is equivalent to a forward seek.
+  - Fix a buffer overflow when trying to decode invalid IMA-ADPCM files.
+  - Add include guard for the implementation section.
+
+v0.12.5 - 2020-05-27
+  - Minor documentation fix.
+
+v0.12.4 - 2020-05-16
+  - Replace assert() with DRWAV_ASSERT().
+  - Add compile-time and run-time version querying.
+    - DRWAV_VERSION_MINOR
+    - DRWAV_VERSION_MAJOR
+    - DRWAV_VERSION_REVISION
+    - DRWAV_VERSION_STRING
+    - drwav_version()
+    - drwav_version_string()
+
+v0.12.3 - 2020-04-30
+  - Fix compilation errors with VC6.
+
+v0.12.2 - 2020-04-21
+  - Fix a bug where drwav_init_file() does not close the file handle after attempting to load an erroneous file.
+
+v0.12.1 - 2020-04-13
+  - Fix some pedantic warnings.
+
+v0.12.0 - 2020-04-04
+  - API CHANGE: Add container and format parameters to the chunk callback.
+  - Minor documentation updates.
+
+v0.11.5 - 2020-03-07
+  - Fix compilation error with Visual Studio .NET 2003.
+
+v0.11.4 - 2020-01-29
+  - Fix some static analysis warnings.
+  - Fix a bug when reading f32 samples from an A-law encoded stream.
+
+v0.11.3 - 2020-01-12
+  - Minor changes to some f32 format conversion routines.
+  - Minor bug fix for ADPCM conversion when end of file is reached.
+
+v0.11.2 - 2019-12-02
+  - Fix a possible crash when using custom memory allocators without a custom realloc() implementation.
+  - Fix an integer overflow bug.
+  - Fix a null pointer dereference bug.
+  - Add limits to sample rate, channels and bits per sample to tighten up some validation.
+
+v0.11.1 - 2019-10-07
+  - Internal code clean up.
+
+v0.11.0 - 2019-10-06
+  - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation
+    routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs:
+    - drwav_init()
+    - drwav_init_ex()
+    - drwav_init_file()
+    - drwav_init_file_ex()
+    - drwav_init_file_w()
+    - drwav_init_file_w_ex()
+    - drwav_init_memory()
+    - drwav_init_memory_ex()
+    - drwav_init_write()
+    - drwav_init_write_sequential()
+    - drwav_init_write_sequential_pcm_frames()
+    - drwav_init_file_write()
+    - drwav_init_file_write_sequential()
+    - drwav_init_file_write_sequential_pcm_frames()
+    - drwav_init_file_write_w()
+    - drwav_init_file_write_sequential_w()
+    - drwav_init_file_write_sequential_pcm_frames_w()
+    - drwav_init_memory_write()
+    - drwav_init_memory_write_sequential()
+    - drwav_init_memory_write_sequential_pcm_frames()
+    - drwav_open_and_read_pcm_frames_s16()
+    - drwav_open_and_read_pcm_frames_f32()
+    - drwav_open_and_read_pcm_frames_s32()
+    - drwav_open_file_and_read_pcm_frames_s16()
+    - drwav_open_file_and_read_pcm_frames_f32()
+    - drwav_open_file_and_read_pcm_frames_s32()
+    - drwav_open_file_and_read_pcm_frames_s16_w()
+    - drwav_open_file_and_read_pcm_frames_f32_w()
+    - drwav_open_file_and_read_pcm_frames_s32_w()
+    - drwav_open_memory_and_read_pcm_frames_s16()
+    - drwav_open_memory_and_read_pcm_frames_f32()
+    - drwav_open_memory_and_read_pcm_frames_s32()
+    Set this extra parameter to NULL to use defaults which is the same as the previous behaviour. Setting this NULL will use
+    DRWAV_MALLOC, DRWAV_REALLOC and DRWAV_FREE.
+  - Add support for reading and writing PCM frames in an explicit endianness. New APIs:
+    - drwav_read_pcm_frames_le()
+    - drwav_read_pcm_frames_be()
+    - drwav_read_pcm_frames_s16le()
+    - drwav_read_pcm_frames_s16be()
+    - drwav_read_pcm_frames_f32le()
+    - drwav_read_pcm_frames_f32be()
+    - drwav_read_pcm_frames_s32le()
+    - drwav_read_pcm_frames_s32be()
+    - drwav_write_pcm_frames_le()
+    - drwav_write_pcm_frames_be()
+  - Remove deprecated APIs.
+  - API CHANGE: The following APIs now return native-endian data. Previously they returned little-endian data.
+    - drwav_read_pcm_frames()
+    - drwav_read_pcm_frames_s16()
+    - drwav_read_pcm_frames_s32()
+    - drwav_read_pcm_frames_f32()
+    - drwav_open_and_read_pcm_frames_s16()
+    - drwav_open_and_read_pcm_frames_s32()
+    - drwav_open_and_read_pcm_frames_f32()
+    - drwav_open_file_and_read_pcm_frames_s16()
+    - drwav_open_file_and_read_pcm_frames_s32()
+    - drwav_open_file_and_read_pcm_frames_f32()
+    - drwav_open_file_and_read_pcm_frames_s16_w()
+    - drwav_open_file_and_read_pcm_frames_s32_w()
+    - drwav_open_file_and_read_pcm_frames_f32_w()
+    - drwav_open_memory_and_read_pcm_frames_s16()
+    - drwav_open_memory_and_read_pcm_frames_s32()
+    - drwav_open_memory_and_read_pcm_frames_f32()
+
+v0.10.1 - 2019-08-31
+  - Correctly handle partial trailing ADPCM blocks.
+
+v0.10.0 - 2019-08-04
+  - Remove deprecated APIs.
+  - Add wchar_t variants for file loading APIs:
+      drwav_init_file_w()
+      drwav_init_file_ex_w()
+      drwav_init_file_write_w()
+      drwav_init_file_write_sequential_w()
+  - Add drwav_target_write_size_bytes() which calculates the total size in bytes of a WAV file given a format and sample count.
+  - Add APIs for specifying the PCM frame count instead of the sample count when opening in sequential write mode:
+      drwav_init_write_sequential_pcm_frames()
+      drwav_init_file_write_sequential_pcm_frames()
+      drwav_init_file_write_sequential_pcm_frames_w()
+      drwav_init_memory_write_sequential_pcm_frames()
+  - Deprecate drwav_open*() and drwav_close():
+      drwav_open()
+      drwav_open_ex()
+      drwav_open_write()
+      drwav_open_write_sequential()
+      drwav_open_file()
+      drwav_open_file_ex()
+      drwav_open_file_write()
+      drwav_open_file_write_sequential()
+      drwav_open_memory()
+      drwav_open_memory_ex()
+      drwav_open_memory_write()
+      drwav_open_memory_write_sequential()
+      drwav_close()
+  - Minor documentation updates.
+
+v0.9.2 - 2019-05-21
+  - Fix warnings.
+
+v0.9.1 - 2019-05-05
+  - Add support for C89.
+  - Change license to choice of public domain or MIT-0.
+
+v0.9.0 - 2018-12-16
+  - API CHANGE: Add new reading APIs for reading by PCM frames instead of samples. Old APIs have been deprecated and
+    will be removed in v0.10.0. Deprecated APIs and their replacements:
+      drwav_read()                     -> drwav_read_pcm_frames()
+      drwav_read_s16()                 -> drwav_read_pcm_frames_s16()
+      drwav_read_f32()                 -> drwav_read_pcm_frames_f32()
+      drwav_read_s32()                 -> drwav_read_pcm_frames_s32()
+      drwav_seek_to_sample()           -> drwav_seek_to_pcm_frame()
+      drwav_write()                    -> drwav_write_pcm_frames()
+      drwav_open_and_read_s16()        -> drwav_open_and_read_pcm_frames_s16()
+      drwav_open_and_read_f32()        -> drwav_open_and_read_pcm_frames_f32()
+      drwav_open_and_read_s32()        -> drwav_open_and_read_pcm_frames_s32()
+      drwav_open_file_and_read_s16()   -> drwav_open_file_and_read_pcm_frames_s16()
+      drwav_open_file_and_read_f32()   -> drwav_open_file_and_read_pcm_frames_f32()
+      drwav_open_file_and_read_s32()   -> drwav_open_file_and_read_pcm_frames_s32()
+      drwav_open_memory_and_read_s16() -> drwav_open_memory_and_read_pcm_frames_s16()
+      drwav_open_memory_and_read_f32() -> drwav_open_memory_and_read_pcm_frames_f32()
+      drwav_open_memory_and_read_s32() -> drwav_open_memory_and_read_pcm_frames_s32()
+      drwav::totalSampleCount          -> drwav::totalPCMFrameCount
+  - API CHANGE: Rename drwav_open_and_read_file_*() to drwav_open_file_and_read_*().
+  - API CHANGE: Rename drwav_open_and_read_memory_*() to drwav_open_memory_and_read_*().
+  - Add built-in support for smpl chunks.
+  - Add support for firing a callback for each chunk in the file at initialization time.
+    - This is enabled through the drwav_init_ex(), etc. family of APIs.
+  - Handle invalid FMT chunks more robustly.
+
+v0.8.5 - 2018-09-11
+  - Const correctness.
+  - Fix a potential stack overflow.
+
+v0.8.4 - 2018-08-07
+  - Improve 64-bit detection.
+
+v0.8.3 - 2018-08-05
+  - Fix C++ build on older versions of GCC.
+
+v0.8.2 - 2018-08-02
+  - Fix some big-endian bugs.
+
+v0.8.1 - 2018-06-29
+  - Add support for sequential writing APIs.
+  - Disable seeking in write mode.
+  - Fix bugs with Wave64.
+  - Fix typos.
+
+v0.8 - 2018-04-27
+  - Bug fix.
+  - Start using major.minor.revision versioning.
+
+v0.7f - 2018-02-05
+  - Restrict ADPCM formats to a maximum of 2 channels.
+
+v0.7e - 2018-02-02
+  - Fix a crash.
+
+v0.7d - 2018-02-01
+  - Fix a crash.
+
+v0.7c - 2018-02-01
+  - Set drwav.bytesPerSample to 0 for all compressed formats.
+  - Fix a crash when reading 16-bit floating point WAV files. In this case dr_wav will output silence for
+    all format conversion reading APIs (*_s16, *_s32, *_f32 APIs).
+  - Fix some divide-by-zero errors.
+
+v0.7b - 2018-01-22
+  - Fix errors with seeking of compressed formats.
+  - Fix compilation error when DR_WAV_NO_CONVERSION_API
+
+v0.7a - 2017-11-17
+  - Fix some GCC warnings.
+
+v0.7 - 2017-11-04
+  - Add writing APIs.
+
+v0.6 - 2017-08-16
+  - API CHANGE: Rename dr_* types to drwav_*.
+  - Add support for custom implementations of malloc(), realloc(), etc.
+  - Add support for Microsoft ADPCM.
+  - Add support for IMA ADPCM (DVI, format code 0x11).
+  - Optimizations to drwav_read_s16().
+  - Bug fixes.
+
+v0.5g - 2017-07-16
+  - Change underlying type for booleans to unsigned.
+
+v0.5f - 2017-04-04
+  - Fix a minor bug with drwav_open_and_read_s16() and family.
+
+v0.5e - 2016-12-29
+  - Added support for reading samples as signed 16-bit integers. Use the _s16() family of APIs for this.
+  - Minor fixes to documentation.
+
+v0.5d - 2016-12-28
+  - Use drwav_int* and drwav_uint* sized types to improve compiler support.
+
+v0.5c - 2016-11-11
+  - Properly handle JUNK chunks that come before the FMT chunk.
+
+v0.5b - 2016-10-23
+  - A minor change to drwav_bool8 and drwav_bool32 types.
+
+v0.5a - 2016-10-11
+  - Fixed a bug with drwav_open_and_read() and family due to incorrect argument ordering.
+  - Improve A-law and mu-law efficiency.
+
+v0.5 - 2016-09-29
+  - API CHANGE. Swap the order of "channels" and "sampleRate" parameters in drwav_open_and_read*(). Rationale for this is to
+    keep it consistent with dr_audio and dr_flac.
+
+v0.4b - 2016-09-18
+  - Fixed a typo in documentation.
+
+v0.4a - 2016-09-18
+  - Fixed a typo.
+  - Change date format to ISO 8601 (YYYY-MM-DD)
+
+v0.4 - 2016-07-13
+  - API CHANGE. Make onSeek consistent with dr_flac.
+  - API CHANGE. Rename drwav_seek() to drwav_seek_to_sample() for clarity and consistency with dr_flac.
+  - Added support for Sony Wave64.
+
+v0.3a - 2016-05-28
+  - API CHANGE. Return drwav_bool32 instead of int in onSeek callback.
+  - Fixed a memory leak.
+
+v0.3 - 2016-05-22
+  - Lots of API changes for consistency.
+
+v0.2a - 2016-05-16
+  - Fixed Linux/GCC build.
+
+v0.2 - 2016-05-11
+  - Added support for reading data as signed 32-bit PCM for consistency with dr_flac.
+
+v0.1a - 2016-05-07
+  - Fixed a bug in drwav_open_file() where the file handle would not be closed if the loader failed to initialize.
+
+v0.1 - 2016-05-04
+  - Initial versioned release.
+*/
+
+/*
+This software is available as a choice of the following licenses. Choose
+whichever you prefer.
+
+===============================================================================
+ALTERNATIVE 1 - Public Domain (www.unlicense.org)
+===============================================================================
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
+
+===============================================================================
+ALTERNATIVE 2 - MIT No Attribution
+===============================================================================
+Copyright 2023 David Reid
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
diff --git a/shared/external/jar_mod.h b/shared/external/jar_mod.h
new file mode 100644
index 0000000..eacd3b7
--- /dev/null
+++ b/shared/external/jar_mod.h
@@ -0,0 +1,1596 @@
+// jar_mod.h - v0.01 - public domain C0 - Joshua Reisenauer
+//
+// HISTORY:
+//
+//   v0.01  2016-03-12  Setup
+//
+//
+// USAGE:
+//
+// In ONE source file, put:
+//
+//    #define JAR_MOD_IMPLEMENTATION
+//    #include "jar_mod.h"
+//
+// Other source files should just include jar_mod.h
+//
+// SAMPLE CODE:
+// jar_mod_context_t modctx;
+// short samplebuff[4096];
+// bool bufferFull = false;
+// int intro_load(void)
+// {
+//     jar_mod_init(&modctx);
+//     jar_mod_load_file(&modctx, "file.mod");
+//     return 1;
+// }
+// int intro_unload(void)
+// {
+//     jar_mod_unload(&modctx);
+//     return 1;
+// }
+// int intro_tick(long counter)
+// {
+//     if(!bufferFull)
+//     {
+//         jar_mod_fillbuffer(&modctx, samplebuff, 4096, 0);
+//         bufferFull=true;
+//     }
+//     if(IsKeyDown(KEY_ENTER))
+//         return 1;
+//     return 0;
+// }
+//
+//
+// LISCENSE:
+//
+// Written by: Jean-François DEL NERO (http://hxc2001.com/) <Email : jeanfrancoisdelnero <> free.fr>
+// Adapted to jar_mod by: Joshua Adam Reisenauer <kd7tck@gmail.com>
+// This program is free software. It comes without any warranty, to the
+// extent permitted by applicable law. You can redistribute it and/or
+// modify it under the terms of the Do What The Fuck You Want To Public
+// License, Version 2, as published by Sam Hocevar. See
+// http://sam.zoy.org/wtfpl/COPYING for more details.
+///////////////////////////////////////////////////////////////////////////////////
+// HxCMOD Core API:
+// -------------------------------------------
+// int  jar_mod_init(jar_mod_context_t * modctx)
+//
+// - Initialize the jar_mod_context_t buffer. Must be called before doing anything else.
+//   Return 1 if success. 0 in case of error.
+// -------------------------------------------
+// mulong jar_mod_load_file(jar_mod_context_t * modctx, const char* filename)
+//
+// - "Load" a MOD from file, context must already be initialized.
+//   Return size of file in bytes.
+// -------------------------------------------
+// void jar_mod_fillbuffer( jar_mod_context_t * modctx, short * outbuffer, unsigned long nbsample, jar_mod_tracker_buffer_state * trkbuf )
+//
+// - Generate and return the next samples chunk to outbuffer.
+//   nbsample specify the number of stereo 16bits samples you want.
+//   The output format is by default signed 48000Hz 16-bit Stereo PCM samples, otherwise it is changed with jar_mod_setcfg().
+//   The output buffer size in bytes must be equal to ( nbsample * 2 * channels ).
+//   The optional trkbuf parameter can be used to get detailed status of the player. Put NULL/0 is unused.
+// -------------------------------------------
+// void jar_mod_unload( jar_mod_context_t * modctx )
+// - "Unload" / clear the player status.
+// -------------------------------------------
+///////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef INCLUDE_JAR_MOD_H
+#define INCLUDE_JAR_MOD_H
+
+// Allow custom memory allocators
+#ifndef JARMOD_MALLOC
+    #define JARMOD_MALLOC(sz)    malloc(sz)
+#endif
+#ifndef JARMOD_FREE
+    #define JARMOD_FREE(p)       free(p)
+#endif
+
+
+// Basic type
+typedef unsigned char muchar;
+typedef unsigned short muint;
+typedef short mint;
+typedef unsigned long mulong;
+
+#define NUMMAXCHANNELS 32
+#define MAXNOTES 12*12
+#define DEFAULT_SAMPLE_RATE 48000
+//
+// MOD file structures
+//
+
+#pragma pack(1)
+
+typedef struct {
+    muchar  name[22];
+    muint   length;
+    muchar  finetune;
+    muchar  volume;
+    muint   reppnt;
+    muint   replen;
+} sample;
+
+typedef struct {
+    muchar  sampperiod;
+    muchar  period;
+    muchar  sampeffect;
+    muchar  effect;
+} note;
+
+typedef struct {
+    muchar  title[20];
+    sample  samples[31];
+    muchar  length; // length of tablepos
+    muchar  protracker;
+    muchar  patterntable[128];
+    muchar  signature[4];
+    muchar  speed;
+} module;
+
+#pragma pack()
+
+//
+// HxCMod Internal structures
+//
+typedef struct {
+    char*   sampdata;
+    muint   sampnum;
+    muint   length;
+    muint   reppnt;
+    muint   replen;
+    mulong  samppos;
+    muint   period;
+    muchar  volume;
+    mulong  ticks;
+    muchar  effect;
+    muchar  parameffect;
+    muint   effect_code;
+    mint    decalperiod;
+    mint    portaspeed;
+    mint    portaperiod;
+    mint    vibraperiod;
+    mint    Arpperiods[3];
+    muchar  ArpIndex;
+    mint    oldk;
+    muchar  volumeslide;
+    muchar  vibraparam;
+    muchar  vibrapointeur;
+    muchar  finetune;
+    muchar  cut_param;
+    muint   patternloopcnt;
+    muint   patternloopstartpoint;
+} channel;
+
+typedef struct {
+    module  song;
+    char*   sampledata[31];
+    note*   patterndata[128];
+
+    mulong  playrate;
+    muint   tablepos;
+    muint   patternpos;
+    muint   patterndelay;
+    muint   jump_loop_effect;
+    muchar  bpm;
+    mulong  patternticks;
+    mulong  patterntickse;
+    mulong  patternticksaim;
+    mulong  sampleticksconst;
+    mulong  samplenb;
+    channel channels[NUMMAXCHANNELS];
+    muint   number_of_channels;
+    muint   fullperiod[MAXNOTES * 8];
+    muint   mod_loaded;
+    mint    last_r_sample;
+    mint    last_l_sample;
+    mint    stereo;
+    mint    stereo_separation;
+    mint    bits;
+    mint    filter;
+    
+    muchar *modfile; // the raw mod file
+    mulong  modfilesize;
+    muint   loopcount;
+} jar_mod_context_t;
+
+//
+// Player states structures
+//
+typedef struct track_state_
+{
+    unsigned char instrument_number;
+    unsigned short cur_period;
+    unsigned char  cur_volume;
+    unsigned short cur_effect;
+    unsigned short cur_parameffect;
+}track_state;
+
+typedef struct tracker_state_
+{
+    int number_of_tracks;
+    int bpm;
+    int speed;
+    int cur_pattern;
+    int cur_pattern_pos;
+    int cur_pattern_table_pos;
+    unsigned int buf_index;
+    track_state tracks[32];
+}tracker_state;
+
+typedef struct tracker_state_instrument_
+{
+    char name[22];
+    int  active;
+}tracker_state_instrument;
+
+typedef struct jar_mod_tracker_buffer_state_
+{
+    int  nb_max_of_state;
+    int  nb_of_state;
+    int  cur_rd_index;
+    int  sample_step;
+    char name[64];
+    tracker_state_instrument instruments[31];
+    tracker_state * track_state_buf;
+}jar_mod_tracker_buffer_state;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool   jar_mod_init(jar_mod_context_t * modctx);
+bool   jar_mod_setcfg(jar_mod_context_t * modctx, int samplerate, int bits, int stereo, int stereo_separation, int filter);
+void   jar_mod_fillbuffer(jar_mod_context_t * modctx, short * outbuffer, unsigned long nbsample, jar_mod_tracker_buffer_state * trkbuf);
+void   jar_mod_unload(jar_mod_context_t * modctx);
+mulong jar_mod_load_file(jar_mod_context_t * modctx, const char* filename);
+mulong jar_mod_current_samples(jar_mod_context_t * modctx);
+mulong jar_mod_max_samples(jar_mod_context_t * modctx);
+void   jar_mod_seek_start(jar_mod_context_t * ctx);
+
+#ifdef __cplusplus
+}
+#endif
+//--------------------------------------------------------------------
+
+
+
+//-------------------------------------------------------------------------------
+#ifdef JAR_MOD_IMPLEMENTATION
+
+#include <stdio.h>
+#include <stdlib.h>
+//#include <stdbool.h>
+
+// Effects list
+#define EFFECT_ARPEGGIO              0x0 // Supported
+#define EFFECT_PORTAMENTO_UP         0x1 // Supported
+#define EFFECT_PORTAMENTO_DOWN       0x2 // Supported
+#define EFFECT_TONE_PORTAMENTO       0x3 // Supported
+#define EFFECT_VIBRATO               0x4 // Supported
+#define EFFECT_VOLSLIDE_TONEPORTA    0x5 // Supported
+#define EFFECT_VOLSLIDE_VIBRATO      0x6 // Supported
+#define EFFECT_VOLSLIDE_TREMOLO      0x7 // - TO BE DONE -
+#define EFFECT_SET_PANNING           0x8 // - TO BE DONE -
+#define EFFECT_SET_OFFSET            0x9 // Supported
+#define EFFECT_VOLUME_SLIDE          0xA // Supported
+#define EFFECT_JUMP_POSITION         0xB // Supported
+#define EFFECT_SET_VOLUME            0xC // Supported
+#define EFFECT_PATTERN_BREAK         0xD // Supported
+
+#define EFFECT_EXTENDED              0xE
+#define EFFECT_E_FINE_PORTA_UP       0x1 // Supported
+#define EFFECT_E_FINE_PORTA_DOWN     0x2 // Supported
+#define EFFECT_E_GLISSANDO_CTRL      0x3 // - TO BE DONE -
+#define EFFECT_E_VIBRATO_WAVEFORM    0x4 // - TO BE DONE -
+#define EFFECT_E_SET_FINETUNE        0x5 // - TO BE DONE -
+#define EFFECT_E_PATTERN_LOOP        0x6 // Supported
+#define EFFECT_E_TREMOLO_WAVEFORM    0x7 // - TO BE DONE -
+#define EFFECT_E_SET_PANNING_2       0x8 // - TO BE DONE -
+#define EFFECT_E_RETRIGGER_NOTE      0x9 // - TO BE DONE -
+#define EFFECT_E_FINE_VOLSLIDE_UP    0xA // Supported
+#define EFFECT_E_FINE_VOLSLIDE_DOWN  0xB // Supported
+#define EFFECT_E_NOTE_CUT            0xC // Supported
+#define EFFECT_E_NOTE_DELAY          0xD // - TO BE DONE -
+#define EFFECT_E_PATTERN_DELAY       0xE // Supported
+#define EFFECT_E_INVERT_LOOP         0xF // - TO BE DONE -
+#define EFFECT_SET_SPEED             0xF0 // Supported
+#define EFFECT_SET_TEMPO             0xF2 // Supported
+
+#define PERIOD_TABLE_LENGTH  MAXNOTES
+#define FULL_PERIOD_TABLE_LENGTH  ( PERIOD_TABLE_LENGTH * 8 )
+
+static const short periodtable[]=
+{
+    27392, 25856, 24384, 23040, 21696, 20480, 19328, 18240, 17216, 16256, 15360, 14496,
+    13696, 12928, 12192, 11520, 10848, 10240,  9664,  9120,  8606,  8128,  7680,  7248,
+     6848,  6464,  6096,  5760,  5424,  5120,  4832,  4560,  4304,  4064,  3840,  3624,
+     3424,  3232,  3048,  2880,  2712,  2560,  2416,  2280,  2152,  2032,  1920,  1812,
+     1712,  1616,  1524,  1440,  1356,  1280,  1208,  1140,  1076,  1016,   960,   906,
+      856,   808,   762,   720,   678,   640,   604,   570,   538,   508,   480,   453,
+      428,   404,   381,   360,   339,   320,   302,   285,   269,   254,   240,   226,
+      214,   202,   190,   180,   170,   160,   151,   143,   135,   127,   120,   113,
+      107,   101,    95,    90,    85,    80,    75,    71,    67,    63,    60,    56,
+       53,    50,    47,    45,    42,    40,    37,    35,    33,    31,    30,    28,
+       27,    25,    24,    22,    21,    20,    19,    18,    17,    16,    15,    14,
+       13,    13,    12,    11,    11,    10,     9,     9,     8,     8,     7,     7
+};
+
+static const short sintable[]={
+      0,  24,  49,  74,  97, 120, 141,161,
+    180, 197, 212, 224, 235, 244, 250,253,
+    255, 253, 250, 244, 235, 224, 212,197,
+    180, 161, 141, 120,  97,  74,  49, 24
+};
+
+typedef struct modtype_
+{
+    unsigned char signature[5];
+    int numberofchannels;
+}modtype;
+
+modtype modlist[]=
+{
+    { "M!K!",4},
+    { "M.K.",4},
+    { "FLT4",4},
+    { "FLT8",8},
+    { "4CHN",4},
+    { "6CHN",6},
+    { "8CHN",8},
+    { "10CH",10},
+    { "12CH",12},
+    { "14CH",14},
+    { "16CH",16},
+    { "18CH",18},
+    { "20CH",20},
+    { "22CH",22},
+    { "24CH",24},
+    { "26CH",26},
+    { "28CH",28},
+    { "30CH",30},
+    { "32CH",32},
+    { "",0}
+};
+
+///////////////////////////////////////////////////////////////////////////////////
+
+static void memcopy( void * dest, void *source, unsigned long size )
+{
+    unsigned long i;
+    unsigned char * d,*s;
+
+    d=(unsigned char*)dest;
+    s=(unsigned char*)source;
+    for(i=0;i<size;i++)
+    {
+        d[i]=s[i];
+    }
+}
+
+static void memclear( void * dest, unsigned char value, unsigned long size )
+{
+    unsigned long i;
+    unsigned char * d;
+
+    d=(unsigned char*)dest;
+    for(i=0;i<size;i++)
+    {
+        d[i]=value;
+    }
+}
+
+static int memcompare( unsigned char * buf1, unsigned char * buf2, unsigned int size )
+{
+    unsigned int i;
+
+    i = 0;
+
+    while(i<size)
+    {
+        if(buf1[i] != buf2[i])
+        {
+            return 0;
+        }
+        i++;
+    }
+
+    return 1;
+}
+
+static int getnote( jar_mod_context_t * mod, unsigned short period, int finetune )
+{
+    int i;
+
+    for(i = 0; i < FULL_PERIOD_TABLE_LENGTH; i++)
+    {
+        if(period >= mod->fullperiod[i])
+        {
+            return i;
+        }
+    }
+
+    return MAXNOTES;
+}
+
+static void worknote( note * nptr, channel * cptr, char t, jar_mod_context_t * mod )
+{
+    muint sample, period, effect, operiod;
+    muint curnote, arpnote;
+
+    sample = (nptr->sampperiod & 0xF0) | (nptr->sampeffect >> 4);
+    period = ((nptr->sampperiod & 0xF) << 8) | nptr->period;
+    effect = ((nptr->sampeffect & 0xF) << 8) | nptr->effect;
+
+    operiod = cptr->period;
+
+    if ( period || sample )
+    {
+        if( sample && sample < 32 )
+        {
+            cptr->sampnum = sample - 1;
+        }
+
+        if( period || sample )
+        {
+            cptr->sampdata = (char *) mod->sampledata[cptr->sampnum];
+            cptr->length = mod->song.samples[cptr->sampnum].length;
+            cptr->reppnt = mod->song.samples[cptr->sampnum].reppnt;
+            cptr->replen = mod->song.samples[cptr->sampnum].replen;
+
+            cptr->finetune = (mod->song.samples[cptr->sampnum].finetune)&0xF;
+
+            if(effect>>8!=4 && effect>>8!=6)
+            {
+                cptr->vibraperiod=0;
+                cptr->vibrapointeur=0;
+            }
+        }
+
+        if( (sample != 0) && ( (effect>>8) != EFFECT_VOLSLIDE_TONEPORTA ) )
+        {
+            cptr->volume = mod->song.samples[cptr->sampnum].volume;
+            cptr->volumeslide = 0;
+        }
+
+        if( ( (effect>>8) != EFFECT_TONE_PORTAMENTO && (effect>>8)!=EFFECT_VOLSLIDE_TONEPORTA) )
+        {
+            if (period!=0)
+                cptr->samppos = 0;
+        }
+
+        cptr->decalperiod = 0;
+        if( period )
+        {
+            if(cptr->finetune)
+            {
+                if( cptr->finetune <= 7 )
+                {
+                    period = mod->fullperiod[getnote(mod,period,0) + cptr->finetune];
+                }
+                else
+                {
+                    period = mod->fullperiod[getnote(mod,period,0) - (16 - (cptr->finetune)) ];
+                }
+            }
+
+            cptr->period = period;
+        }
+
+    }
+
+    cptr->effect = 0;
+    cptr->parameffect = 0;
+    cptr->effect_code = effect;
+
+    switch (effect >> 8)
+    {
+        case EFFECT_ARPEGGIO:
+            /*
+            [0]: Arpeggio
+            Where [0][x][y] means "play note, note+x semitones, note+y
+            semitones, then return to original note". The fluctuations are
+            carried out evenly spaced in one pattern division. They are usually
+            used to simulate chords, but this doesn't work too well. They are
+            also used to produce heavy vibrato. A major chord is when x=4, y=7.
+            A minor chord is when x=3, y=7.
+            */
+
+            if(effect&0xff)
+            {
+                cptr->effect = EFFECT_ARPEGGIO;
+                cptr->parameffect = effect&0xff;
+
+                cptr->ArpIndex = 0;
+
+                curnote = getnote(mod,cptr->period,cptr->finetune);
+
+                cptr->Arpperiods[0] = cptr->period;
+
+                arpnote = curnote + (((cptr->parameffect>>4)&0xF)*8);
+                if( arpnote >= FULL_PERIOD_TABLE_LENGTH )
+                    arpnote = FULL_PERIOD_TABLE_LENGTH - 1;
+
+                cptr->Arpperiods[1] = mod->fullperiod[arpnote];
+
+                arpnote = curnote + (((cptr->parameffect)&0xF)*8);
+                if( arpnote >= FULL_PERIOD_TABLE_LENGTH )
+                    arpnote = FULL_PERIOD_TABLE_LENGTH - 1;
+
+                cptr->Arpperiods[2] = mod->fullperiod[arpnote];
+            }
+        break;
+
+        case EFFECT_PORTAMENTO_UP:
+            /*
+            [1]: Slide up
+            Where [1][x][y] means "smoothly decrease the period of current
+            sample by x*16+y after each tick in the division". The
+            ticks/division are set with the 'set speed' effect (see below). If
+            the period of the note being played is z, then the final period
+            will be z - (x*16 + y)*(ticks - 1). As the slide rate depends on
+            the speed, changing the speed will change the slide. You cannot
+            slide beyond the note B3 (period 113).
+            */
+
+            cptr->effect = EFFECT_PORTAMENTO_UP;
+            cptr->parameffect = effect&0xff;
+        break;
+
+        case EFFECT_PORTAMENTO_DOWN:
+            /*
+            [2]: Slide down
+            Where [2][x][y] means "smoothly increase the period of current
+            sample by x*16+y after each tick in the division". Similar to [1],
+            but lowers the pitch. You cannot slide beyond the note C1 (period
+            856).
+            */
+
+            cptr->effect = EFFECT_PORTAMENTO_DOWN;
+            cptr->parameffect = effect&0xff;
+        break;
+
+        case EFFECT_TONE_PORTAMENTO:
+            /*
+            [3]: Slide to note
+            Where [3][x][y] means "smoothly change the period of current sample
+            by x*16+y after each tick in the division, never sliding beyond
+            current period". The period-length in this channel's division is a
+            parameter to this effect, and hence is not played. Sliding to a
+            note is similar to effects [1] and [2], but the slide will not go
+            beyond the given period, and the direction is implied by that
+            period. If x and y are both 0, then the old slide will continue.
+            */
+
+            cptr->effect = EFFECT_TONE_PORTAMENTO;
+            if( (effect&0xff) != 0 )
+            {
+                cptr->portaspeed = (short)(effect&0xff);
+            }
+
+            if(period!=0)
+            {
+                cptr->portaperiod = period;
+                cptr->period = operiod;
+            }
+        break;
+
+        case EFFECT_VIBRATO:
+            /*
+            [4]: Vibrato
+            Where [4][x][y] means "oscillate the sample pitch using a
+            particular waveform with amplitude y/16 semitones, such that (x *
+            ticks)/64 cycles occur in the division". The waveform is set using
+            effect [14][4]. By placing vibrato effects on consecutive
+            divisions, the vibrato effect can be maintained. If either x or y
+            are 0, then the old vibrato values will be used.
+            */
+
+            cptr->effect = EFFECT_VIBRATO;
+            if( ( effect & 0x0F ) != 0 ) // Depth continue or change ?
+                cptr->vibraparam = (cptr->vibraparam & 0xF0) | ( effect & 0x0F );
+            if( ( effect & 0xF0 ) != 0 ) // Speed continue or change ?
+                cptr->vibraparam = (cptr->vibraparam & 0x0F) | ( effect & 0xF0 );
+
+        break;
+
+        case EFFECT_VOLSLIDE_TONEPORTA:
+            /*
+            [5]: Continue 'Slide to note', but also do Volume slide
+            Where [5][x][y] means "either slide the volume up x*(ticks - 1) or
+            slide the volume down y*(ticks - 1), at the same time as continuing
+            the last 'Slide to note'". It is illegal for both x and y to be
+            non-zero. You cannot slide outside the volume range 0..64. The
+            period-length in this channel's division is a parameter to this
+            effect, and hence is not played.
+            */
+
+            if( period != 0 )
+            {
+                cptr->portaperiod = period;
+                cptr->period = operiod;
+            }
+
+            cptr->effect = EFFECT_VOLSLIDE_TONEPORTA;
+            if( ( effect & 0xFF ) != 0 )
+                cptr->volumeslide = ( effect & 0xFF );
+
+        break;
+
+        case EFFECT_VOLSLIDE_VIBRATO:
+            /*
+            [6]: Continue 'Vibrato', but also do Volume slide
+            Where [6][x][y] means "either slide the volume up x*(ticks - 1) or
+            slide the volume down y*(ticks - 1), at the same time as continuing
+            the last 'Vibrato'". It is illegal for both x and y to be non-zero.
+            You cannot slide outside the volume range 0..64.
+            */
+
+            cptr->effect = EFFECT_VOLSLIDE_VIBRATO;
+            if( (effect & 0xFF) != 0 )
+                cptr->volumeslide = (effect & 0xFF);
+        break;
+
+        case EFFECT_SET_OFFSET:
+            /*
+            [9]: Set sample offset
+            Where [9][x][y] means "play the sample from offset x*4096 + y*256".
+            The offset is measured in words. If no sample is given, yet one is
+            still playing on this channel, it should be retriggered to the new
+            offset using the current volume.
+            */
+
+            cptr->samppos = ((effect>>4) * 4096) + ((effect&0xF)*256);
+
+        break;
+
+        case EFFECT_VOLUME_SLIDE:
+            /*
+            [10]: Volume slide
+            Where [10][x][y] means "either slide the volume up x*(ticks - 1) or
+            slide the volume down y*(ticks - 1)". If both x and y are non-zero,
+            then the y value is ignored (assumed to be 0). You cannot slide
+            outside the volume range 0..64.
+            */
+
+            cptr->effect = EFFECT_VOLUME_SLIDE;
+            cptr->volumeslide = (effect & 0xFF);
+        break;
+
+        case EFFECT_JUMP_POSITION:
+            /*
+            [11]: Position Jump
+            Where [11][x][y] means "stop the pattern after this division, and
+            continue the song at song-position x*16+y". This shifts the
+            'pattern-cursor' in the pattern table (see above). Legal values for
+            x*16+y are from 0 to 127.
+            */
+
+            mod->tablepos = (effect & 0xFF);
+            if(mod->tablepos >= mod->song.length)
+            {
+                mod->tablepos = 0;
+            }
+            mod->patternpos = 0;
+            mod->jump_loop_effect = 1;
+
+        break;
+
+        case EFFECT_SET_VOLUME:
+            /*
+            [12]: Set volume
+            Where [12][x][y] means "set current sample's volume to x*16+y".
+            Legal volumes are 0..64.
+            */
+
+            cptr->volume = (effect & 0xFF);
+        break;
+
+        case EFFECT_PATTERN_BREAK:
+            /*
+            [13]: Pattern Break
+            Where [13][x][y] means "stop the pattern after this division, and
+            continue the song at the next pattern at division x*10+y" (the 10
+            is not a typo). Legal divisions are from 0 to 63 (note Protracker
+            exception above).
+            */
+
+            mod->patternpos = ( ((effect>>4)&0xF)*10 + (effect&0xF) ) * mod->number_of_channels;
+            mod->jump_loop_effect = 1;
+            mod->tablepos++;
+            if(mod->tablepos >= mod->song.length)
+            {
+                mod->tablepos = 0;
+            }
+
+        break;
+
+        case EFFECT_EXTENDED:
+            switch( (effect>>4) & 0xF )
+            {
+                case EFFECT_E_FINE_PORTA_UP:
+                    /*
+                    [14][1]: Fineslide up
+                    Where [14][1][x] means "decrement the period of the current sample
+                    by x". The incrementing takes place at the beginning of the
+                    division, and hence there is no actual sliding. You cannot slide
+                    beyond the note B3 (period 113).
+                    */
+
+                    cptr->period -= (effect & 0xF);
+                    if( cptr->period < 113 )
+                        cptr->period = 113;
+                break;
+
+                case EFFECT_E_FINE_PORTA_DOWN:
+                    /*
+                    [14][2]: Fineslide down
+                    Where [14][2][x] means "increment the period of the current sample
+                    by x". Similar to [14][1] but shifts the pitch down. You cannot
+                    slide beyond the note C1 (period 856).
+                    */
+
+                    cptr->period += (effect & 0xF);
+                    if( cptr->period > 856 )
+                        cptr->period = 856;
+                break;
+
+                case EFFECT_E_FINE_VOLSLIDE_UP:
+                    /*
+                    [14][10]: Fine volume slide up
+                    Where [14][10][x] means "increment the volume of the current sample
+                    by x". The incrementing takes place at the beginning of the
+                    division, and hence there is no sliding. You cannot slide beyond
+                    volume 64.
+                    */
+
+                    cptr->volume += (effect & 0xF);
+                    if( cptr->volume>64 )
+                        cptr->volume = 64;
+                break;
+
+                case EFFECT_E_FINE_VOLSLIDE_DOWN:
+                    /*
+                    [14][11]: Fine volume slide down
+                    Where [14][11][x] means "decrement the volume of the current sample
+                    by x". Similar to [14][10] but lowers volume. You cannot slide
+                    beyond volume 0.
+                    */
+
+                    cptr->volume -= (effect & 0xF);
+                    if( cptr->volume > 200 )
+                        cptr->volume = 0;
+                break;
+
+                case EFFECT_E_PATTERN_LOOP:
+                    /*
+                    [14][6]: Loop pattern
+                    Where [14][6][x] means "set the start of a loop to this division if
+                    x is 0, otherwise after this division, jump back to the start of a
+                    loop and play it another x times before continuing". If the start
+                    of the loop was not set, it will default to the start of the
+                    current pattern. Hence 'loop pattern' cannot be performed across
+                    multiple patterns. Note that loops do not support nesting, and you
+                    may generate an infinite loop if you try to nest 'loop pattern's.
+                    */
+
+                    if( effect & 0xF )
+                    {
+                        if( cptr->patternloopcnt )
+                        {
+                            cptr->patternloopcnt--;
+                            if( cptr->patternloopcnt )
+                            {
+                                mod->patternpos = cptr->patternloopstartpoint;
+                                mod->jump_loop_effect = 1;
+                            }
+                            else
+                            {
+                                cptr->patternloopstartpoint = mod->patternpos ;
+                            }
+                        }
+                        else
+                        {
+                            cptr->patternloopcnt = (effect & 0xF);
+                            mod->patternpos = cptr->patternloopstartpoint;
+                            mod->jump_loop_effect = 1;
+                        }
+                    }
+                    else // Start point
+                    {
+                        cptr->patternloopstartpoint = mod->patternpos;
+                    }
+
+                break;
+
+                case EFFECT_E_PATTERN_DELAY:
+                    /*
+                    [14][14]: Delay pattern
+                    Where [14][14][x] means "after this division there will be a delay
+                    equivalent to the time taken to play x divisions after which the
+                    pattern will be resumed". The delay only relates to the
+                    interpreting of new divisions, and all effects and previous notes
+                    continue during delay.
+                    */
+
+                    mod->patterndelay = (effect & 0xF);
+                break;
+
+                case EFFECT_E_NOTE_CUT:
+                    /*
+                    [14][12]: Cut sample
+                    Where [14][12][x] means "after the current sample has been played
+                    for x ticks in this division, its volume will be set to 0". This
+                    implies that if x is 0, then you will not hear any of the sample.
+                    If you wish to insert "silence" in a pattern, it is better to use a
+                    "silence"-sample (see above) due to the lack of proper support for
+                    this effect.
+                    */
+                    cptr->effect = EFFECT_E_NOTE_CUT;
+                    cptr->cut_param = (effect & 0xF);
+                    if(!cptr->cut_param)
+                        cptr->volume = 0;
+                break;
+
+                default:
+
+                break;
+            }
+        break;
+
+        case 0xF:
+            /*
+            [15]: Set speed
+            Where [15][x][y] means "set speed to x*16+y". Though it is nowhere
+            near that simple. Let z = x*16+y. Depending on what values z takes,
+            different units of speed are set, there being two: ticks/division
+            and beats/minute (though this one is only a label and not strictly
+            true). If z=0, then what should technically happen is that the
+            module stops, but in practice it is treated as if z=1, because
+            there is already a method for stopping the module (running out of
+            patterns). If z<=32, then it means "set ticks/division to z"
+            otherwise it means "set beats/minute to z" (convention says that
+            this should read "If z<32.." but there are some composers out there
+            that defy conventions). Default values are 6 ticks/division, and
+            125 beats/minute (4 divisions = 1 beat). The beats/minute tag is
+            only meaningful for 6 ticks/division. To get a more accurate view
+            of how things work, use the following formula:
+                                     24 * beats/minute
+                  divisions/minute = -----------------
+                                      ticks/division
+            Hence divisions/minute range from 24.75 to 6120, eg. to get a value
+            of 2000 divisions/minute use 3 ticks/division and 250 beats/minute.
+            If multiple "set speed" effects are performed in a single division,
+            the ones on higher-numbered channels take precedence over the ones
+            on lower-numbered channels. This effect has a large number of
+            different implementations, but the one described here has the
+            widest usage.
+            */
+
+            if( (effect&0xFF) < 0x21 )
+            {
+                if( effect&0xFF )
+                {
+                    mod->song.speed = effect&0xFF;
+                    mod->patternticksaim = (long)mod->song.speed * ((mod->playrate * 5 ) / (((long)2 * (long)mod->bpm)));
+                }
+            }
+
+            if( (effect&0xFF) >= 0x21 )
+            {
+                ///  HZ = 2 * BPM / 5
+                mod->bpm = effect&0xFF;
+                mod->patternticksaim = (long)mod->song.speed * ((mod->playrate * 5 ) / (((long)2 * (long)mod->bpm)));
+            }
+
+        break;
+
+        default:
+        // Unsupported effect
+        break;
+
+    }
+
+}
+
+static void workeffect( note * nptr, channel * cptr )
+{
+    switch(cptr->effect)
+    {
+        case EFFECT_ARPEGGIO:
+
+            if( cptr->parameffect )
+            {
+                cptr->decalperiod = cptr->period - cptr->Arpperiods[cptr->ArpIndex];
+
+                cptr->ArpIndex++;
+                if( cptr->ArpIndex>2 )
+                    cptr->ArpIndex = 0;
+            }
+        break;
+
+        case EFFECT_PORTAMENTO_UP:
+
+            if(cptr->period)
+            {
+                cptr->period -= cptr->parameffect;
+
+                if( cptr->period < 113 || cptr->period > 20000 )
+                    cptr->period = 113;
+            }
+
+        break;
+
+        case EFFECT_PORTAMENTO_DOWN:
+
+            if(cptr->period)
+            {
+                cptr->period += cptr->parameffect;
+
+                if( cptr->period > 20000 )
+                    cptr->period = 20000;
+            }
+
+        break;
+
+        case EFFECT_VOLSLIDE_TONEPORTA:
+        case EFFECT_TONE_PORTAMENTO:
+
+            if( cptr->period && ( cptr->period != cptr->portaperiod ) && cptr->portaperiod )
+            {
+                if( cptr->period > cptr->portaperiod )
+                {
+                    if( cptr->period - cptr->portaperiod >= cptr->portaspeed )
+                    {
+                        cptr->period -= cptr->portaspeed;
+                    }
+                    else
+                    {
+                        cptr->period = cptr->portaperiod;
+                    }
+                }
+                else
+                {
+                    if( cptr->portaperiod - cptr->period >= cptr->portaspeed )
+                    {
+                        cptr->period += cptr->portaspeed;
+                    }
+                    else
+                    {
+                        cptr->period = cptr->portaperiod;
+                    }
+                }
+
+                if( cptr->period == cptr->portaperiod )
+                {
+                    // If the slide is over, don't let it to be retriggered.
+                    cptr->portaperiod = 0;
+                }
+            }
+
+            if( cptr->effect == EFFECT_VOLSLIDE_TONEPORTA )
+            {
+                if( cptr->volumeslide > 0x0F )
+                {
+                    cptr->volume = cptr->volume + (cptr->volumeslide>>4);
+
+                    if(cptr->volume>63)
+                        cptr->volume = 63;
+                }
+                else
+                {
+                    cptr->volume = cptr->volume - (cptr->volumeslide);
+
+                    if(cptr->volume>63)
+                        cptr->volume=0;
+                }
+            }
+        break;
+
+        case EFFECT_VOLSLIDE_VIBRATO:
+        case EFFECT_VIBRATO:
+
+            cptr->vibraperiod = ( (cptr->vibraparam&0xF) * sintable[cptr->vibrapointeur&0x1F] )>>7;
+
+            if( cptr->vibrapointeur > 31 )
+                cptr->vibraperiod = -cptr->vibraperiod;
+
+            cptr->vibrapointeur = (cptr->vibrapointeur+(((cptr->vibraparam>>4))&0xf)) & 0x3F;
+
+            if( cptr->effect == EFFECT_VOLSLIDE_VIBRATO )
+            {
+                if( cptr->volumeslide > 0xF )
+                {
+                    cptr->volume = cptr->volume+(cptr->volumeslide>>4);
+
+                    if( cptr->volume > 64 )
+                        cptr->volume = 64;
+                }
+                else
+                {
+                    cptr->volume = cptr->volume - cptr->volumeslide;
+
+                    if( cptr->volume > 64 )
+                        cptr->volume = 0;
+                }
+            }
+
+        break;
+
+        case EFFECT_VOLUME_SLIDE:
+
+            if( cptr->volumeslide > 0xF )
+            {
+                cptr->volume += (cptr->volumeslide>>4);
+
+                if( cptr->volume > 64 )
+                    cptr->volume = 64;
+            }
+            else
+            {
+                cptr->volume -= (cptr->volumeslide&0xf);
+
+                if( cptr->volume > 64 )
+                    cptr->volume = 0;
+            }
+        break;
+
+        case EFFECT_E_NOTE_CUT:
+            if(cptr->cut_param)
+                cptr->cut_param--;
+
+            if(!cptr->cut_param)
+                cptr->volume = 0;
+        break;
+
+        default:
+        break;
+
+    }
+
+}
+
+///////////////////////////////////////////////////////////////////////////////////
+bool jar_mod_init(jar_mod_context_t * modctx)
+{
+    muint i,j;
+
+    if( modctx )
+    {
+        memclear(modctx, 0, sizeof(jar_mod_context_t));
+        modctx->playrate = DEFAULT_SAMPLE_RATE;
+        modctx->stereo = 1;
+        modctx->stereo_separation = 1;
+        modctx->bits = 16;
+        modctx->filter = 1;
+
+        for(i=0; i < PERIOD_TABLE_LENGTH - 1; i++)
+        {
+            for(j=0; j < 8; j++)
+            {
+                modctx->fullperiod[(i*8) + j] = periodtable[i] - ((( periodtable[i] - periodtable[i+1] ) / 8) * j);
+            }
+        }
+
+        return 1;
+    }
+
+    return 0;
+}
+
+bool jar_mod_setcfg(jar_mod_context_t * modctx, int samplerate, int bits, int stereo, int stereo_separation, int filter)
+{
+    if( modctx )
+    {
+        modctx->playrate = samplerate;
+
+        if( stereo )
+            modctx->stereo = 1;
+        else
+            modctx->stereo = 0;
+            
+        if(stereo_separation < 4)
+        {
+            modctx->stereo_separation = stereo_separation;
+        }
+
+        if( bits == 8 || bits == 16 )
+            modctx->bits = bits;
+        else
+            modctx->bits = 16;
+
+        if( filter )
+            modctx->filter = 1;
+        else
+            modctx->filter = 0;
+
+        return 1;
+    }
+
+    return 0;
+}
+
+// make certain that mod_data stays in memory while playing
+static bool jar_mod_load( jar_mod_context_t * modctx, void * mod_data, int mod_data_size )
+{
+    muint i, max;
+    unsigned short t;
+    sample *sptr;
+    unsigned char * modmemory,* endmodmemory;
+
+    modmemory = (unsigned char *)mod_data;
+    endmodmemory = modmemory + mod_data_size;
+    
+    
+
+    if(modmemory)
+    {
+        if( modctx )
+        {
+            memcopy(&(modctx->song.title),modmemory,1084);
+
+            i = 0;
+            modctx->number_of_channels = 0;
+            while(modlist[i].numberofchannels)
+            {
+                if(memcompare(modctx->song.signature,modlist[i].signature,4))
+                {
+                    modctx->number_of_channels = modlist[i].numberofchannels;
+                }
+
+                i++;
+            }
+
+            if( !modctx->number_of_channels )
+            {
+                // 15 Samples modules support
+                // Shift the whole datas to make it look likes a standard 4 channels mod.
+                memcopy(&(modctx->song.signature), "M.K.", 4);
+                memcopy(&(modctx->song.length), &(modctx->song.samples[15]), 130);
+                memclear(&(modctx->song.samples[15]), 0, 480);
+                modmemory += 600;
+                modctx->number_of_channels = 4;
+            }
+            else
+            {
+                modmemory += 1084;
+            }
+
+            if( modmemory >= endmodmemory )
+                return 0; // End passed ? - Probably a bad file !
+
+            // Patterns loading
+            for (i = max = 0; i < 128; i++)
+            {
+                while (max <= modctx->song.patterntable[i])
+                {
+                    modctx->patterndata[max] = (note*)modmemory;
+                    modmemory += (256*modctx->number_of_channels);
+                    max++;
+
+                    if( modmemory >= endmodmemory )
+                        return 0; // End passed ? - Probably a bad file !
+                }
+            }
+
+            for (i = 0; i < 31; i++)
+                modctx->sampledata[i]=0;
+
+            // Samples loading
+            for (i = 0, sptr = modctx->song.samples; i <31; i++, sptr++)
+            {
+                t= (sptr->length &0xFF00)>>8 | (sptr->length &0xFF)<<8;
+                sptr->length = t*2;
+
+                t= (sptr->reppnt &0xFF00)>>8 | (sptr->reppnt &0xFF)<<8;
+                sptr->reppnt = t*2;
+
+                t= (sptr->replen &0xFF00)>>8 | (sptr->replen &0xFF)<<8;
+                sptr->replen = t*2;
+
+
+                if (sptr->length == 0) continue;
+
+                modctx->sampledata[i] = (char*)modmemory;
+                modmemory += sptr->length;
+
+                if (sptr->replen + sptr->reppnt > sptr->length)
+                    sptr->replen = sptr->length - sptr->reppnt;
+
+                if( modmemory > endmodmemory )
+                    return 0; // End passed ? - Probably a bad file !
+            }
+
+            // States init
+
+            modctx->tablepos = 0;
+            modctx->patternpos = 0;
+            modctx->song.speed = 6;
+            modctx->bpm = 125;
+            modctx->samplenb = 0;
+
+            modctx->patternticks = (((long)modctx->song.speed * modctx->playrate * 5)/ (2 * modctx->bpm)) + 1;
+            modctx->patternticksaim = ((long)modctx->song.speed * modctx->playrate * 5) / (2 * modctx->bpm);
+
+            modctx->sampleticksconst = 3546894UL / modctx->playrate; //8448*428/playrate;
+
+            for(i=0; i < modctx->number_of_channels; i++)
+            {
+                modctx->channels[i].volume = 0;
+                modctx->channels[i].period = 0;
+            }
+
+            modctx->mod_loaded = 1;
+
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+void jar_mod_fillbuffer( jar_mod_context_t * modctx, short * outbuffer, unsigned long nbsample, jar_mod_tracker_buffer_state * trkbuf )
+{
+    unsigned long i, j;
+    unsigned long k;
+    unsigned char c;
+    unsigned int state_remaining_steps;
+    int l,r;
+    int ll,lr;
+    int tl,tr;
+    short finalperiod;
+    note    *nptr;
+    channel *cptr;
+
+    if( modctx && outbuffer )
+    {
+        if(modctx->mod_loaded)
+        {
+            state_remaining_steps = 0;
+
+            if( trkbuf )
+            {
+                trkbuf->cur_rd_index = 0;
+
+                memcopy(trkbuf->name,modctx->song.title,sizeof(modctx->song.title));
+
+                for(i=0;i<31;i++)
+                {
+                    memcopy(trkbuf->instruments[i].name,modctx->song.samples[i].name,sizeof(trkbuf->instruments[i].name));
+                }
+            }
+
+            ll = modctx->last_l_sample;
+            lr = modctx->last_r_sample;
+
+            for (i = 0; i < nbsample; i++)
+            {
+                //---------------------------------------
+                if( modctx->patternticks++ > modctx->patternticksaim )
+                {
+                    if( !modctx->patterndelay )
+                    {
+                        nptr = modctx->patterndata[modctx->song.patterntable[modctx->tablepos]];
+                        nptr = nptr + modctx->patternpos;
+                        cptr = modctx->channels;
+
+                        modctx->patternticks = 0;
+                        modctx->patterntickse = 0;
+
+                        for(c=0;c<modctx->number_of_channels;c++)
+                        {
+                            worknote((note*)(nptr+c), (channel*)(cptr+c),(char)(c+1),modctx);
+                        }
+
+                        if( !modctx->jump_loop_effect )
+                            modctx->patternpos += modctx->number_of_channels;
+                        else
+                            modctx->jump_loop_effect = 0;
+
+                        if( modctx->patternpos == 64*modctx->number_of_channels )
+                        {
+                            modctx->tablepos++;
+                            modctx->patternpos = 0;
+                            if(modctx->tablepos >= modctx->song.length)
+                            {
+                                modctx->tablepos = 0;
+                                modctx->loopcount++; // count next loop
+                            }
+                        }
+                    }
+                    else
+                    {
+                        modctx->patterndelay--;
+                        modctx->patternticks = 0;
+                        modctx->patterntickse = 0;
+                    }
+
+                }
+
+                if( modctx->patterntickse++ > (modctx->patternticksaim/modctx->song.speed) )
+                {
+                    nptr = modctx->patterndata[modctx->song.patterntable[modctx->tablepos]];
+                    nptr = nptr + modctx->patternpos;
+                    cptr = modctx->channels;
+
+                    for(c=0;c<modctx->number_of_channels;c++)
+                    {
+                        workeffect(nptr+c, cptr+c);
+                    }
+
+                    modctx->patterntickse = 0;
+                }
+
+                //---------------------------------------
+
+                if( trkbuf && !state_remaining_steps )
+                {
+                    if( trkbuf->nb_of_state < trkbuf->nb_max_of_state )
+                    {
+                        memclear(&trkbuf->track_state_buf[trkbuf->nb_of_state], 0, sizeof(tracker_state));
+                    }
+                }
+
+                l=0;
+                r=0;
+
+                for(j =0, cptr = modctx->channels; j < modctx->number_of_channels ; j++, cptr++)
+                {
+                    if( cptr->period != 0 )
+                    {
+                        finalperiod = cptr->period - cptr->decalperiod - cptr->vibraperiod;
+                        if( finalperiod )
+                        {
+                            cptr->samppos += ( (modctx->sampleticksconst<<10) / finalperiod );
+                        }
+
+                        cptr->ticks++;
+
+                        if( cptr->replen<=2 )
+                        {
+                            if( (cptr->samppos>>10) >= (cptr->length) )
+                            {
+                                cptr->length = 0;
+                                cptr->reppnt = 0;
+
+                                if( cptr->length )
+                                    cptr->samppos = cptr->samppos % (((unsigned long)cptr->length)<<10);
+                                else
+                                    cptr->samppos = 0;
+                            }
+                        }
+                        else
+                        {
+                            if( (cptr->samppos>>10) >= (unsigned long)(cptr->replen+cptr->reppnt) )
+                            {
+                                cptr->samppos = ((unsigned long)(cptr->reppnt)<<10) + (cptr->samppos % ((unsigned long)(cptr->replen+cptr->reppnt)<<10));
+                            }
+                        }
+
+                        k = cptr->samppos >> 10;
+
+                        if( cptr->sampdata!=0 && ( ((j&3)==1) || ((j&3)==2) ) )
+                        {
+                            r += ( cptr->sampdata[k] *  cptr->volume );
+                        }
+
+                        if( cptr->sampdata!=0 && ( ((j&3)==0) || ((j&3)==3) ) )
+                        {
+                            l += ( cptr->sampdata[k] *  cptr->volume );
+                        }
+
+                        if( trkbuf && !state_remaining_steps )
+                        {
+                            if( trkbuf->nb_of_state < trkbuf->nb_max_of_state )
+                            {
+                                trkbuf->track_state_buf[trkbuf->nb_of_state].number_of_tracks = modctx->number_of_channels;
+                                trkbuf->track_state_buf[trkbuf->nb_of_state].buf_index = i;
+                                trkbuf->track_state_buf[trkbuf->nb_of_state].cur_pattern = modctx->song.patterntable[modctx->tablepos];
+                                trkbuf->track_state_buf[trkbuf->nb_of_state].cur_pattern_pos = modctx->patternpos / modctx->number_of_channels;
+                                trkbuf->track_state_buf[trkbuf->nb_of_state].cur_pattern_table_pos = modctx->tablepos;
+                                trkbuf->track_state_buf[trkbuf->nb_of_state].bpm = modctx->bpm;
+                                trkbuf->track_state_buf[trkbuf->nb_of_state].speed = modctx->song.speed;
+                                trkbuf->track_state_buf[trkbuf->nb_of_state].tracks[j].cur_effect = cptr->effect_code;
+                                trkbuf->track_state_buf[trkbuf->nb_of_state].tracks[j].cur_parameffect = cptr->parameffect;
+                                trkbuf->track_state_buf[trkbuf->nb_of_state].tracks[j].cur_period = finalperiod;
+                                trkbuf->track_state_buf[trkbuf->nb_of_state].tracks[j].cur_volume = cptr->volume;
+                                trkbuf->track_state_buf[trkbuf->nb_of_state].tracks[j].instrument_number = (unsigned char)cptr->sampnum;
+                            }
+                        }
+                    }
+                }
+
+                if( trkbuf && !state_remaining_steps )
+                {
+                    state_remaining_steps = trkbuf->sample_step;
+
+                    if(trkbuf->nb_of_state < trkbuf->nb_max_of_state)
+                        trkbuf->nb_of_state++;
+                }
+                else
+                {
+                    state_remaining_steps--;
+                }
+
+                tl = (short)l;
+                tr = (short)r;
+
+                if ( modctx->filter )
+                {
+                    // Filter
+                    l = (l+ll)>>1;
+                    r = (r+lr)>>1;
+                }
+
+                if ( modctx->stereo_separation == 1 )
+                {
+                    // Left & Right Stereo panning
+                    l = (l+(r>>1));
+                    r = (r+(l>>1));
+                }
+
+                // Level limitation
+                if( l > 32767 ) l = 32767;
+                if( l < -32768 ) l = -32768;
+                if( r > 32767 ) r = 32767;
+                if( r < -32768 ) r = -32768;
+
+                // Store the final sample.
+                outbuffer[(i*2)]   = l;
+                outbuffer[(i*2)+1] = r;
+
+                ll = tl;
+                lr = tr;
+
+            }
+
+            modctx->last_l_sample = ll;
+            modctx->last_r_sample = lr;
+
+            modctx->samplenb = modctx->samplenb+nbsample;
+        }
+        else
+        {
+            for (i = 0; i < nbsample; i++)
+            {
+                // Mod not loaded. Return blank buffer.
+                outbuffer[(i*2)]   = 0;
+                outbuffer[(i*2)+1] = 0;
+            }
+
+            if(trkbuf)
+            {
+                trkbuf->nb_of_state = 0;
+                trkbuf->cur_rd_index = 0;
+                trkbuf->name[0] = 0;
+                memclear(trkbuf->track_state_buf, 0, sizeof(tracker_state) * trkbuf->nb_max_of_state);
+                memclear(trkbuf->instruments, 0, sizeof(trkbuf->instruments));
+            }
+        }
+    }
+}
+
+//resets internals for mod context
+static bool jar_mod_reset( jar_mod_context_t * modctx)
+{
+    if(modctx)
+    {
+        memclear(&modctx->song, 0, sizeof(modctx->song));
+        memclear(&modctx->sampledata, 0, sizeof(modctx->sampledata));
+        memclear(&modctx->patterndata, 0, sizeof(modctx->patterndata));
+        modctx->tablepos = 0;
+        modctx->patternpos = 0;
+        modctx->patterndelay  = 0;
+        modctx->jump_loop_effect = 0;
+        modctx->bpm = 0;
+        modctx->patternticks = 0;
+        modctx->patterntickse = 0;
+        modctx->patternticksaim = 0;
+        modctx->sampleticksconst = 0;
+        modctx->samplenb = 0;
+        memclear(modctx->channels, 0, sizeof(modctx->channels));
+        modctx->number_of_channels = 0;
+        modctx->mod_loaded = 0;
+        modctx->last_r_sample = 0;
+        modctx->last_l_sample = 0;
+        
+        return jar_mod_init(modctx);
+    }
+    return 0;
+}
+
+void jar_mod_unload( jar_mod_context_t * modctx)
+{
+    if(modctx)
+    {
+        if(modctx->modfile)
+        {
+            JARMOD_FREE(modctx->modfile);
+            modctx->modfile = 0;
+            modctx->modfilesize = 0;
+            modctx->loopcount = 0;
+        }
+        jar_mod_reset(modctx);
+    }
+}
+
+mulong jar_mod_load_file(jar_mod_context_t * modctx, const char* filename)
+{
+    mulong fsize = 0;
+    if(modctx->modfile)
+    {
+        JARMOD_FREE(modctx->modfile);
+        modctx->modfile = 0;
+    }
+    
+    FILE *f = fopen(filename, "rb");
+    if(f)
+    {
+        fseek(f,0,SEEK_END);
+        fsize = ftell(f);
+        fseek(f,0,SEEK_SET);
+        
+        if(fsize && fsize < 32*1024*1024)
+        {
+            modctx->modfile = JARMOD_MALLOC(fsize);
+            modctx->modfilesize = fsize;
+            memset(modctx->modfile, 0, fsize);
+            fread(modctx->modfile, fsize, 1, f);
+            fclose(f);
+            
+            if(!jar_mod_load(modctx, (void*)modctx->modfile, fsize)) fsize = 0;
+        } else fsize = 0;
+    }
+    return fsize;
+}
+
+mulong jar_mod_current_samples(jar_mod_context_t * modctx)
+{
+    if(modctx)
+        return modctx->samplenb;
+    
+    return 0;
+}
+
+// Works, however it is very slow, this data should be cached to ensure it is run only once per file
+mulong jar_mod_max_samples(jar_mod_context_t * ctx)
+{
+    mint buff[2];
+    mulong len;
+    mulong lastcount = ctx->loopcount;
+    
+    while(ctx->loopcount <= lastcount)
+        jar_mod_fillbuffer(ctx, buff, 1, 0);
+    
+    len = ctx->samplenb;
+    jar_mod_seek_start(ctx);
+    
+    return len;
+}
+
+// move seek_val to sample index, 0 -> jar_mod_max_samples is the range
+void jar_mod_seek_start(jar_mod_context_t * ctx)
+{
+    if(ctx && ctx->modfile)
+    {
+        muchar* ftmp = ctx->modfile;
+        mulong stmp = ctx->modfilesize;
+        muint lcnt = ctx->loopcount;
+        
+        if(jar_mod_reset(ctx)){
+            jar_mod_load(ctx, ftmp, stmp);
+            ctx->modfile = ftmp;
+            ctx->modfilesize = stmp;
+            ctx->loopcount = lcnt;
+        }
+    }
+}
+
+#endif // end of JAR_MOD_IMPLEMENTATION
+//-------------------------------------------------------------------------------
+
+
+#endif //end of header file
\ No newline at end of file
diff --git a/shared/external/jar_xm.h b/shared/external/jar_xm.h
new file mode 100644
index 0000000..4a1bfbf
--- /dev/null
+++ b/shared/external/jar_xm.h
@@ -0,0 +1,2471 @@
+// jar_xm.h
+//
+// ORIGINAL LICENSE - FOR LIBXM:
+//
+// Author: Romain "Artefact2" Dalmaso <artefact2@gmail.com>
+// Contributor: Dan Spencer <dan@atomicpotato.net>
+// Repackaged into jar_xm.h By: Joshua Adam Reisenauer <kd7tck@gmail.com>
+// This program is free software. It comes without any warranty, to the
+// extent permitted by applicable law. You can redistribute it and/or
+// modify it under the terms of the Do What The Fuck You Want To Public
+// License, Version 2, as published by Sam Hocevar. See
+// http://sam.zoy.org/wtfpl/COPYING for more details.
+//
+// HISTORY:
+//   v0.1.0 2016-02-22  jar_xm.h - development by Joshua Reisenauer, MAR 2016
+//   v0.2.1 2021-03-07  m4ntr0n1c: Fix clipping noise for "bad" xm's (they will always clip), avoid clip noise and just put a ceiling)
+//   v0.2.2 2021-03-09  m4ntr0n1c: Add complete debug solution (raylib.h must be included)
+//   v0.2.3 2021-03-11  m4ntr0n1c: Fix tempo, bpm and volume on song stop / start / restart / loop
+//   v0.2.4 2021-03-17  m4ntr0n1c: Sanitize code for readability
+//   v0.2.5 2021-03-22  m4ntr0n1c: Minor adjustments
+//   v0.2.6 2021-04-01  m4ntr0n1c: Minor fixes and optimisation
+//   v0.3.0 2021-04-03  m4ntr0n1c: Addition of Stereo sample support, Linear Interpolation and Ramping now addressable options in code
+//   v0.3.1 2021-04-04  m4ntr0n1c: Volume effects column adjustments, sample offset handling adjustments
+//
+// USAGE:
+//
+// In ONE source file, put:
+//
+//    #define JAR_XM_IMPLEMENTATION
+//    #include "jar_xm.h"
+//
+// Other source files should just include jar_xm.h
+//
+// SAMPLE CODE:
+//
+// jar_xm_context_t *musicptr;
+// float musicBuffer[48000 / 60];
+// int intro_load(void)
+// {
+//     jar_xm_create_context_from_file(&musicptr, 48000, "Song.XM");
+//     return 1;
+// }
+// int intro_unload(void)
+// {
+//     jar_xm_free_context(musicptr);
+//     return 1;
+// }
+// int intro_tick(long counter)
+// {
+//     jar_xm_generate_samples(musicptr, musicBuffer, (48000 / 60) / 2);
+//     if(IsKeyDown(KEY_ENTER))
+//         return 1;
+//     return 0;
+// }
+//
+#ifndef INCLUDE_JAR_XM_H
+#define INCLUDE_JAR_XM_H
+
+#include <stdint.h>
+
+#define JAR_XM_DEBUG 0
+#define JAR_XM_DEFENSIVE 1
+//#define JAR_XM_RAYLIB 0 // set to 0 to disable the RayLib visualizer extension
+
+// Allow custom memory allocators
+#ifndef JARXM_MALLOC
+    #define JARXM_MALLOC(sz)    malloc(sz)
+#endif
+#ifndef JARXM_FREE
+    #define JARXM_FREE(p)       free(p)
+#endif
+
+//-------------------------------------------------------------------------------
+struct jar_xm_context_s;
+typedef struct jar_xm_context_s jar_xm_context_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//** Create a XM context.
+// * @param moddata the contents of the module
+// * @param rate play rate in Hz, recommended value of 48000
+// * @returns 0 on success
+// * @returns 1 if module data is not sane
+// * @returns 2 if memory allocation failed
+// * @returns 3 unable to open input file
+// * @returns 4 fseek() failed
+// * @returns 5 fread() failed
+// * @returns 6 unkown error
+// * @deprecated This function is unsafe!
+// * @see jar_xm_create_context_safe()
+int jar_xm_create_context_from_file(jar_xm_context_t** ctx, uint32_t rate, const char* filename);
+
+//** Create a XM context.
+// * @param moddata the contents of the module
+// * @param rate play rate in Hz, recommended value of 48000
+// * @returns 0 on success
+// * @returns 1 if module data is not sane
+// * @returns 2 if memory allocation failed
+// * @deprecated This function is unsafe!
+// * @see jar_xm_create_context_safe()
+int jar_xm_create_context(jar_xm_context_t** ctx, const char* moddata, uint32_t rate);
+
+//** Create a XM context.
+// * @param moddata the contents of the module
+// * @param moddata_length the length of the contents of the module, in bytes
+// * @param rate play rate in Hz, recommended value of 48000
+// * @returns 0 on success
+// * @returns 1 if module data is not sane
+// * @returns 2 if memory allocation failed
+int jar_xm_create_context_safe(jar_xm_context_t** ctx, const char* moddata, size_t moddata_length, uint32_t rate);
+
+//** Free a XM context created by jar_xm_create_context(). */
+void jar_xm_free_context(jar_xm_context_t* ctx);
+
+//** Play the module and put the sound samples in an output buffer.
+// * @param output buffer of 2*numsamples elements (A left and right value for each sample)
+// * @param numsamples number of samples to generate
+void jar_xm_generate_samples(jar_xm_context_t* ctx, float* output, size_t numsamples);
+
+//** Play the module, resample from float to 16 bit, and put the sound samples in an output buffer.
+// * @param output buffer of 2*numsamples elements (A left and right value for each sample)
+// * @param numsamples number of samples to generate
+void jar_xm_generate_samples_16bit(jar_xm_context_t* ctx, short* output, size_t numsamples) {
+    float* musicBuffer = JARXM_MALLOC((2*numsamples)*sizeof(float));
+    jar_xm_generate_samples(ctx, musicBuffer, numsamples);
+
+    if(output){
+        for(int x=0;x<2*numsamples;x++) output[x] = (musicBuffer[x] * 32767.0f); // scale sample to signed small int
+    }
+    JARXM_FREE(musicBuffer);
+}
+
+//** Play the module, resample from float to 8 bit, and put the sound samples in an output buffer.
+// * @param output buffer of 2*numsamples elements (A left and right value for each sample)
+// * @param numsamples number of samples to generate
+void jar_xm_generate_samples_8bit(jar_xm_context_t* ctx, char* output, size_t numsamples) {
+    float* musicBuffer = JARXM_MALLOC((2*numsamples)*sizeof(float));
+    jar_xm_generate_samples(ctx, musicBuffer, numsamples);
+
+    if(output){
+        for(int x=0;x<2*numsamples;x++) output[x] = (musicBuffer[x] * 127.0f); // scale sample to signed 8 bit
+    }
+    JARXM_FREE(musicBuffer);
+}
+
+//** Set the maximum number of times a module can loop. After the specified number of loops, calls to jar_xm_generate_samples will only generate silence. You can control the current number of loops with jar_xm_get_loop_count().
+// * @param loopcnt maximum number of loops. Use 0 to loop indefinitely.
+void jar_xm_set_max_loop_count(jar_xm_context_t* ctx, uint8_t loopcnt);
+
+//** Get the loop count of the currently playing module. This value is 0 when the module is still playing, 1 when the module has looped once, etc.
+uint8_t jar_xm_get_loop_count(jar_xm_context_t* ctx);
+
+//** Mute or unmute a channel.
+// * @note Channel numbers go from 1 to jar_xm_get_number_of_channels(...).
+// * @return whether the channel was muted.
+bool jar_xm_mute_channel(jar_xm_context_t* ctx, uint16_t, bool);
+
+//** Mute or unmute an instrument.
+// * @note Instrument numbers go from 1 to jar_xm_get_number_of_instruments(...).
+// * @return whether the instrument was muted.
+bool jar_xm_mute_instrument(jar_xm_context_t* ctx, uint16_t, bool);
+
+//** Get the module name as a NUL-terminated string.
+const char* jar_xm_get_module_name(jar_xm_context_t* ctx);
+
+//** Get the tracker name as a NUL-terminated string.
+const char* jar_xm_get_tracker_name(jar_xm_context_t* ctx);
+
+//** Get the number of channels.
+uint16_t jar_xm_get_number_of_channels(jar_xm_context_t* ctx);
+
+//** Get the module length (in patterns).
+uint16_t jar_xm_get_module_length(jar_xm_context_t*);
+
+//** Get the number of patterns.
+uint16_t jar_xm_get_number_of_patterns(jar_xm_context_t* ctx);
+
+//** Get the number of rows of a pattern.
+// * @note Pattern numbers go from 0 to jar_xm_get_number_of_patterns(...)-1.
+uint16_t jar_xm_get_number_of_rows(jar_xm_context_t* ctx, uint16_t);
+
+//** Get the number of instruments.
+uint16_t jar_xm_get_number_of_instruments(jar_xm_context_t* ctx);
+
+//** Get the number of samples of an instrument.
+// * @note Instrument numbers go from 1 to jar_xm_get_number_of_instruments(...).
+uint16_t jar_xm_get_number_of_samples(jar_xm_context_t* ctx, uint16_t);
+
+//** Get the current module speed.
+// * @param bpm will receive the current BPM
+// * @param tempo will receive the current tempo (ticks per line)
+void jar_xm_get_playing_speed(jar_xm_context_t* ctx, uint16_t* bpm, uint16_t* tempo);
+
+//** Get the current position in the module being played.
+// * @param pattern_index if not NULL, will receive the current pattern index in the POT (pattern order table)
+// * @param pattern if not NULL, will receive the current pattern number
+// * @param row if not NULL, will receive the current row
+// * @param samples if not NULL, will receive the total number of
+// * generated samples (divide by sample rate to get seconds of generated audio)
+void jar_xm_get_position(jar_xm_context_t* ctx, uint8_t* pattern_index, uint8_t* pattern, uint8_t* row, uint64_t* samples);
+
+//** Get the latest time (in number of generated samples) when a particular instrument was triggered in any channel.
+// * @note Instrument numbers go from 1 to jar_xm_get_number_of_instruments(...).
+uint64_t jar_xm_get_latest_trigger_of_instrument(jar_xm_context_t* ctx, uint16_t);
+
+//** Get the latest time (in number of generated samples) when a particular sample was triggered in any channel.
+// * @note Instrument numbers go from 1 to jar_xm_get_number_of_instruments(...).
+// * @note Sample numbers go from 0 to jar_xm_get_nubmer_of_samples(...,instr)-1.
+uint64_t jar_xm_get_latest_trigger_of_sample(jar_xm_context_t* ctx, uint16_t instr, uint16_t sample);
+
+//** Get the latest time (in number of generated samples) when any instrument was triggered in a given channel.
+// * @note Channel numbers go from 1 to jar_xm_get_number_of_channels(...).
+uint64_t jar_xm_get_latest_trigger_of_channel(jar_xm_context_t* ctx, uint16_t);
+
+//** Get the number of remaining samples. Divide by 2 to get the number of individual LR data samples.
+// * @note This is the remaining number of samples before the loop starts module again, or halts if on last pass.
+// * @note This function is very slow and should only be run once, if at all.
+uint64_t jar_xm_get_remaining_samples(jar_xm_context_t* ctx);
+
+#ifdef __cplusplus
+}
+#endif
+//-------------------------------------------------------------------------------
+
+#ifdef JAR_XM_IMPLEMENTATION
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+
+#if JAR_XM_DEBUG            //JAR_XM_DEBUG defined as 0
+#include <stdio.h>
+#define DEBUG(fmt, ...) do {                                        \
+        fprintf(stderr, "%s(): " fmt "\n", __func__, __VA_ARGS__);    \
+        fflush(stderr);                                                \
+    } while(0)
+#else
+#define DEBUG(...)
+#endif
+
+#if jar_xm_BIG_ENDIAN
+#error "Big endian platforms are not yet supported, sorry"
+/* Make sure the compiler stops, even if #error is ignored */
+extern int __fail[-1];
+#endif
+
+/* ----- XM constants ----- */
+#define SAMPLE_NAME_LENGTH 22
+#define INSTRUMENT_NAME_LENGTH 22
+#define MODULE_NAME_LENGTH 20
+#define TRACKER_NAME_LENGTH 20
+#define PATTERN_ORDER_TABLE_LENGTH 256
+#define NUM_NOTES 96 // from 1 to 96, where 1 = C-0
+#define NUM_ENVELOPE_POINTS 12 // to be verified if 12 is the max
+#define MAX_NUM_ROWS 256
+
+#define jar_xm_SAMPLE_RAMPING_POINTS 8
+
+/* ----- Data types ----- */
+
+enum jar_xm_waveform_type_e {
+    jar_xm_SINE_WAVEFORM = 0,
+    jar_xm_RAMP_DOWN_WAVEFORM = 1,
+    jar_xm_SQUARE_WAVEFORM = 2,
+    jar_xm_RANDOM_WAVEFORM = 3,
+    jar_xm_RAMP_UP_WAVEFORM = 4,
+};
+typedef enum jar_xm_waveform_type_e jar_xm_waveform_type_t;
+
+enum jar_xm_loop_type_e {
+    jar_xm_NO_LOOP,
+    jar_xm_FORWARD_LOOP,
+    jar_xm_PING_PONG_LOOP,
+};
+typedef enum jar_xm_loop_type_e jar_xm_loop_type_t;
+
+enum jar_xm_frequency_type_e {
+    jar_xm_LINEAR_FREQUENCIES,
+    jar_xm_AMIGA_FREQUENCIES,
+};
+typedef enum jar_xm_frequency_type_e jar_xm_frequency_type_t;
+
+struct jar_xm_envelope_point_s {
+    uint16_t frame;
+    uint16_t value;
+};
+typedef struct jar_xm_envelope_point_s jar_xm_envelope_point_t;
+
+struct jar_xm_envelope_s {
+    jar_xm_envelope_point_t points[NUM_ENVELOPE_POINTS];
+    uint8_t num_points;
+    uint8_t sustain_point;
+    uint8_t loop_start_point;
+    uint8_t loop_end_point;
+    bool enabled;
+    bool sustain_enabled;
+    bool loop_enabled;
+};
+typedef struct jar_xm_envelope_s jar_xm_envelope_t;
+
+struct jar_xm_sample_s {
+    char name[SAMPLE_NAME_LENGTH + 1];
+    int8_t bits; /* Either 8 or 16 */
+    int8_t stereo;
+    uint32_t length;
+    uint32_t loop_start;
+    uint32_t loop_length;
+    uint32_t loop_end;
+    float volume;
+    int8_t finetune;
+    jar_xm_loop_type_t loop_type;
+    float panning;
+    int8_t relative_note;
+    uint64_t latest_trigger;
+
+    float* data;
+ };
+ typedef struct jar_xm_sample_s jar_xm_sample_t;
+
+ struct jar_xm_instrument_s {
+     char name[INSTRUMENT_NAME_LENGTH + 1];
+     uint16_t num_samples;
+     uint8_t sample_of_notes[NUM_NOTES];
+     jar_xm_envelope_t volume_envelope;
+     jar_xm_envelope_t panning_envelope;
+     jar_xm_waveform_type_t vibrato_type;
+     uint8_t vibrato_sweep;
+     uint8_t vibrato_depth;
+     uint8_t vibrato_rate;
+     uint16_t volume_fadeout;
+     uint64_t latest_trigger;
+     bool muted;
+
+     jar_xm_sample_t* samples;
+ };
+ typedef struct jar_xm_instrument_s jar_xm_instrument_t;
+
+ struct jar_xm_pattern_slot_s {
+     uint8_t note; /* 1-96, 97 = Key Off note */
+     uint8_t instrument; /* 1-128 */
+     uint8_t volume_column;
+     uint8_t effect_type;
+     uint8_t effect_param;
+ };
+ typedef struct jar_xm_pattern_slot_s jar_xm_pattern_slot_t;
+
+ struct jar_xm_pattern_s {
+     uint16_t num_rows;
+     jar_xm_pattern_slot_t* slots; /* Array of size num_rows * num_channels */
+ };
+ typedef struct jar_xm_pattern_s jar_xm_pattern_t;
+
+ struct jar_xm_module_s {
+     char name[MODULE_NAME_LENGTH + 1];
+     char trackername[TRACKER_NAME_LENGTH + 1];
+     uint16_t length;
+     uint16_t restart_position;
+     uint16_t num_channels;
+     uint16_t num_patterns;
+     uint16_t num_instruments;
+     uint16_t linear_interpolation;
+     uint16_t ramping;
+     jar_xm_frequency_type_t frequency_type;
+     uint8_t pattern_table[PATTERN_ORDER_TABLE_LENGTH];
+
+     jar_xm_pattern_t* patterns;
+     jar_xm_instrument_t* instruments; /* Instrument 1 has index 0, instrument 2 has index 1, etc. */
+ };
+ typedef struct jar_xm_module_s jar_xm_module_t;
+
+ struct jar_xm_channel_context_s {
+     float note;
+     float orig_note; /* The original note before effect modifications, as read in the pattern. */
+     jar_xm_instrument_t* instrument; /* Could be NULL */
+     jar_xm_sample_t* sample; /* Could be NULL */
+     jar_xm_pattern_slot_t* current;
+
+     float sample_position;
+     float period;
+     float frequency;
+     float step;
+     bool ping; /* For ping-pong samples: true is -->, false is <-- */
+
+     float volume; /* Ideally between 0 (muted) and 1 (loudest) */
+     float panning; /* Between 0 (left) and 1 (right); 0.5 is centered */
+
+     uint16_t autovibrato_ticks;
+
+     bool sustained;
+     float fadeout_volume;
+     float volume_envelope_volume;
+     float panning_envelope_panning;
+     uint16_t volume_envelope_frame_count;
+     uint16_t panning_envelope_frame_count;
+
+     float autovibrato_note_offset;
+
+     bool arp_in_progress;
+     uint8_t arp_note_offset;
+     uint8_t volume_slide_param;
+     uint8_t fine_volume_slide_param;
+     uint8_t global_volume_slide_param;
+     uint8_t panning_slide_param;
+     uint8_t portamento_up_param;
+     uint8_t portamento_down_param;
+     uint8_t fine_portamento_up_param;
+     uint8_t fine_portamento_down_param;
+     uint8_t extra_fine_portamento_up_param;
+     uint8_t extra_fine_portamento_down_param;
+     uint8_t tone_portamento_param;
+     float tone_portamento_target_period;
+     uint8_t multi_retrig_param;
+     uint8_t note_delay_param;
+     uint8_t pattern_loop_origin; /* Where to restart a E6y loop */
+     uint8_t pattern_loop_count; /* How many loop passes have been done */
+     bool vibrato_in_progress;
+     jar_xm_waveform_type_t vibrato_waveform;
+     bool vibrato_waveform_retrigger; /* True if a new note retriggers the waveform */
+     uint8_t vibrato_param;
+     uint16_t vibrato_ticks; /* Position in the waveform */
+     float vibrato_note_offset;
+     jar_xm_waveform_type_t tremolo_waveform;
+     bool tremolo_waveform_retrigger;
+     uint8_t tremolo_param;
+     uint8_t tremolo_ticks;
+     float tremolo_volume;
+     uint8_t tremor_param;
+     bool tremor_on;
+
+     uint64_t latest_trigger;
+     bool muted;
+
+     //* These values are updated at the end of each tick, to save a couple of float operations on every generated sample.
+     float target_panning;
+     float target_volume;
+
+     unsigned long frame_count;
+     float end_of_previous_sample_left[jar_xm_SAMPLE_RAMPING_POINTS];
+     float end_of_previous_sample_right[jar_xm_SAMPLE_RAMPING_POINTS];
+     float curr_left;
+     float curr_right;
+
+     float actual_panning;
+     float actual_volume;
+ };
+ typedef struct jar_xm_channel_context_s jar_xm_channel_context_t;
+
+ struct jar_xm_context_s {
+     void* allocated_memory;
+     jar_xm_module_t module;
+     uint32_t rate;
+
+     uint16_t default_tempo; // Number of ticks per row
+     uint16_t default_bpm;
+     float default_global_volume;
+
+     uint16_t tempo; // Number of ticks per row
+     uint16_t bpm;
+     float global_volume;
+
+     float volume_ramp; /* How much is a channel final volume allowed to change per sample; this is used to avoid abrubt volume changes which manifest as "clicks" in the generated sound. */
+     float panning_ramp; /* Same for panning. */
+
+     uint8_t current_table_index;
+     uint8_t current_row;
+     uint16_t current_tick; /* Can go below 255, with high tempo and a pattern delay */
+     float remaining_samples_in_tick;
+     uint64_t generated_samples;
+
+     bool position_jump;
+     bool pattern_break;
+     uint8_t jump_dest;
+     uint8_t jump_row;
+
+     uint16_t extra_ticks; /* Extra ticks to be played before going to the next row - Used for EEy effect */
+
+     uint8_t* row_loop_count; /* Array of size MAX_NUM_ROWS * module_length */
+     uint8_t loop_count;
+     uint8_t max_loop_count;
+
+     jar_xm_channel_context_t* channels;
+};
+
+#if JAR_XM_DEFENSIVE
+
+//** Check the module data for errors/inconsistencies.
+// * @returns 0 if everything looks OK. Module should be safe to load.
+int jar_xm_check_sanity_preload(const char*, size_t);
+
+//** Check a loaded module for errors/inconsistencies.
+// * @returns 0 if everything looks OK.
+int jar_xm_check_sanity_postload(jar_xm_context_t*);
+
+#endif
+
+//** Get the number of bytes needed to store the module data in a dynamically allocated blank context.
+// * Things that are dynamically allocated:
+// * - sample data
+// * - sample structures in instruments
+// * - pattern data
+// * - row loop count arrays
+// * - pattern structures in module
+// * - instrument structures in module
+// * - channel contexts
+// * - context structure itself
+// * @returns 0 if everything looks OK.
+size_t jar_xm_get_memory_needed_for_context(const char*, size_t);
+
+//** Populate the context from module data.
+// * @returns pointer to the memory pool
+char* jar_xm_load_module(jar_xm_context_t*, const char*, size_t, char*);
+
+int jar_xm_create_context(jar_xm_context_t** ctxp, const char* moddata, uint32_t rate) {
+    return jar_xm_create_context_safe(ctxp, moddata, SIZE_MAX, rate);
+}
+
+#define ALIGN(x, b) (((x) + ((b) - 1)) & ~((b) - 1))
+#define ALIGN_PTR(x, b) (void*)(((uintptr_t)(x) + ((b) - 1)) & ~((b) - 1))
+int jar_xm_create_context_safe(jar_xm_context_t** ctxp, const char* moddata, size_t moddata_length, uint32_t rate) {
+#if JAR_XM_DEFENSIVE
+    int ret;
+#endif
+    size_t bytes_needed;
+    char* mempool;
+    jar_xm_context_t* ctx;
+
+#if JAR_XM_DEFENSIVE
+    if((ret = jar_xm_check_sanity_preload(moddata, moddata_length))) {
+        DEBUG("jar_xm_check_sanity_preload() returned %i, module is not safe to load", ret);
+        return 1;
+    }
+#endif
+
+    bytes_needed = jar_xm_get_memory_needed_for_context(moddata, moddata_length);
+    mempool = JARXM_MALLOC(bytes_needed);
+    if(mempool == NULL && bytes_needed > 0) { /* JARXM_MALLOC() failed, trouble ahead */
+        DEBUG("call to JARXM_MALLOC() failed, returned %p", (void*)mempool);
+        return 2;
+    }
+
+    /* Initialize most of the fields to 0, 0.f, NULL or false depending on type */
+    memset(mempool, 0, bytes_needed);
+
+    ctx = (*ctxp = (jar_xm_context_t *)mempool);
+    ctx->allocated_memory = mempool; /* Keep original pointer for JARXM_FREE() */
+    mempool += sizeof(jar_xm_context_t);
+
+    ctx->rate = rate;
+    mempool = jar_xm_load_module(ctx, moddata, moddata_length, mempool);
+    mempool = ALIGN_PTR(mempool, 16);
+
+    ctx->channels = (jar_xm_channel_context_t*)mempool;
+    mempool += ctx->module.num_channels * sizeof(jar_xm_channel_context_t);
+    mempool = ALIGN_PTR(mempool, 16);
+
+    ctx->default_global_volume = 1.f;
+    ctx->global_volume = ctx->default_global_volume;
+
+    ctx->volume_ramp = (1.f / 128.f);
+    ctx->panning_ramp = (1.f / 128.f);
+
+    for(uint8_t i = 0; i < ctx->module.num_channels; ++i) {
+        jar_xm_channel_context_t *ch = ctx->channels + i;
+        ch->ping = true;
+        ch->vibrato_waveform = jar_xm_SINE_WAVEFORM;
+        ch->vibrato_waveform_retrigger = true;
+        ch->tremolo_waveform = jar_xm_SINE_WAVEFORM;
+        ch->tremolo_waveform_retrigger = true;
+        ch->volume = ch->volume_envelope_volume = ch->fadeout_volume = 1.0f;
+        ch->panning = ch->panning_envelope_panning = .5f;
+        ch->actual_volume = .0f;
+        ch->actual_panning = .5f;
+    }
+
+    mempool = ALIGN_PTR(mempool, 16);
+    ctx->row_loop_count = (uint8_t *)mempool;
+    mempool += MAX_NUM_ROWS * sizeof(uint8_t);
+
+#if JAR_XM_DEFENSIVE
+    if((ret = jar_xm_check_sanity_postload(ctx))) {   DEBUG("jar_xm_check_sanity_postload() returned %i, module is not safe to play", ret);
+        jar_xm_free_context(ctx);
+        return 1;
+    }
+#endif
+
+    return 0;
+}
+
+void jar_xm_free_context(jar_xm_context_t *ctx) {
+    if (ctx != NULL) {   JARXM_FREE(ctx->allocated_memory); }
+}
+
+void jar_xm_set_max_loop_count(jar_xm_context_t *ctx, uint8_t loopcnt) {
+    ctx->max_loop_count = loopcnt;
+}
+
+uint8_t jar_xm_get_loop_count(jar_xm_context_t *ctx) {
+    return ctx->loop_count;
+}
+
+bool jar_xm_mute_channel(jar_xm_context_t *ctx, uint16_t channel, bool mute) {
+    bool old = ctx->channels[channel - 1].muted;
+    ctx->channels[channel - 1].muted = mute;
+    return old;
+}
+
+bool jar_xm_mute_instrument(jar_xm_context_t *ctx, uint16_t instr, bool mute) {
+    bool old = ctx->module.instruments[instr - 1].muted;
+    ctx->module.instruments[instr - 1].muted = mute;
+    return old;
+}
+
+const char* jar_xm_get_module_name(jar_xm_context_t *ctx) {
+    return ctx->module.name;
+}
+
+const char* jar_xm_get_tracker_name(jar_xm_context_t *ctx) {
+    return ctx->module.trackername;
+}
+
+uint16_t jar_xm_get_number_of_channels(jar_xm_context_t *ctx) {
+    return ctx->module.num_channels;
+}
+
+uint16_t jar_xm_get_module_length(jar_xm_context_t *ctx) {
+    return ctx->module.length;
+}
+
+uint16_t jar_xm_get_number_of_patterns(jar_xm_context_t *ctx) {
+    return ctx->module.num_patterns;
+}
+
+uint16_t jar_xm_get_number_of_rows(jar_xm_context_t *ctx, uint16_t pattern) {
+    return ctx->module.patterns[pattern].num_rows;
+}
+
+uint16_t jar_xm_get_number_of_instruments(jar_xm_context_t *ctx) {
+    return ctx->module.num_instruments;
+}
+
+uint16_t jar_xm_get_number_of_samples(jar_xm_context_t *ctx, uint16_t instrument) {
+    return ctx->module.instruments[instrument - 1].num_samples;
+}
+
+void jar_xm_get_playing_speed(jar_xm_context_t *ctx, uint16_t *bpm, uint16_t *tempo) {
+    if(bpm) *bpm = ctx->bpm;
+    if(tempo) *tempo = ctx->tempo;
+}
+
+void jar_xm_get_position(jar_xm_context_t *ctx, uint8_t *pattern_index, uint8_t *pattern, uint8_t *row, uint64_t *samples) {
+    if(pattern_index) *pattern_index = ctx->current_table_index;
+    if(pattern) *pattern = ctx->module.pattern_table[ctx->current_table_index];
+    if(row) *row = ctx->current_row;
+    if(samples) *samples = ctx->generated_samples;
+}
+
+uint64_t jar_xm_get_latest_trigger_of_instrument(jar_xm_context_t *ctx, uint16_t instr) {
+    return ctx->module.instruments[instr - 1].latest_trigger;
+}
+
+uint64_t jar_xm_get_latest_trigger_of_sample(jar_xm_context_t *ctx, uint16_t instr, uint16_t sample) {
+    return ctx->module.instruments[instr - 1].samples[sample].latest_trigger;
+}
+
+uint64_t jar_xm_get_latest_trigger_of_channel(jar_xm_context_t *ctx, uint16_t chn) {
+    return ctx->channels[chn - 1].latest_trigger;
+}
+
+//* .xm files are little-endian. (XXX: Are they really?)
+
+//* Bound reader macros.
+//* If we attempt to read the buffer out-of-bounds, pretend that the buffer is infinitely padded with zeroes.
+#define READ_U8(offset) (((offset) < moddata_length) ? (*(uint8_t*)(moddata + (offset))) : 0)
+#define READ_U16(offset) ((uint16_t)READ_U8(offset) | ((uint16_t)READ_U8((offset) + 1) << 8))
+#define READ_U32(offset) ((uint32_t)READ_U16(offset) | ((uint32_t)READ_U16((offset) + 2) << 16))
+#define READ_MEMCPY(ptr, offset, length) memcpy_pad(ptr, length, moddata, moddata_length, offset)
+
+static void memcpy_pad(void *dst, size_t dst_len, const void *src, size_t src_len, size_t offset) {
+    uint8_t *dst_c = dst;
+    const uint8_t *src_c = src;
+
+    /* how many bytes can be copied without overrunning `src` */
+    size_t copy_bytes = (src_len >= offset) ? (src_len - offset) : 0;
+    copy_bytes = copy_bytes > dst_len ? dst_len : copy_bytes;
+
+    memcpy(dst_c, src_c + offset, copy_bytes);
+    /* padded bytes */
+    memset(dst_c + copy_bytes, 0, dst_len - copy_bytes);
+}
+
+#if JAR_XM_DEFENSIVE
+
+int jar_xm_check_sanity_preload(const char* module, size_t module_length) {
+    if(module_length < 60) { return 4; }
+    if(memcmp("Extended Module: ", module, 17) != 0) { return 1; }
+    if(module[37] != 0x1A) { return 2; }
+    if(module[59] != 0x01 || module[58] != 0x04) { return 3; }  /* Not XM 1.04 */
+    return 0;
+}
+
+int jar_xm_check_sanity_postload(jar_xm_context_t* ctx) {
+    /* Check the POT */
+    for(uint8_t i = 0; i < ctx->module.length; ++i) {
+        if(ctx->module.pattern_table[i] >= ctx->module.num_patterns) {
+            if(i+1 == ctx->module.length && ctx->module.length > 1) {
+                DEBUG("trimming invalid POT at pos %X", i);
+                --ctx->module.length;
+            } else {
+                DEBUG("module has invalid POT, pos %X references nonexistent pattern %X", i, ctx->module.pattern_table[i]);
+                return 1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+#endif
+
+size_t jar_xm_get_memory_needed_for_context(const char* moddata, size_t moddata_length) {
+    size_t memory_needed = 0;
+    size_t offset = 60; /* 60 = Skip the first header */
+    uint16_t num_channels;
+    uint16_t num_patterns;
+    uint16_t num_instruments;
+
+    /* Read the module header */
+    num_channels = READ_U16(offset + 8);
+    num_patterns = READ_U16(offset + 10);
+    memory_needed += num_patterns * sizeof(jar_xm_pattern_t);
+    memory_needed  = ALIGN(memory_needed, 16);
+    num_instruments = READ_U16(offset + 12);
+    memory_needed += num_instruments * sizeof(jar_xm_instrument_t);
+    memory_needed  = ALIGN(memory_needed, 16);
+    memory_needed += MAX_NUM_ROWS * READ_U16(offset + 4) * sizeof(uint8_t); /* Module length */
+
+    offset += READ_U32(offset); /* Header size */
+
+    /* Read pattern headers */
+    for(uint16_t i = 0; i < num_patterns; ++i) {
+        uint16_t num_rows;
+        num_rows = READ_U16(offset + 5);
+        memory_needed += num_rows * num_channels * sizeof(jar_xm_pattern_slot_t);
+        offset += READ_U32(offset) + READ_U16(offset + 7); /* Pattern header length + packed pattern data size */
+    }
+    memory_needed  = ALIGN(memory_needed, 16);
+
+    /* Read instrument headers */
+    for(uint16_t i = 0; i < num_instruments; ++i) {
+        uint16_t num_samples;
+        uint32_t sample_header_size = 0;
+        uint32_t sample_size_aggregate = 0;
+        num_samples = READ_U16(offset + 27);
+        memory_needed += num_samples * sizeof(jar_xm_sample_t);
+        if(num_samples > 0) { sample_header_size = READ_U32(offset + 29); }
+
+        offset += READ_U32(offset);  /* Instrument header size */
+        for(uint16_t j = 0; j < num_samples; ++j) {
+            uint32_t sample_size;
+            uint8_t flags;
+            sample_size = READ_U32(offset);
+            flags = READ_U8(offset + 14);
+            sample_size_aggregate += sample_size;
+
+            if(flags & (1 << 4)) {  /* 16 bit sample */
+                memory_needed += sample_size * (sizeof(float) >> 1);
+            } else {  /* 8 bit sample */
+                memory_needed += sample_size * sizeof(float);
+            }
+            offset += sample_header_size;
+        }
+        offset += sample_size_aggregate;
+    }
+
+    memory_needed += num_channels * sizeof(jar_xm_channel_context_t);
+    memory_needed += sizeof(jar_xm_context_t);
+    return memory_needed;
+}
+
+char* jar_xm_load_module(jar_xm_context_t* ctx, const char* moddata, size_t moddata_length, char* mempool) {
+    size_t offset = 0;
+    jar_xm_module_t* mod = &(ctx->module);
+
+    /* Read XM header */
+    READ_MEMCPY(mod->name, offset + 17, MODULE_NAME_LENGTH);
+    READ_MEMCPY(mod->trackername, offset + 38, TRACKER_NAME_LENGTH);
+    offset += 60;
+
+    /* Read module header */
+    uint32_t header_size = READ_U32(offset);
+    mod->length = READ_U16(offset + 4);
+    mod->restart_position = READ_U16(offset + 6);
+    mod->num_channels = READ_U16(offset + 8);
+    mod->num_patterns = READ_U16(offset + 10);
+    mod->num_instruments = READ_U16(offset + 12);
+    mod->patterns = (jar_xm_pattern_t*)mempool;
+    mod->linear_interpolation = 1; // Linear interpolation can be set after loading
+    mod->ramping = 1; // ramping can be set after loading
+    mempool += mod->num_patterns * sizeof(jar_xm_pattern_t);
+    mempool = ALIGN_PTR(mempool, 16);
+    mod->instruments = (jar_xm_instrument_t*)mempool;
+    mempool += mod->num_instruments * sizeof(jar_xm_instrument_t);
+    mempool = ALIGN_PTR(mempool, 16);
+    uint16_t flags = READ_U32(offset + 14);
+    mod->frequency_type = (flags & (1 << 0)) ? jar_xm_LINEAR_FREQUENCIES : jar_xm_AMIGA_FREQUENCIES;
+    ctx->default_tempo = READ_U16(offset + 16);
+    ctx->default_bpm = READ_U16(offset + 18);
+    ctx->tempo =ctx->default_tempo;
+    ctx->bpm = ctx->default_bpm;
+
+    READ_MEMCPY(mod->pattern_table, offset + 20, PATTERN_ORDER_TABLE_LENGTH);
+    offset += header_size;
+
+    /* Read patterns */
+    for(uint16_t i = 0; i < mod->num_patterns; ++i) {
+        uint16_t packed_patterndata_size = READ_U16(offset + 7);
+        jar_xm_pattern_t* pat = mod->patterns + i;
+        pat->num_rows = READ_U16(offset + 5);
+        pat->slots = (jar_xm_pattern_slot_t*)mempool;
+        mempool += mod->num_channels * pat->num_rows * sizeof(jar_xm_pattern_slot_t);
+        offset += READ_U32(offset); /* Pattern header length */
+
+        if(packed_patterndata_size == 0) {    /* No pattern data is present */
+            memset(pat->slots, 0, sizeof(jar_xm_pattern_slot_t) * pat->num_rows * mod->num_channels);
+        } else {
+            /* This isn't your typical for loop */
+            for(uint16_t j = 0, k = 0; j < packed_patterndata_size; ++k) {
+                uint8_t note = READ_U8(offset + j);
+                jar_xm_pattern_slot_t* slot = pat->slots + k;
+                if(note & (1 << 7)) {
+                    /* MSB is set, this is a compressed packet */
+                    ++j;
+                    if(note & (1 << 0)) {    /* Note follows */
+                        slot->note = READ_U8(offset + j);
+                        ++j;
+                    } else {
+                        slot->note = 0;
+                    }
+                    if(note & (1 << 1)) {    /* Instrument follows */
+                        slot->instrument = READ_U8(offset + j);
+                        ++j;
+                    } else {
+                        slot->instrument = 0;
+                    }
+                    if(note & (1 << 2)) {    /* Volume column follows */
+                        slot->volume_column = READ_U8(offset + j);
+                        ++j;
+                    } else {
+                        slot->volume_column = 0;
+                    }
+                    if(note & (1 << 3)) {    /* Effect follows */
+                        slot->effect_type = READ_U8(offset + j);
+                        ++j;
+                    } else {
+                        slot->effect_type = 0;
+                    }
+                    if(note & (1 << 4)) {    /* Effect parameter follows */
+                        slot->effect_param = READ_U8(offset + j);
+                        ++j;
+                    } else {
+                        slot->effect_param = 0;
+                    }
+                } else {    /* Uncompressed packet */
+                    slot->note = note;
+                    slot->instrument = READ_U8(offset + j + 1);
+                    slot->volume_column = READ_U8(offset + j + 2);
+                    slot->effect_type = READ_U8(offset + j + 3);
+                    slot->effect_param = READ_U8(offset + j + 4);
+                    j += 5;
+                }
+            }
+        }
+
+        offset += packed_patterndata_size;
+    }
+    mempool = ALIGN_PTR(mempool, 16);
+
+    /* Read instruments */
+    for(uint16_t i = 0; i < ctx->module.num_instruments; ++i) {
+        uint32_t sample_header_size = 0;
+        jar_xm_instrument_t* instr = mod->instruments + i;
+
+        READ_MEMCPY(instr->name, offset + 4, INSTRUMENT_NAME_LENGTH);
+        instr->num_samples = READ_U16(offset + 27);
+
+        if(instr->num_samples > 0) {
+            /* Read extra header properties */
+            sample_header_size = READ_U32(offset + 29);
+            READ_MEMCPY(instr->sample_of_notes, offset + 33, NUM_NOTES);
+
+            instr->volume_envelope.num_points = READ_U8(offset + 225);
+            instr->panning_envelope.num_points = READ_U8(offset + 226);
+
+            for(uint8_t j = 0; j < instr->volume_envelope.num_points; ++j) {
+                instr->volume_envelope.points[j].frame = READ_U16(offset + 129 + 4 * j);
+                instr->volume_envelope.points[j].value = READ_U16(offset + 129 + 4 * j + 2);
+            }
+
+            for(uint8_t j = 0; j < instr->panning_envelope.num_points; ++j) {
+                instr->panning_envelope.points[j].frame = READ_U16(offset + 177 + 4 * j);
+                instr->panning_envelope.points[j].value = READ_U16(offset + 177 + 4 * j + 2);
+            }
+
+            instr->volume_envelope.sustain_point = READ_U8(offset + 227);
+            instr->volume_envelope.loop_start_point = READ_U8(offset + 228);
+            instr->volume_envelope.loop_end_point = READ_U8(offset + 229);
+            instr->panning_envelope.sustain_point = READ_U8(offset + 230);
+            instr->panning_envelope.loop_start_point = READ_U8(offset + 231);
+            instr->panning_envelope.loop_end_point = READ_U8(offset + 232);
+
+            uint8_t flags = READ_U8(offset + 233);
+            instr->volume_envelope.enabled = flags & (1 << 0);
+            instr->volume_envelope.sustain_enabled = flags & (1 << 1);
+            instr->volume_envelope.loop_enabled = flags & (1 << 2);
+
+            flags = READ_U8(offset + 234);
+            instr->panning_envelope.enabled = flags & (1 << 0);
+            instr->panning_envelope.sustain_enabled = flags & (1 << 1);
+            instr->panning_envelope.loop_enabled = flags & (1 << 2);
+            instr->vibrato_type = READ_U8(offset + 235);
+            if(instr->vibrato_type == 2) {
+                instr->vibrato_type = 1;
+            } else if(instr->vibrato_type == 1) {
+                instr->vibrato_type = 2;
+            }
+            instr->vibrato_sweep = READ_U8(offset + 236);
+            instr->vibrato_depth = READ_U8(offset + 237);
+            instr->vibrato_rate = READ_U8(offset + 238);
+            instr->volume_fadeout = READ_U16(offset + 239);
+            instr->samples = (jar_xm_sample_t*)mempool;
+            mempool += instr->num_samples * sizeof(jar_xm_sample_t);
+        } else {
+            instr->samples = NULL;
+        }
+
+        /* Instrument header size */
+        offset += READ_U32(offset);
+
+        for(int j = 0; j < instr->num_samples; ++j) {
+            /* Read sample header */
+            jar_xm_sample_t* sample = instr->samples + j;
+
+            sample->length = READ_U32(offset);
+            sample->loop_start = READ_U32(offset + 4);
+            sample->loop_length = READ_U32(offset + 8);
+            sample->loop_end = sample->loop_start + sample->loop_length;
+            sample->volume = (float)(READ_U8(offset + 12) << 2) / 256.f;
+            if (sample->volume > 1.0f) {sample->volume = 1.f;};
+            sample->finetune = (int8_t)READ_U8(offset + 13);
+
+            uint8_t flags = READ_U8(offset + 14);
+            switch (flags & 3) {
+            case 2:
+            case 3:
+                sample->loop_type = jar_xm_PING_PONG_LOOP;
+            case 1:
+                sample->loop_type = jar_xm_FORWARD_LOOP;
+                break;
+            default:
+                sample->loop_type = jar_xm_NO_LOOP;
+                break;
+            };
+            sample->bits = (flags & 0x10) ? 16 : 8;
+            sample->stereo = (flags & 0x20) ? 1 : 0;
+            sample->panning = (float)READ_U8(offset + 15) / 255.f;
+            sample->relative_note = (int8_t)READ_U8(offset + 16);
+            READ_MEMCPY(sample->name, 18, SAMPLE_NAME_LENGTH);
+            sample->data = (float*)mempool;
+            if(sample->bits == 16) {
+                /* 16 bit sample */
+                mempool += sample->length * (sizeof(float) >> 1);
+                sample->loop_start >>= 1;
+                sample->loop_length >>= 1;
+                sample->loop_end >>= 1;
+                sample->length >>= 1;
+            } else {
+                /* 8 bit sample */
+                mempool += sample->length * sizeof(float);
+            }
+            // Adjust loop points to reflect half of the reported length (stereo)
+            if (sample->stereo && sample->loop_type != jar_xm_NO_LOOP) {
+                div_t lstart = div(READ_U32(offset + 4), 2);
+                sample->loop_start = lstart.quot;
+                div_t llength = div(READ_U32(offset + 8), 2);
+                sample->loop_length = llength.quot;
+                sample->loop_end = sample->loop_start + sample->loop_length;
+            };
+
+            offset += sample_header_size;
+        }
+
+        // Read all samples and convert them to float values
+        for(int j = 0; j < instr->num_samples; ++j) {
+            /* Read sample data */
+            jar_xm_sample_t* sample = instr->samples + j;
+            int length = sample->length;
+            if (sample->stereo) {
+                // Since it is stereo, we cut the sample in half (treated as single channel)
+                div_t result = div(sample->length, 2);
+                if(sample->bits == 16) {
+                    int16_t v = 0;
+                    for(int k = 0; k < length; ++k) {
+                        if (k == result.quot) { v = 0;};
+                        v = v + (int16_t)READ_U16(offset + (k << 1));
+                        sample->data[k] = (float) v / 32768.f ;//* sign;
+                        if(sample->data[k] < -1.0)  {sample->data[k] = -1.0;}  else if(sample->data[k] > 1.0)  {sample->data[k] = 1.0;};
+                    }
+                    offset += sample->length << 1;
+                } else {
+                    int8_t v = 0;
+                    for(int k = 0; k < length; ++k) {
+                        if (k == result.quot) { v = 0;};
+                        v = v + (int8_t)READ_U8(offset + k);
+                        sample->data[k] = (float)v  / 128.f ;//* sign;
+                        if(sample->data[k] < -1.0)  {sample->data[k] = -1.0;}  else if(sample->data[k] > 1.0)  {sample->data[k] = 1.0;};
+                    }
+                    offset += sample->length;
+                };
+                sample->length = result.quot;
+            } else {
+                if(sample->bits == 16) {
+                    int16_t v = 0;
+                    for(int k = 0; k < length; ++k) {
+                        v = v + (int16_t)READ_U16(offset + (k << 1));
+                        sample->data[k] = (float) v / 32768.f ;//* sign;
+                        if(sample->data[k] < -1.0)  {sample->data[k] = -1.0;}  else if(sample->data[k] > 1.0)  {sample->data[k] = 1.0;};
+                    }
+                    offset += sample->length << 1;
+                } else {
+                    int8_t v = 0;
+                    for(int k = 0; k < length; ++k) {
+                        v = v + (int8_t)READ_U8(offset + k);
+                        sample->data[k] = (float)v  / 128.f ;//* sign;
+                        if(sample->data[k] < -1.0)  {sample->data[k] = -1.0;}  else if(sample->data[k] > 1.0)  {sample->data[k] = 1.0;};
+                    }
+                    offset += sample->length;
+                }
+            }
+        };
+    };
+    return mempool;
+};
+
+//-------------------------------------------------------------------------------
+//THE FOLLOWING IS FOR PLAYING
+static float jar_xm_waveform(jar_xm_waveform_type_t, uint8_t);
+static void jar_xm_autovibrato(jar_xm_context_t*, jar_xm_channel_context_t*);
+static void jar_xm_vibrato(jar_xm_context_t*, jar_xm_channel_context_t*, uint8_t, uint16_t);
+static void jar_xm_tremolo(jar_xm_context_t*, jar_xm_channel_context_t*, uint8_t, uint16_t);
+static void jar_xm_arpeggio(jar_xm_context_t*, jar_xm_channel_context_t*, uint8_t, uint16_t);
+static void jar_xm_tone_portamento(jar_xm_context_t*, jar_xm_channel_context_t*);
+static void jar_xm_pitch_slide(jar_xm_context_t*, jar_xm_channel_context_t*, float);
+static void jar_xm_panning_slide(jar_xm_channel_context_t*, uint8_t);
+static void jar_xm_volume_slide(jar_xm_channel_context_t*, uint8_t);
+
+static float jar_xm_envelope_lerp(jar_xm_envelope_point_t*, jar_xm_envelope_point_t*, uint16_t);
+static void jar_xm_envelope_tick(jar_xm_channel_context_t*, jar_xm_envelope_t*, uint16_t*, float*);
+static void jar_xm_envelopes(jar_xm_channel_context_t*);
+
+static float jar_xm_linear_period(float);
+static float jar_xm_linear_frequency(float);
+static float jar_xm_amiga_period(float);
+static float jar_xm_amiga_frequency(float);
+static float jar_xm_period(jar_xm_context_t*, float);
+static float jar_xm_frequency(jar_xm_context_t*, float, float);
+static void jar_xm_update_frequency(jar_xm_context_t*, jar_xm_channel_context_t*);
+
+static void jar_xm_handle_note_and_instrument(jar_xm_context_t*, jar_xm_channel_context_t*, jar_xm_pattern_slot_t*);
+static void jar_xm_trigger_note(jar_xm_context_t*, jar_xm_channel_context_t*, unsigned int flags);
+static void jar_xm_cut_note(jar_xm_channel_context_t*);
+static void jar_xm_key_off(jar_xm_channel_context_t*);
+
+static void jar_xm_post_pattern_change(jar_xm_context_t*);
+static void jar_xm_row(jar_xm_context_t*);
+static void jar_xm_tick(jar_xm_context_t*);
+
+static void jar_xm_next_of_sample(jar_xm_context_t*, jar_xm_channel_context_t*, int);
+static void jar_xm_mixdown(jar_xm_context_t*, float*, float*);
+
+#define jar_xm_TRIGGER_KEEP_VOLUME (1 << 0)
+#define jar_xm_TRIGGER_KEEP_PERIOD (1 << 1)
+#define jar_xm_TRIGGER_KEEP_SAMPLE_POSITION (1 << 2)
+
+                                            // C-2, C#2, D-2, D#2, E-2, F-2, F#2, G-2, G#2, A-2, A#2, B-2, C-3
+static const uint16_t amiga_frequencies[] = { 1712, 1616, 1525, 1440, 1357, 1281, 1209, 1141, 1077, 1017,  961,  907, 856 };
+
+                                            // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f
+static const float multi_retrig_add[] = { 0.f, -1.f, -2.f, -4.f, -8.f, -16.f, 0.f, 0.f, 0.f, 1.f, 2.f, 4.f, 8.f, 16.f, 0.f, 0.f };
+
+                                            // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f
+static const float multi_retrig_multiply[] = { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, .6666667f, .5f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.5f, 2.f };
+
+#define jar_xm_CLAMP_UP1F(vol, limit) do {            \
+        if((vol) > (limit)) (vol) = (limit);    \
+    } while(0)
+#define jar_xm_CLAMP_UP(vol) jar_xm_CLAMP_UP1F((vol), 1.f)
+
+#define jar_xm_CLAMP_DOWN1F(vol, limit) do {        \
+        if((vol) < (limit)) (vol) = (limit);    \
+    } while(0)
+#define jar_xm_CLAMP_DOWN(vol) jar_xm_CLAMP_DOWN1F((vol), .0f)
+
+#define jar_xm_CLAMP2F(vol, up, down) do {            \
+        if((vol) > (up)) (vol) = (up);            \
+        else if((vol) < (down)) (vol) = (down); \
+    } while(0)
+#define jar_xm_CLAMP(vol) jar_xm_CLAMP2F((vol), 1.f, .0f)
+
+#define jar_xm_SLIDE_TOWARDS(val, goal, incr) do {        \
+        if((val) > (goal)) {                        \
+            (val) -= (incr);                        \
+            jar_xm_CLAMP_DOWN1F((val), (goal));            \
+        } else if((val) < (goal)) {                    \
+            (val) += (incr);                        \
+            jar_xm_CLAMP_UP1F((val), (goal));            \
+        }                                            \
+    } while(0)
+
+#define jar_xm_LERP(u, v, t) ((u) + (t) * ((v) - (u)))
+#define jar_xm_INVERSE_LERP(u, v, lerp) (((lerp) - (u)) / ((v) - (u)))
+
+#define HAS_TONE_PORTAMENTO(s) ((s)->effect_type == 3 \
+                                 || (s)->effect_type == 5 \
+                                 || ((s)->volume_column >> 4) == 0xF)
+#define HAS_ARPEGGIO(s) ((s)->effect_type == 0 \
+                          && (s)->effect_param != 0)
+#define HAS_VIBRATO(s) ((s)->effect_type == 4 \
+                         || (s)->effect_param == 6 \
+                         || ((s)->volume_column >> 4) == 0xB)
+#define NOTE_IS_VALID(n) ((n) > 0 && (n) < 97)
+#define NOTE_OFF 97
+
+static float jar_xm_waveform(jar_xm_waveform_type_t waveform, uint8_t step) {
+    static unsigned int next_rand = 24492;
+    step %= 0x40;
+    switch(waveform) {
+    case jar_xm_SINE_WAVEFORM: /* No SIN() table used, direct calculation. */
+        return -sinf(2.f * 3.141592f * (float)step / (float)0x40);
+    case jar_xm_RAMP_DOWN_WAVEFORM: /* Ramp down: 1.0f when step = 0; -1.0f when step = 0x40 */
+        return (float)(0x20 - step) / 0x20;
+    case jar_xm_SQUARE_WAVEFORM: /* Square with a 50% duty */
+        return (step >= 0x20) ? 1.f : -1.f;
+    case jar_xm_RANDOM_WAVEFORM: /* Use the POSIX.1-2001 example, just to be deterministic across different machines */
+        next_rand = next_rand * 1103515245 + 12345;
+        return (float)((next_rand >> 16) & 0x7FFF) / (float)0x4000 - 1.f;
+    case jar_xm_RAMP_UP_WAVEFORM: /* Ramp up: -1.f when step = 0; 1.f when step = 0x40 */
+        return (float)(step - 0x20) / 0x20;
+    default:
+        break;
+    }
+    return .0f;
+}
+
+static void jar_xm_autovibrato(jar_xm_context_t* ctx, jar_xm_channel_context_t* ch) {
+    if(ch->instrument == NULL || ch->instrument->vibrato_depth == 0) return;
+    jar_xm_instrument_t* instr = ch->instrument;
+    float sweep = 1.f;
+    if(ch->autovibrato_ticks < instr->vibrato_sweep) { sweep = jar_xm_LERP(0.f, 1.f, (float)ch->autovibrato_ticks / (float)instr->vibrato_sweep); }
+    unsigned int step = ((ch->autovibrato_ticks++) * instr->vibrato_rate) >> 2;
+    ch->autovibrato_note_offset = .25f * jar_xm_waveform(instr->vibrato_type, step) * (float)instr->vibrato_depth / (float)0xF * sweep;
+    jar_xm_update_frequency(ctx, ch);
+}
+
+static void jar_xm_vibrato(jar_xm_context_t* ctx, jar_xm_channel_context_t* ch, uint8_t param, uint16_t pos) {
+    unsigned int step = pos * (param >> 4);
+    ch->vibrato_note_offset = 2.f * jar_xm_waveform(ch->vibrato_waveform, step) * (float)(param & 0x0F) / (float)0xF;
+    jar_xm_update_frequency(ctx, ch);
+}
+
+static void jar_xm_tremolo(jar_xm_context_t* ctx, jar_xm_channel_context_t* ch, uint8_t param, uint16_t pos) {
+    unsigned int step = pos * (param >> 4);
+    ch->tremolo_volume = -1.f * jar_xm_waveform(ch->tremolo_waveform, step) * (float)(param & 0x0F) / (float)0xF;
+}
+
+static void jar_xm_arpeggio(jar_xm_context_t* ctx, jar_xm_channel_context_t* ch, uint8_t param, uint16_t tick) {
+    switch(tick % 3) {
+    case 0:
+        ch->arp_in_progress = false;
+        ch->arp_note_offset = 0;
+        break;
+    case 2:
+        ch->arp_in_progress = true;
+        ch->arp_note_offset = param >> 4;
+        break;
+    case 1:
+        ch->arp_in_progress = true;
+        ch->arp_note_offset = param & 0x0F;
+        break;
+    }
+    jar_xm_update_frequency(ctx, ch);
+}
+
+static void jar_xm_tone_portamento(jar_xm_context_t* ctx, jar_xm_channel_context_t* ch) {
+    /* 3xx called without a note, wait until we get an actual target note. */
+    if(ch->tone_portamento_target_period == 0.f) return;  /* no value, exit */
+    if(ch->period != ch->tone_portamento_target_period) {
+        jar_xm_SLIDE_TOWARDS(ch->period, ch->tone_portamento_target_period, (ctx->module.frequency_type == jar_xm_LINEAR_FREQUENCIES ? 4.f : 1.f) * ch->tone_portamento_param);
+        jar_xm_update_frequency(ctx, ch);
+    }
+}
+
+static void jar_xm_pitch_slide(jar_xm_context_t* ctx, jar_xm_channel_context_t* ch, float period_offset) {
+    /* Don't ask about the 4.f coefficient. I found mention of it nowhere. Found by ear™. */
+    if(ctx->module.frequency_type == jar_xm_LINEAR_FREQUENCIES) {period_offset *= 4.f; }
+    ch->period += period_offset;
+    jar_xm_CLAMP_DOWN(ch->period);
+    /* XXX: upper bound of period ? */
+    jar_xm_update_frequency(ctx, ch);
+}
+
+static void jar_xm_panning_slide(jar_xm_channel_context_t* ch, uint8_t rawval) {
+    if (rawval & 0xF0) {ch->panning += (float)((rawval & 0xF0 )>> 4) / (float)0xFF;};
+    if (rawval & 0x0F) {ch->panning -= (float)(rawval & 0x0F) / (float)0xFF;};
+};
+
+static void jar_xm_volume_slide(jar_xm_channel_context_t* ch, uint8_t rawval) {
+    if (rawval & 0xF0) {ch->volume += (float)((rawval & 0xF0) >> 4) / (float)0x40;};
+    if (rawval & 0x0F) {ch->volume -= (float)(rawval & 0x0F) / (float)0x40;};
+};
+
+static float jar_xm_envelope_lerp(jar_xm_envelope_point_t* a, jar_xm_envelope_point_t* b, uint16_t pos) {
+    /* Linear interpolation between two envelope points */
+    if(pos <= a->frame) return a->value;
+    else if(pos >= b->frame) return b->value;
+    else {
+        float p = (float)(pos - a->frame) / (float)(b->frame - a->frame);
+        return a->value * (1 - p) + b->value * p;
+    }
+}
+
+static void jar_xm_post_pattern_change(jar_xm_context_t* ctx) {
+    /* Loop if necessary */
+    if(ctx->current_table_index >= ctx->module.length) {
+        ctx->current_table_index = ctx->module.restart_position;
+        ctx->tempo =ctx->default_tempo; // reset to file default value
+        ctx->bpm = ctx->default_bpm; // reset to file default value
+        ctx->global_volume = ctx->default_global_volume; // reset to file default value
+    }
+}
+
+static float jar_xm_linear_period(float note) {
+    return 7680.f - note * 64.f;
+}
+
+static float jar_xm_linear_frequency(float period) {
+    return 8363.f * powf(2.f, (4608.f - period) / 768.f);
+}
+
+static float jar_xm_amiga_period(float note) {
+    unsigned int intnote = note;
+    uint8_t a = intnote % 12;
+    int8_t octave = note / 12.f - 2;
+    uint16_t p1 = amiga_frequencies[a], p2 = amiga_frequencies[a + 1];
+    if(octave > 0) {
+        p1 >>= octave;
+        p2 >>= octave;
+    } else if(octave < 0) {
+        p1 <<= -octave;
+        p2 <<= -octave;
+    }
+    return jar_xm_LERP(p1, p2, note - intnote);
+}
+
+static float jar_xm_amiga_frequency(float period) {
+    if(period == .0f) return .0f;
+    return 7093789.2f / (period * 2.f); /* This is the PAL value. (we could use the NTSC value also) */
+}
+
+static float jar_xm_period(jar_xm_context_t* ctx, float note) {
+    switch(ctx->module.frequency_type) {
+    case jar_xm_LINEAR_FREQUENCIES:
+        return jar_xm_linear_period(note);
+    case jar_xm_AMIGA_FREQUENCIES:
+        return jar_xm_amiga_period(note);
+    }
+    return .0f;
+}
+
+static float jar_xm_frequency(jar_xm_context_t* ctx, float period, float note_offset) {
+    switch(ctx->module.frequency_type) {
+    case jar_xm_LINEAR_FREQUENCIES:
+        return jar_xm_linear_frequency(period - 64.f * note_offset);
+    case jar_xm_AMIGA_FREQUENCIES:
+        if(note_offset == 0) { return jar_xm_amiga_frequency(period); };
+        int8_t octave;
+        float  note;
+        uint16_t p1, p2;
+        uint8_t a = octave = 0;
+
+        /* Find the octave of the current period */
+        if(period > amiga_frequencies[0]) {
+            --octave;
+            while(period > (amiga_frequencies[0] << -octave)) --octave;
+        } else if(period < amiga_frequencies[12]) {
+            ++octave;
+            while(period < (amiga_frequencies[12] >> octave)) ++octave;
+        }
+
+        /* Find the smallest note closest to the current period */
+        for(uint8_t i = 0; i < 12; ++i) {
+            p1 = amiga_frequencies[i], p2 = amiga_frequencies[i + 1];
+            if(octave > 0) {
+                p1 >>= octave;
+                p2 >>= octave;
+            } else if(octave < 0) {
+                p1 <<= (-octave);
+                p2 <<= (-octave);
+            }
+            if(p2 <= period && period <= p1) {
+                a = i;
+                break;
+            }
+        }
+        if(JAR_XM_DEBUG && (p1 < period || p2 > period)) { DEBUG("%i <= %f <= %i should hold but doesn't, this is a bug", p2, period, p1); }
+        note = 12.f * (octave + 2) + a + jar_xm_INVERSE_LERP(p1, p2, period);
+        return jar_xm_amiga_frequency(jar_xm_amiga_period(note + note_offset));
+    }
+
+    return .0f;
+}
+
+static void jar_xm_update_frequency(jar_xm_context_t* ctx, jar_xm_channel_context_t* ch) {
+    ch->frequency = jar_xm_frequency( ctx, ch->period, (ch->arp_note_offset > 0 ? ch->arp_note_offset : (  ch->vibrato_note_offset + ch->autovibrato_note_offset ))  );
+    ch->step = ch->frequency / ctx->rate;
+}
+
+static void jar_xm_handle_note_and_instrument(jar_xm_context_t* ctx, jar_xm_channel_context_t* ch, jar_xm_pattern_slot_t* s) {
+    jar_xm_module_t* mod = &(ctx->module);
+    if(s->instrument > 0) {
+        if(HAS_TONE_PORTAMENTO(ch->current) && ch->instrument != NULL && ch->sample != NULL) {  /* Tone portamento in effect */
+            jar_xm_trigger_note(ctx, ch, jar_xm_TRIGGER_KEEP_PERIOD | jar_xm_TRIGGER_KEEP_SAMPLE_POSITION);
+        } else if(s->instrument > ctx->module.num_instruments) {    /* Invalid instrument, Cut current note */
+            jar_xm_cut_note(ch);
+            ch->instrument = NULL;
+            ch->sample = NULL;
+        } else {
+            ch->instrument = ctx->module.instruments + (s->instrument - 1);
+            if(s->note == 0 && ch->sample != NULL) {  /* Ghost instrument, trigger note */
+                /* Sample position is kept, but envelopes are reset */
+                jar_xm_trigger_note(ctx, ch, jar_xm_TRIGGER_KEEP_SAMPLE_POSITION);
+            }
+        }
+    }
+
+    if(NOTE_IS_VALID(s->note)) {
+        // note value is s->note -1
+        jar_xm_instrument_t* instr = ch->instrument;
+        if(HAS_TONE_PORTAMENTO(ch->current) && instr != NULL && ch->sample != NULL) {
+            /* Tone portamento in effect */
+            ch->note = s->note + ch->sample->relative_note + ch->sample->finetune / 128.f - 1.f;
+            ch->tone_portamento_target_period = jar_xm_period(ctx, ch->note);
+        } else if(instr == NULL || ch->instrument->num_samples == 0) {   /* Issue on instrument */
+            jar_xm_cut_note(ch);
+        } else {
+            if(instr->sample_of_notes[s->note - 1] < instr->num_samples) {
+                if (mod->ramping) {
+                    for(int i = 0; i < jar_xm_SAMPLE_RAMPING_POINTS; ++i) {
+                        jar_xm_next_of_sample(ctx, ch, i);
+                    }
+                    ch->frame_count = 0;
+                };
+                ch->sample = instr->samples + instr->sample_of_notes[s->note - 1];
+                ch->orig_note = ch->note = s->note + ch->sample->relative_note + ch->sample->finetune / 128.f - 1.f;
+                if(s->instrument > 0) {
+                    jar_xm_trigger_note(ctx, ch, 0);
+                } else {  /* Ghost note: keep old volume */
+                    jar_xm_trigger_note(ctx, ch, jar_xm_TRIGGER_KEEP_VOLUME);
+                }
+            } else {
+                jar_xm_cut_note(ch);
+            }
+        }
+    } else if(s->note == NOTE_OFF) {
+        jar_xm_key_off(ch);
+    }
+
+    // Interpret Effect column
+    switch(s->effect_type) {
+    case 1: /* 1xx: Portamento up */
+        if(s->effect_param > 0) {    ch->portamento_up_param = s->effect_param; }
+        break;
+    case 2: /* 2xx: Portamento down */
+        if(s->effect_param > 0) {    ch->portamento_down_param = s->effect_param; }
+        break;
+    case 3: /* 3xx: Tone portamento */
+        if(s->effect_param > 0) {    ch->tone_portamento_param = s->effect_param; }
+        break;
+    case 4: /* 4xy: Vibrato */
+        if(s->effect_param & 0x0F) { ch->vibrato_param = (ch->vibrato_param & 0xF0) | (s->effect_param & 0x0F); }  /* Set vibrato depth */
+        if(s->effect_param >> 4) { ch->vibrato_param = (s->effect_param & 0xF0) | (ch->vibrato_param & 0x0F); }   /* Set vibrato speed */
+        break;
+    case 5: /* 5xy: Tone portamento + Volume slide */
+        if(s->effect_param > 0) {  ch->volume_slide_param = s->effect_param; }
+        break;
+    case 6: /* 6xy: Vibrato + Volume slide */
+        if(s->effect_param > 0) {    ch->volume_slide_param = s->effect_param; }
+        break;
+    case 7: /* 7xy: Tremolo */
+        if(s->effect_param & 0x0F) { ch->tremolo_param = (ch->tremolo_param & 0xF0) | (s->effect_param & 0x0F); } /* Set tremolo depth */
+        if(s->effect_param >> 4) { ch->tremolo_param = (s->effect_param & 0xF0) | (ch->tremolo_param & 0x0F); }  /* Set tremolo speed */
+        break;
+    case 8: /* 8xx: Set panning */
+        ch->panning = (float)s->effect_param / 255.f;
+        break;
+    case 9: /* 9xx: Sample offset */
+        if(ch->sample != 0) { //&& NOTE_IS_VALID(s->note)) {
+            uint32_t final_offset = s->effect_param << (ch->sample->bits == 16 ? 7 : 8);
+            switch (ch->sample->loop_type) {
+            case jar_xm_NO_LOOP:
+                if(final_offset >= ch->sample->length) { /* Pretend the sample dosen't loop and is done playing */
+                    ch->sample_position = -1;
+                } else {
+                    ch->sample_position = final_offset;
+                }
+                break;
+            case jar_xm_FORWARD_LOOP:
+                if (final_offset >= ch->sample->loop_end) {
+                    ch->sample_position -= ch->sample->loop_length;
+                } else if(final_offset >= ch->sample->length) {
+                    ch->sample_position = ch->sample->loop_start;
+                } else {
+                    ch->sample_position = final_offset;
+                }
+                break;
+            case jar_xm_PING_PONG_LOOP:
+                if(final_offset >= ch->sample->loop_end) {
+                    ch->ping = false;
+                    ch->sample_position = (ch->sample->loop_end << 1) - ch->sample_position;
+                } else if(final_offset >= ch->sample->length) {
+                    ch->ping = false;
+                    ch->sample_position -= ch->sample->length - 1;
+                } else {
+                    ch->sample_position = final_offset;
+                };
+                break;
+            }
+        }
+        break;
+    case 0xA: /* Axy: Volume slide */
+        if(s->effect_param > 0) {    ch->volume_slide_param = s->effect_param; }
+        break;
+    case 0xB: /* Bxx: Position jump */
+        if(s->effect_param < ctx->module.length) {
+            ctx->position_jump = true;
+            ctx->jump_dest = s->effect_param;
+        }
+        break;
+    case 0xC: /* Cxx: Set volume */
+        ch->volume = (float)((s->effect_param > 0x40) ? 0x40 : s->effect_param) / (float)0x40;
+        break;
+    case 0xD: /* Dxx: Pattern break */
+        /* Jump after playing this line */
+        ctx->pattern_break = true;
+        ctx->jump_row = (s->effect_param >> 4) * 10 + (s->effect_param & 0x0F);
+        break;
+    case 0xE: /* EXy: Extended command */
+        switch(s->effect_param >> 4) {
+        case 1: /* E1y: Fine portamento up */
+            if(s->effect_param & 0x0F) {    ch->fine_portamento_up_param = s->effect_param & 0x0F; }
+            jar_xm_pitch_slide(ctx, ch, -ch->fine_portamento_up_param);
+            break;
+        case 2: /* E2y: Fine portamento down */
+            if(s->effect_param & 0x0F) {    ch->fine_portamento_down_param = s->effect_param & 0x0F; }
+            jar_xm_pitch_slide(ctx, ch, ch->fine_portamento_down_param);
+            break;
+        case 4: /* E4y: Set vibrato control */
+            ch->vibrato_waveform = s->effect_param & 3;
+            ch->vibrato_waveform_retrigger = !((s->effect_param >> 2) & 1);
+            break;
+        case 5: /* E5y: Set finetune */
+            if(NOTE_IS_VALID(ch->current->note) && ch->sample != NULL) {
+                ch->note = ch->current->note + ch->sample->relative_note + (float)(((s->effect_param & 0x0F) - 8) << 4) / 128.f - 1.f;
+                ch->period = jar_xm_period(ctx, ch->note);
+                jar_xm_update_frequency(ctx, ch);
+            }
+            break;
+        case 6: /* E6y: Pattern loop */
+            if(s->effect_param & 0x0F) {
+                if((s->effect_param & 0x0F) == ch->pattern_loop_count) {   /* Loop is over */
+                    ch->pattern_loop_count = 0;
+                    ctx->position_jump = false;
+                } else {    /* Jump to the beginning of the loop */
+                    ch->pattern_loop_count++;
+                    ctx->position_jump = true;
+                    ctx->jump_row = ch->pattern_loop_origin;
+                    ctx->jump_dest = ctx->current_table_index;
+                }
+            } else {
+                ch->pattern_loop_origin = ctx->current_row; /* Set loop start point */
+                ctx->jump_row = ch->pattern_loop_origin;    /* Replicate FT2 E60 bug */
+            }
+            break;
+        case 7: /* E7y: Set tremolo control */
+            ch->tremolo_waveform = s->effect_param & 3;
+            ch->tremolo_waveform_retrigger = !((s->effect_param >> 2) & 1);
+            break;
+        case 0xA: /* EAy: Fine volume slide up */
+            if(s->effect_param & 0x0F) {   ch->fine_volume_slide_param = s->effect_param & 0x0F; }
+            jar_xm_volume_slide(ch, ch->fine_volume_slide_param << 4);
+            break;
+        case 0xB: /* EBy: Fine volume slide down */
+            if(s->effect_param & 0x0F) {   ch->fine_volume_slide_param = s->effect_param & 0x0F; }
+            jar_xm_volume_slide(ch, ch->fine_volume_slide_param);
+            break;
+        case 0xD: /* EDy: Note delay */
+            /* XXX: figure this out better. EDx triggers the note even when there no note and no instrument. But ED0 acts like like a ghost note, EDx (x ≠ 0) does not. */
+            if(s->note == 0 && s->instrument == 0) {
+                unsigned int flags = jar_xm_TRIGGER_KEEP_VOLUME;
+                if(ch->current->effect_param & 0x0F) {
+                    ch->note = ch->orig_note;
+                    jar_xm_trigger_note(ctx, ch, flags);
+                } else {
+                    jar_xm_trigger_note(ctx, ch, flags | jar_xm_TRIGGER_KEEP_PERIOD | jar_xm_TRIGGER_KEEP_SAMPLE_POSITION );
+                }
+            }
+            break;
+
+        case 0xE: /* EEy: Pattern delay */
+            ctx->extra_ticks = (ch->current->effect_param & 0x0F) * ctx->tempo;
+            break;
+        default:
+            break;
+        }
+        break;
+
+    case 0xF: /* Fxx: Set tempo/BPM */
+        if(s->effect_param > 0) {
+            if(s->effect_param <= 0x1F) {  // First 32 possible values adjust the ticks (goes into tempo)
+                ctx->tempo = s->effect_param;
+            } else {                       //32 and greater values adjust the BPM
+                ctx->bpm = s->effect_param;
+            }
+        }
+        break;
+
+    case 16: /* Gxx: Set global volume */
+        ctx->global_volume = (float)((s->effect_param > 0x40) ? 0x40 : s->effect_param) / (float)0x40;
+        break;
+    case 17: /* Hxy: Global volume slide */
+        if(s->effect_param > 0) {    ch->global_volume_slide_param = s->effect_param; }
+        break;
+    case 21: /* Lxx: Set envelope position */
+        ch->volume_envelope_frame_count = s->effect_param;
+        ch->panning_envelope_frame_count = s->effect_param;
+        break;
+    case 25: /* Pxy: Panning slide */
+        if(s->effect_param > 0) {   ch->panning_slide_param = s->effect_param; }
+        break;
+    case 27: /* Rxy: Multi retrig note */
+        if(s->effect_param > 0) {
+            if((s->effect_param >> 4) == 0) {    /* Keep previous x value */
+                ch->multi_retrig_param = (ch->multi_retrig_param & 0xF0) | (s->effect_param & 0x0F);
+            } else {
+                ch->multi_retrig_param = s->effect_param;
+            }
+        }
+        break;
+    case 29: /* Txy: Tremor */
+        if(s->effect_param > 0) { ch->tremor_param = s->effect_param; }  /* Tremor x and y params are not separately kept in memory, unlike Rxy */
+        break;
+    case 33: /* Xxy: Extra stuff */
+        switch(s->effect_param >> 4) {
+        case 1: /* X1y: Extra fine portamento up */
+            if(s->effect_param & 0x0F) {    ch->extra_fine_portamento_up_param = s->effect_param & 0x0F; }
+            jar_xm_pitch_slide(ctx, ch, -1.0f * ch->extra_fine_portamento_up_param);
+            break;
+        case 2: /* X2y: Extra fine portamento down */
+            if(s->effect_param & 0x0F) {   ch->extra_fine_portamento_down_param = s->effect_param & 0x0F; }
+            jar_xm_pitch_slide(ctx, ch, ch->extra_fine_portamento_down_param);
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        break;
+    }
+}
+
+static void jar_xm_trigger_note(jar_xm_context_t* ctx, jar_xm_channel_context_t* ch, unsigned int flags) {
+    if (!(flags & jar_xm_TRIGGER_KEEP_SAMPLE_POSITION)) {
+        ch->sample_position = 0.f;
+        ch->ping = true;
+    };
+
+    if (!(flags & jar_xm_TRIGGER_KEEP_VOLUME)) {
+       if(ch->sample != NULL) {
+        ch->volume = ch->sample->volume;
+        };
+    };
+    ch->panning = ch->sample->panning;
+    ch->sustained = true;
+    ch->fadeout_volume = ch->volume_envelope_volume = 1.0f;
+    ch->panning_envelope_panning = .5f;
+    ch->volume_envelope_frame_count = ch->panning_envelope_frame_count = 0;
+    ch->vibrato_note_offset = 0.f;
+    ch->tremolo_volume = 0.f;
+    ch->tremor_on = false;
+    ch->autovibrato_ticks = 0;
+
+    if(ch->vibrato_waveform_retrigger) { ch->vibrato_ticks = 0; } /* XXX: should the waveform itself also be reset to sine? */
+    if(ch->tremolo_waveform_retrigger) { ch->tremolo_ticks = 0; }
+    if(!(flags & jar_xm_TRIGGER_KEEP_PERIOD)) {
+        ch->period = jar_xm_period(ctx, ch->note);
+        jar_xm_update_frequency(ctx, ch);
+    }
+    ch->latest_trigger = ctx->generated_samples;
+    if(ch->instrument != NULL) { ch->instrument->latest_trigger = ctx->generated_samples; }
+    if(ch->sample != NULL) {     ch->sample->latest_trigger = ctx->generated_samples; }
+}
+
+static void jar_xm_cut_note(jar_xm_channel_context_t* ch) {
+    ch->volume = .0f; /* NB: this is not the same as Key Off */
+//    ch->curr_left = .0f;
+//    ch->curr_right = .0f;
+}
+
+static void jar_xm_key_off(jar_xm_channel_context_t* ch) {
+    ch->sustained = false; /* Key Off */
+    if(ch->instrument == NULL || !ch->instrument->volume_envelope.enabled) { jar_xm_cut_note(ch); } /* If no volume envelope is used, also cut the note */
+}
+
+static void jar_xm_row(jar_xm_context_t* ctx) {
+    if(ctx->position_jump) {
+        ctx->current_table_index = ctx->jump_dest;
+        ctx->current_row = ctx->jump_row;
+        ctx->position_jump = false;
+        ctx->pattern_break = false;
+        ctx->jump_row = 0;
+        jar_xm_post_pattern_change(ctx);
+    } else if(ctx->pattern_break) {
+        ctx->current_table_index++;
+        ctx->current_row = ctx->jump_row;
+        ctx->pattern_break = false;
+        ctx->jump_row = 0;
+        jar_xm_post_pattern_change(ctx);
+    }
+    jar_xm_pattern_t* cur = ctx->module.patterns + ctx->module.pattern_table[ctx->current_table_index];
+    bool in_a_loop = false;
+
+    /* Read notes information for all channels into temporary pattern slot */
+    for(uint8_t i = 0; i < ctx->module.num_channels; ++i) {
+        jar_xm_pattern_slot_t* s = cur->slots + ctx->current_row * ctx->module.num_channels + i;
+        jar_xm_channel_context_t* ch = ctx->channels + i;
+        ch->current = s;
+        // If there is no note delay effect (0xED) then...
+        if(s->effect_type != 0xE || s->effect_param >> 4 != 0xD) {
+            //********** Process the channel slot information **********
+            jar_xm_handle_note_and_instrument(ctx, ch, s);
+        } else {
+            // read the note delay information
+            ch->note_delay_param = s->effect_param & 0x0F;
+        }
+        if(!in_a_loop && ch->pattern_loop_count > 0) {
+            // clarify if in a loop or not
+            in_a_loop = true;
+        }
+    }
+
+    if(!in_a_loop) {
+        /* No E6y loop is in effect (or we are in the first pass) */
+        ctx->loop_count = (ctx->row_loop_count[MAX_NUM_ROWS * ctx->current_table_index + ctx->current_row]++);
+    }
+
+    /// Move to next row
+    ctx->current_row++; /* uint8 warning: can increment from 255 to 0, in which case it is still necessary to go the next pattern. */
+    if (!ctx->position_jump && !ctx->pattern_break && (ctx->current_row >= cur->num_rows || ctx->current_row == 0)) {
+        ctx->current_table_index++;
+        ctx->current_row = ctx->jump_row; /* This will be 0 most of the time, except when E60 is used */
+        ctx->jump_row = 0;
+        jar_xm_post_pattern_change(ctx);
+    }
+}
+
+static void jar_xm_envelope_tick(jar_xm_channel_context_t *ch, jar_xm_envelope_t *env, uint16_t *counter, float *outval) {
+    if(env->num_points < 2) {
+        if(env->num_points == 1) {
+            *outval = (float)env->points[0].value / (float)0x40;
+            if(*outval > 1) { *outval = 1; };
+        } else {;
+            return;
+        };
+    } else {
+        if(env->loop_enabled) {
+            uint16_t loop_start = env->points[env->loop_start_point].frame;
+            uint16_t loop_end = env->points[env->loop_end_point].frame;
+            uint16_t loop_length = loop_end - loop_start;
+            if(*counter >= loop_end) { *counter -= loop_length; };
+        };
+        for(uint8_t j = 0; j < (env->num_points - 1); ++j) {
+            if(env->points[j].frame <= *counter && env->points[j+1].frame >= *counter) {
+                *outval = jar_xm_envelope_lerp(env->points + j, env->points + j + 1, *counter) / (float)0x40;
+                break;
+            };
+        };
+        /* Make sure it is safe to increment frame count */
+        if(!ch->sustained || !env->sustain_enabled || *counter != env->points[env->sustain_point].frame) { (*counter)++; };
+    };
+};
+
+static void jar_xm_envelopes(jar_xm_channel_context_t *ch) {
+    if(ch->instrument != NULL) {
+        if(ch->instrument->volume_envelope.enabled) {
+            if(!ch->sustained) {
+                ch->fadeout_volume -= (float)ch->instrument->volume_fadeout / 65536.f;
+                jar_xm_CLAMP_DOWN(ch->fadeout_volume);
+            };
+            jar_xm_envelope_tick(ch, &(ch->instrument->volume_envelope), &(ch->volume_envelope_frame_count), &(ch->volume_envelope_volume));
+        };
+        if(ch->instrument->panning_envelope.enabled) {
+            jar_xm_envelope_tick(ch, &(ch->instrument->panning_envelope), &(ch->panning_envelope_frame_count), &(ch->panning_envelope_panning));
+        };
+    };
+};
+
+static void jar_xm_tick(jar_xm_context_t* ctx) {
+    if(ctx->current_tick == 0) {
+        jar_xm_row(ctx);        // We have processed all ticks and we run the row
+    }
+
+    jar_xm_module_t* mod = &(ctx->module);
+    for(uint8_t i = 0; i < ctx->module.num_channels; ++i) {
+        jar_xm_channel_context_t* ch = ctx->channels + i;
+        jar_xm_envelopes(ch);
+        jar_xm_autovibrato(ctx, ch);
+        if(ch->arp_in_progress && !HAS_ARPEGGIO(ch->current)) {
+            ch->arp_in_progress = false;
+            ch->arp_note_offset = 0;
+            jar_xm_update_frequency(ctx, ch);
+        }
+        if(ch->vibrato_in_progress && !HAS_VIBRATO(ch->current)) {
+            ch->vibrato_in_progress = false;
+            ch->vibrato_note_offset = 0.f;
+            jar_xm_update_frequency(ctx, ch);
+        }
+
+        // Effects in volumne column mostly handled on a per tick basis
+        switch(ch->current->volume_column & 0xF0) {
+        case 0x50: // Checks for volume = 64
+            if(ch->current->volume_column != 0x50) break;
+        case 0x10: // Set volume 0-15
+        case 0x20: // Set volume 16-32
+        case 0x30: // Set volume 32-48
+        case 0x40: // Set volume 48-64
+            ch->volume = (float)(ch->current->volume_column - 16) / 64.0f;
+            break;
+        case 0x60: // Volume slide down
+            jar_xm_volume_slide(ch, ch->current->volume_column & 0x0F);
+            break;
+        case 0x70: // Volume slide up
+            jar_xm_volume_slide(ch, ch->current->volume_column << 4);
+            break;
+        case 0x80: // Fine volume slide down
+            jar_xm_volume_slide(ch, ch->current->volume_column & 0x0F);
+            break;
+        case 0x90: // Fine volume slide up
+            jar_xm_volume_slide(ch, ch->current->volume_column << 4);
+            break;
+        case 0xA0: // Set vibrato speed
+            ch->vibrato_param = (ch->vibrato_param & 0x0F) | ((ch->current->volume_column & 0x0F) << 4);
+            break;
+        case 0xB0: // Vibrato
+            ch->vibrato_in_progress = false;
+            jar_xm_vibrato(ctx, ch, ch->vibrato_param, ch->vibrato_ticks++);
+            break;
+        case 0xC0: // Set panning
+            if(!ctx->current_tick ) {
+                ch->panning = (float)(ch->current->volume_column & 0x0F) / 15.0f;
+            }
+            break;
+        case 0xD0: // Panning slide left
+            jar_xm_panning_slide(ch, ch->current->volume_column & 0x0F);
+            break;
+        case 0xE0: // Panning slide right
+            jar_xm_panning_slide(ch, ch->current->volume_column << 4);
+            break;
+        case 0xF0: // Tone portamento
+            if(!ctx->current_tick ) {
+                if(ch->current->volume_column & 0x0F) { ch->tone_portamento_param = ((ch->current->volume_column & 0x0F) << 4) | (ch->current->volume_column & 0x0F); }
+            };
+            jar_xm_tone_portamento(ctx, ch);
+            break;
+        default:
+            break;
+        }
+
+        // Only some standard effects handled on a per tick basis
+        // see jar_xm_handle_note_and_instrument for all effects handling on a per row basis
+        switch(ch->current->effect_type) {
+        case 0: /* 0xy: Arpeggio */
+            if(ch->current->effect_param > 0) {
+                char arp_offset = ctx->tempo % 3;
+                switch(arp_offset) {
+                case 2: /* 0 -> x -> 0 -> y -> x -> … */
+                    if(ctx->current_tick == 1) {
+                        ch->arp_in_progress = true;
+                        ch->arp_note_offset = ch->current->effect_param >> 4;
+                        jar_xm_update_frequency(ctx, ch);
+                        break;
+                    }
+                    /* No break here, this is intended */
+                case 1: /* 0 -> 0 -> y -> x -> … */
+                    if(ctx->current_tick == 0) {
+                        ch->arp_in_progress = false;
+                        ch->arp_note_offset = 0;
+                        jar_xm_update_frequency(ctx, ch);
+                        break;
+                    }
+                    /* No break here, this is intended */
+                case 0: /* 0 -> y -> x -> … */
+                    jar_xm_arpeggio(ctx, ch, ch->current->effect_param, ctx->current_tick - arp_offset);
+                default:
+                    break;
+                }
+            }
+            break;
+
+        case 1: /* 1xx: Portamento up */
+            if(ctx->current_tick == 0) break;
+            jar_xm_pitch_slide(ctx, ch, -ch->portamento_up_param);
+            break;
+        case 2: /* 2xx: Portamento down */
+            if(ctx->current_tick == 0) break;
+            jar_xm_pitch_slide(ctx, ch, ch->portamento_down_param);
+            break;
+        case 3: /* 3xx: Tone portamento */
+            if(ctx->current_tick == 0) break;
+            jar_xm_tone_portamento(ctx, ch);
+            break;
+        case 4: /* 4xy: Vibrato */
+            if(ctx->current_tick == 0) break;
+            ch->vibrato_in_progress = true;
+            jar_xm_vibrato(ctx, ch, ch->vibrato_param, ch->vibrato_ticks++);
+            break;
+        case 5: /* 5xy: Tone portamento + Volume slide */
+            if(ctx->current_tick == 0) break;
+            jar_xm_tone_portamento(ctx, ch);
+            jar_xm_volume_slide(ch, ch->volume_slide_param);
+            break;
+        case 6: /* 6xy: Vibrato + Volume slide */
+            if(ctx->current_tick == 0) break;
+            ch->vibrato_in_progress = true;
+            jar_xm_vibrato(ctx, ch, ch->vibrato_param, ch->vibrato_ticks++);
+            jar_xm_volume_slide(ch, ch->volume_slide_param);
+            break;
+        case 7: /* 7xy: Tremolo */
+            if(ctx->current_tick == 0) break;
+            jar_xm_tremolo(ctx, ch, ch->tremolo_param, ch->tremolo_ticks++);
+            break;
+        case 8: /* 8xy: Set panning */
+            break;
+        case 9: /* 9xy: Sample offset */
+            break;
+        case 0xA: /* Axy: Volume slide */
+            if(ctx->current_tick == 0) break;
+            jar_xm_volume_slide(ch, ch->volume_slide_param);
+            break;
+        case 0xE: /* EXy: Extended command */
+            switch(ch->current->effect_param >> 4) {
+            case 0x9: /* E9y: Retrigger note */
+                if(ctx->current_tick != 0 && ch->current->effect_param & 0x0F) {
+                    if(!(ctx->current_tick % (ch->current->effect_param & 0x0F))) {
+                        jar_xm_trigger_note(ctx, ch, 0);
+                        jar_xm_envelopes(ch);
+                    }
+                }
+                break;
+            case 0xC: /* ECy: Note cut */
+                if((ch->current->effect_param & 0x0F) == ctx->current_tick) {
+                    jar_xm_cut_note(ch);
+                }
+                break;
+            case 0xD: /* EDy: Note delay */
+                if(ch->note_delay_param == ctx->current_tick) {
+                    jar_xm_handle_note_and_instrument(ctx, ch, ch->current);
+                    jar_xm_envelopes(ch);
+                }
+                break;
+            default:
+                break;
+            }
+            break;
+        case 16: /* Fxy: Set tempo/BPM */
+            break;
+        case 17: /* Hxy: Global volume slide */
+            if(ctx->current_tick == 0) break;
+            if((ch->global_volume_slide_param & 0xF0) && (ch->global_volume_slide_param & 0x0F)) { break; }; /* Invalid state */
+            if(ch->global_volume_slide_param & 0xF0) {    /* Global slide up */
+                float f = (float)(ch->global_volume_slide_param >> 4) / (float)0x40;
+                ctx->global_volume += f;
+                jar_xm_CLAMP_UP(ctx->global_volume);
+            } else {                                      /* Global slide down */
+                float f = (float)(ch->global_volume_slide_param & 0x0F) / (float)0x40;
+                ctx->global_volume -= f;
+                jar_xm_CLAMP_DOWN(ctx->global_volume);
+            };
+            break;
+
+        case 20: /* Kxx: Key off */
+            if(ctx->current_tick == ch->current->effect_param) {     jar_xm_key_off(ch); };
+            break;
+        case 21: /* Lxx: Set envelope position */
+            break;
+        case 25: /* Pxy: Panning slide */
+            if(ctx->current_tick == 0) break;
+            jar_xm_panning_slide(ch, ch->panning_slide_param);
+            break;
+        case 27: /* Rxy: Multi retrig note */
+            if(ctx->current_tick == 0) break;
+            if(((ch->multi_retrig_param) & 0x0F) == 0) break;
+            if((ctx->current_tick % (ch->multi_retrig_param & 0x0F)) == 0) {
+                float v = ch->volume * multi_retrig_multiply[ch->multi_retrig_param >> 4]
+                    + multi_retrig_add[ch->multi_retrig_param >> 4];
+                jar_xm_CLAMP(v);
+                jar_xm_trigger_note(ctx, ch, 0);
+                ch->volume = v;
+            };
+            break;
+
+        case 29: /* Txy: Tremor */
+            if(ctx->current_tick == 0) break;
+            ch->tremor_on = ( (ctx->current_tick - 1) % ((ch->tremor_param >> 4) + (ch->tremor_param & 0x0F) + 2) > (ch->tremor_param >> 4)  );
+            break;
+        default:
+            break;
+        };
+
+        float panning, volume;
+        panning = ch->panning + (ch->panning_envelope_panning - .5f) * (.5f - fabs(ch->panning - .5f)) * 2.0f;
+        if(ch->tremor_on) {
+            volume = .0f;
+        } else {
+            volume = ch->volume + ch->tremolo_volume;
+            jar_xm_CLAMP(volume);
+            volume *= ch->fadeout_volume * ch->volume_envelope_volume;
+        };
+
+        if (mod->ramping) {
+            ch->target_panning = panning;
+            ch->target_volume = volume;
+        } else {
+            ch->actual_panning = panning;
+            ch->actual_volume = volume;
+        };
+    };
+
+    ctx->current_tick++; // ok so we understand that ticks increment within the row
+    if(ctx->current_tick >= ctx->tempo + ctx->extra_ticks) {
+        // This means it reached the end of the row and we reset
+        ctx->current_tick = 0;
+        ctx->extra_ticks = 0;
+    };
+
+    // Number of ticks / second = BPM * 0.4
+    ctx->remaining_samples_in_tick += (float)ctx->rate / ((float)ctx->bpm * 0.4f);
+};
+
+static void jar_xm_next_of_sample(jar_xm_context_t* ctx, jar_xm_channel_context_t* ch, int previous) {
+    jar_xm_module_t* mod = &(ctx->module);
+
+//    ch->curr_left = 0.f;
+//    ch->curr_right = 0.f;
+    if(ch->instrument == NULL || ch->sample == NULL || ch->sample_position < 0) {
+        ch->curr_left = 0.f;
+        ch->curr_right = 0.f;
+        if (mod->ramping) {
+            if (ch->frame_count < jar_xm_SAMPLE_RAMPING_POINTS) {
+                if (previous > -1) {
+                    ch->end_of_previous_sample_left[previous] = jar_xm_LERP(ch->end_of_previous_sample_left[ch->frame_count], ch->curr_left, (float)ch->frame_count / (float)jar_xm_SAMPLE_RAMPING_POINTS);
+                    ch->end_of_previous_sample_right[previous] = jar_xm_LERP(ch->end_of_previous_sample_right[ch->frame_count], ch->curr_right, (float)ch->frame_count / (float)jar_xm_SAMPLE_RAMPING_POINTS);
+                } else {
+                    ch->curr_left = jar_xm_LERP(ch->end_of_previous_sample_left[ch->frame_count], ch->curr_left, (float)ch->frame_count / (float)jar_xm_SAMPLE_RAMPING_POINTS);
+                    ch->curr_right = jar_xm_LERP(ch->end_of_previous_sample_right[ch->frame_count], ch->curr_right, (float)ch->frame_count / (float)jar_xm_SAMPLE_RAMPING_POINTS);
+                };
+            };
+        };
+        return;
+    };
+    if(ch->sample->length == 0) {
+        return;
+    };
+
+    float t = 0.f;
+    uint32_t b = 0;
+    if(mod->linear_interpolation) {
+        b = ch->sample_position + 1;
+        t = ch->sample_position - (uint32_t)ch->sample_position; /* Cheaper than fmodf(., 1.f) */
+    };
+
+    float u_left, u_right;
+    u_left = ch->sample->data[(uint32_t)ch->sample_position];
+    if (ch->sample->stereo) {
+        u_right = ch->sample->data[(uint32_t)ch->sample_position + ch->sample->length];
+    } else {
+        u_right = u_left;
+    };
+    float v_left = 0.f, v_right = 0.f;
+    switch(ch->sample->loop_type) {
+    case jar_xm_NO_LOOP:
+        if(mod->linear_interpolation) {
+            v_left = (b < ch->sample->length) ? ch->sample->data[b] : .0f;
+            if (ch->sample->stereo) {
+                v_right = (b < ch->sample->length) ? ch->sample->data[b + ch->sample->length] : .0f;
+            } else {
+                v_right = v_left;
+            };
+        };
+        ch->sample_position += ch->step;
+        if(ch->sample_position >= ch->sample->length) { ch->sample_position = -1; } // stop playing this sample
+        break;
+    case jar_xm_FORWARD_LOOP:
+        if(mod->linear_interpolation) {
+            v_left = ch->sample->data[ (b == ch->sample->loop_end) ? ch->sample->loop_start : b ];
+            if (ch->sample->stereo) {
+                v_right = ch->sample->data[ (b == ch->sample->loop_end) ? ch->sample->loop_start + ch->sample->length : b + ch->sample->length];
+            } else {
+                v_right = v_left;
+            };
+        };
+        ch->sample_position += ch->step;
+        if (ch->sample_position >= ch->sample->loop_end) {
+            ch->sample_position -= ch->sample->loop_length;
+        };
+        if(ch->sample_position >= ch->sample->length) {
+            ch->sample_position = ch->sample->loop_start;
+        };
+        break;
+    case jar_xm_PING_PONG_LOOP:
+        if(ch->ping) {
+            if(mod->linear_interpolation) {
+                v_left = (b >= ch->sample->loop_end) ? ch->sample->data[(uint32_t)ch->sample_position] : ch->sample->data[b];
+                if (ch->sample->stereo) {
+                    v_right = (b >= ch->sample->loop_end) ? ch->sample->data[(uint32_t)ch->sample_position + ch->sample->length] : ch->sample->data[b + ch->sample->length];
+                } else {
+                    v_right = v_left;
+                };
+            };
+            ch->sample_position += ch->step;
+            if(ch->sample_position >= ch->sample->loop_end) {
+                ch->ping = false;
+                ch->sample_position = (ch->sample->loop_end << 1) - ch->sample_position;
+            };
+            if(ch->sample_position >= ch->sample->length) {
+                ch->ping = false;
+                ch->sample_position -= ch->sample->length - 1;
+            };
+        } else {
+            if(mod->linear_interpolation) {
+                v_left = u_left;
+                v_right = u_right;
+                u_left = (b == 1 || b - 2 <= ch->sample->loop_start) ? ch->sample->data[(uint32_t)ch->sample_position] : ch->sample->data[b - 2];
+                if (ch->sample->stereo) {
+                    u_right = (b == 1 || b - 2 <= ch->sample->loop_start) ? ch->sample->data[(uint32_t)ch->sample_position + ch->sample->length] : ch->sample->data[b + ch->sample->length - 2];
+                } else {
+                    u_right = u_left;
+                };
+            };
+            ch->sample_position -= ch->step;
+            if(ch->sample_position <= ch->sample->loop_start) {
+                ch->ping = true;
+                ch->sample_position = (ch->sample->loop_start << 1) - ch->sample_position;
+            };
+            if (ch->sample_position <= .0f) {
+                ch->ping = true;
+                ch->sample_position = .0f;
+            };
+        };
+        break;
+
+    default:
+        v_left = .0f;
+        v_right = .0f;
+        break;
+    };
+
+    float endval_left = mod->linear_interpolation ? jar_xm_LERP(u_left, v_left, t) : u_left;
+    float endval_right = mod->linear_interpolation ? jar_xm_LERP(u_right, v_right, t) : u_right;
+
+    if (mod->ramping) {
+        if(ch->frame_count < jar_xm_SAMPLE_RAMPING_POINTS) {
+            /* Smoothly transition between old and new sample. */
+            if (previous > -1) {
+                ch->end_of_previous_sample_left[previous] = jar_xm_LERP(ch->end_of_previous_sample_left[ch->frame_count], endval_left, (float)ch->frame_count / (float)jar_xm_SAMPLE_RAMPING_POINTS);
+                ch->end_of_previous_sample_right[previous] = jar_xm_LERP(ch->end_of_previous_sample_right[ch->frame_count], endval_right, (float)ch->frame_count / (float)jar_xm_SAMPLE_RAMPING_POINTS);
+            } else {
+                ch->curr_left = jar_xm_LERP(ch->end_of_previous_sample_left[ch->frame_count], endval_left, (float)ch->frame_count / (float)jar_xm_SAMPLE_RAMPING_POINTS);
+                ch->curr_right = jar_xm_LERP(ch->end_of_previous_sample_right[ch->frame_count], endval_right, (float)ch->frame_count / (float)jar_xm_SAMPLE_RAMPING_POINTS);
+            };
+        };
+    };
+
+    if (previous > -1) {
+        ch->end_of_previous_sample_left[previous] = endval_left;
+        ch->end_of_previous_sample_right[previous] = endval_right;
+    } else {
+        ch->curr_left = endval_left;
+        ch->curr_right = endval_right;
+    };
+};
+
+// gather all channel audio into stereo float
+static void jar_xm_mixdown(jar_xm_context_t* ctx, float* left, float* right) {
+    jar_xm_module_t* mod = &(ctx->module);
+
+    if(ctx->remaining_samples_in_tick <= 0) {
+        jar_xm_tick(ctx);
+    };
+    ctx->remaining_samples_in_tick--;
+    *left = 0.f;
+    *right = 0.f;
+    if(ctx->max_loop_count > 0 && ctx->loop_count > ctx->max_loop_count) { return; }
+
+    for(uint8_t i = 0; i < ctx->module.num_channels; ++i) {
+        jar_xm_channel_context_t* ch = ctx->channels + i;
+        if(ch->instrument != NULL && ch->sample != NULL && ch->sample_position >= 0) {
+            jar_xm_next_of_sample(ctx, ch, -1);
+            if(!ch->muted && !ch->instrument->muted) {
+                *left  += ch->curr_left * ch->actual_volume * (1.f - ch->actual_panning);
+                *right += ch->curr_right * ch->actual_volume * ch->actual_panning;
+            };
+
+            if (mod->ramping) {
+                ch->frame_count++;
+                jar_xm_SLIDE_TOWARDS(ch->actual_volume, ch->target_volume, ctx->volume_ramp);
+                jar_xm_SLIDE_TOWARDS(ch->actual_panning, ch->target_panning, ctx->panning_ramp);
+            };
+        };
+    };
+    if (ctx->global_volume != 1.0f) {
+        *left *= ctx->global_volume;
+        *right *= ctx->global_volume;
+    };
+
+    // experimental
+//    float counter = (float)ctx->generated_samples * 0.0001f
+//    *left = tan(&left + sin(counter));
+//    *right = tan(&right + cos(counter));
+
+    // apply brick wall limiter when audio goes beyond bounderies
+    if(*left < -1.0)  {*left = -1.0;}  else if(*left > 1.0)  {*left = 1.0;};
+    if(*right < -1.0) {*right = -1.0;} else if(*right > 1.0) {*right = 1.0;};
+};
+
+void jar_xm_generate_samples(jar_xm_context_t* ctx, float* output, size_t numsamples) {
+    if(ctx && output) {
+        ctx->generated_samples += numsamples;
+        for(size_t i = 0; i < numsamples; i++) {
+            jar_xm_mixdown(ctx, output + (2 * i), output + (2 * i + 1));
+        };
+    };
+};
+
+uint64_t jar_xm_get_remaining_samples(jar_xm_context_t* ctx) {
+    uint64_t total = 0;
+    uint8_t currentLoopCount = jar_xm_get_loop_count(ctx);
+    jar_xm_set_max_loop_count(ctx, 0);
+    while(jar_xm_get_loop_count(ctx) == currentLoopCount) {
+        total += ctx->remaining_samples_in_tick;
+        ctx->remaining_samples_in_tick = 0;
+        jar_xm_tick(ctx);
+    }
+    ctx->loop_count = currentLoopCount;
+    return total;
+}
+
+//--------------------------------------------
+//FILE LOADER - TODO - NEEDS TO BE CLEANED UP
+//--------------------------------------------
+#undef DEBUG
+#define DEBUG(...) do {      \
+        fprintf(stderr, __VA_ARGS__); \
+        fflush(stderr); \
+    } while(0)
+
+#define DEBUG_ERR(...) do {      \
+        fprintf(stderr, __VA_ARGS__); \
+        fflush(stderr); \
+    } while(0)
+
+#define FATAL(...) do {      \
+        fprintf(stderr, __VA_ARGS__); \
+        fflush(stderr); \
+        exit(1); \
+    } while(0)
+
+#define FATAL_ERR(...) do {      \
+        fprintf(stderr, __VA_ARGS__); \
+        fflush(stderr); \
+        exit(1); \
+    } while(0)
+
+
+int jar_xm_create_context_from_file(jar_xm_context_t** ctx, uint32_t rate, const char* filename) {
+    FILE* xmf;
+    int size;
+    int ret;
+
+    xmf = fopen(filename, "rb");
+    if(xmf == NULL) {
+        DEBUG_ERR("Could not open input file");
+        *ctx = NULL;
+        return 3;
+    }
+
+    fseek(xmf, 0, SEEK_END);
+    size = ftell(xmf);
+    rewind(xmf);
+    if(size == -1) {
+        fclose(xmf);
+        DEBUG_ERR("fseek() failed");
+        *ctx = NULL;
+        return 4;
+    }
+
+    char* data = JARXM_MALLOC(size + 1);
+    if(!data || fread(data, 1, size, xmf) < size) {
+        fclose(xmf);
+        DEBUG_ERR(data ? "fread() failed" : "JARXM_MALLOC() failed");
+        JARXM_FREE(data);
+        *ctx = NULL;
+        return 5;
+    }
+
+    fclose(xmf);
+
+    ret = jar_xm_create_context_safe(ctx, data, size, rate);
+    JARXM_FREE(data);
+
+    switch(ret) {
+    case 0:
+        break;
+    case 1:        DEBUG("could not create context: module is not sane\n");
+        *ctx = NULL;
+        return 1;
+        break;
+    case 2:        FATAL("could not create context: malloc failed\n");
+        return 2;
+        break;
+    default:       FATAL("could not create context: unknown error\n");
+        return 6;
+        break;
+    }
+
+    return 0;
+}
+
+// not part of the original library
+void jar_xm_reset(jar_xm_context_t* ctx) {
+    for (uint16_t i = 0; i < jar_xm_get_number_of_channels(ctx); i++) {
+        jar_xm_cut_note(&ctx->channels[i]);
+    }
+    ctx->generated_samples = 0;
+    ctx->current_row = 0;
+    ctx->current_table_index = 0;
+    ctx->current_tick = 0;
+    ctx->tempo =ctx->default_tempo; // reset to file default value
+    ctx->bpm = ctx->default_bpm; // reset to file default value
+    ctx->global_volume = ctx->default_global_volume; // reset to file default value
+}
+
+
+void jar_xm_flip_linear_interpolation(jar_xm_context_t* ctx) {
+    if (ctx->module.linear_interpolation) {
+        ctx->module.linear_interpolation = 0;
+    } else {
+        ctx->module.linear_interpolation = 1;
+    }
+}
+
+void jar_xm_table_jump(jar_xm_context_t* ctx, int table_ptr) {
+    for (uint16_t i = 0; i < jar_xm_get_number_of_channels(ctx); i++) {
+        jar_xm_cut_note(&ctx->channels[i]);
+    }
+    ctx->current_row = 0;
+    ctx->current_tick = 0;
+    if(table_ptr > 0 && table_ptr < ctx->module.length) {
+        ctx->current_table_index = table_ptr;
+        ctx->module.restart_position = table_ptr; // The reason to jump is to start a new loop or track
+    } else {
+        ctx->current_table_index = 0;
+        ctx->module.restart_position = 0; // The reason to jump is to start a new loop or track
+        ctx->tempo =ctx->default_tempo; // reset to file default value
+        ctx->bpm = ctx->default_bpm; // reset to file default value
+        ctx->global_volume = ctx->default_global_volume; // reset to file default value
+    };
+}
+
+
+// TRANSLATE NOTE NUMBER INTO USER VALUE (ie. 1 = C-1, 2 = C#1, 3 = D-1 ... )
+const char* xm_note_chr(int number) {
+    if (number == NOTE_OFF) {
+        return "==";
+    };
+    number = number % 12;
+    switch(number) {
+    case 1: return "C-";
+    case 2: return "C#";
+    case 3: return "D-";
+    case 4: return "D#";
+    case 5: return "E-";
+    case 6: return "F-";
+    case 7: return "F#";
+    case 8: return "G-";
+    case 9: return "G#";
+    case 10: return "A-";
+    case 11: return "A#";
+    case 12: return "B-";
+    };
+    return "??";
+};
+
+const char* xm_octave_chr(int number) {
+    if (number == NOTE_OFF) {
+        return "=";
+    };
+
+    int number2 = number - number % 12;
+    int result = floor(number2 / 12) + 1;
+    switch(result) {
+    case 1: return "1";
+    case 2: return "2";
+    case 3: return "3";
+    case 4: return "4";
+    case 5: return "5";
+    case 6: return "6";
+    case 7: return "7";
+    case 8: return "8";
+    default: return "?"; /* UNKNOWN */
+    };
+
+};
+
+// TRANSLATE NOTE EFFECT CODE INTO USER VALUE
+const char* xm_effect_chr(int fx) {
+    switch(fx) {
+    case 0: return "0";  /* ZERO = NO EFFECT */
+    case 1: return "1";  /* 1xx: Portamento up */
+    case 2: return "2";  /* 2xx: Portamento down */
+    case 3: return "3";  /* 3xx: Tone portamento */
+    case 4: return "4";  /* 4xy: Vibrato */
+    case 5: return "5";  /* 5xy: Tone portamento + Volume slide */
+    case 6: return "6";  /* 6xy: Vibrato + Volume slide */
+    case 7: return "7";  /* 7xy: Tremolo */
+    case 8: return "8";  /* 8xx: Set panning */
+    case 9: return "9";  /* 9xx: Sample offset */
+    case 0xA: return "A";/* Axy: Volume slide */
+    case 0xB: return "B";/* Bxx: Position jump */
+    case 0xC: return "C";/* Cxx: Set volume */
+    case 0xD: return "D";/* Dxx: Pattern break */
+    case 0xE: return "E";/* EXy: Extended command */
+    case 0xF: return "F";/* Fxx: Set tempo/BPM */
+    case 16: return "G"; /* Gxx: Set global volume */
+    case 17: return "H"; /* Hxy: Global volume slide */
+    case 21: return "L"; /* Lxx: Set envelope position */
+    case 25: return "P"; /* Pxy: Panning slide */
+    case 27: return "R"; /* Rxy: Multi retrig note */
+    case 29: return "T"; /* Txy: Tremor */
+    case 33: return "X"; /* Xxy: Extra stuff */
+    default: return "?"; /* UNKNOWN */
+    };
+}
+
+#ifdef JAR_XM_RAYLIB
+
+#include "raylib.h" // Need RayLib API calls for the DEBUG display
+
+void jar_xm_debug(jar_xm_context_t *ctx) {
+    int size=40;
+    int x = 0, y = 0;
+
+    // DEBUG VARIABLES
+    y += size; DrawText(TextFormat("CUR TBL = %i", ctx->current_table_index),       x, y, size, WHITE);
+    y += size; DrawText(TextFormat("CUR PAT = %i", ctx->module.pattern_table[ctx->current_table_index]),   x, y, size, WHITE);
+    y += size; DrawText(TextFormat("POS JMP = %d", ctx->position_jump),             x, y, size, WHITE);
+    y += size; DrawText(TextFormat("JMP DST = %i", ctx->jump_dest),                 x, y, size, WHITE);
+    y += size; DrawText(TextFormat("PTN BRK = %d", ctx->pattern_break),             x, y, size, WHITE);
+    y += size; DrawText(TextFormat("CUR ROW = %i", ctx->current_row),               x, y, size, WHITE);
+    y += size; DrawText(TextFormat("JMP ROW = %i", ctx->jump_row),                  x, y, size, WHITE);
+    y += size; DrawText(TextFormat("ROW LCT = %i", ctx->row_loop_count),            x, y, size, WHITE);
+    y += size; DrawText(TextFormat("LCT     = %i", ctx->loop_count),                x, y, size, WHITE);
+    y += size; DrawText(TextFormat("MAX LCT = %i", ctx->max_loop_count),            x, y, size, WHITE);
+    x = size * 12; y = 0;
+
+    y += size; DrawText(TextFormat("CUR TCK = %i", ctx->current_tick),              x, y, size, WHITE);
+    y += size; DrawText(TextFormat("XTR TCK = %i", ctx->extra_ticks),               x, y, size, WHITE);
+    y += size; DrawText(TextFormat("TCK/ROW = %i", ctx->tempo),                     x, y, size, ORANGE);
+    y += size; DrawText(TextFormat("SPL TCK = %f", ctx->remaining_samples_in_tick), x, y, size, WHITE);
+    y += size; DrawText(TextFormat("GEN SPL = %i", ctx->generated_samples),         x, y, size, WHITE);
+    y += size * 7;
+
+    x = 0;
+    size=16;
+    // TIMELINE OF MODULE
+    for (int i=0; i < ctx->module.length; i++) {
+        if (i == ctx->jump_dest) {
+            if (ctx->position_jump) {
+                DrawRectangle(i * size * 2, y - size, size * 2, size, GOLD);
+            } else {
+                DrawRectangle(i * size * 2, y - size, size * 2, size, BROWN);
+            };
+        };
+        if (i == ctx->current_table_index) {
+//            DrawText(TextFormat("%02X", ctx->current_tick), i * size * 2, y - size, size, WHITE);
+            DrawRectangle(i * size * 2, y, size * 2, size, RED);
+            DrawText(TextFormat("%02X", ctx->current_row), i * size * 2, y - size, size, YELLOW);
+        } else {
+            DrawRectangle(i * size * 2, y, size * 2, size, ORANGE);
+        };
+        DrawText(TextFormat("%02X", ctx->module.pattern_table[i]), i * size * 2, y, size, WHITE);
+    };
+    y += size;
+
+    jar_xm_pattern_t* cur = ctx->module.patterns + ctx->module.pattern_table[ctx->current_table_index];
+
+    /* DISPLAY CURRENTLY PLAYING PATTERN */
+
+    x += 2 * size;
+    for(uint8_t i = 0; i < ctx->module.num_channels; i++) {
+        DrawRectangle(x, y, 8 * size, size, PURPLE);
+        DrawText("N", x, y, size, YELLOW);
+        DrawText("I", x + size * 2, y, size, YELLOW);
+        DrawText("V", x + size * 4, y, size, YELLOW);
+        DrawText("FX", x + size * 6, y, size, YELLOW);
+        x += 9 * size;
+    };
+    x += size;
+    for (int j=(ctx->current_row - 14); j<(ctx->current_row + 15); j++) {
+        y += size;
+        x = 0;
+        if (j >=0 && j < (cur->num_rows)) {
+            DrawRectangle(x, y, size * 2, size, BROWN);
+            DrawText(TextFormat("%02X",j), x, y, size, WHITE);
+            x += 2 * size;
+            for(uint8_t i = 0; i < ctx->module.num_channels; i++) {
+                if (j==(ctx->current_row)) {
+                    DrawRectangle(x, y, 8 * size, size, DARKGREEN);
+                } else {
+                    DrawRectangle(x, y, 8 * size, size, DARKGRAY);
+                };
+                jar_xm_pattern_slot_t *s = cur->slots + j * ctx->module.num_channels + i;
+           //     jar_xm_channel_context_t *ch = ctx->channels + i;
+                if (s->note > 0) {DrawText(TextFormat("%s%s", xm_note_chr(s->note), xm_octave_chr(s->note) ), x, y, size, WHITE);} else {DrawText("...", x, y, size, GRAY);};
+                if (s->instrument > 0) {
+                    DrawText(TextFormat("%02X", s->instrument), x + size * 2, y, size, WHITE);
+                    if (s->volume_column == 0) {
+                        DrawText(TextFormat("%02X", 64), x + size * 4, y, size, YELLOW);
+                    };
+                } else {
+                    DrawText("..", x + size * 2, y, size, GRAY);
+                    if (s->volume_column == 0) {
+                        DrawText("..", x + size * 4, y, size, GRAY);
+                    };
+                };
+                if (s->volume_column > 0) {DrawText(TextFormat("%02X", (s->volume_column - 16)), x + size * 4, y, size, WHITE);};
+                if (s->effect_type > 0 || s->effect_param > 0) {DrawText(TextFormat("%s%02X", xm_effect_chr(s->effect_type), s->effect_param), x + size * 6, y, size, WHITE);};
+                x += 9 * size;
+            };
+        };
+    };
+
+}
+#endif // RayLib extension
+
+#endif//end of JAR_XM_IMPLEMENTATION
+//-------------------------------------------------------------------------------
+
+#endif//end of INCLUDE_JAR_XM_H
diff --git a/shared/miniaudio.h b/shared/external/miniaudio.h
similarity index 99%
rename from shared/miniaudio.h
rename to shared/external/miniaudio.h
index 47332e1..8abd91f 100644
--- a/shared/miniaudio.h
+++ b/shared/external/miniaudio.h
@@ -92618,4 +92618,4 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
-*/
+*/
\ No newline at end of file
diff --git a/shared/external/qoa.h b/shared/external/qoa.h
new file mode 100644
index 0000000..fc62f47
--- /dev/null
+++ b/shared/external/qoa.h
@@ -0,0 +1,728 @@
+/*
+
+Copyright (c) 2023, Dominic Szablewski - https://phoboslab.org
+SPDX-License-Identifier: MIT
+
+QOA - The "Quite OK Audio" format for fast, lossy audio compression
+
+
+-- Data Format
+
+QOA encodes pulse-code modulated (PCM) audio data with up to 255 channels, 
+sample rates from 1 up to 16777215 hertz and a bit depth of 16 bits.
+
+The compression method employed in QOA is lossy; it discards some information
+from the uncompressed PCM data. For many types of audio signals this compression
+is "transparent", i.e. the difference from the original file is often not
+audible.
+
+QOA encodes 20 samples of 16 bit PCM data into slices of 64 bits. A single
+sample therefore requires 3.2 bits of storage space, resulting in a 5x
+compression (16 / 3.2).
+
+A QOA file consists of an 8 byte file header, followed by a number of frames.
+Each frame contains an 8 byte frame header, the current 16 byte en-/decoder
+state per channel and 256 slices per channel. Each slice is 8 bytes wide and
+encodes 20 samples of audio data.
+
+All values, including the slices, are big endian. The file layout is as follows:
+
+struct {
+	struct {
+		char     magic[4];         // magic bytes "qoaf"
+		uint32_t samples;          // samples per channel in this file
+	} file_header;             
+
+	struct {
+		struct {
+			uint8_t  num_channels; // no. of channels
+			uint24_t samplerate;   // samplerate in hz
+			uint16_t fsamples;     // samples per channel in this frame
+			uint16_t fsize;        // frame size (includes this header)
+		} frame_header;          
+
+		struct {
+			int16_t history[4];    // most recent last
+			int16_t weights[4];    // most recent last
+		} lms_state[num_channels]; 
+
+		qoa_slice_t slices[256][num_channels];
+
+	} frames[ceil(samples / (256 * 20))];
+} qoa_file_t;
+
+Each `qoa_slice_t` contains a quantized scalefactor `sf_quant` and 20 quantized
+residuals `qrNN`:
+
+.- QOA_SLICE -- 64 bits, 20 samples --------------------------/  /------------.
+|        Byte[0]         |        Byte[1]         |  Byte[2]  \  \  Byte[7]   |
+| 7  6  5  4  3  2  1  0 | 7  6  5  4  3  2  1  0 | 7  6  5   /  /    2  1  0 |
+|------------+--------+--------+--------+---------+---------+-\  \--+---------|
+|  sf_quant  |  qr00  |  qr01  |  qr02  |  qr03   |  qr04   | /  /  |  qr19   |
+`-------------------------------------------------------------\  \------------`
+
+Each frame except the last must contain exactly 256 slices per channel. The last
+frame may contain between 1 .. 256 (inclusive) slices per channel. The last
+slice (for each channel) in the last frame may contain less than 20 samples; the
+slice still must be 8 bytes wide, with the unused samples zeroed out.
+
+Channels are interleaved per slice. E.g. for 2 channel stereo: 
+slice[0] = L, slice[1] = R, slice[2] = L, slice[3] = R ...
+
+A valid QOA file or stream must have at least one frame. Each frame must contain
+at least one channel and one sample with a samplerate between 1 .. 16777215
+(inclusive).
+
+If the total number of samples is not known by the encoder, the samples in the
+file header may be set to 0x00000000 to indicate that the encoder is 
+"streaming". In a streaming context, the samplerate and number of channels may
+differ from frame to frame. For static files (those with samples set to a
+non-zero value), each frame must have the same number of channels and same
+samplerate.
+
+Note that this implementation of QOA only handles files with a known total
+number of samples.
+
+A decoder should support at least 8 channels. The channel layout for channel
+counts 1 .. 8 is:
+
+	1. Mono
+	2. L, R
+	3. L, R, C 
+	4. FL, FR, B/SL, B/SR 
+	5. FL, FR, C, B/SL, B/SR 
+	6. FL, FR, C, LFE, B/SL, B/SR
+	7. FL, FR, C, LFE, B, SL, SR 
+	8. FL, FR, C, LFE, BL, BR, SL, SR
+
+QOA predicts each audio sample based on the previously decoded ones using a
+"Sign-Sign Least Mean Squares Filter" (LMS). This prediction plus the 
+dequantized residual forms the final output sample.
+
+*/
+
+
+
+/* -----------------------------------------------------------------------------
+	Header - Public functions */
+
+#ifndef QOA_H
+#define QOA_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define QOA_MIN_FILESIZE 16
+#define QOA_MAX_CHANNELS 8
+
+#define QOA_SLICE_LEN 20
+#define QOA_SLICES_PER_FRAME 256
+#define QOA_FRAME_LEN (QOA_SLICES_PER_FRAME * QOA_SLICE_LEN)
+#define QOA_LMS_LEN 4
+#define QOA_MAGIC 0x716f6166 /* 'qoaf' */
+
+#define QOA_FRAME_SIZE(channels, slices) \
+	(8 + QOA_LMS_LEN * 4 * channels + 8 * slices * channels)
+
+typedef struct {
+	int history[QOA_LMS_LEN];
+	int weights[QOA_LMS_LEN];
+} qoa_lms_t;
+
+typedef struct {
+	unsigned int channels;
+	unsigned int samplerate;
+	unsigned int samples;
+	qoa_lms_t lms[QOA_MAX_CHANNELS];
+	#ifdef QOA_RECORD_TOTAL_ERROR
+		double error;
+	#endif
+} qoa_desc;
+
+unsigned int qoa_encode_header(qoa_desc *qoa, unsigned char *bytes);
+unsigned int qoa_encode_frame(const short *sample_data, qoa_desc *qoa, unsigned int frame_len, unsigned char *bytes);
+void *qoa_encode(const short *sample_data, qoa_desc *qoa, unsigned int *out_len);
+
+unsigned int qoa_max_frame_size(qoa_desc *qoa);
+unsigned int qoa_decode_header(const unsigned char *bytes, int size, qoa_desc *qoa);
+unsigned int qoa_decode_frame(const unsigned char *bytes, unsigned int size, qoa_desc *qoa, short *sample_data, unsigned int *frame_len);
+short *qoa_decode(const unsigned char *bytes, int size, qoa_desc *file);
+
+#ifndef QOA_NO_STDIO
+
+int qoa_write(const char *filename, const short *sample_data, qoa_desc *qoa);
+void *qoa_read(const char *filename, qoa_desc *qoa);
+
+#endif /* QOA_NO_STDIO */
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* QOA_H */
+
+
+/* -----------------------------------------------------------------------------
+	Implementation */
+
+#ifdef QOA_IMPLEMENTATION
+#include <stdlib.h>
+
+#ifndef QOA_MALLOC
+	#define QOA_MALLOC(sz) malloc(sz)
+	#define QOA_FREE(p) free(p)
+#endif
+
+typedef unsigned long long qoa_uint64_t;
+
+
+/* The quant_tab provides an index into the dequant_tab for residuals in the
+range of -8 .. 8. It maps this range to just 3bits and becomes less accurate at 
+the higher end. Note that the residual zero is identical to the lowest positive 
+value. This is mostly fine, since the qoa_div() function always rounds away 
+from zero. */
+
+static const int qoa_quant_tab[17] = {
+	7, 7, 7, 5, 5, 3, 3, 1, /* -8..-1 */
+	0,                      /*  0     */
+	0, 2, 2, 4, 4, 6, 6, 6  /*  1.. 8 */
+};
+
+
+/* We have 16 different scalefactors. Like the quantized residuals these become
+less accurate at the higher end. In theory, the highest scalefactor that we
+would need to encode the highest 16bit residual is (2**16)/8 = 8192. However we
+rely on the LMS filter to predict samples accurately enough that a maximum 
+residual of one quarter of the 16 bit range is sufficient. I.e. with the 
+scalefactor 2048 times the quant range of 8 we can encode residuals up to 2**14.
+
+The scalefactor values are computed as:
+scalefactor_tab[s] <- round(pow(s + 1, 2.75)) */
+
+static const int qoa_scalefactor_tab[16] = {
+	1, 7, 21, 45, 84, 138, 211, 304, 421, 562, 731, 928, 1157, 1419, 1715, 2048
+};
+
+
+/* The reciprocal_tab maps each of the 16 scalefactors to their rounded 
+reciprocals 1/scalefactor. This allows us to calculate the scaled residuals in 
+the encoder with just one multiplication instead of an expensive division. We 
+do this in .16 fixed point with integers, instead of floats.
+
+The reciprocal_tab is computed as:
+reciprocal_tab[s] <- ((1<<16) + scalefactor_tab[s] - 1) / scalefactor_tab[s] */
+
+static const int qoa_reciprocal_tab[16] = {
+	65536, 9363, 3121, 1457, 781, 475, 311, 216, 156, 117, 90, 71, 57, 47, 39, 32
+};
+
+
+/* The dequant_tab maps each of the scalefactors and quantized residuals to 
+their unscaled & dequantized version.
+
+Since qoa_div rounds away from the zero, the smallest entries are mapped to 3/4
+instead of 1. The dequant_tab assumes the following dequantized values for each 
+of the quant_tab indices and is computed as:
+float dqt[8] = {0.75, -0.75, 2.5, -2.5, 4.5, -4.5, 7, -7};
+dequant_tab[s][q] <- round_ties_away_from_zero(scalefactor_tab[s] * dqt[q])
+
+The rounding employed here is "to nearest, ties away from zero",  i.e. positive
+and negative values are treated symmetrically.
+*/
+
+static const int qoa_dequant_tab[16][8] = {
+	{   1,    -1,    3,    -3,    5,    -5,     7,     -7},
+	{   5,    -5,   18,   -18,   32,   -32,    49,    -49},
+	{  16,   -16,   53,   -53,   95,   -95,   147,   -147},
+	{  34,   -34,  113,  -113,  203,  -203,   315,   -315},
+	{  63,   -63,  210,  -210,  378,  -378,   588,   -588},
+	{ 104,  -104,  345,  -345,  621,  -621,   966,   -966},
+	{ 158,  -158,  528,  -528,  950,  -950,  1477,  -1477},
+	{ 228,  -228,  760,  -760, 1368, -1368,  2128,  -2128},
+	{ 316,  -316, 1053, -1053, 1895, -1895,  2947,  -2947},
+	{ 422,  -422, 1405, -1405, 2529, -2529,  3934,  -3934},
+	{ 548,  -548, 1828, -1828, 3290, -3290,  5117,  -5117},
+	{ 696,  -696, 2320, -2320, 4176, -4176,  6496,  -6496},
+	{ 868,  -868, 2893, -2893, 5207, -5207,  8099,  -8099},
+	{1064, -1064, 3548, -3548, 6386, -6386,  9933,  -9933},
+	{1286, -1286, 4288, -4288, 7718, -7718, 12005, -12005},
+	{1536, -1536, 5120, -5120, 9216, -9216, 14336, -14336},
+};
+
+
+/* The Least Mean Squares Filter is the heart of QOA. It predicts the next
+sample based on the previous 4 reconstructed samples. It does so by continuously
+adjusting 4 weights based on the residual of the previous prediction.
+
+The next sample is predicted as the sum of (weight[i] * history[i]).
+
+The adjustment of the weights is done with a "Sign-Sign-LMS" that adds or
+subtracts the residual to each weight, based on the corresponding sample from 
+the history. This, surprisingly, is sufficient to get worthwhile predictions.
+
+This is all done with fixed point integers. Hence the right-shifts when updating
+the weights and calculating the prediction. */
+
+static int qoa_lms_predict(qoa_lms_t *lms) {
+	int prediction = 0;
+	for (int i = 0; i < QOA_LMS_LEN; i++) {
+		prediction += lms->weights[i] * lms->history[i];
+	}
+	return prediction >> 13;
+}
+
+static void qoa_lms_update(qoa_lms_t *lms, int sample, int residual) {
+	int delta = residual >> 4;
+	for (int i = 0; i < QOA_LMS_LEN; i++) {
+		lms->weights[i] += lms->history[i] < 0 ? -delta : delta;
+	}
+
+	for (int i = 0; i < QOA_LMS_LEN-1; i++) {
+		lms->history[i] = lms->history[i+1];
+	}
+	lms->history[QOA_LMS_LEN-1] = sample;
+}
+
+
+/* qoa_div() implements a rounding division, but avoids rounding to zero for 
+small numbers. E.g. 0.1 will be rounded to 1. Note that 0 itself still 
+returns as 0, which is handled in the qoa_quant_tab[].
+qoa_div() takes an index into the .16 fixed point qoa_reciprocal_tab as an
+argument, so it can do the division with a cheaper integer multiplication. */
+
+static inline int qoa_div(int v, int scalefactor) {
+	int reciprocal = qoa_reciprocal_tab[scalefactor];
+	int n = (v * reciprocal + (1 << 15)) >> 16;
+	n = n + ((v > 0) - (v < 0)) - ((n > 0) - (n < 0)); /* round away from 0 */
+	return n;
+}
+
+static inline int qoa_clamp(int v, int min, int max) {
+	if (v < min) { return min; }
+	if (v > max) { return max; }
+	return v;
+}
+
+/* This specialized clamp function for the signed 16 bit range improves decode
+performance quite a bit. The extra if() statement works nicely with the CPUs
+branch prediction as this branch is rarely taken. */
+
+static inline int qoa_clamp_s16(int v) {
+	if ((unsigned int)(v + 32768) > 65535) {
+		if (v < -32768) { return -32768; }
+		if (v >  32767) { return  32767; }
+	}
+	return v;
+}
+
+static inline qoa_uint64_t qoa_read_u64(const unsigned char *bytes, unsigned int *p) {
+	bytes += *p;
+	*p += 8;
+	return 
+		((qoa_uint64_t)(bytes[0]) << 56) | ((qoa_uint64_t)(bytes[1]) << 48) |
+		((qoa_uint64_t)(bytes[2]) << 40) | ((qoa_uint64_t)(bytes[3]) << 32) |
+		((qoa_uint64_t)(bytes[4]) << 24) | ((qoa_uint64_t)(bytes[5]) << 16) |
+		((qoa_uint64_t)(bytes[6]) <<  8) | ((qoa_uint64_t)(bytes[7]) <<  0);
+}
+
+static inline void qoa_write_u64(qoa_uint64_t v, unsigned char *bytes, unsigned int *p) {
+	bytes += *p;
+	*p += 8;
+	bytes[0] = (v >> 56) & 0xff;
+	bytes[1] = (v >> 48) & 0xff;
+	bytes[2] = (v >> 40) & 0xff;
+	bytes[3] = (v >> 32) & 0xff;
+	bytes[4] = (v >> 24) & 0xff;
+	bytes[5] = (v >> 16) & 0xff;
+	bytes[6] = (v >>  8) & 0xff;
+	bytes[7] = (v >>  0) & 0xff;
+}
+
+
+/* -----------------------------------------------------------------------------
+	Encoder */
+
+unsigned int qoa_encode_header(qoa_desc *qoa, unsigned char *bytes) {
+	unsigned int p = 0;
+	qoa_write_u64(((qoa_uint64_t)QOA_MAGIC << 32) | qoa->samples, bytes, &p);
+	return p;
+}
+
+unsigned int qoa_encode_frame(const short *sample_data, qoa_desc *qoa, unsigned int frame_len, unsigned char *bytes) {
+	unsigned int channels = qoa->channels;
+
+	unsigned int p = 0;
+	unsigned int slices = (frame_len + QOA_SLICE_LEN - 1) / QOA_SLICE_LEN;
+	unsigned int frame_size = QOA_FRAME_SIZE(channels, slices);
+	int prev_scalefactor[QOA_MAX_CHANNELS] = {0};
+
+	/* Write the frame header */
+	qoa_write_u64((
+		(qoa_uint64_t)qoa->channels   << 56 |
+		(qoa_uint64_t)qoa->samplerate << 32 |
+		(qoa_uint64_t)frame_len       << 16 |
+		(qoa_uint64_t)frame_size
+	), bytes, &p);
+
+	
+	for (int c = 0; c < channels; c++) {
+		/* If the weights have grown too large, reset them to 0. This may happen
+		with certain high-frequency sounds. This is a last resort and will 
+		introduce quite a bit of noise, but should at least prevent pops/clicks */
+		int weights_sum = 
+			qoa->lms[c].weights[0] * qoa->lms[c].weights[0] + 
+			qoa->lms[c].weights[1] * qoa->lms[c].weights[1] + 
+			qoa->lms[c].weights[2] * qoa->lms[c].weights[2] + 
+			qoa->lms[c].weights[3] * qoa->lms[c].weights[3];
+		if (weights_sum > 0x2fffffff) {
+			qoa->lms[c].weights[0] = 0;
+			qoa->lms[c].weights[1] = 0;
+			qoa->lms[c].weights[2] = 0;
+			qoa->lms[c].weights[3] = 0;
+		}
+
+		/* Write the current LMS state */
+		qoa_uint64_t weights = 0;
+		qoa_uint64_t history = 0;
+		for (int i = 0; i < QOA_LMS_LEN; i++) {
+			history = (history << 16) | (qoa->lms[c].history[i] & 0xffff);
+			weights = (weights << 16) | (qoa->lms[c].weights[i] & 0xffff);
+		}
+		qoa_write_u64(history, bytes, &p);
+		qoa_write_u64(weights, bytes, &p);
+	}
+
+	/* We encode all samples with the channels interleaved on a slice level.
+	E.g. for stereo: (ch-0, slice 0), (ch 1, slice 0), (ch 0, slice 1), ...*/
+	for (int sample_index = 0; sample_index < frame_len; sample_index += QOA_SLICE_LEN) {
+
+		for (int c = 0; c < channels; c++) {
+			int slice_len = qoa_clamp(QOA_SLICE_LEN, 0, frame_len - sample_index);
+			int slice_start = sample_index * channels + c;
+			int slice_end = (sample_index + slice_len) * channels + c;			
+
+			/* Brute for search for the best scalefactor. Just go through all
+			16 scalefactors, encode all samples for the current slice and 
+			meassure the total squared error. */
+			qoa_uint64_t best_error = -1;
+			qoa_uint64_t best_slice;
+			qoa_lms_t best_lms;
+			int best_scalefactor;
+
+			for (int sfi = 0; sfi < 16; sfi++) {
+				/* There is a strong correlation between the scalefactors of
+				neighboring slices. As an optimization, start testing
+				the best scalefactor of the previous slice first. */
+				int scalefactor = (sfi + prev_scalefactor[c]) % 16;
+
+				/* We have to reset the LMS state to the last known good one
+				before trying each scalefactor, as each pass updates the LMS
+				state when encoding. */
+				qoa_lms_t lms = qoa->lms[c];
+				qoa_uint64_t slice = scalefactor;
+				qoa_uint64_t current_error = 0;
+
+				for (int si = slice_start; si < slice_end; si += channels) {
+					int sample = sample_data[si];
+					int predicted = qoa_lms_predict(&lms);
+
+					int residual = sample - predicted;
+					int scaled = qoa_div(residual, scalefactor);
+					int clamped = qoa_clamp(scaled, -8, 8);
+					int quantized = qoa_quant_tab[clamped + 8];
+					int dequantized = qoa_dequant_tab[scalefactor][quantized];
+					int reconstructed = qoa_clamp_s16(predicted + dequantized);
+
+					long long error = (sample - reconstructed);
+					current_error += error * error;
+					if (current_error > best_error) {
+						break;
+					}
+
+					qoa_lms_update(&lms, reconstructed, dequantized);
+					slice = (slice << 3) | quantized;
+				}
+
+				if (current_error < best_error) {
+					best_error = current_error;
+					best_slice = slice;
+					best_lms = lms;
+					best_scalefactor = scalefactor;
+				}
+			}
+
+			prev_scalefactor[c] = best_scalefactor;
+
+			qoa->lms[c] = best_lms;
+			#ifdef QOA_RECORD_TOTAL_ERROR
+				qoa->error += best_error;
+			#endif
+
+			/* If this slice was shorter than QOA_SLICE_LEN, we have to left-
+			shift all encoded data, to ensure the rightmost bits are the empty
+			ones. This should only happen in the last frame of a file as all
+			slices are completely filled otherwise. */
+			best_slice <<= (QOA_SLICE_LEN - slice_len) * 3;
+			qoa_write_u64(best_slice, bytes, &p);
+		}
+	}
+	
+	return p;
+}
+
+void *qoa_encode(const short *sample_data, qoa_desc *qoa, unsigned int *out_len) {
+	if (
+		qoa->samples == 0 || 
+		qoa->samplerate == 0 || qoa->samplerate > 0xffffff ||
+		qoa->channels == 0 || qoa->channels > QOA_MAX_CHANNELS
+	) {
+		return NULL;
+	}
+
+	/* Calculate the encoded size and allocate */
+	unsigned int num_frames = (qoa->samples + QOA_FRAME_LEN-1) / QOA_FRAME_LEN;
+	unsigned int num_slices = (qoa->samples + QOA_SLICE_LEN-1) / QOA_SLICE_LEN;
+	unsigned int encoded_size = 8 +                    /* 8 byte file header */
+		num_frames * 8 +                               /* 8 byte frame headers */
+		num_frames * QOA_LMS_LEN * 4 * qoa->channels + /* 4 * 4 bytes lms state per channel */
+		num_slices * 8 * qoa->channels;                /* 8 byte slices */
+
+	unsigned char *bytes = QOA_MALLOC(encoded_size);
+
+	for (int c = 0; c < qoa->channels; c++) {
+		/* Set the initial LMS weights to {0, 0, -1, 2}. This helps with the 
+		prediction of the first few ms of a file. */
+		qoa->lms[c].weights[0] = 0;
+		qoa->lms[c].weights[1] = 0;
+		qoa->lms[c].weights[2] = -(1<<13);
+		qoa->lms[c].weights[3] =  (1<<14);
+
+		/* Explicitly set the history samples to 0, as we might have some
+		garbage in there. */
+		for (int i = 0; i < QOA_LMS_LEN; i++) {
+			qoa->lms[c].history[i] = 0;
+		}
+	}
+
+
+	/* Encode the header and go through all frames */
+	unsigned int p = qoa_encode_header(qoa, bytes);
+	#ifdef QOA_RECORD_TOTAL_ERROR
+		qoa->error = 0;
+	#endif
+
+	int frame_len = QOA_FRAME_LEN;
+	for (int sample_index = 0; sample_index < qoa->samples; sample_index += frame_len) {
+		frame_len = qoa_clamp(QOA_FRAME_LEN, 0, qoa->samples - sample_index);		
+		const short *frame_samples = sample_data + sample_index * qoa->channels;
+		unsigned int frame_size = qoa_encode_frame(frame_samples, qoa, frame_len, bytes + p);
+		p += frame_size;
+	}
+
+	*out_len = p;
+	return bytes;
+}
+
+
+
+/* -----------------------------------------------------------------------------
+	Decoder */
+
+unsigned int qoa_max_frame_size(qoa_desc *qoa) {
+	return QOA_FRAME_SIZE(qoa->channels, QOA_SLICES_PER_FRAME);
+}
+
+unsigned int qoa_decode_header(const unsigned char *bytes, int size, qoa_desc *qoa) {
+	unsigned int p = 0;
+	if (size < QOA_MIN_FILESIZE) {
+		return 0;
+	}
+
+
+	/* Read the file header, verify the magic number ('qoaf') and read the 
+	total number of samples. */
+	qoa_uint64_t file_header = qoa_read_u64(bytes, &p);
+
+	if ((file_header >> 32) != QOA_MAGIC) {
+		return 0;
+	}
+
+	qoa->samples = file_header & 0xffffffff;
+	if (!qoa->samples) {
+		return 0;
+	}
+
+	/* Peek into the first frame header to get the number of channels and
+	the samplerate. */
+	qoa_uint64_t frame_header = qoa_read_u64(bytes, &p);
+	qoa->channels   = (frame_header >> 56) & 0x0000ff;
+	qoa->samplerate = (frame_header >> 32) & 0xffffff;
+
+	if (qoa->channels == 0 || qoa->samples == 0 || qoa->samplerate == 0) {
+		return 0;
+	}
+
+	return 8;
+}
+
+unsigned int qoa_decode_frame(const unsigned char *bytes, unsigned int size, qoa_desc *qoa, short *sample_data, unsigned int *frame_len) {
+	unsigned int p = 0;
+	*frame_len = 0;
+
+	if (size < 8 + QOA_LMS_LEN * 4 * qoa->channels) {
+		return 0;
+	}
+
+	/* Read and verify the frame header */
+	qoa_uint64_t frame_header = qoa_read_u64(bytes, &p);
+	int channels   = (frame_header >> 56) & 0x0000ff;
+	int samplerate = (frame_header >> 32) & 0xffffff;
+	int samples    = (frame_header >> 16) & 0x00ffff;
+	int frame_size = (frame_header      ) & 0x00ffff;
+
+	int data_size = frame_size - 8 - QOA_LMS_LEN * 4 * channels;
+	int num_slices = data_size / 8;
+	int max_total_samples = num_slices * QOA_SLICE_LEN;
+
+	if (
+		channels != qoa->channels || 
+		samplerate != qoa->samplerate ||
+		frame_size > size ||
+		samples * channels > max_total_samples
+	) {
+		return 0;
+	}
+
+
+	/* Read the LMS state: 4 x 2 bytes history, 4 x 2 bytes weights per channel */
+	for (int c = 0; c < channels; c++) {
+		qoa_uint64_t history = qoa_read_u64(bytes, &p);
+		qoa_uint64_t weights = qoa_read_u64(bytes, &p);
+		for (int i = 0; i < QOA_LMS_LEN; i++) {
+			qoa->lms[c].history[i] = ((signed short)(history >> 48));
+			history <<= 16;
+			qoa->lms[c].weights[i] = ((signed short)(weights >> 48));
+			weights <<= 16;
+		}
+	}
+
+
+	/* Decode all slices for all channels in this frame */
+	for (int sample_index = 0; sample_index < samples; sample_index += QOA_SLICE_LEN) {
+		for (int c = 0; c < channels; c++) {
+			qoa_uint64_t slice = qoa_read_u64(bytes, &p);
+
+			int scalefactor = (slice >> 60) & 0xf;
+			int slice_start = sample_index * channels + c;
+			int slice_end = qoa_clamp(sample_index + QOA_SLICE_LEN, 0, samples) * channels + c;
+
+			for (int si = slice_start; si < slice_end; si += channels) {
+				int predicted = qoa_lms_predict(&qoa->lms[c]);
+				int quantized = (slice >> 57) & 0x7;
+				int dequantized = qoa_dequant_tab[scalefactor][quantized];
+				int reconstructed = qoa_clamp_s16(predicted + dequantized);
+				
+				sample_data[si] = reconstructed;
+				slice <<= 3;
+
+				qoa_lms_update(&qoa->lms[c], reconstructed, dequantized);
+			}
+		}
+	}
+
+	*frame_len = samples;
+	return p;
+}
+
+short *qoa_decode(const unsigned char *bytes, int size, qoa_desc *qoa) {
+	unsigned int p = qoa_decode_header(bytes, size, qoa);
+	if (!p) {
+		return NULL;
+	}
+
+	/* Calculate the required size of the sample buffer and allocate */
+	int total_samples = qoa->samples * qoa->channels;
+	short *sample_data = QOA_MALLOC(total_samples * sizeof(short));
+
+	unsigned int sample_index = 0;
+	unsigned int frame_len;
+	unsigned int frame_size;
+
+	/* Decode all frames */
+	do {
+		short *sample_ptr = sample_data + sample_index * qoa->channels;
+		frame_size = qoa_decode_frame(bytes + p, size - p, qoa, sample_ptr, &frame_len);
+
+		p += frame_size;
+		sample_index += frame_len;
+	} while (frame_size && sample_index < qoa->samples);
+
+	qoa->samples = sample_index;
+	return sample_data;
+}
+
+
+
+/* -----------------------------------------------------------------------------
+	File read/write convenience functions */
+
+#ifndef QOA_NO_STDIO
+#include <stdio.h>
+
+int qoa_write(const char *filename, const short *sample_data, qoa_desc *qoa) {
+	FILE *f = fopen(filename, "wb");
+	unsigned int size;
+	void *encoded;
+
+	if (!f) {
+		return 0;
+	}
+
+	encoded = qoa_encode(sample_data, qoa, &size);
+	if (!encoded) {
+		fclose(f);
+		return 0;
+	}
+
+	fwrite(encoded, 1, size, f);
+	fclose(f);
+
+	QOA_FREE(encoded);
+	return size;
+}
+
+void *qoa_read(const char *filename, qoa_desc *qoa) {
+	FILE *f = fopen(filename, "rb");
+	int size, bytes_read;
+	void *data;
+	short *sample_data;
+
+	if (!f) {
+		return NULL;
+	}
+
+	fseek(f, 0, SEEK_END);
+	size = ftell(f);
+	if (size <= 0) {
+		fclose(f);
+		return NULL;
+	}
+	fseek(f, 0, SEEK_SET);
+
+	data = QOA_MALLOC(size);
+	if (!data) {
+		fclose(f);
+		return NULL;
+	}
+
+	bytes_read = fread(data, 1, size, f);
+	fclose(f);
+
+	sample_data = qoa_decode(data, bytes_read, qoa);
+	QOA_FREE(data);
+	return sample_data;
+}
+
+#endif /* QOA_NO_STDIO */
+#endif /* QOA_IMPLEMENTATION */
diff --git a/shared/external/qoaplay.c b/shared/external/qoaplay.c
new file mode 100644
index 0000000..039e279
--- /dev/null
+++ b/shared/external/qoaplay.c
@@ -0,0 +1,278 @@
+/*******************************************************************************************
+*
+*   qoaplay - QOA stream playing helper functions
+*
+*   qoaplay is a tiny abstraction to read and decode a QOA file "on the fly".
+*   It reads and decodes one frame at a time with minimal memory requirements.
+*   qoaplay also provides some functions to seek to a specific frame.
+*
+*   LICENSE: MIT License
+*
+*   Copyright (c) 2023 Dominic Szablewski (@phoboslab), reviewed by Ramon Santamaria (@raysan5)
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a copy
+*   of this software and associated documentation files (the "Software"), to deal
+*   in the Software without restriction, including without limitation the rights
+*   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+*   copies of the Software, and to permit persons to whom the Software is
+*   furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included in all
+*   copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+*   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+**********************************************************************************************/
+
+//----------------------------------------------------------------------------------
+// Types and Structures Definition
+//----------------------------------------------------------------------------------
+// QOA streaming data descriptor
+typedef struct {
+    qoa_desc info;                  // QOA descriptor data
+
+    FILE *file;                     // QOA file to read, if NULL, using memory buffer -> file_data
+    unsigned char *file_data;       // QOA file data on memory
+    unsigned int file_data_size;    // QOA file data on memory size
+    unsigned int file_data_offset;  // QOA file data on memory offset for next read
+
+    unsigned int first_frame_pos;   // First frame position (after QOA header, required for offset)
+    unsigned int sample_position;   // Current streaming sample position
+
+    unsigned char *buffer;          // Buffer used to read samples from file/memory (used on decoding)
+    unsigned int buffer_len;        // Buffer length to read samples for streaming
+
+    short *sample_data;             // Sample data decoded
+    unsigned int sample_data_len;   // Sample data decoded length
+    unsigned int sample_data_pos;   // Sample data decoded position
+
+} qoaplay_desc;
+
+//----------------------------------------------------------------------------------
+// Module Functions Declaration
+//----------------------------------------------------------------------------------
+
+#if defined(__cplusplus)
+extern "C" {            // Prevents name mangling of functions
+#endif
+
+qoaplay_desc *qoaplay_open(const char *path);
+qoaplay_desc *qoaplay_open_memory(const unsigned char *data, int data_size);
+void qoaplay_close(qoaplay_desc *qoa_ctx);
+
+void qoaplay_rewind(qoaplay_desc *qoa_ctx);
+void qoaplay_seek_frame(qoaplay_desc *qoa_ctx, int frame);
+unsigned int qoaplay_decode(qoaplay_desc *qoa_ctx, float *sample_data, int num_samples);
+unsigned int qoaplay_decode_frame(qoaplay_desc *qoa_ctx);
+double qoaplay_get_duration(qoaplay_desc *qoa_ctx);
+double qoaplay_get_time(qoaplay_desc *qoa_ctx);
+int qoaplay_get_frame(qoaplay_desc *qoa_ctx);
+
+#if defined(__cplusplus)
+}            // Prevents name mangling of functions
+#endif
+
+//----------------------------------------------------------------------------------
+// Module Functions Definition
+//----------------------------------------------------------------------------------
+
+// Open QOA file, keep FILE pointer to keep reading from file
+qoaplay_desc *qoaplay_open(const char *path)
+{
+    FILE *file = fopen(path, "rb");
+    if (!file) return NULL;
+
+    // Read and decode the file header
+    unsigned char header[QOA_MIN_FILESIZE];
+    int read = fread(header, QOA_MIN_FILESIZE, 1, file);
+    if (!read) return NULL;
+
+    qoa_desc qoa;
+    unsigned int first_frame_pos = qoa_decode_header(header, QOA_MIN_FILESIZE, &qoa);
+    if (!first_frame_pos) return NULL;
+
+    // Rewind the file back to beginning of the first frame
+    fseek(file, first_frame_pos, SEEK_SET);
+
+    // Allocate one chunk of memory for the qoaplay_desc struct
+    // + the sample data for one frame
+    // + a buffer to hold one frame of encoded data
+    unsigned int buffer_size = qoa_max_frame_size(&qoa);
+    unsigned int sample_data_size = qoa.channels*QOA_FRAME_LEN*sizeof(short)*2;
+    qoaplay_desc *qoa_ctx = QOA_MALLOC(sizeof(qoaplay_desc) + buffer_size + sample_data_size);
+    memset(qoa_ctx, 0, sizeof(qoaplay_desc));
+
+    qoa_ctx->file = file;
+    qoa_ctx->file_data = NULL;
+    qoa_ctx->file_data_size = 0;
+    qoa_ctx->file_data_offset = 0;
+    qoa_ctx->first_frame_pos = first_frame_pos;
+
+    // Setup data pointers to previously allocated data
+    qoa_ctx->buffer = ((unsigned char *)qoa_ctx) + sizeof(qoaplay_desc);
+    qoa_ctx->sample_data = (short *)(((unsigned char *)qoa_ctx) + sizeof(qoaplay_desc) + buffer_size);
+
+    qoa_ctx->info.channels = qoa.channels;
+    qoa_ctx->info.samplerate = qoa.samplerate;
+    qoa_ctx->info.samples = qoa.samples;
+
+    return qoa_ctx;
+}
+
+// Open QOA file from memory, no FILE pointer required
+qoaplay_desc *qoaplay_open_memory(const unsigned char *data, int data_size)
+{
+    // Read and decode the file header
+    unsigned char header[QOA_MIN_FILESIZE];
+    memcpy(header, data, QOA_MIN_FILESIZE);
+
+    qoa_desc qoa;
+    unsigned int first_frame_pos = qoa_decode_header(header, QOA_MIN_FILESIZE, &qoa);
+    if (!first_frame_pos) return NULL;
+
+    // Allocate one chunk of memory for the qoaplay_desc struct
+    // + the sample data for one frame
+    // + a buffer to hold one frame of encoded data
+    unsigned int buffer_size = qoa_max_frame_size(&qoa);
+    unsigned int sample_data_size = qoa.channels*QOA_FRAME_LEN*sizeof(short)*2;
+    qoaplay_desc *qoa_ctx = QOA_MALLOC(sizeof(qoaplay_desc) + buffer_size + sample_data_size);
+    memset(qoa_ctx, 0, sizeof(qoaplay_desc));
+
+    qoa_ctx->file = NULL;
+
+    // Keep a copy of file data provided to be managed internally
+    qoa_ctx->file_data = (unsigned char *)QOA_MALLOC(data_size);
+    memcpy(qoa_ctx->file_data, data, data_size);
+    qoa_ctx->file_data_size = data_size;
+    qoa_ctx->file_data_offset = 0;
+    qoa_ctx->first_frame_pos = first_frame_pos;
+
+    // Setup data pointers to previously allocated data
+    qoa_ctx->buffer = ((unsigned char *)qoa_ctx) + sizeof(qoaplay_desc);
+    qoa_ctx->sample_data = (short *)(((unsigned char *)qoa_ctx) + sizeof(qoaplay_desc) + buffer_size);
+
+    qoa_ctx->info.channels = qoa.channels;
+    qoa_ctx->info.samplerate = qoa.samplerate;
+    qoa_ctx->info.samples = qoa.samples;
+
+    return qoa_ctx;
+}
+
+// Close QOA file (if open) and free internal memory
+void qoaplay_close(qoaplay_desc *qoa_ctx)
+{
+    if (qoa_ctx->file) fclose(qoa_ctx->file);
+
+    if ((qoa_ctx->file_data) && (qoa_ctx->file_data_size > 0))
+    {
+        QOA_FREE(qoa_ctx->file_data);
+        qoa_ctx->file_data_size = 0;
+    }
+
+    QOA_FREE(qoa_ctx);
+}
+
+// Decode one frame from QOA data
+unsigned int qoaplay_decode_frame(qoaplay_desc *qoa_ctx)
+{
+    if (qoa_ctx->file) qoa_ctx->buffer_len = fread(qoa_ctx->buffer, 1, qoa_max_frame_size(&qoa_ctx->info), qoa_ctx->file);
+    else
+    {
+        qoa_ctx->buffer_len = qoa_max_frame_size(&qoa_ctx->info);
+        memcpy(qoa_ctx->buffer, qoa_ctx->file_data + qoa_ctx->file_data_offset, qoa_ctx->buffer_len);
+        qoa_ctx->file_data_offset += qoa_ctx->buffer_len;
+    }
+
+    unsigned int frame_len;
+    qoa_decode_frame(qoa_ctx->buffer, qoa_ctx->buffer_len, &qoa_ctx->info, qoa_ctx->sample_data, &frame_len);
+    qoa_ctx->sample_data_pos = 0;
+    qoa_ctx->sample_data_len = frame_len;
+
+    return frame_len;
+}
+
+// Rewind QOA file or memory pointer to beginning
+void qoaplay_rewind(qoaplay_desc *qoa_ctx)
+{
+    if (qoa_ctx->file) fseek(qoa_ctx->file, qoa_ctx->first_frame_pos, SEEK_SET);
+    else qoa_ctx->file_data_offset = 0;
+
+    qoa_ctx->sample_position = 0;
+    qoa_ctx->sample_data_len = 0;
+    qoa_ctx->sample_data_pos = 0;
+}
+
+// Decode required QOA frames
+unsigned int qoaplay_decode(qoaplay_desc *qoa_ctx, float *sample_data, int num_samples)
+{
+    int src_index = qoa_ctx->sample_data_pos*qoa_ctx->info.channels;
+    int dst_index = 0;
+
+    for (int i = 0; i < num_samples; i++)
+    {
+        // Do we have to decode more samples?
+        if (qoa_ctx->sample_data_len - qoa_ctx->sample_data_pos == 0)
+        {
+            if (!qoaplay_decode_frame(qoa_ctx))
+            {
+                // Loop to the beginning
+                qoaplay_rewind(qoa_ctx);
+                qoaplay_decode_frame(qoa_ctx);
+            }
+
+            src_index = 0;
+        }
+
+        // Normalize to -1..1 floats and write to dest
+        for (int c = 0; c < qoa_ctx->info.channels; c++)
+        {
+            sample_data[dst_index++] = qoa_ctx->sample_data[src_index++]/32768.0;
+        }
+
+        qoa_ctx->sample_data_pos++;
+        qoa_ctx->sample_position++;
+    }
+
+    return num_samples;
+}
+
+// Get QOA total time duration in seconds
+double qoaplay_get_duration(qoaplay_desc *qoa_ctx)
+{
+    return (double)qoa_ctx->info.samples/(double)qoa_ctx->info.samplerate;
+}
+
+// Get QOA current time position in seconds
+double qoaplay_get_time(qoaplay_desc *qoa_ctx)
+{
+    return (double)qoa_ctx->sample_position/(double)qoa_ctx->info.samplerate;
+}
+
+// Get QOA current audio frame
+int qoaplay_get_frame(qoaplay_desc *qoa_ctx)
+{
+    return qoa_ctx->sample_position/QOA_FRAME_LEN;
+}
+
+// Seek QOA audio frame
+void qoaplay_seek_frame(qoaplay_desc *qoa_ctx, int frame)
+{
+    if (frame < 0) frame = 0;
+
+    if (frame > qoa_ctx->info.samples/QOA_FRAME_LEN) frame = qoa_ctx->info.samples/QOA_FRAME_LEN;
+
+    qoa_ctx->sample_position = frame*QOA_FRAME_LEN;
+    qoa_ctx->sample_data_len = 0;
+    qoa_ctx->sample_data_pos = 0;
+
+    unsigned int offset = qoa_ctx->first_frame_pos + frame*qoa_max_frame_size(&qoa_ctx->info);
+
+    if (qoa_ctx->file) fseek(qoa_ctx->file, offset, SEEK_SET);
+    else qoa_ctx->file_data_offset = offset;
+}
diff --git a/shared/external/stb_vorbis.c b/shared/external/stb_vorbis.c
new file mode 100644
index 0000000..3e5c250
--- /dev/null
+++ b/shared/external/stb_vorbis.c
@@ -0,0 +1,5584 @@
+// Ogg Vorbis audio decoder - v1.22 - public domain
+// http://nothings.org/stb_vorbis/
+//
+// Original version written by Sean Barrett in 2007.
+//
+// Originally sponsored by RAD Game Tools. Seeking implementation
+// sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
+// Elias Software, Aras Pranckevicius, and Sean Barrett.
+//
+// LICENSE
+//
+//   See end of file for license information.
+//
+// Limitations:
+//
+//   - floor 0 not supported (used in old ogg vorbis files pre-2004)
+//   - lossless sample-truncation at beginning ignored
+//   - cannot concatenate multiple vorbis streams
+//   - sample positions are 32-bit, limiting seekable 192Khz
+//       files to around 6 hours (Ogg supports 64-bit)
+//
+// Feature contributors:
+//    Dougall Johnson (sample-exact seeking)
+//
+// Bugfix/warning contributors:
+//    Terje Mathisen     Niklas Frykholm     Andy Hill
+//    Casey Muratori     John Bolton         Gargaj
+//    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
+//    Bernhard Wodo      Evan Balster        github:alxprd
+//    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
+//    Phillip Bennefall  Rohit               Thiago Goulart
+//    github:manxorist   Saga Musix          github:infatum
+//    Timur Gagiev       Maxwell Koo         Peter Waller
+//    github:audinowho   Dougall Johnson     David Reid
+//    github:Clownacy    Pedro J. Estebanez  Remi Verschelde
+//    AnthoFoxo          github:morlat       Gabriel Ravier
+//
+// Partial history:
+//    1.22    - 2021-07-11 - various small fixes
+//    1.21    - 2021-07-02 - fix bug for files with no comments
+//    1.20    - 2020-07-11 - several small fixes
+//    1.19    - 2020-02-05 - warnings
+//    1.18    - 2020-02-02 - fix seek bugs; parse header comments; misc warnings etc.
+//    1.17    - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
+//    1.16    - 2019-03-04 - fix warnings
+//    1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
+//    1.14    - 2018-02-11 - delete bogus dealloca usage
+//    1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
+//    1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
+//    1.11    - 2017-07-23 - fix MinGW compilation
+//    1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
+//    1.09    - 2016-04-04 - back out 'truncation of last frame' fix from previous version
+//    1.08    - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
+//    1.07    - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
+//    1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
+//                           some crash fixes when out of memory or with corrupt files
+//                           fix some inappropriately signed shifts
+//    1.05    - 2015-04-19 - don't define __forceinline if it's redundant
+//    1.04    - 2014-08-27 - fix missing const-correct case in API
+//    1.03    - 2014-08-07 - warning fixes
+//    1.02    - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
+//    1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
+//    1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
+//                           (API change) report sample rate for decode-full-file funcs
+//
+// See end of file for full version history.
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  HEADER BEGINS HERE
+//
+
+#ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
+#define STB_VORBIS_INCLUDE_STB_VORBIS_H
+
+#if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
+#define STB_VORBIS_NO_STDIO 1
+#endif
+
+#ifndef STB_VORBIS_NO_STDIO
+#include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+///////////   THREAD SAFETY
+
+// Individual stb_vorbis* handles are not thread-safe; you cannot decode from
+// them from multiple threads at the same time. However, you can have multiple
+// stb_vorbis* handles and decode from them independently in multiple thrads.
+
+
+///////////   MEMORY ALLOCATION
+
+// normally stb_vorbis uses malloc() to allocate memory at startup,
+// and alloca() to allocate temporary memory during a frame on the
+// stack. (Memory consumption will depend on the amount of setup
+// data in the file and how you set the compile flags for speed
+// vs. size. In my test files the maximal-size usage is ~150KB.)
+//
+// You can modify the wrapper functions in the source (setup_malloc,
+// setup_temp_malloc, temp_malloc) to change this behavior, or you
+// can use a simpler allocation model: you pass in a buffer from
+// which stb_vorbis will allocate _all_ its memory (including the
+// temp memory). "open" may fail with a VORBIS_outofmem if you
+// do not pass in enough data; there is no way to determine how
+// much you do need except to succeed (at which point you can
+// query get_info to find the exact amount required. yes I know
+// this is lame).
+//
+// If you pass in a non-NULL buffer of the type below, allocation
+// will occur from it as described above. Otherwise just pass NULL
+// to use malloc()/alloca()
+
+typedef struct
+{
+   char *alloc_buffer;
+   int   alloc_buffer_length_in_bytes;
+} stb_vorbis_alloc;
+
+
+///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
+
+typedef struct stb_vorbis stb_vorbis;
+
+typedef struct
+{
+   unsigned int sample_rate;
+   int channels;
+
+   unsigned int setup_memory_required;
+   unsigned int setup_temp_memory_required;
+   unsigned int temp_memory_required;
+
+   int max_frame_size;
+} stb_vorbis_info;
+
+typedef struct
+{
+   char *vendor;
+
+   int comment_list_length;
+   char **comment_list;
+} stb_vorbis_comment;
+
+// get general information about the file
+extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
+
+// get ogg comments
+extern stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f);
+
+// get the last error detected (clears it, too)
+extern int stb_vorbis_get_error(stb_vorbis *f);
+
+// close an ogg vorbis file and free all memory in use
+extern void stb_vorbis_close(stb_vorbis *f);
+
+// this function returns the offset (in samples) from the beginning of the
+// file that will be returned by the next decode, if it is known, or -1
+// otherwise. after a flush_pushdata() call, this may take a while before
+// it becomes valid again.
+// NOT WORKING YET after a seek with PULLDATA API
+extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
+
+// returns the current seek point within the file, or offset from the beginning
+// of the memory buffer. In pushdata mode it returns 0.
+extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
+
+///////////   PUSHDATA API
+
+#ifndef STB_VORBIS_NO_PUSHDATA_API
+
+// this API allows you to get blocks of data from any source and hand
+// them to stb_vorbis. you have to buffer them; stb_vorbis will tell
+// you how much it used, and you have to give it the rest next time;
+// and stb_vorbis may not have enough data to work with and you will
+// need to give it the same data again PLUS more. Note that the Vorbis
+// specification does not bound the size of an individual frame.
+
+extern stb_vorbis *stb_vorbis_open_pushdata(
+         const unsigned char * datablock, int datablock_length_in_bytes,
+         int *datablock_memory_consumed_in_bytes,
+         int *error,
+         const stb_vorbis_alloc *alloc_buffer);
+// create a vorbis decoder by passing in the initial data block containing
+//    the ogg&vorbis headers (you don't need to do parse them, just provide
+//    the first N bytes of the file--you're told if it's not enough, see below)
+// on success, returns an stb_vorbis *, does not set error, returns the amount of
+//    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
+// on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
+// if returns NULL and *error is VORBIS_need_more_data, then the input block was
+//       incomplete and you need to pass in a larger block from the start of the file
+
+extern int stb_vorbis_decode_frame_pushdata(
+         stb_vorbis *f,
+         const unsigned char *datablock, int datablock_length_in_bytes,
+         int *channels,             // place to write number of float * buffers
+         float ***output,           // place to write float ** array of float * buffers
+         int *samples               // place to write number of output samples
+     );
+// decode a frame of audio sample data if possible from the passed-in data block
+//
+// return value: number of bytes we used from datablock
+//
+// possible cases:
+//     0 bytes used, 0 samples output (need more data)
+//     N bytes used, 0 samples output (resynching the stream, keep going)
+//     N bytes used, M samples output (one frame of data)
+// note that after opening a file, you will ALWAYS get one N-bytes,0-sample
+// frame, because Vorbis always "discards" the first frame.
+//
+// Note that on resynch, stb_vorbis will rarely consume all of the buffer,
+// instead only datablock_length_in_bytes-3 or less. This is because it wants
+// to avoid missing parts of a page header if they cross a datablock boundary,
+// without writing state-machiney code to record a partial detection.
+//
+// The number of channels returned are stored in *channels (which can be
+// NULL--it is always the same as the number of channels reported by
+// get_info). *output will contain an array of float* buffers, one per
+// channel. In other words, (*output)[0][0] contains the first sample from
+// the first channel, and (*output)[1][0] contains the first sample from
+// the second channel.
+//
+// *output points into stb_vorbis's internal output buffer storage; these
+// buffers are owned by stb_vorbis and application code should not free
+// them or modify their contents. They are transient and will be overwritten
+// once you ask for more data to get decoded, so be sure to grab any data
+// you need before then.
+
+extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
+// inform stb_vorbis that your next datablock will not be contiguous with
+// previous ones (e.g. you've seeked in the data); future attempts to decode
+// frames will cause stb_vorbis to resynchronize (as noted above), and
+// once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
+// will begin decoding the _next_ frame.
+//
+// if you want to seek using pushdata, you need to seek in your file, then
+// call stb_vorbis_flush_pushdata(), then start calling decoding, then once
+// decoding is returning you data, call stb_vorbis_get_sample_offset, and
+// if you don't like the result, seek your file again and repeat.
+#endif
+
+
+//////////   PULLING INPUT API
+
+#ifndef STB_VORBIS_NO_PULLDATA_API
+// This API assumes stb_vorbis is allowed to pull data from a source--
+// either a block of memory containing the _entire_ vorbis stream, or a
+// FILE * that you or it create, or possibly some other reading mechanism
+// if you go modify the source to replace the FILE * case with some kind
+// of callback to your code. (But if you don't support seeking, you may
+// just want to go ahead and use pushdata.)
+
+#if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
+extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
+#endif
+#if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
+extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
+#endif
+// decode an entire file and output the data interleaved into a malloc()ed
+// buffer stored in *output. The return value is the number of samples
+// decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
+// When you're done with it, just free() the pointer returned in *output.
+
+extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
+                                  int *error, const stb_vorbis_alloc *alloc_buffer);
+// create an ogg vorbis decoder from an ogg vorbis stream in memory (note
+// this must be the entire stream!). on failure, returns NULL and sets *error
+
+#ifndef STB_VORBIS_NO_STDIO
+extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
+                                  int *error, const stb_vorbis_alloc *alloc_buffer);
+// create an ogg vorbis decoder from a filename via fopen(). on failure,
+// returns NULL and sets *error (possibly to VORBIS_file_open_failure).
+
+extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
+                                  int *error, const stb_vorbis_alloc *alloc_buffer);
+// create an ogg vorbis decoder from an open FILE *, looking for a stream at
+// the _current_ seek point (ftell). on failure, returns NULL and sets *error.
+// note that stb_vorbis must "own" this stream; if you seek it in between
+// calls to stb_vorbis, it will become confused. Moreover, if you attempt to
+// perform stb_vorbis_seek_*() operations on this file, it will assume it
+// owns the _entire_ rest of the file after the start point. Use the next
+// function, stb_vorbis_open_file_section(), to limit it.
+
+extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
+                int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
+// create an ogg vorbis decoder from an open FILE *, looking for a stream at
+// the _current_ seek point (ftell); the stream will be of length 'len' bytes.
+// on failure, returns NULL and sets *error. note that stb_vorbis must "own"
+// this stream; if you seek it in between calls to stb_vorbis, it will become
+// confused.
+#endif
+
+extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
+extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
+// these functions seek in the Vorbis file to (approximately) 'sample_number'.
+// after calling seek_frame(), the next call to get_frame_*() will include
+// the specified sample. after calling stb_vorbis_seek(), the next call to
+// stb_vorbis_get_samples_* will start with the specified sample. If you
+// do not need to seek to EXACTLY the target sample when using get_samples_*,
+// you can also use seek_frame().
+
+extern int stb_vorbis_seek_start(stb_vorbis *f);
+// this function is equivalent to stb_vorbis_seek(f,0)
+
+extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
+extern float        stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
+// these functions return the total length of the vorbis stream
+
+extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
+// decode the next frame and return the number of samples. the number of
+// channels returned are stored in *channels (which can be NULL--it is always
+// the same as the number of channels reported by get_info). *output will
+// contain an array of float* buffers, one per channel. These outputs will
+// be overwritten on the next call to stb_vorbis_get_frame_*.
+//
+// You generally should not intermix calls to stb_vorbis_get_frame_*()
+// and stb_vorbis_get_samples_*(), since the latter calls the former.
+
+#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
+extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
+extern int stb_vorbis_get_frame_short            (stb_vorbis *f, int num_c, short **buffer, int num_samples);
+#endif
+// decode the next frame and return the number of *samples* per channel.
+// Note that for interleaved data, you pass in the number of shorts (the
+// size of your array), but the return value is the number of samples per
+// channel, not the total number of samples.
+//
+// The data is coerced to the number of channels you request according to the
+// channel coercion rules (see below). You must pass in the size of your
+// buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
+// The maximum buffer size needed can be gotten from get_info(); however,
+// the Vorbis I specification implies an absolute maximum of 4096 samples
+// per channel.
+
+// Channel coercion rules:
+//    Let M be the number of channels requested, and N the number of channels present,
+//    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
+//    and stereo R be the sum of all R and center channels (channel assignment from the
+//    vorbis spec).
+//        M    N       output
+//        1    k      sum(Ck) for all k
+//        2    *      stereo L, stereo R
+//        k    l      k > l, the first l channels, then 0s
+//        k    l      k <= l, the first k channels
+//    Note that this is not _good_ surround etc. mixing at all! It's just so
+//    you get something useful.
+
+extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
+extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
+// gets num_samples samples, not necessarily on a frame boundary--this requires
+// buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
+// Returns the number of samples stored per channel; it may be less than requested
+// at the end of the file. If there are no more samples in the file, returns 0.
+
+#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
+extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
+extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
+#endif
+// gets num_samples samples, not necessarily on a frame boundary--this requires
+// buffering so you have to supply the buffers. Applies the coercion rules above
+// to produce 'channels' channels. Returns the number of samples stored per channel;
+// it may be less than requested at the end of the file. If there are no more
+// samples in the file, returns 0.
+
+#endif
+
+////////   ERROR CODES
+
+enum STBVorbisError
+{
+   VORBIS__no_error,
+
+   VORBIS_need_more_data=1,             // not a real error
+
+   VORBIS_invalid_api_mixing,           // can't mix API modes
+   VORBIS_outofmem,                     // not enough memory
+   VORBIS_feature_not_supported,        // uses floor 0
+   VORBIS_too_many_channels,            // STB_VORBIS_MAX_CHANNELS is too small
+   VORBIS_file_open_failure,            // fopen() failed
+   VORBIS_seek_without_length,          // can't seek in unknown-length file
+
+   VORBIS_unexpected_eof=10,            // file is truncated?
+   VORBIS_seek_invalid,                 // seek past EOF
+
+   // decoding errors (corrupt/invalid stream) -- you probably
+   // don't care about the exact details of these
+
+   // vorbis errors:
+   VORBIS_invalid_setup=20,
+   VORBIS_invalid_stream,
+
+   // ogg errors:
+   VORBIS_missing_capture_pattern=30,
+   VORBIS_invalid_stream_structure_version,
+   VORBIS_continued_packet_flag_invalid,
+   VORBIS_incorrect_stream_serial_number,
+   VORBIS_invalid_first_page,
+   VORBIS_bad_packet_type,
+   VORBIS_cant_find_last_page,
+   VORBIS_seek_failed,
+   VORBIS_ogg_skeleton_not_supported
+};
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
+//
+//  HEADER ENDS HERE
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#ifndef STB_VORBIS_HEADER_ONLY
+
+// global configuration settings (e.g. set these in the project/makefile),
+// or just set them in this file at the top (although ideally the first few
+// should be visible when the header file is compiled too, although it's not
+// crucial)
+
+// STB_VORBIS_NO_PUSHDATA_API
+//     does not compile the code for the various stb_vorbis_*_pushdata()
+//     functions
+// #define STB_VORBIS_NO_PUSHDATA_API
+
+// STB_VORBIS_NO_PULLDATA_API
+//     does not compile the code for the non-pushdata APIs
+// #define STB_VORBIS_NO_PULLDATA_API
+
+// STB_VORBIS_NO_STDIO
+//     does not compile the code for the APIs that use FILE *s internally
+//     or externally (implied by STB_VORBIS_NO_PULLDATA_API)
+// #define STB_VORBIS_NO_STDIO
+
+// STB_VORBIS_NO_INTEGER_CONVERSION
+//     does not compile the code for converting audio sample data from
+//     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
+// #define STB_VORBIS_NO_INTEGER_CONVERSION
+
+// STB_VORBIS_NO_FAST_SCALED_FLOAT
+//      does not use a fast float-to-int trick to accelerate float-to-int on
+//      most platforms which requires endianness be defined correctly.
+//#define STB_VORBIS_NO_FAST_SCALED_FLOAT
+
+
+// STB_VORBIS_MAX_CHANNELS [number]
+//     globally define this to the maximum number of channels you need.
+//     The spec does not put a restriction on channels except that
+//     the count is stored in a byte, so 255 is the hard limit.
+//     Reducing this saves about 16 bytes per value, so using 16 saves
+//     (255-16)*16 or around 4KB. Plus anything other memory usage
+//     I forgot to account for. Can probably go as low as 8 (7.1 audio),
+//     6 (5.1 audio), or 2 (stereo only).
+#ifndef STB_VORBIS_MAX_CHANNELS
+#define STB_VORBIS_MAX_CHANNELS    16  // enough for anyone?
+#endif
+
+// STB_VORBIS_PUSHDATA_CRC_COUNT [number]
+//     after a flush_pushdata(), stb_vorbis begins scanning for the
+//     next valid page, without backtracking. when it finds something
+//     that looks like a page, it streams through it and verifies its
+//     CRC32. Should that validation fail, it keeps scanning. But it's
+//     possible that _while_ streaming through to check the CRC32 of
+//     one candidate page, it sees another candidate page. This #define
+//     determines how many "overlapping" candidate pages it can search
+//     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
+//     garbage pages could be as big as 64KB, but probably average ~16KB.
+//     So don't hose ourselves by scanning an apparent 64KB page and
+//     missing a ton of real ones in the interim; so minimum of 2
+#ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
+#define STB_VORBIS_PUSHDATA_CRC_COUNT  4
+#endif
+
+// STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
+//     sets the log size of the huffman-acceleration table.  Maximum
+//     supported value is 24. with larger numbers, more decodings are O(1),
+//     but the table size is larger so worse cache missing, so you'll have
+//     to probe (and try multiple ogg vorbis files) to find the sweet spot.
+#ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
+#define STB_VORBIS_FAST_HUFFMAN_LENGTH   10
+#endif
+
+// STB_VORBIS_FAST_BINARY_LENGTH [number]
+//     sets the log size of the binary-search acceleration table. this
+//     is used in similar fashion to the fast-huffman size to set initial
+//     parameters for the binary search
+
+// STB_VORBIS_FAST_HUFFMAN_INT
+//     The fast huffman tables are much more efficient if they can be
+//     stored as 16-bit results instead of 32-bit results. This restricts
+//     the codebooks to having only 65535 possible outcomes, though.
+//     (At least, accelerated by the huffman table.)
+#ifndef STB_VORBIS_FAST_HUFFMAN_INT
+#define STB_VORBIS_FAST_HUFFMAN_SHORT
+#endif
+
+// STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
+//     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
+//     back on binary searching for the correct one. This requires storing
+//     extra tables with the huffman codes in sorted order. Defining this
+//     symbol trades off space for speed by forcing a linear search in the
+//     non-fast case, except for "sparse" codebooks.
+// #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
+
+// STB_VORBIS_DIVIDES_IN_RESIDUE
+//     stb_vorbis precomputes the result of the scalar residue decoding
+//     that would otherwise require a divide per chunk. you can trade off
+//     space for time by defining this symbol.
+// #define STB_VORBIS_DIVIDES_IN_RESIDUE
+
+// STB_VORBIS_DIVIDES_IN_CODEBOOK
+//     vorbis VQ codebooks can be encoded two ways: with every case explicitly
+//     stored, or with all elements being chosen from a small range of values,
+//     and all values possible in all elements. By default, stb_vorbis expands
+//     this latter kind out to look like the former kind for ease of decoding,
+//     because otherwise an integer divide-per-vector-element is required to
+//     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
+//     trade off storage for speed.
+//#define STB_VORBIS_DIVIDES_IN_CODEBOOK
+
+#ifdef STB_VORBIS_CODEBOOK_SHORTS
+#error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
+#endif
+
+// STB_VORBIS_DIVIDE_TABLE
+//     this replaces small integer divides in the floor decode loop with
+//     table lookups. made less than 1% difference, so disabled by default.
+
+// STB_VORBIS_NO_INLINE_DECODE
+//     disables the inlining of the scalar codebook fast-huffman decode.
+//     might save a little codespace; useful for debugging
+// #define STB_VORBIS_NO_INLINE_DECODE
+
+// STB_VORBIS_NO_DEFER_FLOOR
+//     Normally we only decode the floor without synthesizing the actual
+//     full curve. We can instead synthesize the curve immediately. This
+//     requires more memory and is very likely slower, so I don't think
+//     you'd ever want to do it except for debugging.
+// #define STB_VORBIS_NO_DEFER_FLOOR
+
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+
+#ifdef STB_VORBIS_NO_PULLDATA_API
+   #define STB_VORBIS_NO_INTEGER_CONVERSION
+   #define STB_VORBIS_NO_STDIO
+#endif
+
+#if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
+   #define STB_VORBIS_NO_STDIO 1
+#endif
+
+#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
+#ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
+
+   // only need endianness for fast-float-to-int, which we don't
+   // use for pushdata
+
+   #ifndef STB_VORBIS_BIG_ENDIAN
+     #define STB_VORBIS_ENDIAN  0
+   #else
+     #define STB_VORBIS_ENDIAN  1
+   #endif
+
+#endif
+#endif
+
+
+#ifndef STB_VORBIS_NO_STDIO
+#include <stdio.h>
+#endif
+
+#ifndef STB_VORBIS_NO_CRT
+   #include <stdlib.h>
+   #include <string.h>
+   #include <assert.h>
+   #include <math.h>
+
+   // find definition of alloca if it's not in stdlib.h:
+   #if defined(_MSC_VER) || defined(__MINGW32__)
+      #include <malloc.h>
+   #endif
+   #if defined(__linux__) || defined(__linux) || defined(__sun__) || defined(__EMSCRIPTEN__) || defined(__NEWLIB__)
+      #include <alloca.h>
+   #endif
+#else // STB_VORBIS_NO_CRT
+   #define NULL 0
+   #define malloc(s)   0
+   #define free(s)     ((void) 0)
+   #define realloc(s)  0
+#endif // STB_VORBIS_NO_CRT
+
+#include <limits.h>
+
+#ifdef __MINGW32__
+   // eff you mingw:
+   //     "fixed":
+   //         http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
+   //     "no that broke the build, reverted, who cares about C":
+   //         http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
+   #ifdef __forceinline
+   #undef __forceinline
+   #endif
+   #define __forceinline
+   #ifndef alloca
+   #define alloca __builtin_alloca
+   #endif
+#elif !defined(_MSC_VER)
+   #if __GNUC__
+      #define __forceinline inline
+   #else
+      #define __forceinline
+   #endif
+#endif
+
+#if STB_VORBIS_MAX_CHANNELS > 256
+#error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
+#endif
+
+#if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
+#error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
+#endif
+
+
+#if 0
+#include <crtdbg.h>
+#define CHECK(f)   _CrtIsValidHeapPointer(f->channel_buffers[1])
+#else
+#define CHECK(f)   ((void) 0)
+#endif
+
+#define MAX_BLOCKSIZE_LOG  13   // from specification
+#define MAX_BLOCKSIZE      (1 << MAX_BLOCKSIZE_LOG)
+
+
+typedef unsigned char  uint8;
+typedef   signed char   int8;
+typedef unsigned short uint16;
+typedef   signed short  int16;
+typedef unsigned int   uint32;
+typedef   signed int    int32;
+
+#ifndef TRUE
+#define TRUE 1
+#define FALSE 0
+#endif
+
+typedef float codetype;
+
+#ifdef _MSC_VER
+#define STBV_NOTUSED(v)  (void)(v)
+#else
+#define STBV_NOTUSED(v)  (void)sizeof(v)
+#endif
+
+// @NOTE
+//
+// Some arrays below are tagged "//varies", which means it's actually
+// a variable-sized piece of data, but rather than malloc I assume it's
+// small enough it's better to just allocate it all together with the
+// main thing
+//
+// Most of the variables are specified with the smallest size I could pack
+// them into. It might give better performance to make them all full-sized
+// integers. It should be safe to freely rearrange the structures or change
+// the sizes larger--nothing relies on silently truncating etc., nor the
+// order of variables.
+
+#define FAST_HUFFMAN_TABLE_SIZE   (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
+#define FAST_HUFFMAN_TABLE_MASK   (FAST_HUFFMAN_TABLE_SIZE - 1)
+
+typedef struct
+{
+   int dimensions, entries;
+   uint8 *codeword_lengths;
+   float  minimum_value;
+   float  delta_value;
+   uint8  value_bits;
+   uint8  lookup_type;
+   uint8  sequence_p;
+   uint8  sparse;
+   uint32 lookup_values;
+   codetype *multiplicands;
+   uint32 *codewords;
+   #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
+    int16  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
+   #else
+    int32  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
+   #endif
+   uint32 *sorted_codewords;
+   int    *sorted_values;
+   int     sorted_entries;
+} Codebook;
+
+typedef struct
+{
+   uint8 order;
+   uint16 rate;
+   uint16 bark_map_size;
+   uint8 amplitude_bits;
+   uint8 amplitude_offset;
+   uint8 number_of_books;
+   uint8 book_list[16]; // varies
+} Floor0;
+
+typedef struct
+{
+   uint8 partitions;
+   uint8 partition_class_list[32]; // varies
+   uint8 class_dimensions[16]; // varies
+   uint8 class_subclasses[16]; // varies
+   uint8 class_masterbooks[16]; // varies
+   int16 subclass_books[16][8]; // varies
+   uint16 Xlist[31*8+2]; // varies
+   uint8 sorted_order[31*8+2];
+   uint8 neighbors[31*8+2][2];
+   uint8 floor1_multiplier;
+   uint8 rangebits;
+   int values;
+} Floor1;
+
+typedef union
+{
+   Floor0 floor0;
+   Floor1 floor1;
+} Floor;
+
+typedef struct
+{
+   uint32 begin, end;
+   uint32 part_size;
+   uint8 classifications;
+   uint8 classbook;
+   uint8 **classdata;
+   int16 (*residue_books)[8];
+} Residue;
+
+typedef struct
+{
+   uint8 magnitude;
+   uint8 angle;
+   uint8 mux;
+} MappingChannel;
+
+typedef struct
+{
+   uint16 coupling_steps;
+   MappingChannel *chan;
+   uint8  submaps;
+   uint8  submap_floor[15]; // varies
+   uint8  submap_residue[15]; // varies
+} Mapping;
+
+typedef struct
+{
+   uint8 blockflag;
+   uint8 mapping;
+   uint16 windowtype;
+   uint16 transformtype;
+} Mode;
+
+typedef struct
+{
+   uint32  goal_crc;    // expected crc if match
+   int     bytes_left;  // bytes left in packet
+   uint32  crc_so_far;  // running crc
+   int     bytes_done;  // bytes processed in _current_ chunk
+   uint32  sample_loc;  // granule pos encoded in page
+} CRCscan;
+
+typedef struct
+{
+   uint32 page_start, page_end;
+   uint32 last_decoded_sample;
+} ProbedPage;
+
+struct stb_vorbis
+{
+  // user-accessible info
+   unsigned int sample_rate;
+   int channels;
+
+   unsigned int setup_memory_required;
+   unsigned int temp_memory_required;
+   unsigned int setup_temp_memory_required;
+
+   char *vendor;
+   int comment_list_length;
+   char **comment_list;
+
+  // input config
+#ifndef STB_VORBIS_NO_STDIO
+   FILE *f;
+   uint32 f_start;
+   int close_on_free;
+#endif
+
+   uint8 *stream;
+   uint8 *stream_start;
+   uint8 *stream_end;
+
+   uint32 stream_len;
+
+   uint8  push_mode;
+
+   // the page to seek to when seeking to start, may be zero
+   uint32 first_audio_page_offset;
+
+   // p_first is the page on which the first audio packet ends
+   // (but not necessarily the page on which it starts)
+   ProbedPage p_first, p_last;
+
+  // memory management
+   stb_vorbis_alloc alloc;
+   int setup_offset;
+   int temp_offset;
+
+  // run-time results
+   int eof;
+   enum STBVorbisError error;
+
+  // user-useful data
+
+  // header info
+   int blocksize[2];
+   int blocksize_0, blocksize_1;
+   int codebook_count;
+   Codebook *codebooks;
+   int floor_count;
+   uint16 floor_types[64]; // varies
+   Floor *floor_config;
+   int residue_count;
+   uint16 residue_types[64]; // varies
+   Residue *residue_config;
+   int mapping_count;
+   Mapping *mapping;
+   int mode_count;
+   Mode mode_config[64];  // varies
+
+   uint32 total_samples;
+
+  // decode buffer
+   float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
+   float *outputs        [STB_VORBIS_MAX_CHANNELS];
+
+   float *previous_window[STB_VORBIS_MAX_CHANNELS];
+   int previous_length;
+
+   #ifndef STB_VORBIS_NO_DEFER_FLOOR
+   int16 *finalY[STB_VORBIS_MAX_CHANNELS];
+   #else
+   float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
+   #endif
+
+   uint32 current_loc; // sample location of next frame to decode
+   int    current_loc_valid;
+
+  // per-blocksize precomputed data
+
+   // twiddle factors
+   float *A[2],*B[2],*C[2];
+   float *window[2];
+   uint16 *bit_reverse[2];
+
+  // current page/packet/segment streaming info
+   uint32 serial; // stream serial number for verification
+   int last_page;
+   int segment_count;
+   uint8 segments[255];
+   uint8 page_flag;
+   uint8 bytes_in_seg;
+   uint8 first_decode;
+   int next_seg;
+   int last_seg;  // flag that we're on the last segment
+   int last_seg_which; // what was the segment number of the last seg?
+   uint32 acc;
+   int valid_bits;
+   int packet_bytes;
+   int end_seg_with_known_loc;
+   uint32 known_loc_for_packet;
+   int discard_samples_deferred;
+   uint32 samples_output;
+
+  // push mode scanning
+   int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
+#ifndef STB_VORBIS_NO_PUSHDATA_API
+   CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
+#endif
+
+  // sample-access
+   int channel_buffer_start;
+   int channel_buffer_end;
+};
+
+#if defined(STB_VORBIS_NO_PUSHDATA_API)
+   #define IS_PUSH_MODE(f)   FALSE
+#elif defined(STB_VORBIS_NO_PULLDATA_API)
+   #define IS_PUSH_MODE(f)   TRUE
+#else
+   #define IS_PUSH_MODE(f)   ((f)->push_mode)
+#endif
+
+typedef struct stb_vorbis vorb;
+
+static int error(vorb *f, enum STBVorbisError e)
+{
+   f->error = e;
+   if (!f->eof && e != VORBIS_need_more_data) {
+      f->error=e; // breakpoint for debugging
+   }
+   return 0;
+}
+
+
+// these functions are used for allocating temporary memory
+// while decoding. if you can afford the stack space, use
+// alloca(); otherwise, provide a temp buffer and it will
+// allocate out of those.
+
+#define array_size_required(count,size)  (count*(sizeof(void *)+(size)))
+
+#define temp_alloc(f,size)              (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
+#define temp_free(f,p)                  (void)0
+#define temp_alloc_save(f)              ((f)->temp_offset)
+#define temp_alloc_restore(f,p)         ((f)->temp_offset = (p))
+
+#define temp_block_array(f,count,size)  make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
+
+// given a sufficiently large block of memory, make an array of pointers to subblocks of it
+static void *make_block_array(void *mem, int count, int size)
+{
+   int i;
+   void ** p = (void **) mem;
+   char *q = (char *) (p + count);
+   for (i=0; i < count; ++i) {
+      p[i] = q;
+      q += size;
+   }
+   return p;
+}
+
+static void *setup_malloc(vorb *f, int sz)
+{
+   sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
+   f->setup_memory_required += sz;
+   if (f->alloc.alloc_buffer) {
+      void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
+      if (f->setup_offset + sz > f->temp_offset) return NULL;
+      f->setup_offset += sz;
+      return p;
+   }
+   return sz ? malloc(sz) : NULL;
+}
+
+static void setup_free(vorb *f, void *p)
+{
+   if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
+   free(p);
+}
+
+static void *setup_temp_malloc(vorb *f, int sz)
+{
+   sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
+   if (f->alloc.alloc_buffer) {
+      if (f->temp_offset - sz < f->setup_offset) return NULL;
+      f->temp_offset -= sz;
+      return (char *) f->alloc.alloc_buffer + f->temp_offset;
+   }
+   return malloc(sz);
+}
+
+static void setup_temp_free(vorb *f, void *p, int sz)
+{
+   if (f->alloc.alloc_buffer) {
+      f->temp_offset += (sz+7)&~7;
+      return;
+   }
+   free(p);
+}
+
+#define CRC32_POLY    0x04c11db7   // from spec
+
+static uint32 crc_table[256];
+static void crc32_init(void)
+{
+   int i,j;
+   uint32 s;
+   for(i=0; i < 256; i++) {
+      for (s=(uint32) i << 24, j=0; j < 8; ++j)
+         s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
+      crc_table[i] = s;
+   }
+}
+
+static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
+{
+   return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
+}
+
+
+// used in setup, and for huffman that doesn't go fast path
+static unsigned int bit_reverse(unsigned int n)
+{
+  n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
+  n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
+  n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
+  n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
+  return (n >> 16) | (n << 16);
+}
+
+static float square(float x)
+{
+   return x*x;
+}
+
+// this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
+// as required by the specification. fast(?) implementation from stb.h
+// @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
+static int ilog(int32 n)
+{
+   static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
+
+   if (n < 0) return 0; // signed n returns 0
+
+   // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
+   if (n < (1 << 14))
+        if (n < (1 <<  4))            return  0 + log2_4[n      ];
+        else if (n < (1 <<  9))       return  5 + log2_4[n >>  5];
+             else                     return 10 + log2_4[n >> 10];
+   else if (n < (1 << 24))
+             if (n < (1 << 19))       return 15 + log2_4[n >> 15];
+             else                     return 20 + log2_4[n >> 20];
+        else if (n < (1 << 29))       return 25 + log2_4[n >> 25];
+             else                     return 30 + log2_4[n >> 30];
+}
+
+#ifndef M_PI
+  #define M_PI  3.14159265358979323846264f  // from CRC
+#endif
+
+// code length assigned to a value with no huffman encoding
+#define NO_CODE   255
+
+/////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
+//
+// these functions are only called at setup, and only a few times
+// per file
+
+static float float32_unpack(uint32 x)
+{
+   // from the specification
+   uint32 mantissa = x & 0x1fffff;
+   uint32 sign = x & 0x80000000;
+   uint32 exp = (x & 0x7fe00000) >> 21;
+   double res = sign ? -(double)mantissa : (double)mantissa;
+   return (float) ldexp((float)res, (int)exp-788);
+}
+
+
+// zlib & jpeg huffman tables assume that the output symbols
+// can either be arbitrarily arranged, or have monotonically
+// increasing frequencies--they rely on the lengths being sorted;
+// this makes for a very simple generation algorithm.
+// vorbis allows a huffman table with non-sorted lengths. This
+// requires a more sophisticated construction, since symbols in
+// order do not map to huffman codes "in order".
+static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
+{
+   if (!c->sparse) {
+      c->codewords      [symbol] = huff_code;
+   } else {
+      c->codewords       [count] = huff_code;
+      c->codeword_lengths[count] = len;
+      values             [count] = symbol;
+   }
+}
+
+static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
+{
+   int i,k,m=0;
+   uint32 available[32];
+
+   memset(available, 0, sizeof(available));
+   // find the first entry
+   for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
+   if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
+   assert(len[k] < 32); // no error return required, code reading lens checks this
+   // add to the list
+   add_entry(c, 0, k, m++, len[k], values);
+   // add all available leaves
+   for (i=1; i <= len[k]; ++i)
+      available[i] = 1U << (32-i);
+   // note that the above code treats the first case specially,
+   // but it's really the same as the following code, so they
+   // could probably be combined (except the initial code is 0,
+   // and I use 0 in available[] to mean 'empty')
+   for (i=k+1; i < n; ++i) {
+      uint32 res;
+      int z = len[i], y;
+      if (z == NO_CODE) continue;
+      assert(z < 32); // no error return required, code reading lens checks this
+      // find lowest available leaf (should always be earliest,
+      // which is what the specification calls for)
+      // note that this property, and the fact we can never have
+      // more than one free leaf at a given level, isn't totally
+      // trivial to prove, but it seems true and the assert never
+      // fires, so!
+      while (z > 0 && !available[z]) --z;
+      if (z == 0) { return FALSE; }
+      res = available[z];
+      available[z] = 0;
+      add_entry(c, bit_reverse(res), i, m++, len[i], values);
+      // propagate availability up the tree
+      if (z != len[i]) {
+         for (y=len[i]; y > z; --y) {
+            assert(available[y] == 0);
+            available[y] = res + (1 << (32-y));
+         }
+      }
+   }
+   return TRUE;
+}
+
+// accelerated huffman table allows fast O(1) match of all symbols
+// of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
+static void compute_accelerated_huffman(Codebook *c)
+{
+   int i, len;
+   for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
+      c->fast_huffman[i] = -1;
+
+   len = c->sparse ? c->sorted_entries : c->entries;
+   #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
+   if (len > 32767) len = 32767; // largest possible value we can encode!
+   #endif
+   for (i=0; i < len; ++i) {
+      if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
+         uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
+         // set table entries for all bit combinations in the higher bits
+         while (z < FAST_HUFFMAN_TABLE_SIZE) {
+             c->fast_huffman[z] = i;
+             z += 1 << c->codeword_lengths[i];
+         }
+      }
+   }
+}
+
+#ifdef _MSC_VER
+#define STBV_CDECL __cdecl
+#else
+#define STBV_CDECL
+#endif
+
+static int STBV_CDECL uint32_compare(const void *p, const void *q)
+{
+   uint32 x = * (uint32 *) p;
+   uint32 y = * (uint32 *) q;
+   return x < y ? -1 : x > y;
+}
+
+static int include_in_sort(Codebook *c, uint8 len)
+{
+   if (c->sparse) { assert(len != NO_CODE); return TRUE; }
+   if (len == NO_CODE) return FALSE;
+   if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
+   return FALSE;
+}
+
+// if the fast table above doesn't work, we want to binary
+// search them... need to reverse the bits
+static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
+{
+   int i, len;
+   // build a list of all the entries
+   // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
+   // this is kind of a frivolous optimization--I don't see any performance improvement,
+   // but it's like 4 extra lines of code, so.
+   if (!c->sparse) {
+      int k = 0;
+      for (i=0; i < c->entries; ++i)
+         if (include_in_sort(c, lengths[i]))
+            c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
+      assert(k == c->sorted_entries);
+   } else {
+      for (i=0; i < c->sorted_entries; ++i)
+         c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
+   }
+
+   qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
+   c->sorted_codewords[c->sorted_entries] = 0xffffffff;
+
+   len = c->sparse ? c->sorted_entries : c->entries;
+   // now we need to indicate how they correspond; we could either
+   //   #1: sort a different data structure that says who they correspond to
+   //   #2: for each sorted entry, search the original list to find who corresponds
+   //   #3: for each original entry, find the sorted entry
+   // #1 requires extra storage, #2 is slow, #3 can use binary search!
+   for (i=0; i < len; ++i) {
+      int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
+      if (include_in_sort(c,huff_len)) {
+         uint32 code = bit_reverse(c->codewords[i]);
+         int x=0, n=c->sorted_entries;
+         while (n > 1) {
+            // invariant: sc[x] <= code < sc[x+n]
+            int m = x + (n >> 1);
+            if (c->sorted_codewords[m] <= code) {
+               x = m;
+               n -= (n>>1);
+            } else {
+               n >>= 1;
+            }
+         }
+         assert(c->sorted_codewords[x] == code);
+         if (c->sparse) {
+            c->sorted_values[x] = values[i];
+            c->codeword_lengths[x] = huff_len;
+         } else {
+            c->sorted_values[x] = i;
+         }
+      }
+   }
+}
+
+// only run while parsing the header (3 times)
+static int vorbis_validate(uint8 *data)
+{
+   static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
+   return memcmp(data, vorbis, 6) == 0;
+}
+
+// called from setup only, once per code book
+// (formula implied by specification)
+static int lookup1_values(int entries, int dim)
+{
+   int r = (int) floor(exp((float) log((float) entries) / dim));
+   if ((int) floor(pow((float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
+      ++r;                                              // floor() to avoid _ftol() when non-CRT
+   if (pow((float) r+1, dim) <= entries)
+      return -1;
+   if ((int) floor(pow((float) r, dim)) > entries)
+      return -1;
+   return r;
+}
+
+// called twice per file
+static void compute_twiddle_factors(int n, float *A, float *B, float *C)
+{
+   int n4 = n >> 2, n8 = n >> 3;
+   int k,k2;
+
+   for (k=k2=0; k < n4; ++k,k2+=2) {
+      A[k2  ] = (float)  cos(4*k*M_PI/n);
+      A[k2+1] = (float) -sin(4*k*M_PI/n);
+      B[k2  ] = (float)  cos((k2+1)*M_PI/n/2) * 0.5f;
+      B[k2+1] = (float)  sin((k2+1)*M_PI/n/2) * 0.5f;
+   }
+   for (k=k2=0; k < n8; ++k,k2+=2) {
+      C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
+      C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
+   }
+}
+
+static void compute_window(int n, float *window)
+{
+   int n2 = n >> 1, i;
+   for (i=0; i < n2; ++i)
+      window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
+}
+
+static void compute_bitreverse(int n, uint16 *rev)
+{
+   int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
+   int i, n8 = n >> 3;
+   for (i=0; i < n8; ++i)
+      rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
+}
+
+static int init_blocksize(vorb *f, int b, int n)
+{
+   int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
+   f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
+   f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
+   f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
+   if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
+   compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
+   f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
+   if (!f->window[b]) return error(f, VORBIS_outofmem);
+   compute_window(n, f->window[b]);
+   f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
+   if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
+   compute_bitreverse(n, f->bit_reverse[b]);
+   return TRUE;
+}
+
+static void neighbors(uint16 *x, int n, int *plow, int *phigh)
+{
+   int low = -1;
+   int high = 65536;
+   int i;
+   for (i=0; i < n; ++i) {
+      if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
+      if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
+   }
+}
+
+// this has been repurposed so y is now the original index instead of y
+typedef struct
+{
+   uint16 x,id;
+} stbv__floor_ordering;
+
+static int STBV_CDECL point_compare(const void *p, const void *q)
+{
+   stbv__floor_ordering *a = (stbv__floor_ordering *) p;
+   stbv__floor_ordering *b = (stbv__floor_ordering *) q;
+   return a->x < b->x ? -1 : a->x > b->x;
+}
+
+//
+/////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
+
+
+#if defined(STB_VORBIS_NO_STDIO)
+   #define USE_MEMORY(z)    TRUE
+#else
+   #define USE_MEMORY(z)    ((z)->stream)
+#endif
+
+static uint8 get8(vorb *z)
+{
+   if (USE_MEMORY(z)) {
+      if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
+      return *z->stream++;
+   }
+
+   #ifndef STB_VORBIS_NO_STDIO
+   {
+   int c = fgetc(z->f);
+   if (c == EOF) { z->eof = TRUE; return 0; }
+   return c;
+   }
+   #endif
+}
+
+static uint32 get32(vorb *f)
+{
+   uint32 x;
+   x = get8(f);
+   x += get8(f) << 8;
+   x += get8(f) << 16;
+   x += (uint32) get8(f) << 24;
+   return x;
+}
+
+static int getn(vorb *z, uint8 *data, int n)
+{
+   if (USE_MEMORY(z)) {
+      if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
+      memcpy(data, z->stream, n);
+      z->stream += n;
+      return 1;
+   }
+
+   #ifndef STB_VORBIS_NO_STDIO
+   if (fread(data, n, 1, z->f) == 1)
+      return 1;
+   else {
+      z->eof = 1;
+      return 0;
+   }
+   #endif
+}
+
+static void skip(vorb *z, int n)
+{
+   if (USE_MEMORY(z)) {
+      z->stream += n;
+      if (z->stream >= z->stream_end) z->eof = 1;
+      return;
+   }
+   #ifndef STB_VORBIS_NO_STDIO
+   {
+      long x = ftell(z->f);
+      fseek(z->f, x+n, SEEK_SET);
+   }
+   #endif
+}
+
+static int set_file_offset(stb_vorbis *f, unsigned int loc)
+{
+   #ifndef STB_VORBIS_NO_PUSHDATA_API
+   if (f->push_mode) return 0;
+   #endif
+   f->eof = 0;
+   if (USE_MEMORY(f)) {
+      if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
+         f->stream = f->stream_end;
+         f->eof = 1;
+         return 0;
+      } else {
+         f->stream = f->stream_start + loc;
+         return 1;
+      }
+   }
+   #ifndef STB_VORBIS_NO_STDIO
+   if (loc + f->f_start < loc || loc >= 0x80000000) {
+      loc = 0x7fffffff;
+      f->eof = 1;
+   } else {
+      loc += f->f_start;
+   }
+   if (!fseek(f->f, loc, SEEK_SET))
+      return 1;
+   f->eof = 1;
+   fseek(f->f, f->f_start, SEEK_END);
+   return 0;
+   #endif
+}
+
+
+static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
+
+static int capture_pattern(vorb *f)
+{
+   if (0x4f != get8(f)) return FALSE;
+   if (0x67 != get8(f)) return FALSE;
+   if (0x67 != get8(f)) return FALSE;
+   if (0x53 != get8(f)) return FALSE;
+   return TRUE;
+}
+
+#define PAGEFLAG_continued_packet   1
+#define PAGEFLAG_first_page         2
+#define PAGEFLAG_last_page          4
+
+static int start_page_no_capturepattern(vorb *f)
+{
+   uint32 loc0,loc1,n;
+   if (f->first_decode && !IS_PUSH_MODE(f)) {
+      f->p_first.page_start = stb_vorbis_get_file_offset(f) - 4;
+   }
+   // stream structure version
+   if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
+   // header flag
+   f->page_flag = get8(f);
+   // absolute granule position
+   loc0 = get32(f);
+   loc1 = get32(f);
+   // @TODO: validate loc0,loc1 as valid positions?
+   // stream serial number -- vorbis doesn't interleave, so discard
+   get32(f);
+   //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
+   // page sequence number
+   n = get32(f);
+   f->last_page = n;
+   // CRC32
+   get32(f);
+   // page_segments
+   f->segment_count = get8(f);
+   if (!getn(f, f->segments, f->segment_count))
+      return error(f, VORBIS_unexpected_eof);
+   // assume we _don't_ know any the sample position of any segments
+   f->end_seg_with_known_loc = -2;
+   if (loc0 != ~0U || loc1 != ~0U) {
+      int i;
+      // determine which packet is the last one that will complete
+      for (i=f->segment_count-1; i >= 0; --i)
+         if (f->segments[i] < 255)
+            break;
+      // 'i' is now the index of the _last_ segment of a packet that ends
+      if (i >= 0) {
+         f->end_seg_with_known_loc = i;
+         f->known_loc_for_packet   = loc0;
+      }
+   }
+   if (f->first_decode) {
+      int i,len;
+      len = 0;
+      for (i=0; i < f->segment_count; ++i)
+         len += f->segments[i];
+      len += 27 + f->segment_count;
+      f->p_first.page_end = f->p_first.page_start + len;
+      f->p_first.last_decoded_sample = loc0;
+   }
+   f->next_seg = 0;
+   return TRUE;
+}
+
+static int start_page(vorb *f)
+{
+   if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
+   return start_page_no_capturepattern(f);
+}
+
+static int start_packet(vorb *f)
+{
+   while (f->next_seg == -1) {
+      if (!start_page(f)) return FALSE;
+      if (f->page_flag & PAGEFLAG_continued_packet)
+         return error(f, VORBIS_continued_packet_flag_invalid);
+   }
+   f->last_seg = FALSE;
+   f->valid_bits = 0;
+   f->packet_bytes = 0;
+   f->bytes_in_seg = 0;
+   // f->next_seg is now valid
+   return TRUE;
+}
+
+static int maybe_start_packet(vorb *f)
+{
+   if (f->next_seg == -1) {
+      int x = get8(f);
+      if (f->eof) return FALSE; // EOF at page boundary is not an error!
+      if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
+      if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
+      if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
+      if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
+      if (!start_page_no_capturepattern(f)) return FALSE;
+      if (f->page_flag & PAGEFLAG_continued_packet) {
+         // set up enough state that we can read this packet if we want,
+         // e.g. during recovery
+         f->last_seg = FALSE;
+         f->bytes_in_seg = 0;
+         return error(f, VORBIS_continued_packet_flag_invalid);
+      }
+   }
+   return start_packet(f);
+}
+
+static int next_segment(vorb *f)
+{
+   int len;
+   if (f->last_seg) return 0;
+   if (f->next_seg == -1) {
+      f->last_seg_which = f->segment_count-1; // in case start_page fails
+      if (!start_page(f)) { f->last_seg = 1; return 0; }
+      if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
+   }
+   len = f->segments[f->next_seg++];
+   if (len < 255) {
+      f->last_seg = TRUE;
+      f->last_seg_which = f->next_seg-1;
+   }
+   if (f->next_seg >= f->segment_count)
+      f->next_seg = -1;
+   assert(f->bytes_in_seg == 0);
+   f->bytes_in_seg = len;
+   return len;
+}
+
+#define EOP    (-1)
+#define INVALID_BITS  (-1)
+
+static int get8_packet_raw(vorb *f)
+{
+   if (!f->bytes_in_seg) {  // CLANG!
+      if (f->last_seg) return EOP;
+      else if (!next_segment(f)) return EOP;
+   }
+   assert(f->bytes_in_seg > 0);
+   --f->bytes_in_seg;
+   ++f->packet_bytes;
+   return get8(f);
+}
+
+static int get8_packet(vorb *f)
+{
+   int x = get8_packet_raw(f);
+   f->valid_bits = 0;
+   return x;
+}
+
+static int get32_packet(vorb *f)
+{
+   uint32 x;
+   x = get8_packet(f);
+   x += get8_packet(f) << 8;
+   x += get8_packet(f) << 16;
+   x += (uint32) get8_packet(f) << 24;
+   return x;
+}
+
+static void flush_packet(vorb *f)
+{
+   while (get8_packet_raw(f) != EOP);
+}
+
+// @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
+// as the huffman decoder?
+static uint32 get_bits(vorb *f, int n)
+{
+   uint32 z;
+
+   if (f->valid_bits < 0) return 0;
+   if (f->valid_bits < n) {
+      if (n > 24) {
+         // the accumulator technique below would not work correctly in this case
+         z = get_bits(f, 24);
+         z += get_bits(f, n-24) << 24;
+         return z;
+      }
+      if (f->valid_bits == 0) f->acc = 0;
+      while (f->valid_bits < n) {
+         int z = get8_packet_raw(f);
+         if (z == EOP) {
+            f->valid_bits = INVALID_BITS;
+            return 0;
+         }
+         f->acc += z << f->valid_bits;
+         f->valid_bits += 8;
+      }
+   }
+
+   assert(f->valid_bits >= n);
+   z = f->acc & ((1 << n)-1);
+   f->acc >>= n;
+   f->valid_bits -= n;
+   return z;
+}
+
+// @OPTIMIZE: primary accumulator for huffman
+// expand the buffer to as many bits as possible without reading off end of packet
+// it might be nice to allow f->valid_bits and f->acc to be stored in registers,
+// e.g. cache them locally and decode locally
+static __forceinline void prep_huffman(vorb *f)
+{
+   if (f->valid_bits <= 24) {
+      if (f->valid_bits == 0) f->acc = 0;
+      do {
+         int z;
+         if (f->last_seg && !f->bytes_in_seg) return;
+         z = get8_packet_raw(f);
+         if (z == EOP) return;
+         f->acc += (unsigned) z << f->valid_bits;
+         f->valid_bits += 8;
+      } while (f->valid_bits <= 24);
+   }
+}
+
+enum
+{
+   VORBIS_packet_id = 1,
+   VORBIS_packet_comment = 3,
+   VORBIS_packet_setup = 5
+};
+
+static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
+{
+   int i;
+   prep_huffman(f);
+
+   if (c->codewords == NULL && c->sorted_codewords == NULL)
+      return -1;
+
+   // cases to use binary search: sorted_codewords && !c->codewords
+   //                             sorted_codewords && c->entries > 8
+   if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
+      // binary search
+      uint32 code = bit_reverse(f->acc);
+      int x=0, n=c->sorted_entries, len;
+
+      while (n > 1) {
+         // invariant: sc[x] <= code < sc[x+n]
+         int m = x + (n >> 1);
+         if (c->sorted_codewords[m] <= code) {
+            x = m;
+            n -= (n>>1);
+         } else {
+            n >>= 1;
+         }
+      }
+      // x is now the sorted index
+      if (!c->sparse) x = c->sorted_values[x];
+      // x is now sorted index if sparse, or symbol otherwise
+      len = c->codeword_lengths[x];
+      if (f->valid_bits >= len) {
+         f->acc >>= len;
+         f->valid_bits -= len;
+         return x;
+      }
+
+      f->valid_bits = 0;
+      return -1;
+   }
+
+   // if small, linear search
+   assert(!c->sparse);
+   for (i=0; i < c->entries; ++i) {
+      if (c->codeword_lengths[i] == NO_CODE) continue;
+      if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
+         if (f->valid_bits >= c->codeword_lengths[i]) {
+            f->acc >>= c->codeword_lengths[i];
+            f->valid_bits -= c->codeword_lengths[i];
+            return i;
+         }
+         f->valid_bits = 0;
+         return -1;
+      }
+   }
+
+   error(f, VORBIS_invalid_stream);
+   f->valid_bits = 0;
+   return -1;
+}
+
+#ifndef STB_VORBIS_NO_INLINE_DECODE
+
+#define DECODE_RAW(var, f,c)                                  \
+   if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)        \
+      prep_huffman(f);                                        \
+   var = f->acc & FAST_HUFFMAN_TABLE_MASK;                    \
+   var = c->fast_huffman[var];                                \
+   if (var >= 0) {                                            \
+      int n = c->codeword_lengths[var];                       \
+      f->acc >>= n;                                           \
+      f->valid_bits -= n;                                     \
+      if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
+   } else {                                                   \
+      var = codebook_decode_scalar_raw(f,c);                  \
+   }
+
+#else
+
+static int codebook_decode_scalar(vorb *f, Codebook *c)
+{
+   int i;
+   if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
+      prep_huffman(f);
+   // fast huffman table lookup
+   i = f->acc & FAST_HUFFMAN_TABLE_MASK;
+   i = c->fast_huffman[i];
+   if (i >= 0) {
+      f->acc >>= c->codeword_lengths[i];
+      f->valid_bits -= c->codeword_lengths[i];
+      if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
+      return i;
+   }
+   return codebook_decode_scalar_raw(f,c);
+}
+
+#define DECODE_RAW(var,f,c)    var = codebook_decode_scalar(f,c);
+
+#endif
+
+#define DECODE(var,f,c)                                       \
+   DECODE_RAW(var,f,c)                                        \
+   if (c->sparse) var = c->sorted_values[var];
+
+#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
+  #define DECODE_VQ(var,f,c)   DECODE_RAW(var,f,c)
+#else
+  #define DECODE_VQ(var,f,c)   DECODE(var,f,c)
+#endif
+
+
+
+
+
+
+// CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
+// where we avoid one addition
+#define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off])
+#define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off])
+#define CODEBOOK_ELEMENT_BASE(c)         (0)
+
+static int codebook_decode_start(vorb *f, Codebook *c)
+{
+   int z = -1;
+
+   // type 0 is only legal in a scalar context
+   if (c->lookup_type == 0)
+      error(f, VORBIS_invalid_stream);
+   else {
+      DECODE_VQ(z,f,c);
+      if (c->sparse) assert(z < c->sorted_entries);
+      if (z < 0) {  // check for EOP
+         if (!f->bytes_in_seg)
+            if (f->last_seg)
+               return z;
+         error(f, VORBIS_invalid_stream);
+      }
+   }
+   return z;
+}
+
+static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
+{
+   int i,z = codebook_decode_start(f,c);
+   if (z < 0) return FALSE;
+   if (len > c->dimensions) len = c->dimensions;
+
+#ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
+   if (c->lookup_type == 1) {
+      float last = CODEBOOK_ELEMENT_BASE(c);
+      int div = 1;
+      for (i=0; i < len; ++i) {
+         int off = (z / div) % c->lookup_values;
+         float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
+         output[i] += val;
+         if (c->sequence_p) last = val + c->minimum_value;
+         div *= c->lookup_values;
+      }
+      return TRUE;
+   }
+#endif
+
+   z *= c->dimensions;
+   if (c->sequence_p) {
+      float last = CODEBOOK_ELEMENT_BASE(c);
+      for (i=0; i < len; ++i) {
+         float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
+         output[i] += val;
+         last = val + c->minimum_value;
+      }
+   } else {
+      float last = CODEBOOK_ELEMENT_BASE(c);
+      for (i=0; i < len; ++i) {
+         output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
+      }
+   }
+
+   return TRUE;
+}
+
+static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
+{
+   int i,z = codebook_decode_start(f,c);
+   float last = CODEBOOK_ELEMENT_BASE(c);
+   if (z < 0) return FALSE;
+   if (len > c->dimensions) len = c->dimensions;
+
+#ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
+   if (c->lookup_type == 1) {
+      int div = 1;
+      for (i=0; i < len; ++i) {
+         int off = (z / div) % c->lookup_values;
+         float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
+         output[i*step] += val;
+         if (c->sequence_p) last = val;
+         div *= c->lookup_values;
+      }
+      return TRUE;
+   }
+#endif
+
+   z *= c->dimensions;
+   for (i=0; i < len; ++i) {
+      float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
+      output[i*step] += val;
+      if (c->sequence_p) last = val;
+   }
+
+   return TRUE;
+}
+
+static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
+{
+   int c_inter = *c_inter_p;
+   int p_inter = *p_inter_p;
+   int i,z, effective = c->dimensions;
+
+   // type 0 is only legal in a scalar context
+   if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
+
+   while (total_decode > 0) {
+      float last = CODEBOOK_ELEMENT_BASE(c);
+      DECODE_VQ(z,f,c);
+      #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
+      assert(!c->sparse || z < c->sorted_entries);
+      #endif
+      if (z < 0) {
+         if (!f->bytes_in_seg)
+            if (f->last_seg) return FALSE;
+         return error(f, VORBIS_invalid_stream);
+      }
+
+      // if this will take us off the end of the buffers, stop short!
+      // we check by computing the length of the virtual interleaved
+      // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
+      // and the length we'll be using (effective)
+      if (c_inter + p_inter*ch + effective > len * ch) {
+         effective = len*ch - (p_inter*ch - c_inter);
+      }
+
+   #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
+      if (c->lookup_type == 1) {
+         int div = 1;
+         for (i=0; i < effective; ++i) {
+            int off = (z / div) % c->lookup_values;
+            float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
+            if (outputs[c_inter])
+               outputs[c_inter][p_inter] += val;
+            if (++c_inter == ch) { c_inter = 0; ++p_inter; }
+            if (c->sequence_p) last = val;
+            div *= c->lookup_values;
+         }
+      } else
+   #endif
+      {
+         z *= c->dimensions;
+         if (c->sequence_p) {
+            for (i=0; i < effective; ++i) {
+               float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
+               if (outputs[c_inter])
+                  outputs[c_inter][p_inter] += val;
+               if (++c_inter == ch) { c_inter = 0; ++p_inter; }
+               last = val;
+            }
+         } else {
+            for (i=0; i < effective; ++i) {
+               float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
+               if (outputs[c_inter])
+                  outputs[c_inter][p_inter] += val;
+               if (++c_inter == ch) { c_inter = 0; ++p_inter; }
+            }
+         }
+      }
+
+      total_decode -= effective;
+   }
+   *c_inter_p = c_inter;
+   *p_inter_p = p_inter;
+   return TRUE;
+}
+
+static int predict_point(int x, int x0, int x1, int y0, int y1)
+{
+   int dy = y1 - y0;
+   int adx = x1 - x0;
+   // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
+   int err = abs(dy) * (x - x0);
+   int off = err / adx;
+   return dy < 0 ? y0 - off : y0 + off;
+}
+
+// the following table is block-copied from the specification
+static float inverse_db_table[256] =
+{
+  1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
+  1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
+  1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
+  2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
+  2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
+  3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
+  4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
+  6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
+  7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
+  1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
+  1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
+  1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
+  2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
+  2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
+  3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
+  4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
+  5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
+  7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
+  9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
+  1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
+  1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
+  2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
+  2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
+  3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
+  4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
+  5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
+  7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
+  9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
+  0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
+  0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
+  0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
+  0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
+  0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
+  0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
+  0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
+  0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
+  0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
+  0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
+  0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
+  0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
+  0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
+  0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
+  0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
+  0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
+  0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
+  0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
+  0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
+  0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
+  0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
+  0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
+  0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
+  0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
+  0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
+  0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
+  0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
+  0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
+  0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
+  0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
+  0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
+  0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
+  0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
+  0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
+  0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
+  0.82788260f,    0.88168307f,    0.9389798f,     1.0f
+};
+
+
+// @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
+// note that you must produce bit-identical output to decode correctly;
+// this specific sequence of operations is specified in the spec (it's
+// drawing integer-quantized frequency-space lines that the encoder
+// expects to be exactly the same)
+//     ... also, isn't the whole point of Bresenham's algorithm to NOT
+// have to divide in the setup? sigh.
+#ifndef STB_VORBIS_NO_DEFER_FLOOR
+#define LINE_OP(a,b)   a *= b
+#else
+#define LINE_OP(a,b)   a = b
+#endif
+
+#ifdef STB_VORBIS_DIVIDE_TABLE
+#define DIVTAB_NUMER   32
+#define DIVTAB_DENOM   64
+int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
+#endif
+
+static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
+{
+   int dy = y1 - y0;
+   int adx = x1 - x0;
+   int ady = abs(dy);
+   int base;
+   int x=x0,y=y0;
+   int err = 0;
+   int sy;
+
+#ifdef STB_VORBIS_DIVIDE_TABLE
+   if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
+      if (dy < 0) {
+         base = -integer_divide_table[ady][adx];
+         sy = base-1;
+      } else {
+         base =  integer_divide_table[ady][adx];
+         sy = base+1;
+      }
+   } else {
+      base = dy / adx;
+      if (dy < 0)
+         sy = base - 1;
+      else
+         sy = base+1;
+   }
+#else
+   base = dy / adx;
+   if (dy < 0)
+      sy = base - 1;
+   else
+      sy = base+1;
+#endif
+   ady -= abs(base) * adx;
+   if (x1 > n) x1 = n;
+   if (x < x1) {
+      LINE_OP(output[x], inverse_db_table[y&255]);
+      for (++x; x < x1; ++x) {
+         err += ady;
+         if (err >= adx) {
+            err -= adx;
+            y += sy;
+         } else
+            y += base;
+         LINE_OP(output[x], inverse_db_table[y&255]);
+      }
+   }
+}
+
+static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
+{
+   int k;
+   if (rtype == 0) {
+      int step = n / book->dimensions;
+      for (k=0; k < step; ++k)
+         if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
+            return FALSE;
+   } else {
+      for (k=0; k < n; ) {
+         if (!codebook_decode(f, book, target+offset, n-k))
+            return FALSE;
+         k += book->dimensions;
+         offset += book->dimensions;
+      }
+   }
+   return TRUE;
+}
+
+// n is 1/2 of the blocksize --
+// specification: "Correct per-vector decode length is [n]/2"
+static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
+{
+   int i,j,pass;
+   Residue *r = f->residue_config + rn;
+   int rtype = f->residue_types[rn];
+   int c = r->classbook;
+   int classwords = f->codebooks[c].dimensions;
+   unsigned int actual_size = rtype == 2 ? n*2 : n;
+   unsigned int limit_r_begin = (r->begin < actual_size ? r->begin : actual_size);
+   unsigned int limit_r_end   = (r->end   < actual_size ? r->end   : actual_size);
+   int n_read = limit_r_end - limit_r_begin;
+   int part_read = n_read / r->part_size;
+   int temp_alloc_point = temp_alloc_save(f);
+   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+   uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
+   #else
+   int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
+   #endif
+
+   CHECK(f);
+
+   for (i=0; i < ch; ++i)
+      if (!do_not_decode[i])
+         memset(residue_buffers[i], 0, sizeof(float) * n);
+
+   if (rtype == 2 && ch != 1) {
+      for (j=0; j < ch; ++j)
+         if (!do_not_decode[j])
+            break;
+      if (j == ch)
+         goto done;
+
+      for (pass=0; pass < 8; ++pass) {
+         int pcount = 0, class_set = 0;
+         if (ch == 2) {
+            while (pcount < part_read) {
+               int z = r->begin + pcount*r->part_size;
+               int c_inter = (z & 1), p_inter = z>>1;
+               if (pass == 0) {
+                  Codebook *c = f->codebooks+r->classbook;
+                  int q;
+                  DECODE(q,f,c);
+                  if (q == EOP) goto done;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  part_classdata[0][class_set] = r->classdata[q];
+                  #else
+                  for (i=classwords-1; i >= 0; --i) {
+                     classifications[0][i+pcount] = q % r->classifications;
+                     q /= r->classifications;
+                  }
+                  #endif
+               }
+               for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
+                  int z = r->begin + pcount*r->part_size;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  int c = part_classdata[0][class_set][i];
+                  #else
+                  int c = classifications[0][pcount];
+                  #endif
+                  int b = r->residue_books[c][pass];
+                  if (b >= 0) {
+                     Codebook *book = f->codebooks + b;
+                     #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
+                     if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
+                        goto done;
+                     #else
+                     // saves 1%
+                     if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
+                        goto done;
+                     #endif
+                  } else {
+                     z += r->part_size;
+                     c_inter = z & 1;
+                     p_inter = z >> 1;
+                  }
+               }
+               #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+               ++class_set;
+               #endif
+            }
+         } else if (ch > 2) {
+            while (pcount < part_read) {
+               int z = r->begin + pcount*r->part_size;
+               int c_inter = z % ch, p_inter = z/ch;
+               if (pass == 0) {
+                  Codebook *c = f->codebooks+r->classbook;
+                  int q;
+                  DECODE(q,f,c);
+                  if (q == EOP) goto done;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  part_classdata[0][class_set] = r->classdata[q];
+                  #else
+                  for (i=classwords-1; i >= 0; --i) {
+                     classifications[0][i+pcount] = q % r->classifications;
+                     q /= r->classifications;
+                  }
+                  #endif
+               }
+               for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
+                  int z = r->begin + pcount*r->part_size;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  int c = part_classdata[0][class_set][i];
+                  #else
+                  int c = classifications[0][pcount];
+                  #endif
+                  int b = r->residue_books[c][pass];
+                  if (b >= 0) {
+                     Codebook *book = f->codebooks + b;
+                     if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
+                        goto done;
+                  } else {
+                     z += r->part_size;
+                     c_inter = z % ch;
+                     p_inter = z / ch;
+                  }
+               }
+               #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+               ++class_set;
+               #endif
+            }
+         }
+      }
+      goto done;
+   }
+   CHECK(f);
+
+   for (pass=0; pass < 8; ++pass) {
+      int pcount = 0, class_set=0;
+      while (pcount < part_read) {
+         if (pass == 0) {
+            for (j=0; j < ch; ++j) {
+               if (!do_not_decode[j]) {
+                  Codebook *c = f->codebooks+r->classbook;
+                  int temp;
+                  DECODE(temp,f,c);
+                  if (temp == EOP) goto done;
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  part_classdata[j][class_set] = r->classdata[temp];
+                  #else
+                  for (i=classwords-1; i >= 0; --i) {
+                     classifications[j][i+pcount] = temp % r->classifications;
+                     temp /= r->classifications;
+                  }
+                  #endif
+               }
+            }
+         }
+         for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
+            for (j=0; j < ch; ++j) {
+               if (!do_not_decode[j]) {
+                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+                  int c = part_classdata[j][class_set][i];
+                  #else
+                  int c = classifications[j][pcount];
+                  #endif
+                  int b = r->residue_books[c][pass];
+                  if (b >= 0) {
+                     float *target = residue_buffers[j];
+                     int offset = r->begin + pcount * r->part_size;
+                     int n = r->part_size;
+                     Codebook *book = f->codebooks + b;
+                     if (!residue_decode(f, book, target, offset, n, rtype))
+                        goto done;
+                  }
+               }
+            }
+         }
+         #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+         ++class_set;
+         #endif
+      }
+   }
+  done:
+   CHECK(f);
+   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+   temp_free(f,part_classdata);
+   #else
+   temp_free(f,classifications);
+   #endif
+   temp_alloc_restore(f,temp_alloc_point);
+}
+
+
+#if 0
+// slow way for debugging
+void inverse_mdct_slow(float *buffer, int n)
+{
+   int i,j;
+   int n2 = n >> 1;
+   float *x = (float *) malloc(sizeof(*x) * n2);
+   memcpy(x, buffer, sizeof(*x) * n2);
+   for (i=0; i < n; ++i) {
+      float acc = 0;
+      for (j=0; j < n2; ++j)
+         // formula from paper:
+         //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
+         // formula from wikipedia
+         //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
+         // these are equivalent, except the formula from the paper inverts the multiplier!
+         // however, what actually works is NO MULTIPLIER!?!
+         //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
+         acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
+      buffer[i] = acc;
+   }
+   free(x);
+}
+#elif 0
+// same as above, but just barely able to run in real time on modern machines
+void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
+{
+   float mcos[16384];
+   int i,j;
+   int n2 = n >> 1, nmask = (n << 2) -1;
+   float *x = (float *) malloc(sizeof(*x) * n2);
+   memcpy(x, buffer, sizeof(*x) * n2);
+   for (i=0; i < 4*n; ++i)
+      mcos[i] = (float) cos(M_PI / 2 * i / n);
+
+   for (i=0; i < n; ++i) {
+      float acc = 0;
+      for (j=0; j < n2; ++j)
+         acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
+      buffer[i] = acc;
+   }
+   free(x);
+}
+#elif 0
+// transform to use a slow dct-iv; this is STILL basically trivial,
+// but only requires half as many ops
+void dct_iv_slow(float *buffer, int n)
+{
+   float mcos[16384];
+   float x[2048];
+   int i,j;
+   int n2 = n >> 1, nmask = (n << 3) - 1;
+   memcpy(x, buffer, sizeof(*x) * n);
+   for (i=0; i < 8*n; ++i)
+      mcos[i] = (float) cos(M_PI / 4 * i / n);
+   for (i=0; i < n; ++i) {
+      float acc = 0;
+      for (j=0; j < n; ++j)
+         acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
+      buffer[i] = acc;
+   }
+}
+
+void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
+{
+   int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
+   float temp[4096];
+
+   memcpy(temp, buffer, n2 * sizeof(float));
+   dct_iv_slow(temp, n2);  // returns -c'-d, a-b'
+
+   for (i=0; i < n4  ; ++i) buffer[i] = temp[i+n4];            // a-b'
+   for (   ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1];   // b-a', c+d'
+   for (   ; i < n   ; ++i) buffer[i] = -temp[i - n3_4];       // c'+d
+}
+#endif
+
+#ifndef LIBVORBIS_MDCT
+#define LIBVORBIS_MDCT 0
+#endif
+
+#if LIBVORBIS_MDCT
+// directly call the vorbis MDCT using an interface documented
+// by Jeff Roberts... useful for performance comparison
+typedef struct
+{
+  int n;
+  int log2n;
+
+  float *trig;
+  int   *bitrev;
+
+  float scale;
+} mdct_lookup;
+
+extern void mdct_init(mdct_lookup *lookup, int n);
+extern void mdct_clear(mdct_lookup *l);
+extern void mdct_backward(mdct_lookup *init, float *in, float *out);
+
+mdct_lookup M1,M2;
+
+void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
+{
+   mdct_lookup *M;
+   if (M1.n == n) M = &M1;
+   else if (M2.n == n) M = &M2;
+   else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
+   else {
+      if (M2.n) __asm int 3;
+      mdct_init(&M2, n);
+      M = &M2;
+   }
+
+   mdct_backward(M, buffer, buffer);
+}
+#endif
+
+
+// the following were split out into separate functions while optimizing;
+// they could be pushed back up but eh. __forceinline showed no change;
+// they're probably already being inlined.
+static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
+{
+   float *ee0 = e + i_off;
+   float *ee2 = ee0 + k_off;
+   int i;
+
+   assert((n & 3) == 0);
+   for (i=(n>>2); i > 0; --i) {
+      float k00_20, k01_21;
+      k00_20  = ee0[ 0] - ee2[ 0];
+      k01_21  = ee0[-1] - ee2[-1];
+      ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
+      ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
+      ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
+      ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
+      A += 8;
+
+      k00_20  = ee0[-2] - ee2[-2];
+      k01_21  = ee0[-3] - ee2[-3];
+      ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
+      ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
+      ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
+      ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
+      A += 8;
+
+      k00_20  = ee0[-4] - ee2[-4];
+      k01_21  = ee0[-5] - ee2[-5];
+      ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
+      ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
+      ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
+      ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
+      A += 8;
+
+      k00_20  = ee0[-6] - ee2[-6];
+      k01_21  = ee0[-7] - ee2[-7];
+      ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
+      ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
+      ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
+      ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
+      A += 8;
+      ee0 -= 8;
+      ee2 -= 8;
+   }
+}
+
+static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
+{
+   int i;
+   float k00_20, k01_21;
+
+   float *e0 = e + d0;
+   float *e2 = e0 + k_off;
+
+   for (i=lim >> 2; i > 0; --i) {
+      k00_20 = e0[-0] - e2[-0];
+      k01_21 = e0[-1] - e2[-1];
+      e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
+      e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
+      e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
+      e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
+
+      A += k1;
+
+      k00_20 = e0[-2] - e2[-2];
+      k01_21 = e0[-3] - e2[-3];
+      e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
+      e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
+      e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
+      e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
+
+      A += k1;
+
+      k00_20 = e0[-4] - e2[-4];
+      k01_21 = e0[-5] - e2[-5];
+      e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
+      e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
+      e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
+      e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
+
+      A += k1;
+
+      k00_20 = e0[-6] - e2[-6];
+      k01_21 = e0[-7] - e2[-7];
+      e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
+      e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
+      e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
+      e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
+
+      e0 -= 8;
+      e2 -= 8;
+
+      A += k1;
+   }
+}
+
+static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
+{
+   int i;
+   float A0 = A[0];
+   float A1 = A[0+1];
+   float A2 = A[0+a_off];
+   float A3 = A[0+a_off+1];
+   float A4 = A[0+a_off*2+0];
+   float A5 = A[0+a_off*2+1];
+   float A6 = A[0+a_off*3+0];
+   float A7 = A[0+a_off*3+1];
+
+   float k00,k11;
+
+   float *ee0 = e  +i_off;
+   float *ee2 = ee0+k_off;
+
+   for (i=n; i > 0; --i) {
+      k00     = ee0[ 0] - ee2[ 0];
+      k11     = ee0[-1] - ee2[-1];
+      ee0[ 0] =  ee0[ 0] + ee2[ 0];
+      ee0[-1] =  ee0[-1] + ee2[-1];
+      ee2[ 0] = (k00) * A0 - (k11) * A1;
+      ee2[-1] = (k11) * A0 + (k00) * A1;
+
+      k00     = ee0[-2] - ee2[-2];
+      k11     = ee0[-3] - ee2[-3];
+      ee0[-2] =  ee0[-2] + ee2[-2];
+      ee0[-3] =  ee0[-3] + ee2[-3];
+      ee2[-2] = (k00) * A2 - (k11) * A3;
+      ee2[-3] = (k11) * A2 + (k00) * A3;
+
+      k00     = ee0[-4] - ee2[-4];
+      k11     = ee0[-5] - ee2[-5];
+      ee0[-4] =  ee0[-4] + ee2[-4];
+      ee0[-5] =  ee0[-5] + ee2[-5];
+      ee2[-4] = (k00) * A4 - (k11) * A5;
+      ee2[-5] = (k11) * A4 + (k00) * A5;
+
+      k00     = ee0[-6] - ee2[-6];
+      k11     = ee0[-7] - ee2[-7];
+      ee0[-6] =  ee0[-6] + ee2[-6];
+      ee0[-7] =  ee0[-7] + ee2[-7];
+      ee2[-6] = (k00) * A6 - (k11) * A7;
+      ee2[-7] = (k11) * A6 + (k00) * A7;
+
+      ee0 -= k0;
+      ee2 -= k0;
+   }
+}
+
+static __forceinline void iter_54(float *z)
+{
+   float k00,k11,k22,k33;
+   float y0,y1,y2,y3;
+
+   k00  = z[ 0] - z[-4];
+   y0   = z[ 0] + z[-4];
+   y2   = z[-2] + z[-6];
+   k22  = z[-2] - z[-6];
+
+   z[-0] = y0 + y2;      // z0 + z4 + z2 + z6
+   z[-2] = y0 - y2;      // z0 + z4 - z2 - z6
+
+   // done with y0,y2
+
+   k33  = z[-3] - z[-7];
+
+   z[-4] = k00 + k33;    // z0 - z4 + z3 - z7
+   z[-6] = k00 - k33;    // z0 - z4 - z3 + z7
+
+   // done with k33
+
+   k11  = z[-1] - z[-5];
+   y1   = z[-1] + z[-5];
+   y3   = z[-3] + z[-7];
+
+   z[-1] = y1 + y3;      // z1 + z5 + z3 + z7
+   z[-3] = y1 - y3;      // z1 + z5 - z3 - z7
+   z[-5] = k11 - k22;    // z1 - z5 + z2 - z6
+   z[-7] = k11 + k22;    // z1 - z5 - z2 + z6
+}
+
+static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
+{
+   int a_off = base_n >> 3;
+   float A2 = A[0+a_off];
+   float *z = e + i_off;
+   float *base = z - 16 * n;
+
+   while (z > base) {
+      float k00,k11;
+      float l00,l11;
+
+      k00    = z[-0] - z[ -8];
+      k11    = z[-1] - z[ -9];
+      l00    = z[-2] - z[-10];
+      l11    = z[-3] - z[-11];
+      z[ -0] = z[-0] + z[ -8];
+      z[ -1] = z[-1] + z[ -9];
+      z[ -2] = z[-2] + z[-10];
+      z[ -3] = z[-3] + z[-11];
+      z[ -8] = k00;
+      z[ -9] = k11;
+      z[-10] = (l00+l11) * A2;
+      z[-11] = (l11-l00) * A2;
+
+      k00    = z[ -4] - z[-12];
+      k11    = z[ -5] - z[-13];
+      l00    = z[ -6] - z[-14];
+      l11    = z[ -7] - z[-15];
+      z[ -4] = z[ -4] + z[-12];
+      z[ -5] = z[ -5] + z[-13];
+      z[ -6] = z[ -6] + z[-14];
+      z[ -7] = z[ -7] + z[-15];
+      z[-12] = k11;
+      z[-13] = -k00;
+      z[-14] = (l11-l00) * A2;
+      z[-15] = (l00+l11) * -A2;
+
+      iter_54(z);
+      iter_54(z-8);
+      z -= 16;
+   }
+}
+
+static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
+{
+   int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
+   int ld;
+   // @OPTIMIZE: reduce register pressure by using fewer variables?
+   int save_point = temp_alloc_save(f);
+   float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
+   float *u=NULL,*v=NULL;
+   // twiddle factors
+   float *A = f->A[blocktype];
+
+   // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
+   // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
+
+   // kernel from paper
+
+
+   // merged:
+   //   copy and reflect spectral data
+   //   step 0
+
+   // note that it turns out that the items added together during
+   // this step are, in fact, being added to themselves (as reflected
+   // by step 0). inexplicable inefficiency! this became obvious
+   // once I combined the passes.
+
+   // so there's a missing 'times 2' here (for adding X to itself).
+   // this propagates through linearly to the end, where the numbers
+   // are 1/2 too small, and need to be compensated for.
+
+   {
+      float *d,*e, *AA, *e_stop;
+      d = &buf2[n2-2];
+      AA = A;
+      e = &buffer[0];
+      e_stop = &buffer[n2];
+      while (e != e_stop) {
+         d[1] = (e[0] * AA[0] - e[2]*AA[1]);
+         d[0] = (e[0] * AA[1] + e[2]*AA[0]);
+         d -= 2;
+         AA += 2;
+         e += 4;
+      }
+
+      e = &buffer[n2-3];
+      while (d >= buf2) {
+         d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
+         d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
+         d -= 2;
+         AA += 2;
+         e -= 4;
+      }
+   }
+
+   // now we use symbolic names for these, so that we can
+   // possibly swap their meaning as we change which operations
+   // are in place
+
+   u = buffer;
+   v = buf2;
+
+   // step 2    (paper output is w, now u)
+   // this could be in place, but the data ends up in the wrong
+   // place... _somebody_'s got to swap it, so this is nominated
+   {
+      float *AA = &A[n2-8];
+      float *d0,*d1, *e0, *e1;
+
+      e0 = &v[n4];
+      e1 = &v[0];
+
+      d0 = &u[n4];
+      d1 = &u[0];
+
+      while (AA >= A) {
+         float v40_20, v41_21;
+
+         v41_21 = e0[1] - e1[1];
+         v40_20 = e0[0] - e1[0];
+         d0[1]  = e0[1] + e1[1];
+         d0[0]  = e0[0] + e1[0];
+         d1[1]  = v41_21*AA[4] - v40_20*AA[5];
+         d1[0]  = v40_20*AA[4] + v41_21*AA[5];
+
+         v41_21 = e0[3] - e1[3];
+         v40_20 = e0[2] - e1[2];
+         d0[3]  = e0[3] + e1[3];
+         d0[2]  = e0[2] + e1[2];
+         d1[3]  = v41_21*AA[0] - v40_20*AA[1];
+         d1[2]  = v40_20*AA[0] + v41_21*AA[1];
+
+         AA -= 8;
+
+         d0 += 4;
+         d1 += 4;
+         e0 += 4;
+         e1 += 4;
+      }
+   }
+
+   // step 3
+   ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
+
+   // optimized step 3:
+
+   // the original step3 loop can be nested r inside s or s inside r;
+   // it's written originally as s inside r, but this is dumb when r
+   // iterates many times, and s few. So I have two copies of it and
+   // switch between them halfway.
+
+   // this is iteration 0 of step 3
+   imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
+   imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
+
+   // this is iteration 1 of step 3
+   imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
+   imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
+   imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
+   imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
+
+   l=2;
+   for (; l < (ld-3)>>1; ++l) {
+      int k0 = n >> (l+2), k0_2 = k0>>1;
+      int lim = 1 << (l+1);
+      int i;
+      for (i=0; i < lim; ++i)
+         imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
+   }
+
+   for (; l < ld-6; ++l) {
+      int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
+      int rlim = n >> (l+6), r;
+      int lim = 1 << (l+1);
+      int i_off;
+      float *A0 = A;
+      i_off = n2-1;
+      for (r=rlim; r > 0; --r) {
+         imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
+         A0 += k1*4;
+         i_off -= 8;
+      }
+   }
+
+   // iterations with count:
+   //   ld-6,-5,-4 all interleaved together
+   //       the big win comes from getting rid of needless flops
+   //         due to the constants on pass 5 & 4 being all 1 and 0;
+   //       combining them to be simultaneous to improve cache made little difference
+   imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
+
+   // output is u
+
+   // step 4, 5, and 6
+   // cannot be in-place because of step 5
+   {
+      uint16 *bitrev = f->bit_reverse[blocktype];
+      // weirdly, I'd have thought reading sequentially and writing
+      // erratically would have been better than vice-versa, but in
+      // fact that's not what my testing showed. (That is, with
+      // j = bitreverse(i), do you read i and write j, or read j and write i.)
+
+      float *d0 = &v[n4-4];
+      float *d1 = &v[n2-4];
+      while (d0 >= v) {
+         int k4;
+
+         k4 = bitrev[0];
+         d1[3] = u[k4+0];
+         d1[2] = u[k4+1];
+         d0[3] = u[k4+2];
+         d0[2] = u[k4+3];
+
+         k4 = bitrev[1];
+         d1[1] = u[k4+0];
+         d1[0] = u[k4+1];
+         d0[1] = u[k4+2];
+         d0[0] = u[k4+3];
+
+         d0 -= 4;
+         d1 -= 4;
+         bitrev += 2;
+      }
+   }
+   // (paper output is u, now v)
+
+
+   // data must be in buf2
+   assert(v == buf2);
+
+   // step 7   (paper output is v, now v)
+   // this is now in place
+   {
+      float *C = f->C[blocktype];
+      float *d, *e;
+
+      d = v;
+      e = v + n2 - 4;
+
+      while (d < e) {
+         float a02,a11,b0,b1,b2,b3;
+
+         a02 = d[0] - e[2];
+         a11 = d[1] + e[3];
+
+         b0 = C[1]*a02 + C[0]*a11;
+         b1 = C[1]*a11 - C[0]*a02;
+
+         b2 = d[0] + e[ 2];
+         b3 = d[1] - e[ 3];
+
+         d[0] = b2 + b0;
+         d[1] = b3 + b1;
+         e[2] = b2 - b0;
+         e[3] = b1 - b3;
+
+         a02 = d[2] - e[0];
+         a11 = d[3] + e[1];
+
+         b0 = C[3]*a02 + C[2]*a11;
+         b1 = C[3]*a11 - C[2]*a02;
+
+         b2 = d[2] + e[ 0];
+         b3 = d[3] - e[ 1];
+
+         d[2] = b2 + b0;
+         d[3] = b3 + b1;
+         e[0] = b2 - b0;
+         e[1] = b1 - b3;
+
+         C += 4;
+         d += 4;
+         e -= 4;
+      }
+   }
+
+   // data must be in buf2
+
+
+   // step 8+decode   (paper output is X, now buffer)
+   // this generates pairs of data a la 8 and pushes them directly through
+   // the decode kernel (pushing rather than pulling) to avoid having
+   // to make another pass later
+
+   // this cannot POSSIBLY be in place, so we refer to the buffers directly
+
+   {
+      float *d0,*d1,*d2,*d3;
+
+      float *B = f->B[blocktype] + n2 - 8;
+      float *e = buf2 + n2 - 8;
+      d0 = &buffer[0];
+      d1 = &buffer[n2-4];
+      d2 = &buffer[n2];
+      d3 = &buffer[n-4];
+      while (e >= v) {
+         float p0,p1,p2,p3;
+
+         p3 =  e[6]*B[7] - e[7]*B[6];
+         p2 = -e[6]*B[6] - e[7]*B[7];
+
+         d0[0] =   p3;
+         d1[3] = - p3;
+         d2[0] =   p2;
+         d3[3] =   p2;
+
+         p1 =  e[4]*B[5] - e[5]*B[4];
+         p0 = -e[4]*B[4] - e[5]*B[5];
+
+         d0[1] =   p1;
+         d1[2] = - p1;
+         d2[1] =   p0;
+         d3[2] =   p0;
+
+         p3 =  e[2]*B[3] - e[3]*B[2];
+         p2 = -e[2]*B[2] - e[3]*B[3];
+
+         d0[2] =   p3;
+         d1[1] = - p3;
+         d2[2] =   p2;
+         d3[1] =   p2;
+
+         p1 =  e[0]*B[1] - e[1]*B[0];
+         p0 = -e[0]*B[0] - e[1]*B[1];
+
+         d0[3] =   p1;
+         d1[0] = - p1;
+         d2[3] =   p0;
+         d3[0] =   p0;
+
+         B -= 8;
+         e -= 8;
+         d0 += 4;
+         d2 += 4;
+         d1 -= 4;
+         d3 -= 4;
+      }
+   }
+
+   temp_free(f,buf2);
+   temp_alloc_restore(f,save_point);
+}
+
+#if 0
+// this is the original version of the above code, if you want to optimize it from scratch
+void inverse_mdct_naive(float *buffer, int n)
+{
+   float s;
+   float A[1 << 12], B[1 << 12], C[1 << 11];
+   int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
+   int n3_4 = n - n4, ld;
+   // how can they claim this only uses N words?!
+   // oh, because they're only used sparsely, whoops
+   float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
+   // set up twiddle factors
+
+   for (k=k2=0; k < n4; ++k,k2+=2) {
+      A[k2  ] = (float)  cos(4*k*M_PI/n);
+      A[k2+1] = (float) -sin(4*k*M_PI/n);
+      B[k2  ] = (float)  cos((k2+1)*M_PI/n/2);
+      B[k2+1] = (float)  sin((k2+1)*M_PI/n/2);
+   }
+   for (k=k2=0; k < n8; ++k,k2+=2) {
+      C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
+      C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
+   }
+
+   // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
+   // Note there are bugs in that pseudocode, presumably due to them attempting
+   // to rename the arrays nicely rather than representing the way their actual
+   // implementation bounces buffers back and forth. As a result, even in the
+   // "some formulars corrected" version, a direct implementation fails. These
+   // are noted below as "paper bug".
+
+   // copy and reflect spectral data
+   for (k=0; k < n2; ++k) u[k] = buffer[k];
+   for (   ; k < n ; ++k) u[k] = -buffer[n - k - 1];
+   // kernel from paper
+   // step 1
+   for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
+      v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2]   - (u[k4+2] - u[n-k4-3])*A[k2+1];
+      v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
+   }
+   // step 2
+   for (k=k4=0; k < n8; k+=1, k4+=4) {
+      w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
+      w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
+      w[k4+3]    = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
+      w[k4+1]    = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
+   }
+   // step 3
+   ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
+   for (l=0; l < ld-3; ++l) {
+      int k0 = n >> (l+2), k1 = 1 << (l+3);
+      int rlim = n >> (l+4), r4, r;
+      int s2lim = 1 << (l+2), s2;
+      for (r=r4=0; r < rlim; r4+=4,++r) {
+         for (s2=0; s2 < s2lim; s2+=2) {
+            u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
+            u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
+            u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
+                                - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
+            u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
+                                + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
+         }
+      }
+      if (l+1 < ld-3) {
+         // paper bug: ping-ponging of u&w here is omitted
+         memcpy(w, u, sizeof(u));
+      }
+   }
+
+   // step 4
+   for (i=0; i < n8; ++i) {
+      int j = bit_reverse(i) >> (32-ld+3);
+      assert(j < n8);
+      if (i == j) {
+         // paper bug: original code probably swapped in place; if copying,
+         //            need to directly copy in this case
+         int i8 = i << 3;
+         v[i8+1] = u[i8+1];
+         v[i8+3] = u[i8+3];
+         v[i8+5] = u[i8+5];
+         v[i8+7] = u[i8+7];
+      } else if (i < j) {
+         int i8 = i << 3, j8 = j << 3;
+         v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
+         v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
+         v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
+         v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
+      }
+   }
+   // step 5
+   for (k=0; k < n2; ++k) {
+      w[k] = v[k*2+1];
+   }
+   // step 6
+   for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
+      u[n-1-k2] = w[k4];
+      u[n-2-k2] = w[k4+1];
+      u[n3_4 - 1 - k2] = w[k4+2];
+      u[n3_4 - 2 - k2] = w[k4+3];
+   }
+   // step 7
+   for (k=k2=0; k < n8; ++k, k2 += 2) {
+      v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
+      v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
+      v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
+      v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
+   }
+   // step 8
+   for (k=k2=0; k < n4; ++k,k2 += 2) {
+      X[k]      = v[k2+n2]*B[k2  ] + v[k2+1+n2]*B[k2+1];
+      X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2  ];
+   }
+
+   // decode kernel to output
+   // determined the following value experimentally
+   // (by first figuring out what made inverse_mdct_slow work); then matching that here
+   // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
+   s = 0.5; // theoretically would be n4
+
+   // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
+   //     so it needs to use the "old" B values to behave correctly, or else
+   //     set s to 1.0 ]]]
+   for (i=0; i < n4  ; ++i) buffer[i] = s * X[i+n4];
+   for (   ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
+   for (   ; i < n   ; ++i) buffer[i] = -s * X[i - n3_4];
+}
+#endif
+
+static float *get_window(vorb *f, int len)
+{
+   len <<= 1;
+   if (len == f->blocksize_0) return f->window[0];
+   if (len == f->blocksize_1) return f->window[1];
+   return NULL;
+}
+
+#ifndef STB_VORBIS_NO_DEFER_FLOOR
+typedef int16 YTYPE;
+#else
+typedef int YTYPE;
+#endif
+static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
+{
+   int n2 = n >> 1;
+   int s = map->chan[i].mux, floor;
+   floor = map->submap_floor[s];
+   if (f->floor_types[floor] == 0) {
+      return error(f, VORBIS_invalid_stream);
+   } else {
+      Floor1 *g = &f->floor_config[floor].floor1;
+      int j,q;
+      int lx = 0, ly = finalY[0] * g->floor1_multiplier;
+      for (q=1; q < g->values; ++q) {
+         j = g->sorted_order[q];
+         #ifndef STB_VORBIS_NO_DEFER_FLOOR
+         STBV_NOTUSED(step2_flag);
+         if (finalY[j] >= 0)
+         #else
+         if (step2_flag[j])
+         #endif
+         {
+            int hy = finalY[j] * g->floor1_multiplier;
+            int hx = g->Xlist[j];
+            if (lx != hx)
+               draw_line(target, lx,ly, hx,hy, n2);
+            CHECK(f);
+            lx = hx, ly = hy;
+         }
+      }
+      if (lx < n2) {
+         // optimization of: draw_line(target, lx,ly, n,ly, n2);
+         for (j=lx; j < n2; ++j)
+            LINE_OP(target[j], inverse_db_table[ly]);
+         CHECK(f);
+      }
+   }
+   return TRUE;
+}
+
+// The meaning of "left" and "right"
+//
+// For a given frame:
+//     we compute samples from 0..n
+//     window_center is n/2
+//     we'll window and mix the samples from left_start to left_end with data from the previous frame
+//     all of the samples from left_end to right_start can be output without mixing; however,
+//        this interval is 0-length except when transitioning between short and long frames
+//     all of the samples from right_start to right_end need to be mixed with the next frame,
+//        which we don't have, so those get saved in a buffer
+//     frame N's right_end-right_start, the number of samples to mix with the next frame,
+//        has to be the same as frame N+1's left_end-left_start (which they are by
+//        construction)
+
+static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
+{
+   Mode *m;
+   int i, n, prev, next, window_center;
+   f->channel_buffer_start = f->channel_buffer_end = 0;
+
+  retry:
+   if (f->eof) return FALSE;
+   if (!maybe_start_packet(f))
+      return FALSE;
+   // check packet type
+   if (get_bits(f,1) != 0) {
+      if (IS_PUSH_MODE(f))
+         return error(f,VORBIS_bad_packet_type);
+      while (EOP != get8_packet(f));
+      goto retry;
+   }
+
+   if (f->alloc.alloc_buffer)
+      assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
+
+   i = get_bits(f, ilog(f->mode_count-1));
+   if (i == EOP) return FALSE;
+   if (i >= f->mode_count) return FALSE;
+   *mode = i;
+   m = f->mode_config + i;
+   if (m->blockflag) {
+      n = f->blocksize_1;
+      prev = get_bits(f,1);
+      next = get_bits(f,1);
+   } else {
+      prev = next = 0;
+      n = f->blocksize_0;
+   }
+
+// WINDOWING
+
+   window_center = n >> 1;
+   if (m->blockflag && !prev) {
+      *p_left_start = (n - f->blocksize_0) >> 2;
+      *p_left_end   = (n + f->blocksize_0) >> 2;
+   } else {
+      *p_left_start = 0;
+      *p_left_end   = window_center;
+   }
+   if (m->blockflag && !next) {
+      *p_right_start = (n*3 - f->blocksize_0) >> 2;
+      *p_right_end   = (n*3 + f->blocksize_0) >> 2;
+   } else {
+      *p_right_start = window_center;
+      *p_right_end   = n;
+   }
+
+   return TRUE;
+}
+
+static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
+{
+   Mapping *map;
+   int i,j,k,n,n2;
+   int zero_channel[256];
+   int really_zero_channel[256];
+
+// WINDOWING
+
+   STBV_NOTUSED(left_end);
+   n = f->blocksize[m->blockflag];
+   map = &f->mapping[m->mapping];
+
+// FLOORS
+   n2 = n >> 1;
+
+   CHECK(f);
+
+   for (i=0; i < f->channels; ++i) {
+      int s = map->chan[i].mux, floor;
+      zero_channel[i] = FALSE;
+      floor = map->submap_floor[s];
+      if (f->floor_types[floor] == 0) {
+         return error(f, VORBIS_invalid_stream);
+      } else {
+         Floor1 *g = &f->floor_config[floor].floor1;
+         if (get_bits(f, 1)) {
+            short *finalY;
+            uint8 step2_flag[256];
+            static int range_list[4] = { 256, 128, 86, 64 };
+            int range = range_list[g->floor1_multiplier-1];
+            int offset = 2;
+            finalY = f->finalY[i];
+            finalY[0] = get_bits(f, ilog(range)-1);
+            finalY[1] = get_bits(f, ilog(range)-1);
+            for (j=0; j < g->partitions; ++j) {
+               int pclass = g->partition_class_list[j];
+               int cdim = g->class_dimensions[pclass];
+               int cbits = g->class_subclasses[pclass];
+               int csub = (1 << cbits)-1;
+               int cval = 0;
+               if (cbits) {
+                  Codebook *c = f->codebooks + g->class_masterbooks[pclass];
+                  DECODE(cval,f,c);
+               }
+               for (k=0; k < cdim; ++k) {
+                  int book = g->subclass_books[pclass][cval & csub];
+                  cval = cval >> cbits;
+                  if (book >= 0) {
+                     int temp;
+                     Codebook *c = f->codebooks + book;
+                     DECODE(temp,f,c);
+                     finalY[offset++] = temp;
+                  } else
+                     finalY[offset++] = 0;
+               }
+            }
+            if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
+            step2_flag[0] = step2_flag[1] = 1;
+            for (j=2; j < g->values; ++j) {
+               int low, high, pred, highroom, lowroom, room, val;
+               low = g->neighbors[j][0];
+               high = g->neighbors[j][1];
+               //neighbors(g->Xlist, j, &low, &high);
+               pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
+               val = finalY[j];
+               highroom = range - pred;
+               lowroom = pred;
+               if (highroom < lowroom)
+                  room = highroom * 2;
+               else
+                  room = lowroom * 2;
+               if (val) {
+                  step2_flag[low] = step2_flag[high] = 1;
+                  step2_flag[j] = 1;
+                  if (val >= room)
+                     if (highroom > lowroom)
+                        finalY[j] = val - lowroom + pred;
+                     else
+                        finalY[j] = pred - val + highroom - 1;
+                  else
+                     if (val & 1)
+                        finalY[j] = pred - ((val+1)>>1);
+                     else
+                        finalY[j] = pred + (val>>1);
+               } else {
+                  step2_flag[j] = 0;
+                  finalY[j] = pred;
+               }
+            }
+
+#ifdef STB_VORBIS_NO_DEFER_FLOOR
+            do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
+#else
+            // defer final floor computation until _after_ residue
+            for (j=0; j < g->values; ++j) {
+               if (!step2_flag[j])
+                  finalY[j] = -1;
+            }
+#endif
+         } else {
+           error:
+            zero_channel[i] = TRUE;
+         }
+         // So we just defer everything else to later
+
+         // at this point we've decoded the floor into buffer
+      }
+   }
+   CHECK(f);
+   // at this point we've decoded all floors
+
+   if (f->alloc.alloc_buffer)
+      assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
+
+   // re-enable coupled channels if necessary
+   memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
+   for (i=0; i < map->coupling_steps; ++i)
+      if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
+         zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
+      }
+
+   CHECK(f);
+// RESIDUE DECODE
+   for (i=0; i < map->submaps; ++i) {
+      float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
+      int r;
+      uint8 do_not_decode[256];
+      int ch = 0;
+      for (j=0; j < f->channels; ++j) {
+         if (map->chan[j].mux == i) {
+            if (zero_channel[j]) {
+               do_not_decode[ch] = TRUE;
+               residue_buffers[ch] = NULL;
+            } else {
+               do_not_decode[ch] = FALSE;
+               residue_buffers[ch] = f->channel_buffers[j];
+            }
+            ++ch;
+         }
+      }
+      r = map->submap_residue[i];
+      decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
+   }
+
+   if (f->alloc.alloc_buffer)
+      assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
+   CHECK(f);
+
+// INVERSE COUPLING
+   for (i = map->coupling_steps-1; i >= 0; --i) {
+      int n2 = n >> 1;
+      float *m = f->channel_buffers[map->chan[i].magnitude];
+      float *a = f->channel_buffers[map->chan[i].angle    ];
+      for (j=0; j < n2; ++j) {
+         float a2,m2;
+         if (m[j] > 0)
+            if (a[j] > 0)
+               m2 = m[j], a2 = m[j] - a[j];
+            else
+               a2 = m[j], m2 = m[j] + a[j];
+         else
+            if (a[j] > 0)
+               m2 = m[j], a2 = m[j] + a[j];
+            else
+               a2 = m[j], m2 = m[j] - a[j];
+         m[j] = m2;
+         a[j] = a2;
+      }
+   }
+   CHECK(f);
+
+   // finish decoding the floors
+#ifndef STB_VORBIS_NO_DEFER_FLOOR
+   for (i=0; i < f->channels; ++i) {
+      if (really_zero_channel[i]) {
+         memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
+      } else {
+         do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
+      }
+   }
+#else
+   for (i=0; i < f->channels; ++i) {
+      if (really_zero_channel[i]) {
+         memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
+      } else {
+         for (j=0; j < n2; ++j)
+            f->channel_buffers[i][j] *= f->floor_buffers[i][j];
+      }
+   }
+#endif
+
+// INVERSE MDCT
+   CHECK(f);
+   for (i=0; i < f->channels; ++i)
+      inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
+   CHECK(f);
+
+   // this shouldn't be necessary, unless we exited on an error
+   // and want to flush to get to the next packet
+   flush_packet(f);
+
+   if (f->first_decode) {
+      // assume we start so first non-discarded sample is sample 0
+      // this isn't to spec, but spec would require us to read ahead
+      // and decode the size of all current frames--could be done,
+      // but presumably it's not a commonly used feature
+      f->current_loc = 0u - n2; // start of first frame is positioned for discard (NB this is an intentional unsigned overflow/wrap-around)
+      // we might have to discard samples "from" the next frame too,
+      // if we're lapping a large block then a small at the start?
+      f->discard_samples_deferred = n - right_end;
+      f->current_loc_valid = TRUE;
+      f->first_decode = FALSE;
+   } else if (f->discard_samples_deferred) {
+      if (f->discard_samples_deferred >= right_start - left_start) {
+         f->discard_samples_deferred -= (right_start - left_start);
+         left_start = right_start;
+         *p_left = left_start;
+      } else {
+         left_start += f->discard_samples_deferred;
+         *p_left = left_start;
+         f->discard_samples_deferred = 0;
+      }
+   } else if (f->previous_length == 0 && f->current_loc_valid) {
+      // we're recovering from a seek... that means we're going to discard
+      // the samples from this packet even though we know our position from
+      // the last page header, so we need to update the position based on
+      // the discarded samples here
+      // but wait, the code below is going to add this in itself even
+      // on a discard, so we don't need to do it here...
+   }
+
+   // check if we have ogg information about the sample # for this packet
+   if (f->last_seg_which == f->end_seg_with_known_loc) {
+      // if we have a valid current loc, and this is final:
+      if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
+         uint32 current_end = f->known_loc_for_packet;
+         // then let's infer the size of the (probably) short final frame
+         if (current_end < f->current_loc + (right_end-left_start)) {
+            if (current_end < f->current_loc) {
+               // negative truncation, that's impossible!
+               *len = 0;
+            } else {
+               *len = current_end - f->current_loc;
+            }
+            *len += left_start; // this doesn't seem right, but has no ill effect on my test files
+            if (*len > right_end) *len = right_end; // this should never happen
+            f->current_loc += *len;
+            return TRUE;
+         }
+      }
+      // otherwise, just set our sample loc
+      // guess that the ogg granule pos refers to the _middle_ of the
+      // last frame?
+      // set f->current_loc to the position of left_start
+      f->current_loc = f->known_loc_for_packet - (n2-left_start);
+      f->current_loc_valid = TRUE;
+   }
+   if (f->current_loc_valid)
+      f->current_loc += (right_start - left_start);
+
+   if (f->alloc.alloc_buffer)
+      assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
+   *len = right_end;  // ignore samples after the window goes to 0
+   CHECK(f);
+
+   return TRUE;
+}
+
+static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
+{
+   int mode, left_end, right_end;
+   if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
+   return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
+}
+
+static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
+{
+   int prev,i,j;
+   // we use right&left (the start of the right- and left-window sin()-regions)
+   // to determine how much to return, rather than inferring from the rules
+   // (same result, clearer code); 'left' indicates where our sin() window
+   // starts, therefore where the previous window's right edge starts, and
+   // therefore where to start mixing from the previous buffer. 'right'
+   // indicates where our sin() ending-window starts, therefore that's where
+   // we start saving, and where our returned-data ends.
+
+   // mixin from previous window
+   if (f->previous_length) {
+      int i,j, n = f->previous_length;
+      float *w = get_window(f, n);
+      if (w == NULL) return 0;
+      for (i=0; i < f->channels; ++i) {
+         for (j=0; j < n; ++j)
+            f->channel_buffers[i][left+j] =
+               f->channel_buffers[i][left+j]*w[    j] +
+               f->previous_window[i][     j]*w[n-1-j];
+      }
+   }
+
+   prev = f->previous_length;
+
+   // last half of this data becomes previous window
+   f->previous_length = len - right;
+
+   // @OPTIMIZE: could avoid this copy by double-buffering the
+   // output (flipping previous_window with channel_buffers), but
+   // then previous_window would have to be 2x as large, and
+   // channel_buffers couldn't be temp mem (although they're NOT
+   // currently temp mem, they could be (unless we want to level
+   // performance by spreading out the computation))
+   for (i=0; i < f->channels; ++i)
+      for (j=0; right+j < len; ++j)
+         f->previous_window[i][j] = f->channel_buffers[i][right+j];
+
+   if (!prev)
+      // there was no previous packet, so this data isn't valid...
+      // this isn't entirely true, only the would-have-overlapped data
+      // isn't valid, but this seems to be what the spec requires
+      return 0;
+
+   // truncate a short frame
+   if (len < right) right = len;
+
+   f->samples_output += right-left;
+
+   return right - left;
+}
+
+static int vorbis_pump_first_frame(stb_vorbis *f)
+{
+   int len, right, left, res;
+   res = vorbis_decode_packet(f, &len, &left, &right);
+   if (res)
+      vorbis_finish_frame(f, len, left, right);
+   return res;
+}
+
+#ifndef STB_VORBIS_NO_PUSHDATA_API
+static int is_whole_packet_present(stb_vorbis *f)
+{
+   // make sure that we have the packet available before continuing...
+   // this requires a full ogg parse, but we know we can fetch from f->stream
+
+   // instead of coding this out explicitly, we could save the current read state,
+   // read the next packet with get8() until end-of-packet, check f->eof, then
+   // reset the state? but that would be slower, esp. since we'd have over 256 bytes
+   // of state to restore (primarily the page segment table)
+
+   int s = f->next_seg, first = TRUE;
+   uint8 *p = f->stream;
+
+   if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
+      for (; s < f->segment_count; ++s) {
+         p += f->segments[s];
+         if (f->segments[s] < 255)               // stop at first short segment
+            break;
+      }
+      // either this continues, or it ends it...
+      if (s == f->segment_count)
+         s = -1; // set 'crosses page' flag
+      if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
+      first = FALSE;
+   }
+   for (; s == -1;) {
+      uint8 *q;
+      int n;
+
+      // check that we have the page header ready
+      if (p + 26 >= f->stream_end)               return error(f, VORBIS_need_more_data);
+      // validate the page
+      if (memcmp(p, ogg_page_header, 4))         return error(f, VORBIS_invalid_stream);
+      if (p[4] != 0)                             return error(f, VORBIS_invalid_stream);
+      if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
+         if (f->previous_length)
+            if ((p[5] & PAGEFLAG_continued_packet))  return error(f, VORBIS_invalid_stream);
+         // if no previous length, we're resynching, so we can come in on a continued-packet,
+         // which we'll just drop
+      } else {
+         if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
+      }
+      n = p[26]; // segment counts
+      q = p+27;  // q points to segment table
+      p = q + n; // advance past header
+      // make sure we've read the segment table
+      if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
+      for (s=0; s < n; ++s) {
+         p += q[s];
+         if (q[s] < 255)
+            break;
+      }
+      if (s == n)
+         s = -1; // set 'crosses page' flag
+      if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
+      first = FALSE;
+   }
+   return TRUE;
+}
+#endif // !STB_VORBIS_NO_PUSHDATA_API
+
+static int start_decoder(vorb *f)
+{
+   uint8 header[6], x,y;
+   int len,i,j,k, max_submaps = 0;
+   int longest_floorlist=0;
+
+   // first page, first packet
+   f->first_decode = TRUE;
+
+   if (!start_page(f))                              return FALSE;
+   // validate page flag
+   if (!(f->page_flag & PAGEFLAG_first_page))       return error(f, VORBIS_invalid_first_page);
+   if (f->page_flag & PAGEFLAG_last_page)           return error(f, VORBIS_invalid_first_page);
+   if (f->page_flag & PAGEFLAG_continued_packet)    return error(f, VORBIS_invalid_first_page);
+   // check for expected packet length
+   if (f->segment_count != 1)                       return error(f, VORBIS_invalid_first_page);
+   if (f->segments[0] != 30) {
+      // check for the Ogg skeleton fishead identifying header to refine our error
+      if (f->segments[0] == 64 &&
+          getn(f, header, 6) &&
+          header[0] == 'f' &&
+          header[1] == 'i' &&
+          header[2] == 's' &&
+          header[3] == 'h' &&
+          header[4] == 'e' &&
+          header[5] == 'a' &&
+          get8(f)   == 'd' &&
+          get8(f)   == '\0')                        return error(f, VORBIS_ogg_skeleton_not_supported);
+      else
+                                                    return error(f, VORBIS_invalid_first_page);
+   }
+
+   // read packet
+   // check packet header
+   if (get8(f) != VORBIS_packet_id)                 return error(f, VORBIS_invalid_first_page);
+   if (!getn(f, header, 6))                         return error(f, VORBIS_unexpected_eof);
+   if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_first_page);
+   // vorbis_version
+   if (get32(f) != 0)                               return error(f, VORBIS_invalid_first_page);
+   f->channels = get8(f); if (!f->channels)         return error(f, VORBIS_invalid_first_page);
+   if (f->channels > STB_VORBIS_MAX_CHANNELS)       return error(f, VORBIS_too_many_channels);
+   f->sample_rate = get32(f); if (!f->sample_rate)  return error(f, VORBIS_invalid_first_page);
+   get32(f); // bitrate_maximum
+   get32(f); // bitrate_nominal
+   get32(f); // bitrate_minimum
+   x = get8(f);
+   {
+      int log0,log1;
+      log0 = x & 15;
+      log1 = x >> 4;
+      f->blocksize_0 = 1 << log0;
+      f->blocksize_1 = 1 << log1;
+      if (log0 < 6 || log0 > 13)                       return error(f, VORBIS_invalid_setup);
+      if (log1 < 6 || log1 > 13)                       return error(f, VORBIS_invalid_setup);
+      if (log0 > log1)                                 return error(f, VORBIS_invalid_setup);
+   }
+
+   // framing_flag
+   x = get8(f);
+   if (!(x & 1))                                    return error(f, VORBIS_invalid_first_page);
+
+   // second packet!
+   if (!start_page(f))                              return FALSE;
+
+   if (!start_packet(f))                            return FALSE;
+
+   if (!next_segment(f))                            return FALSE;
+
+   if (get8_packet(f) != VORBIS_packet_comment)            return error(f, VORBIS_invalid_setup);
+   for (i=0; i < 6; ++i) header[i] = get8_packet(f);
+   if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
+   //file vendor
+   len = get32_packet(f);
+   f->vendor = (char*)setup_malloc(f, sizeof(char) * (len+1));
+   if (f->vendor == NULL)                           return error(f, VORBIS_outofmem);
+   for(i=0; i < len; ++i) {
+      f->vendor[i] = get8_packet(f);
+   }
+   f->vendor[len] = (char)'\0';
+   //user comments
+   f->comment_list_length = get32_packet(f);
+   f->comment_list = NULL;
+   if (f->comment_list_length > 0)
+   {
+      f->comment_list = (char**) setup_malloc(f, sizeof(char*) * (f->comment_list_length));
+      if (f->comment_list == NULL)                  return error(f, VORBIS_outofmem);
+   }
+
+   for(i=0; i < f->comment_list_length; ++i) {
+      len = get32_packet(f);
+      f->comment_list[i] = (char*)setup_malloc(f, sizeof(char) * (len+1));
+      if (f->comment_list[i] == NULL)               return error(f, VORBIS_outofmem);
+
+      for(j=0; j < len; ++j) {
+         f->comment_list[i][j] = get8_packet(f);
+      }
+      f->comment_list[i][len] = (char)'\0';
+   }
+
+   // framing_flag
+   x = get8_packet(f);
+   if (!(x & 1))                                    return error(f, VORBIS_invalid_setup);
+
+
+   skip(f, f->bytes_in_seg);
+   f->bytes_in_seg = 0;
+
+   do {
+      len = next_segment(f);
+      skip(f, len);
+      f->bytes_in_seg = 0;
+   } while (len);
+
+   // third packet!
+   if (!start_packet(f))                            return FALSE;
+
+   #ifndef STB_VORBIS_NO_PUSHDATA_API
+   if (IS_PUSH_MODE(f)) {
+      if (!is_whole_packet_present(f)) {
+         // convert error in ogg header to write type
+         if (f->error == VORBIS_invalid_stream)
+            f->error = VORBIS_invalid_setup;
+         return FALSE;
+      }
+   }
+   #endif
+
+   crc32_init(); // always init it, to avoid multithread race conditions
+
+   if (get8_packet(f) != VORBIS_packet_setup)       return error(f, VORBIS_invalid_setup);
+   for (i=0; i < 6; ++i) header[i] = get8_packet(f);
+   if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
+
+   // codebooks
+
+   f->codebook_count = get_bits(f,8) + 1;
+   f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
+   if (f->codebooks == NULL)                        return error(f, VORBIS_outofmem);
+   memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
+   for (i=0; i < f->codebook_count; ++i) {
+      uint32 *values;
+      int ordered, sorted_count;
+      int total=0;
+      uint8 *lengths;
+      Codebook *c = f->codebooks+i;
+      CHECK(f);
+      x = get_bits(f, 8); if (x != 0x42)            return error(f, VORBIS_invalid_setup);
+      x = get_bits(f, 8); if (x != 0x43)            return error(f, VORBIS_invalid_setup);
+      x = get_bits(f, 8); if (x != 0x56)            return error(f, VORBIS_invalid_setup);
+      x = get_bits(f, 8);
+      c->dimensions = (get_bits(f, 8)<<8) + x;
+      x = get_bits(f, 8);
+      y = get_bits(f, 8);
+      c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
+      ordered = get_bits(f,1);
+      c->sparse = ordered ? 0 : get_bits(f,1);
+
+      if (c->dimensions == 0 && c->entries != 0)    return error(f, VORBIS_invalid_setup);
+
+      if (c->sparse)
+         lengths = (uint8 *) setup_temp_malloc(f, c->entries);
+      else
+         lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
+
+      if (!lengths) return error(f, VORBIS_outofmem);
+
+      if (ordered) {
+         int current_entry = 0;
+         int current_length = get_bits(f,5) + 1;
+         while (current_entry < c->entries) {
+            int limit = c->entries - current_entry;
+            int n = get_bits(f, ilog(limit));
+            if (current_length >= 32) return error(f, VORBIS_invalid_setup);
+            if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
+            memset(lengths + current_entry, current_length, n);
+            current_entry += n;
+            ++current_length;
+         }
+      } else {
+         for (j=0; j < c->entries; ++j) {
+            int present = c->sparse ? get_bits(f,1) : 1;
+            if (present) {
+               lengths[j] = get_bits(f, 5) + 1;
+               ++total;
+               if (lengths[j] == 32)
+                  return error(f, VORBIS_invalid_setup);
+            } else {
+               lengths[j] = NO_CODE;
+            }
+         }
+      }
+
+      if (c->sparse && total >= c->entries >> 2) {
+         // convert sparse items to non-sparse!
+         if (c->entries > (int) f->setup_temp_memory_required)
+            f->setup_temp_memory_required = c->entries;
+
+         c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
+         if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
+         memcpy(c->codeword_lengths, lengths, c->entries);
+         setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
+         lengths = c->codeword_lengths;
+         c->sparse = 0;
+      }
+
+      // compute the size of the sorted tables
+      if (c->sparse) {
+         sorted_count = total;
+      } else {
+         sorted_count = 0;
+         #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
+         for (j=0; j < c->entries; ++j)
+            if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
+               ++sorted_count;
+         #endif
+      }
+
+      c->sorted_entries = sorted_count;
+      values = NULL;
+
+      CHECK(f);
+      if (!c->sparse) {
+         c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
+         if (!c->codewords)                  return error(f, VORBIS_outofmem);
+      } else {
+         unsigned int size;
+         if (c->sorted_entries) {
+            c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
+            if (!c->codeword_lengths)           return error(f, VORBIS_outofmem);
+            c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
+            if (!c->codewords)                  return error(f, VORBIS_outofmem);
+            values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
+            if (!values)                        return error(f, VORBIS_outofmem);
+         }
+         size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
+         if (size > f->setup_temp_memory_required)
+            f->setup_temp_memory_required = size;
+      }
+
+      if (!compute_codewords(c, lengths, c->entries, values)) {
+         if (c->sparse) setup_temp_free(f, values, 0);
+         return error(f, VORBIS_invalid_setup);
+      }
+
+      if (c->sorted_entries) {
+         // allocate an extra slot for sentinels
+         c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
+         if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
+         // allocate an extra slot at the front so that c->sorted_values[-1] is defined
+         // so that we can catch that case without an extra if
+         c->sorted_values    = ( int   *) setup_malloc(f, sizeof(*c->sorted_values   ) * (c->sorted_entries+1));
+         if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
+         ++c->sorted_values;
+         c->sorted_values[-1] = -1;
+         compute_sorted_huffman(c, lengths, values);
+      }
+
+      if (c->sparse) {
+         setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
+         setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
+         setup_temp_free(f, lengths, c->entries);
+         c->codewords = NULL;
+      }
+
+      compute_accelerated_huffman(c);
+
+      CHECK(f);
+      c->lookup_type = get_bits(f, 4);
+      if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
+      if (c->lookup_type > 0) {
+         uint16 *mults;
+         c->minimum_value = float32_unpack(get_bits(f, 32));
+         c->delta_value = float32_unpack(get_bits(f, 32));
+         c->value_bits = get_bits(f, 4)+1;
+         c->sequence_p = get_bits(f,1);
+         if (c->lookup_type == 1) {
+            int values = lookup1_values(c->entries, c->dimensions);
+            if (values < 0) return error(f, VORBIS_invalid_setup);
+            c->lookup_values = (uint32) values;
+         } else {
+            c->lookup_values = c->entries * c->dimensions;
+         }
+         if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
+         mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
+         if (mults == NULL) return error(f, VORBIS_outofmem);
+         for (j=0; j < (int) c->lookup_values; ++j) {
+            int q = get_bits(f, c->value_bits);
+            if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
+            mults[j] = q;
+         }
+
+#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
+         if (c->lookup_type == 1) {
+            int len, sparse = c->sparse;
+            float last=0;
+            // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
+            if (sparse) {
+               if (c->sorted_entries == 0) goto skip;
+               c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
+            } else
+               c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries        * c->dimensions);
+            if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
+            len = sparse ? c->sorted_entries : c->entries;
+            for (j=0; j < len; ++j) {
+               unsigned int z = sparse ? c->sorted_values[j] : j;
+               unsigned int div=1;
+               for (k=0; k < c->dimensions; ++k) {
+                  int off = (z / div) % c->lookup_values;
+                  float val = mults[off]*c->delta_value + c->minimum_value + last;
+                  c->multiplicands[j*c->dimensions + k] = val;
+                  if (c->sequence_p)
+                     last = val;
+                  if (k+1 < c->dimensions) {
+                     if (div > UINT_MAX / (unsigned int) c->lookup_values) {
+                        setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
+                        return error(f, VORBIS_invalid_setup);
+                     }
+                     div *= c->lookup_values;
+                  }
+               }
+            }
+            c->lookup_type = 2;
+         }
+         else
+#endif
+         {
+            float last=0;
+            CHECK(f);
+            c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
+            if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
+            for (j=0; j < (int) c->lookup_values; ++j) {
+               float val = mults[j] * c->delta_value + c->minimum_value + last;
+               c->multiplicands[j] = val;
+               if (c->sequence_p)
+                  last = val;
+            }
+         }
+#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
+        skip:;
+#endif
+         setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
+
+         CHECK(f);
+      }
+      CHECK(f);
+   }
+
+   // time domain transfers (notused)
+
+   x = get_bits(f, 6) + 1;
+   for (i=0; i < x; ++i) {
+      uint32 z = get_bits(f, 16);
+      if (z != 0) return error(f, VORBIS_invalid_setup);
+   }
+
+   // Floors
+   f->floor_count = get_bits(f, 6)+1;
+   f->floor_config = (Floor *)  setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
+   if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
+   for (i=0; i < f->floor_count; ++i) {
+      f->floor_types[i] = get_bits(f, 16);
+      if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
+      if (f->floor_types[i] == 0) {
+         Floor0 *g = &f->floor_config[i].floor0;
+         g->order = get_bits(f,8);
+         g->rate = get_bits(f,16);
+         g->bark_map_size = get_bits(f,16);
+         g->amplitude_bits = get_bits(f,6);
+         g->amplitude_offset = get_bits(f,8);
+         g->number_of_books = get_bits(f,4) + 1;
+         for (j=0; j < g->number_of_books; ++j)
+            g->book_list[j] = get_bits(f,8);
+         return error(f, VORBIS_feature_not_supported);
+      } else {
+         stbv__floor_ordering p[31*8+2];
+         Floor1 *g = &f->floor_config[i].floor1;
+         int max_class = -1;
+         g->partitions = get_bits(f, 5);
+         for (j=0; j < g->partitions; ++j) {
+            g->partition_class_list[j] = get_bits(f, 4);
+            if (g->partition_class_list[j] > max_class)
+               max_class = g->partition_class_list[j];
+         }
+         for (j=0; j <= max_class; ++j) {
+            g->class_dimensions[j] = get_bits(f, 3)+1;
+            g->class_subclasses[j] = get_bits(f, 2);
+            if (g->class_subclasses[j]) {
+               g->class_masterbooks[j] = get_bits(f, 8);
+               if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
+            }
+            for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
+               g->subclass_books[j][k] = (int16)get_bits(f,8)-1;
+               if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
+            }
+         }
+         g->floor1_multiplier = get_bits(f,2)+1;
+         g->rangebits = get_bits(f,4);
+         g->Xlist[0] = 0;
+         g->Xlist[1] = 1 << g->rangebits;
+         g->values = 2;
+         for (j=0; j < g->partitions; ++j) {
+            int c = g->partition_class_list[j];
+            for (k=0; k < g->class_dimensions[c]; ++k) {
+               g->Xlist[g->values] = get_bits(f, g->rangebits);
+               ++g->values;
+            }
+         }
+         // precompute the sorting
+         for (j=0; j < g->values; ++j) {
+            p[j].x = g->Xlist[j];
+            p[j].id = j;
+         }
+         qsort(p, g->values, sizeof(p[0]), point_compare);
+         for (j=0; j < g->values-1; ++j)
+            if (p[j].x == p[j+1].x)
+               return error(f, VORBIS_invalid_setup);
+         for (j=0; j < g->values; ++j)
+            g->sorted_order[j] = (uint8) p[j].id;
+         // precompute the neighbors
+         for (j=2; j < g->values; ++j) {
+            int low = 0,hi = 0;
+            neighbors(g->Xlist, j, &low,&hi);
+            g->neighbors[j][0] = low;
+            g->neighbors[j][1] = hi;
+         }
+
+         if (g->values > longest_floorlist)
+            longest_floorlist = g->values;
+      }
+   }
+
+   // Residue
+   f->residue_count = get_bits(f, 6)+1;
+   f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
+   if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
+   memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
+   for (i=0; i < f->residue_count; ++i) {
+      uint8 residue_cascade[64];
+      Residue *r = f->residue_config+i;
+      f->residue_types[i] = get_bits(f, 16);
+      if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
+      r->begin = get_bits(f, 24);
+      r->end = get_bits(f, 24);
+      if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
+      r->part_size = get_bits(f,24)+1;
+      r->classifications = get_bits(f,6)+1;
+      r->classbook = get_bits(f,8);
+      if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
+      for (j=0; j < r->classifications; ++j) {
+         uint8 high_bits=0;
+         uint8 low_bits=get_bits(f,3);
+         if (get_bits(f,1))
+            high_bits = get_bits(f,5);
+         residue_cascade[j] = high_bits*8 + low_bits;
+      }
+      r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
+      if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
+      for (j=0; j < r->classifications; ++j) {
+         for (k=0; k < 8; ++k) {
+            if (residue_cascade[j] & (1 << k)) {
+               r->residue_books[j][k] = get_bits(f, 8);
+               if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
+            } else {
+               r->residue_books[j][k] = -1;
+            }
+         }
+      }
+      // precompute the classifications[] array to avoid inner-loop mod/divide
+      // call it 'classdata' since we already have r->classifications
+      r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
+      if (!r->classdata) return error(f, VORBIS_outofmem);
+      memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
+      for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
+         int classwords = f->codebooks[r->classbook].dimensions;
+         int temp = j;
+         r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
+         if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
+         for (k=classwords-1; k >= 0; --k) {
+            r->classdata[j][k] = temp % r->classifications;
+            temp /= r->classifications;
+         }
+      }
+   }
+
+   f->mapping_count = get_bits(f,6)+1;
+   f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
+   if (f->mapping == NULL) return error(f, VORBIS_outofmem);
+   memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
+   for (i=0; i < f->mapping_count; ++i) {
+      Mapping *m = f->mapping + i;
+      int mapping_type = get_bits(f,16);
+      if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
+      m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
+      if (m->chan == NULL) return error(f, VORBIS_outofmem);
+      if (get_bits(f,1))
+         m->submaps = get_bits(f,4)+1;
+      else
+         m->submaps = 1;
+      if (m->submaps > max_submaps)
+         max_submaps = m->submaps;
+      if (get_bits(f,1)) {
+         m->coupling_steps = get_bits(f,8)+1;
+         if (m->coupling_steps > f->channels) return error(f, VORBIS_invalid_setup);
+         for (k=0; k < m->coupling_steps; ++k) {
+            m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
+            m->chan[k].angle = get_bits(f, ilog(f->channels-1));
+            if (m->chan[k].magnitude >= f->channels)        return error(f, VORBIS_invalid_setup);
+            if (m->chan[k].angle     >= f->channels)        return error(f, VORBIS_invalid_setup);
+            if (m->chan[k].magnitude == m->chan[k].angle)   return error(f, VORBIS_invalid_setup);
+         }
+      } else
+         m->coupling_steps = 0;
+
+      // reserved field
+      if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
+      if (m->submaps > 1) {
+         for (j=0; j < f->channels; ++j) {
+            m->chan[j].mux = get_bits(f, 4);
+            if (m->chan[j].mux >= m->submaps)                return error(f, VORBIS_invalid_setup);
+         }
+      } else
+         // @SPECIFICATION: this case is missing from the spec
+         for (j=0; j < f->channels; ++j)
+            m->chan[j].mux = 0;
+
+      for (j=0; j < m->submaps; ++j) {
+         get_bits(f,8); // discard
+         m->submap_floor[j] = get_bits(f,8);
+         m->submap_residue[j] = get_bits(f,8);
+         if (m->submap_floor[j] >= f->floor_count)      return error(f, VORBIS_invalid_setup);
+         if (m->submap_residue[j] >= f->residue_count)  return error(f, VORBIS_invalid_setup);
+      }
+   }
+
+   // Modes
+   f->mode_count = get_bits(f, 6)+1;
+   for (i=0; i < f->mode_count; ++i) {
+      Mode *m = f->mode_config+i;
+      m->blockflag = get_bits(f,1);
+      m->windowtype = get_bits(f,16);
+      m->transformtype = get_bits(f,16);
+      m->mapping = get_bits(f,8);
+      if (m->windowtype != 0)                 return error(f, VORBIS_invalid_setup);
+      if (m->transformtype != 0)              return error(f, VORBIS_invalid_setup);
+      if (m->mapping >= f->mapping_count)     return error(f, VORBIS_invalid_setup);
+   }
+
+   flush_packet(f);
+
+   f->previous_length = 0;
+
+   for (i=0; i < f->channels; ++i) {
+      f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
+      f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
+      f->finalY[i]          = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
+      if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
+      memset(f->channel_buffers[i], 0, sizeof(float) * f->blocksize_1);
+      #ifdef STB_VORBIS_NO_DEFER_FLOOR
+      f->floor_buffers[i]   = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
+      if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
+      #endif
+   }
+
+   if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
+   if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
+   f->blocksize[0] = f->blocksize_0;
+   f->blocksize[1] = f->blocksize_1;
+
+#ifdef STB_VORBIS_DIVIDE_TABLE
+   if (integer_divide_table[1][1]==0)
+      for (i=0; i < DIVTAB_NUMER; ++i)
+         for (j=1; j < DIVTAB_DENOM; ++j)
+            integer_divide_table[i][j] = i / j;
+#endif
+
+   // compute how much temporary memory is needed
+
+   // 1.
+   {
+      uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
+      uint32 classify_mem;
+      int i,max_part_read=0;
+      for (i=0; i < f->residue_count; ++i) {
+         Residue *r = f->residue_config + i;
+         unsigned int actual_size = f->blocksize_1 / 2;
+         unsigned int limit_r_begin = r->begin < actual_size ? r->begin : actual_size;
+         unsigned int limit_r_end   = r->end   < actual_size ? r->end   : actual_size;
+         int n_read = limit_r_end - limit_r_begin;
+         int part_read = n_read / r->part_size;
+         if (part_read > max_part_read)
+            max_part_read = part_read;
+      }
+      #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
+      classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
+      #else
+      classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
+      #endif
+
+      // maximum reasonable partition size is f->blocksize_1
+
+      f->temp_memory_required = classify_mem;
+      if (imdct_mem > f->temp_memory_required)
+         f->temp_memory_required = imdct_mem;
+   }
+
+
+   if (f->alloc.alloc_buffer) {
+      assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
+      // check if there's enough temp memory so we don't error later
+      if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
+         return error(f, VORBIS_outofmem);
+   }
+
+   // @TODO: stb_vorbis_seek_start expects first_audio_page_offset to point to a page
+   // without PAGEFLAG_continued_packet, so this either points to the first page, or
+   // the page after the end of the headers. It might be cleaner to point to a page
+   // in the middle of the headers, when that's the page where the first audio packet
+   // starts, but we'd have to also correctly skip the end of any continued packet in
+   // stb_vorbis_seek_start.
+   if (f->next_seg == -1) {
+      f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
+   } else {
+      f->first_audio_page_offset = 0;
+   }
+
+   return TRUE;
+}
+
+static void vorbis_deinit(stb_vorbis *p)
+{
+   int i,j;
+
+   setup_free(p, p->vendor);
+   for (i=0; i < p->comment_list_length; ++i) {
+      setup_free(p, p->comment_list[i]);
+   }
+   setup_free(p, p->comment_list);
+
+   if (p->residue_config) {
+      for (i=0; i < p->residue_count; ++i) {
+         Residue *r = p->residue_config+i;
+         if (r->classdata) {
+            for (j=0; j < p->codebooks[r->classbook].entries; ++j)
+               setup_free(p, r->classdata[j]);
+            setup_free(p, r->classdata);
+         }
+         setup_free(p, r->residue_books);
+      }
+   }
+
+   if (p->codebooks) {
+      CHECK(p);
+      for (i=0; i < p->codebook_count; ++i) {
+         Codebook *c = p->codebooks + i;
+         setup_free(p, c->codeword_lengths);
+         setup_free(p, c->multiplicands);
+         setup_free(p, c->codewords);
+         setup_free(p, c->sorted_codewords);
+         // c->sorted_values[-1] is the first entry in the array
+         setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
+      }
+      setup_free(p, p->codebooks);
+   }
+   setup_free(p, p->floor_config);
+   setup_free(p, p->residue_config);
+   if (p->mapping) {
+      for (i=0; i < p->mapping_count; ++i)
+         setup_free(p, p->mapping[i].chan);
+      setup_free(p, p->mapping);
+   }
+   CHECK(p);
+   for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
+      setup_free(p, p->channel_buffers[i]);
+      setup_free(p, p->previous_window[i]);
+      #ifdef STB_VORBIS_NO_DEFER_FLOOR
+      setup_free(p, p->floor_buffers[i]);
+      #endif
+      setup_free(p, p->finalY[i]);
+   }
+   for (i=0; i < 2; ++i) {
+      setup_free(p, p->A[i]);
+      setup_free(p, p->B[i]);
+      setup_free(p, p->C[i]);
+      setup_free(p, p->window[i]);
+      setup_free(p, p->bit_reverse[i]);
+   }
+   #ifndef STB_VORBIS_NO_STDIO
+   if (p->close_on_free) fclose(p->f);
+   #endif
+}
+
+void stb_vorbis_close(stb_vorbis *p)
+{
+   if (p == NULL) return;
+   vorbis_deinit(p);
+   setup_free(p,p);
+}
+
+static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
+{
+   memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
+   if (z) {
+      p->alloc = *z;
+      p->alloc.alloc_buffer_length_in_bytes &= ~7;
+      p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
+   }
+   p->eof = 0;
+   p->error = VORBIS__no_error;
+   p->stream = NULL;
+   p->codebooks = NULL;
+   p->page_crc_tests = -1;
+   #ifndef STB_VORBIS_NO_STDIO
+   p->close_on_free = FALSE;
+   p->f = NULL;
+   #endif
+}
+
+int stb_vorbis_get_sample_offset(stb_vorbis *f)
+{
+   if (f->current_loc_valid)
+      return f->current_loc;
+   else
+      return -1;
+}
+
+stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
+{
+   stb_vorbis_info d;
+   d.channels = f->channels;
+   d.sample_rate = f->sample_rate;
+   d.setup_memory_required = f->setup_memory_required;
+   d.setup_temp_memory_required = f->setup_temp_memory_required;
+   d.temp_memory_required = f->temp_memory_required;
+   d.max_frame_size = f->blocksize_1 >> 1;
+   return d;
+}
+
+stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f)
+{
+   stb_vorbis_comment d;
+   d.vendor = f->vendor;
+   d.comment_list_length = f->comment_list_length;
+   d.comment_list = f->comment_list;
+   return d;
+}
+
+int stb_vorbis_get_error(stb_vorbis *f)
+{
+   int e = f->error;
+   f->error = VORBIS__no_error;
+   return e;
+}
+
+static stb_vorbis * vorbis_alloc(stb_vorbis *f)
+{
+   stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
+   return p;
+}
+
+#ifndef STB_VORBIS_NO_PUSHDATA_API
+
+void stb_vorbis_flush_pushdata(stb_vorbis *f)
+{
+   f->previous_length = 0;
+   f->page_crc_tests  = 0;
+   f->discard_samples_deferred = 0;
+   f->current_loc_valid = FALSE;
+   f->first_decode = FALSE;
+   f->samples_output = 0;
+   f->channel_buffer_start = 0;
+   f->channel_buffer_end = 0;
+}
+
+static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
+{
+   int i,n;
+   for (i=0; i < f->page_crc_tests; ++i)
+      f->scan[i].bytes_done = 0;
+
+   // if we have room for more scans, search for them first, because
+   // they may cause us to stop early if their header is incomplete
+   if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
+      if (data_len < 4) return 0;
+      data_len -= 3; // need to look for 4-byte sequence, so don't miss
+                     // one that straddles a boundary
+      for (i=0; i < data_len; ++i) {
+         if (data[i] == 0x4f) {
+            if (0==memcmp(data+i, ogg_page_header, 4)) {
+               int j,len;
+               uint32 crc;
+               // make sure we have the whole page header
+               if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
+                  // only read up to this page start, so hopefully we'll
+                  // have the whole page header start next time
+                  data_len = i;
+                  break;
+               }
+               // ok, we have it all; compute the length of the page
+               len = 27 + data[i+26];
+               for (j=0; j < data[i+26]; ++j)
+                  len += data[i+27+j];
+               // scan everything up to the embedded crc (which we must 0)
+               crc = 0;
+               for (j=0; j < 22; ++j)
+                  crc = crc32_update(crc, data[i+j]);
+               // now process 4 0-bytes
+               for (   ; j < 26; ++j)
+                  crc = crc32_update(crc, 0);
+               // len is the total number of bytes we need to scan
+               n = f->page_crc_tests++;
+               f->scan[n].bytes_left = len-j;
+               f->scan[n].crc_so_far = crc;
+               f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
+               // if the last frame on a page is continued to the next, then
+               // we can't recover the sample_loc immediately
+               if (data[i+27+data[i+26]-1] == 255)
+                  f->scan[n].sample_loc = ~0;
+               else
+                  f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
+               f->scan[n].bytes_done = i+j;
+               if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
+                  break;
+               // keep going if we still have room for more
+            }
+         }
+      }
+   }
+
+   for (i=0; i < f->page_crc_tests;) {
+      uint32 crc;
+      int j;
+      int n = f->scan[i].bytes_done;
+      int m = f->scan[i].bytes_left;
+      if (m > data_len - n) m = data_len - n;
+      // m is the bytes to scan in the current chunk
+      crc = f->scan[i].crc_so_far;
+      for (j=0; j < m; ++j)
+         crc = crc32_update(crc, data[n+j]);
+      f->scan[i].bytes_left -= m;
+      f->scan[i].crc_so_far = crc;
+      if (f->scan[i].bytes_left == 0) {
+         // does it match?
+         if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
+            // Houston, we have page
+            data_len = n+m; // consumption amount is wherever that scan ended
+            f->page_crc_tests = -1; // drop out of page scan mode
+            f->previous_length = 0; // decode-but-don't-output one frame
+            f->next_seg = -1;       // start a new page
+            f->current_loc = f->scan[i].sample_loc; // set the current sample location
+                                    // to the amount we'd have decoded had we decoded this page
+            f->current_loc_valid = f->current_loc != ~0U;
+            return data_len;
+         }
+         // delete entry
+         f->scan[i] = f->scan[--f->page_crc_tests];
+      } else {
+         ++i;
+      }
+   }
+
+   return data_len;
+}
+
+// return value: number of bytes we used
+int stb_vorbis_decode_frame_pushdata(
+         stb_vorbis *f,                   // the file we're decoding
+         const uint8 *data, int data_len, // the memory available for decoding
+         int *channels,                   // place to write number of float * buffers
+         float ***output,                 // place to write float ** array of float * buffers
+         int *samples                     // place to write number of output samples
+     )
+{
+   int i;
+   int len,right,left;
+
+   if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
+
+   if (f->page_crc_tests >= 0) {
+      *samples = 0;
+      return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
+   }
+
+   f->stream     = (uint8 *) data;
+   f->stream_end = (uint8 *) data + data_len;
+   f->error      = VORBIS__no_error;
+
+   // check that we have the entire packet in memory
+   if (!is_whole_packet_present(f)) {
+      *samples = 0;
+      return 0;
+   }
+
+   if (!vorbis_decode_packet(f, &len, &left, &right)) {
+      // save the actual error we encountered
+      enum STBVorbisError error = f->error;
+      if (error == VORBIS_bad_packet_type) {
+         // flush and resynch
+         f->error = VORBIS__no_error;
+         while (get8_packet(f) != EOP)
+            if (f->eof) break;
+         *samples = 0;
+         return (int) (f->stream - data);
+      }
+      if (error == VORBIS_continued_packet_flag_invalid) {
+         if (f->previous_length == 0) {
+            // we may be resynching, in which case it's ok to hit one
+            // of these; just discard the packet
+            f->error = VORBIS__no_error;
+            while (get8_packet(f) != EOP)
+               if (f->eof) break;
+            *samples = 0;
+            return (int) (f->stream - data);
+         }
+      }
+      // if we get an error while parsing, what to do?
+      // well, it DEFINITELY won't work to continue from where we are!
+      stb_vorbis_flush_pushdata(f);
+      // restore the error that actually made us bail
+      f->error = error;
+      *samples = 0;
+      return 1;
+   }
+
+   // success!
+   len = vorbis_finish_frame(f, len, left, right);
+   for (i=0; i < f->channels; ++i)
+      f->outputs[i] = f->channel_buffers[i] + left;
+
+   if (channels) *channels = f->channels;
+   *samples = len;
+   *output = f->outputs;
+   return (int) (f->stream - data);
+}
+
+stb_vorbis *stb_vorbis_open_pushdata(
+         const unsigned char *data, int data_len, // the memory available for decoding
+         int *data_used,              // only defined if result is not NULL
+         int *error, const stb_vorbis_alloc *alloc)
+{
+   stb_vorbis *f, p;
+   vorbis_init(&p, alloc);
+   p.stream     = (uint8 *) data;
+   p.stream_end = (uint8 *) data + data_len;
+   p.push_mode  = TRUE;
+   if (!start_decoder(&p)) {
+      if (p.eof)
+         *error = VORBIS_need_more_data;
+      else
+         *error = p.error;
+      vorbis_deinit(&p);
+      return NULL;
+   }
+   f = vorbis_alloc(&p);
+   if (f) {
+      *f = p;
+      *data_used = (int) (f->stream - data);
+      *error = 0;
+      return f;
+   } else {
+      vorbis_deinit(&p);
+      return NULL;
+   }
+}
+#endif // STB_VORBIS_NO_PUSHDATA_API
+
+unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
+{
+   #ifndef STB_VORBIS_NO_PUSHDATA_API
+   if (f->push_mode) return 0;
+   #endif
+   if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
+   #ifndef STB_VORBIS_NO_STDIO
+   return (unsigned int) (ftell(f->f) - f->f_start);
+   #endif
+}
+
+#ifndef STB_VORBIS_NO_PULLDATA_API
+//
+// DATA-PULLING API
+//
+
+static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
+{
+   for(;;) {
+      int n;
+      if (f->eof) return 0;
+      n = get8(f);
+      if (n == 0x4f) { // page header candidate
+         unsigned int retry_loc = stb_vorbis_get_file_offset(f);
+         int i;
+         // check if we're off the end of a file_section stream
+         if (retry_loc - 25 > f->stream_len)
+            return 0;
+         // check the rest of the header
+         for (i=1; i < 4; ++i)
+            if (get8(f) != ogg_page_header[i])
+               break;
+         if (f->eof) return 0;
+         if (i == 4) {
+            uint8 header[27];
+            uint32 i, crc, goal, len;
+            for (i=0; i < 4; ++i)
+               header[i] = ogg_page_header[i];
+            for (; i < 27; ++i)
+               header[i] = get8(f);
+            if (f->eof) return 0;
+            if (header[4] != 0) goto invalid;
+            goal = header[22] + (header[23] << 8) + (header[24]<<16) + ((uint32)header[25]<<24);
+            for (i=22; i < 26; ++i)
+               header[i] = 0;
+            crc = 0;
+            for (i=0; i < 27; ++i)
+               crc = crc32_update(crc, header[i]);
+            len = 0;
+            for (i=0; i < header[26]; ++i) {
+               int s = get8(f);
+               crc = crc32_update(crc, s);
+               len += s;
+            }
+            if (len && f->eof) return 0;
+            for (i=0; i < len; ++i)
+               crc = crc32_update(crc, get8(f));
+            // finished parsing probable page
+            if (crc == goal) {
+               // we could now check that it's either got the last
+               // page flag set, OR it's followed by the capture
+               // pattern, but I guess TECHNICALLY you could have
+               // a file with garbage between each ogg page and recover
+               // from it automatically? So even though that paranoia
+               // might decrease the chance of an invalid decode by
+               // another 2^32, not worth it since it would hose those
+               // invalid-but-useful files?
+               if (end)
+                  *end = stb_vorbis_get_file_offset(f);
+               if (last) {
+                  if (header[5] & 0x04)
+                     *last = 1;
+                  else
+                     *last = 0;
+               }
+               set_file_offset(f, retry_loc-1);
+               return 1;
+            }
+         }
+        invalid:
+         // not a valid page, so rewind and look for next one
+         set_file_offset(f, retry_loc);
+      }
+   }
+}
+
+
+#define SAMPLE_unknown  0xffffffff
+
+// seeking is implemented with a binary search, which narrows down the range to
+// 64K, before using a linear search (because finding the synchronization
+// pattern can be expensive, and the chance we'd find the end page again is
+// relatively high for small ranges)
+//
+// two initial interpolation-style probes are used at the start of the search
+// to try to bound either side of the binary search sensibly, while still
+// working in O(log n) time if they fail.
+
+static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
+{
+   uint8 header[27], lacing[255];
+   int i,len;
+
+   // record where the page starts
+   z->page_start = stb_vorbis_get_file_offset(f);
+
+   // parse the header
+   getn(f, header, 27);
+   if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
+      return 0;
+   getn(f, lacing, header[26]);
+
+   // determine the length of the payload
+   len = 0;
+   for (i=0; i < header[26]; ++i)
+      len += lacing[i];
+
+   // this implies where the page ends
+   z->page_end = z->page_start + 27 + header[26] + len;
+
+   // read the last-decoded sample out of the data
+   z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
+
+   // restore file state to where we were
+   set_file_offset(f, z->page_start);
+   return 1;
+}
+
+// rarely used function to seek back to the preceding page while finding the
+// start of a packet
+static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
+{
+   unsigned int previous_safe, end;
+
+   // now we want to seek back 64K from the limit
+   if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
+      previous_safe = limit_offset - 65536;
+   else
+      previous_safe = f->first_audio_page_offset;
+
+   set_file_offset(f, previous_safe);
+
+   while (vorbis_find_page(f, &end, NULL)) {
+      if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
+         return 1;
+      set_file_offset(f, end);
+   }
+
+   return 0;
+}
+
+// implements the search logic for finding a page and starting decoding. if
+// the function succeeds, current_loc_valid will be true and current_loc will
+// be less than or equal to the provided sample number (the closer the
+// better).
+static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
+{
+   ProbedPage left, right, mid;
+   int i, start_seg_with_known_loc, end_pos, page_start;
+   uint32 delta, stream_length, padding, last_sample_limit;
+   double offset = 0.0, bytes_per_sample = 0.0;
+   int probe = 0;
+
+   // find the last page and validate the target sample
+   stream_length = stb_vorbis_stream_length_in_samples(f);
+   if (stream_length == 0)            return error(f, VORBIS_seek_without_length);
+   if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
+
+   // this is the maximum difference between the window-center (which is the
+   // actual granule position value), and the right-start (which the spec
+   // indicates should be the granule position (give or take one)).
+   padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
+   if (sample_number < padding)
+      last_sample_limit = 0;
+   else
+      last_sample_limit = sample_number - padding;
+
+   left = f->p_first;
+   while (left.last_decoded_sample == ~0U) {
+      // (untested) the first page does not have a 'last_decoded_sample'
+      set_file_offset(f, left.page_end);
+      if (!get_seek_page_info(f, &left)) goto error;
+   }
+
+   right = f->p_last;
+   assert(right.last_decoded_sample != ~0U);
+
+   // starting from the start is handled differently
+   if (last_sample_limit <= left.last_decoded_sample) {
+      if (stb_vorbis_seek_start(f)) {
+         if (f->current_loc > sample_number)
+            return error(f, VORBIS_seek_failed);
+         return 1;
+      }
+      return 0;
+   }
+
+   while (left.page_end != right.page_start) {
+      assert(left.page_end < right.page_start);
+      // search range in bytes
+      delta = right.page_start - left.page_end;
+      if (delta <= 65536) {
+         // there's only 64K left to search - handle it linearly
+         set_file_offset(f, left.page_end);
+      } else {
+         if (probe < 2) {
+            if (probe == 0) {
+               // first probe (interpolate)
+               double data_bytes = right.page_end - left.page_start;
+               bytes_per_sample = data_bytes / right.last_decoded_sample;
+               offset = left.page_start + bytes_per_sample * (last_sample_limit - left.last_decoded_sample);
+            } else {
+               // second probe (try to bound the other side)
+               double error = ((double) last_sample_limit - mid.last_decoded_sample) * bytes_per_sample;
+               if (error >= 0 && error <  8000) error =  8000;
+               if (error <  0 && error > -8000) error = -8000;
+               offset += error * 2;
+            }
+
+            // ensure the offset is valid
+            if (offset < left.page_end)
+               offset = left.page_end;
+            if (offset > right.page_start - 65536)
+               offset = right.page_start - 65536;
+
+            set_file_offset(f, (unsigned int) offset);
+         } else {
+            // binary search for large ranges (offset by 32K to ensure
+            // we don't hit the right page)
+            set_file_offset(f, left.page_end + (delta / 2) - 32768);
+         }
+
+         if (!vorbis_find_page(f, NULL, NULL)) goto error;
+      }
+
+      for (;;) {
+         if (!get_seek_page_info(f, &mid)) goto error;
+         if (mid.last_decoded_sample != ~0U) break;
+         // (untested) no frames end on this page
+         set_file_offset(f, mid.page_end);
+         assert(mid.page_start < right.page_start);
+      }
+
+      // if we've just found the last page again then we're in a tricky file,
+      // and we're close enough (if it wasn't an interpolation probe).
+      if (mid.page_start == right.page_start) {
+         if (probe >= 2 || delta <= 65536)
+            break;
+      } else {
+         if (last_sample_limit < mid.last_decoded_sample)
+            right = mid;
+         else
+            left = mid;
+      }
+
+      ++probe;
+   }
+
+   // seek back to start of the last packet
+   page_start = left.page_start;
+   set_file_offset(f, page_start);
+   if (!start_page(f)) return error(f, VORBIS_seek_failed);
+   end_pos = f->end_seg_with_known_loc;
+   assert(end_pos >= 0);
+
+   for (;;) {
+      for (i = end_pos; i > 0; --i)
+         if (f->segments[i-1] != 255)
+            break;
+
+      start_seg_with_known_loc = i;
+
+      if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
+         break;
+
+      // (untested) the final packet begins on an earlier page
+      if (!go_to_page_before(f, page_start))
+         goto error;
+
+      page_start = stb_vorbis_get_file_offset(f);
+      if (!start_page(f)) goto error;
+      end_pos = f->segment_count - 1;
+   }
+
+   // prepare to start decoding
+   f->current_loc_valid = FALSE;
+   f->last_seg = FALSE;
+   f->valid_bits = 0;
+   f->packet_bytes = 0;
+   f->bytes_in_seg = 0;
+   f->previous_length = 0;
+   f->next_seg = start_seg_with_known_loc;
+
+   for (i = 0; i < start_seg_with_known_loc; i++)
+      skip(f, f->segments[i]);
+
+   // start decoding (optimizable - this frame is generally discarded)
+   if (!vorbis_pump_first_frame(f))
+      return 0;
+   if (f->current_loc > sample_number)
+      return error(f, VORBIS_seek_failed);
+   return 1;
+
+error:
+   // try to restore the file to a valid state
+   stb_vorbis_seek_start(f);
+   return error(f, VORBIS_seek_failed);
+}
+
+// the same as vorbis_decode_initial, but without advancing
+static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
+{
+   int bits_read, bytes_read;
+
+   if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
+      return 0;
+
+   // either 1 or 2 bytes were read, figure out which so we can rewind
+   bits_read = 1 + ilog(f->mode_count-1);
+   if (f->mode_config[*mode].blockflag)
+      bits_read += 2;
+   bytes_read = (bits_read + 7) / 8;
+
+   f->bytes_in_seg += bytes_read;
+   f->packet_bytes -= bytes_read;
+   skip(f, -bytes_read);
+   if (f->next_seg == -1)
+      f->next_seg = f->segment_count - 1;
+   else
+      f->next_seg--;
+   f->valid_bits = 0;
+
+   return 1;
+}
+
+int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
+{
+   uint32 max_frame_samples;
+
+   if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
+
+   // fast page-level search
+   if (!seek_to_sample_coarse(f, sample_number))
+      return 0;
+
+   assert(f->current_loc_valid);
+   assert(f->current_loc <= sample_number);
+
+   // linear search for the relevant packet
+   max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
+   while (f->current_loc < sample_number) {
+      int left_start, left_end, right_start, right_end, mode, frame_samples;
+      if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
+         return error(f, VORBIS_seek_failed);
+      // calculate the number of samples returned by the next frame
+      frame_samples = right_start - left_start;
+      if (f->current_loc + frame_samples > sample_number) {
+         return 1; // the next frame will contain the sample
+      } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
+         // there's a chance the frame after this could contain the sample
+         vorbis_pump_first_frame(f);
+      } else {
+         // this frame is too early to be relevant
+         f->current_loc += frame_samples;
+         f->previous_length = 0;
+         maybe_start_packet(f);
+         flush_packet(f);
+      }
+   }
+   // the next frame should start with the sample
+   if (f->current_loc != sample_number) return error(f, VORBIS_seek_failed);
+   return 1;
+}
+
+int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
+{
+   if (!stb_vorbis_seek_frame(f, sample_number))
+      return 0;
+
+   if (sample_number != f->current_loc) {
+      int n;
+      uint32 frame_start = f->current_loc;
+      stb_vorbis_get_frame_float(f, &n, NULL);
+      assert(sample_number > frame_start);
+      assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
+      f->channel_buffer_start += (sample_number - frame_start);
+   }
+
+   return 1;
+}
+
+int stb_vorbis_seek_start(stb_vorbis *f)
+{
+   if (IS_PUSH_MODE(f)) { return error(f, VORBIS_invalid_api_mixing); }
+   set_file_offset(f, f->first_audio_page_offset);
+   f->previous_length = 0;
+   f->first_decode = TRUE;
+   f->next_seg = -1;
+   return vorbis_pump_first_frame(f);
+}
+
+unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
+{
+   unsigned int restore_offset, previous_safe;
+   unsigned int end, last_page_loc;
+
+   if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
+   if (!f->total_samples) {
+      unsigned int last;
+      uint32 lo,hi;
+      char header[6];
+
+      // first, store the current decode position so we can restore it
+      restore_offset = stb_vorbis_get_file_offset(f);
+
+      // now we want to seek back 64K from the end (the last page must
+      // be at most a little less than 64K, but let's allow a little slop)
+      if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
+         previous_safe = f->stream_len - 65536;
+      else
+         previous_safe = f->first_audio_page_offset;
+
+      set_file_offset(f, previous_safe);
+      // previous_safe is now our candidate 'earliest known place that seeking
+      // to will lead to the final page'
+
+      if (!vorbis_find_page(f, &end, &last)) {
+         // if we can't find a page, we're hosed!
+         f->error = VORBIS_cant_find_last_page;
+         f->total_samples = 0xffffffff;
+         goto done;
+      }
+
+      // check if there are more pages
+      last_page_loc = stb_vorbis_get_file_offset(f);
+
+      // stop when the last_page flag is set, not when we reach eof;
+      // this allows us to stop short of a 'file_section' end without
+      // explicitly checking the length of the section
+      while (!last) {
+         set_file_offset(f, end);
+         if (!vorbis_find_page(f, &end, &last)) {
+            // the last page we found didn't have the 'last page' flag
+            // set. whoops!
+            break;
+         }
+         //previous_safe = last_page_loc+1; // NOTE: not used after this point, but note for debugging
+         last_page_loc = stb_vorbis_get_file_offset(f);
+      }
+
+      set_file_offset(f, last_page_loc);
+
+      // parse the header
+      getn(f, (unsigned char *)header, 6);
+      // extract the absolute granule position
+      lo = get32(f);
+      hi = get32(f);
+      if (lo == 0xffffffff && hi == 0xffffffff) {
+         f->error = VORBIS_cant_find_last_page;
+         f->total_samples = SAMPLE_unknown;
+         goto done;
+      }
+      if (hi)
+         lo = 0xfffffffe; // saturate
+      f->total_samples = lo;
+
+      f->p_last.page_start = last_page_loc;
+      f->p_last.page_end   = end;
+      f->p_last.last_decoded_sample = lo;
+
+     done:
+      set_file_offset(f, restore_offset);
+   }
+   return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
+}
+
+float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
+{
+   return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
+}
+
+
+
+int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
+{
+   int len, right,left,i;
+   if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
+
+   if (!vorbis_decode_packet(f, &len, &left, &right)) {
+      f->channel_buffer_start = f->channel_buffer_end = 0;
+      return 0;
+   }
+
+   len = vorbis_finish_frame(f, len, left, right);
+   for (i=0; i < f->channels; ++i)
+      f->outputs[i] = f->channel_buffers[i] + left;
+
+   f->channel_buffer_start = left;
+   f->channel_buffer_end   = left+len;
+
+   if (channels) *channels = f->channels;
+   if (output)   *output = f->outputs;
+   return len;
+}
+
+#ifndef STB_VORBIS_NO_STDIO
+
+stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
+{
+   stb_vorbis *f, p;
+   vorbis_init(&p, alloc);
+   p.f = file;
+   p.f_start = (uint32) ftell(file);
+   p.stream_len   = length;
+   p.close_on_free = close_on_free;
+   if (start_decoder(&p)) {
+      f = vorbis_alloc(&p);
+      if (f) {
+         *f = p;
+         vorbis_pump_first_frame(f);
+         return f;
+      }
+   }
+   if (error) *error = p.error;
+   vorbis_deinit(&p);
+   return NULL;
+}
+
+stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
+{
+   unsigned int len, start;
+   start = (unsigned int) ftell(file);
+   fseek(file, 0, SEEK_END);
+   len = (unsigned int) (ftell(file) - start);
+   fseek(file, start, SEEK_SET);
+   return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
+}
+
+stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
+{
+   FILE *f;
+#if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
+   if (0 != fopen_s(&f, filename, "rb"))
+      f = NULL;
+#else
+   f = fopen(filename, "rb");
+#endif
+   if (f)
+      return stb_vorbis_open_file(f, TRUE, error, alloc);
+   if (error) *error = VORBIS_file_open_failure;
+   return NULL;
+}
+#endif // STB_VORBIS_NO_STDIO
+
+stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
+{
+   stb_vorbis *f, p;
+   if (!data) {
+      if (error) *error = VORBIS_unexpected_eof;
+      return NULL;
+   }
+   vorbis_init(&p, alloc);
+   p.stream = (uint8 *) data;
+   p.stream_end = (uint8 *) data + len;
+   p.stream_start = (uint8 *) p.stream;
+   p.stream_len = len;
+   p.push_mode = FALSE;
+   if (start_decoder(&p)) {
+      f = vorbis_alloc(&p);
+      if (f) {
+         *f = p;
+         vorbis_pump_first_frame(f);
+         if (error) *error = VORBIS__no_error;
+         return f;
+      }
+   }
+   if (error) *error = p.error;
+   vorbis_deinit(&p);
+   return NULL;
+}
+
+#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
+#define PLAYBACK_MONO     1
+#define PLAYBACK_LEFT     2
+#define PLAYBACK_RIGHT    4
+
+#define L  (PLAYBACK_LEFT  | PLAYBACK_MONO)
+#define C  (PLAYBACK_LEFT  | PLAYBACK_RIGHT | PLAYBACK_MONO)
+#define R  (PLAYBACK_RIGHT | PLAYBACK_MONO)
+
+static int8 channel_position[7][6] =
+{
+   { 0 },
+   { C },
+   { L, R },
+   { L, C, R },
+   { L, R, L, R },
+   { L, C, R, L, R },
+   { L, C, R, L, R, C },
+};
+
+
+#ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
+   typedef union {
+      float f;
+      int i;
+   } float_conv;
+   typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
+   #define FASTDEF(x) float_conv x
+   // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
+   #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
+   #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
+   #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
+   #define check_endianness()
+#else
+   #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
+   #define check_endianness()
+   #define FASTDEF(x)
+#endif
+
+static void copy_samples(short *dest, float *src, int len)
+{
+   int i;
+   check_endianness();
+   for (i=0; i < len; ++i) {
+      FASTDEF(temp);
+      int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
+      if ((unsigned int) (v + 32768) > 65535)
+         v = v < 0 ? -32768 : 32767;
+      dest[i] = v;
+   }
+}
+
+static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
+{
+   #define STB_BUFFER_SIZE  32
+   float buffer[STB_BUFFER_SIZE];
+   int i,j,o,n = STB_BUFFER_SIZE;
+   check_endianness();
+   for (o = 0; o < len; o += STB_BUFFER_SIZE) {
+      memset(buffer, 0, sizeof(buffer));
+      if (o + n > len) n = len - o;
+      for (j=0; j < num_c; ++j) {
+         if (channel_position[num_c][j] & mask) {
+            for (i=0; i < n; ++i)
+               buffer[i] += data[j][d_offset+o+i];
+         }
+      }
+      for (i=0; i < n; ++i) {
+         FASTDEF(temp);
+         int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
+         if ((unsigned int) (v + 32768) > 65535)
+            v = v < 0 ? -32768 : 32767;
+         output[o+i] = v;
+      }
+   }
+   #undef STB_BUFFER_SIZE
+}
+
+static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
+{
+   #define STB_BUFFER_SIZE  32
+   float buffer[STB_BUFFER_SIZE];
+   int i,j,o,n = STB_BUFFER_SIZE >> 1;
+   // o is the offset in the source data
+   check_endianness();
+   for (o = 0; o < len; o += STB_BUFFER_SIZE >> 1) {
+      // o2 is the offset in the output data
+      int o2 = o << 1;
+      memset(buffer, 0, sizeof(buffer));
+      if (o + n > len) n = len - o;
+      for (j=0; j < num_c; ++j) {
+         int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
+         if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
+            for (i=0; i < n; ++i) {
+               buffer[i*2+0] += data[j][d_offset+o+i];
+               buffer[i*2+1] += data[j][d_offset+o+i];
+            }
+         } else if (m == PLAYBACK_LEFT) {
+            for (i=0; i < n; ++i) {
+               buffer[i*2+0] += data[j][d_offset+o+i];
+            }
+         } else if (m == PLAYBACK_RIGHT) {
+            for (i=0; i < n; ++i) {
+               buffer[i*2+1] += data[j][d_offset+o+i];
+            }
+         }
+      }
+      for (i=0; i < (n<<1); ++i) {
+         FASTDEF(temp);
+         int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
+         if ((unsigned int) (v + 32768) > 65535)
+            v = v < 0 ? -32768 : 32767;
+         output[o2+i] = v;
+      }
+   }
+   #undef STB_BUFFER_SIZE
+}
+
+static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
+{
+   int i;
+   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
+      static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
+      for (i=0; i < buf_c; ++i)
+         compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
+   } else {
+      int limit = buf_c < data_c ? buf_c : data_c;
+      for (i=0; i < limit; ++i)
+         copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
+      for (   ; i < buf_c; ++i)
+         memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
+   }
+}
+
+int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
+{
+   float **output = NULL;
+   int len = stb_vorbis_get_frame_float(f, NULL, &output);
+   if (len > num_samples) len = num_samples;
+   if (len)
+      convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
+   return len;
+}
+
+static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
+{
+   int i;
+   check_endianness();
+   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
+      assert(buf_c == 2);
+      for (i=0; i < buf_c; ++i)
+         compute_stereo_samples(buffer, data_c, data, d_offset, len);
+   } else {
+      int limit = buf_c < data_c ? buf_c : data_c;
+      int j;
+      for (j=0; j < len; ++j) {
+         for (i=0; i < limit; ++i) {
+            FASTDEF(temp);
+            float f = data[i][d_offset+j];
+            int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
+            if ((unsigned int) (v + 32768) > 65535)
+               v = v < 0 ? -32768 : 32767;
+            *buffer++ = v;
+         }
+         for (   ; i < buf_c; ++i)
+            *buffer++ = 0;
+      }
+   }
+}
+
+int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
+{
+   float **output;
+   int len;
+   if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
+   len = stb_vorbis_get_frame_float(f, NULL, &output);
+   if (len) {
+      if (len*num_c > num_shorts) len = num_shorts / num_c;
+      convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
+   }
+   return len;
+}
+
+int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
+{
+   float **outputs;
+   int len = num_shorts / channels;
+   int n=0;
+   while (n < len) {
+      int k = f->channel_buffer_end - f->channel_buffer_start;
+      if (n+k >= len) k = len - n;
+      if (k)
+         convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
+      buffer += k*channels;
+      n += k;
+      f->channel_buffer_start += k;
+      if (n == len) break;
+      if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
+   }
+   return n;
+}
+
+int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
+{
+   float **outputs;
+   int n=0;
+   while (n < len) {
+      int k = f->channel_buffer_end - f->channel_buffer_start;
+      if (n+k >= len) k = len - n;
+      if (k)
+         convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
+      n += k;
+      f->channel_buffer_start += k;
+      if (n == len) break;
+      if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
+   }
+   return n;
+}
+
+#ifndef STB_VORBIS_NO_STDIO
+int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
+{
+   int data_len, offset, total, limit, error;
+   short *data;
+   stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
+   if (v == NULL) return -1;
+   limit = v->channels * 4096;
+   *channels = v->channels;
+   if (sample_rate)
+      *sample_rate = v->sample_rate;
+   offset = data_len = 0;
+   total = limit;
+   data = (short *) malloc(total * sizeof(*data));
+   if (data == NULL) {
+      stb_vorbis_close(v);
+      return -2;
+   }
+   for (;;) {
+      int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
+      if (n == 0) break;
+      data_len += n;
+      offset += n * v->channels;
+      if (offset + limit > total) {
+         short *data2;
+         total *= 2;
+         data2 = (short *) realloc(data, total * sizeof(*data));
+         if (data2 == NULL) {
+            free(data);
+            stb_vorbis_close(v);
+            return -2;
+         }
+         data = data2;
+      }
+   }
+   *output = data;
+   stb_vorbis_close(v);
+   return data_len;
+}
+#endif // NO_STDIO
+
+int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
+{
+   int data_len, offset, total, limit, error;
+   short *data;
+   stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
+   if (v == NULL) return -1;
+   limit = v->channels * 4096;
+   *channels = v->channels;
+   if (sample_rate)
+      *sample_rate = v->sample_rate;
+   offset = data_len = 0;
+   total = limit;
+   data = (short *) malloc(total * sizeof(*data));
+   if (data == NULL) {
+      stb_vorbis_close(v);
+      return -2;
+   }
+   for (;;) {
+      int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
+      if (n == 0) break;
+      data_len += n;
+      offset += n * v->channels;
+      if (offset + limit > total) {
+         short *data2;
+         total *= 2;
+         data2 = (short *) realloc(data, total * sizeof(*data));
+         if (data2 == NULL) {
+            free(data);
+            stb_vorbis_close(v);
+            return -2;
+         }
+         data = data2;
+      }
+   }
+   *output = data;
+   stb_vorbis_close(v);
+   return data_len;
+}
+#endif // STB_VORBIS_NO_INTEGER_CONVERSION
+
+int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
+{
+   float **outputs;
+   int len = num_floats / channels;
+   int n=0;
+   int z = f->channels;
+   if (z > channels) z = channels;
+   while (n < len) {
+      int i,j;
+      int k = f->channel_buffer_end - f->channel_buffer_start;
+      if (n+k >= len) k = len - n;
+      for (j=0; j < k; ++j) {
+         for (i=0; i < z; ++i)
+            *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
+         for (   ; i < channels; ++i)
+            *buffer++ = 0;
+      }
+      n += k;
+      f->channel_buffer_start += k;
+      if (n == len)
+         break;
+      if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
+         break;
+   }
+   return n;
+}
+
+int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
+{
+   float **outputs;
+   int n=0;
+   int z = f->channels;
+   if (z > channels) z = channels;
+   while (n < num_samples) {
+      int i;
+      int k = f->channel_buffer_end - f->channel_buffer_start;
+      if (n+k >= num_samples) k = num_samples - n;
+      if (k) {
+         for (i=0; i < z; ++i)
+            memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
+         for (   ; i < channels; ++i)
+            memset(buffer[i]+n, 0, sizeof(float) * k);
+      }
+      n += k;
+      f->channel_buffer_start += k;
+      if (n == num_samples)
+         break;
+      if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
+         break;
+   }
+   return n;
+}
+#endif // STB_VORBIS_NO_PULLDATA_API
+
+/* Version history
+    1.17    - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223
+                           found with Mayhem by ForAllSecure
+    1.16    - 2019-03-04 - fix warnings
+    1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
+    1.14    - 2018-02-11 - delete bogus dealloca usage
+    1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
+    1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
+    1.11    - 2017-07-23 - fix MinGW compilation
+    1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
+    1.09    - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version
+    1.08    - 2016-04-02 - fixed multiple warnings; fix setup memory leaks;
+                           avoid discarding last frame of audio data
+    1.07    - 2015-01-16 - fixed some warnings, fix mingw, const-correct API
+                           some more crash fixes when out of memory or with corrupt files
+    1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
+                           some crash fixes when out of memory or with corrupt files
+    1.05    - 2015-04-19 - don't define __forceinline if it's redundant
+    1.04    - 2014-08-27 - fix missing const-correct case in API
+    1.03    - 2014-08-07 - Warning fixes
+    1.02    - 2014-07-09 - Declare qsort compare function _cdecl on windows
+    1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float
+    1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel
+                           (API change) report sample rate for decode-full-file funcs
+    0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
+    0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
+    0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
+    0.99993 - remove assert that fired on legal files with empty tables
+    0.99992 - rewind-to-start
+    0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
+    0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
+    0.9998 - add a full-decode function with a memory source
+    0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
+    0.9996 - query length of vorbis stream in samples/seconds
+    0.9995 - bugfix to another optimization that only happened in certain files
+    0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
+    0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
+    0.9992 - performance improvement of IMDCT; now performs close to reference implementation
+    0.9991 - performance improvement of IMDCT
+    0.999 - (should have been 0.9990) performance improvement of IMDCT
+    0.998 - no-CRT support from Casey Muratori
+    0.997 - bugfixes for bugs found by Terje Mathisen
+    0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
+    0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
+    0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
+    0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
+    0.992 - fixes for MinGW warning
+    0.991 - turn fast-float-conversion on by default
+    0.990 - fix push-mode seek recovery if you seek into the headers
+    0.98b - fix to bad release of 0.98
+    0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
+    0.97 - builds under c++ (typecasting, don't use 'class' keyword)
+    0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
+    0.95 - clamping code for 16-bit functions
+    0.94 - not publically released
+    0.93 - fixed all-zero-floor case (was decoding garbage)
+    0.92 - fixed a memory leak
+    0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
+    0.90 - first public release
+*/
+
+#endif // STB_VORBIS_HEADER_ONLY
+
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/firmware/library/json.hpp b/shared/json.hpp
similarity index 100%
rename from firmware/library/json.hpp
rename to shared/json.hpp
diff --git a/story-editor/src/miniz.c b/shared/miniz.c
similarity index 100%
rename from story-editor/src/miniz.c
rename to shared/miniz.c
diff --git a/story-editor/src/miniz.h b/shared/miniz.h
similarity index 100%
rename from story-editor/src/miniz.h
rename to shared/miniz.h
diff --git a/story-editor/src/platform_folders.cpp b/shared/platform_folders.cpp
similarity index 100%
rename from story-editor/src/platform_folders.cpp
rename to shared/platform_folders.cpp
diff --git a/story-editor/src/platform_folders.h b/shared/platform_folders.h
similarity index 100%
rename from story-editor/src/platform_folders.h
rename to shared/platform_folders.h
diff --git a/shared/qoixx.hpp b/shared/qoixx.hpp
new file mode 100644
index 0000000..c466404
--- /dev/null
+++ b/shared/qoixx.hpp
@@ -0,0 +1,1400 @@
+#ifndef QOIXX_HPP_INCLUDED_
+#define QOIXX_HPP_INCLUDED_
+
+#include<cstdint>
+#include<cstddef>
+#include<cstring>
+#include<vector>
+#include<type_traits>
+#include<memory>
+#include<stdexcept>
+#include<bit>
+#include<numeric>
+#include<array>
+#include<utility>
+
+#ifndef QOIXX_NO_SIMD
+#if defined(__ARM_FEATURE_SVE)
+#include<arm_sve.h>
+#include<arm_neon.h>
+#elif defined(__aarch64__)
+#include<arm_neon.h>
+#elif defined(__AVX2__)
+#include<immintrin.h>
+#endif
+#endif
+
+namespace qoixx{
+
+namespace detail{
+
+template<typename T>
+requires(sizeof(T) == 1 && !std::same_as<T, bool>)
+struct contiguous_puller{
+  static constexpr bool is_contiguous = true;
+  const T* t;
+  inline std::uint8_t pull()noexcept{
+    return static_cast<std::uint8_t>(*t++);
+  }
+  inline const std::uint8_t* raw_pointer()noexcept{
+    return reinterpret_cast<const std::uint8_t*>(t);
+  }
+  inline void advance(std::size_t n)noexcept{
+    t += n;
+  }
+};
+
+template<typename T>
+struct default_container_operator;
+
+template<typename T, typename A>
+requires(sizeof(T) == 1)
+struct default_container_operator<std::vector<T, A>>{
+  using target_type = std::vector<T, A>;
+  static inline target_type construct(std::size_t size){
+    target_type t(size);
+    return t;
+  }
+  struct pusher{
+    static constexpr bool is_contiguous = true;
+    target_type* t;
+    std::size_t i = 0;
+    inline void push(std::uint8_t x)noexcept{
+      (*t)[i++] = static_cast<T>(x);
+    }
+    template<typename U>
+    requires std::unsigned_integral<U> && (sizeof(U) != 1)
+    inline void push(U t)noexcept{
+      this->push(static_cast<std::uint8_t>(t));
+    }
+    inline target_type finalize()noexcept{
+      t->resize(i);
+      return std::move(*t);
+    }
+    inline std::uint8_t* raw_pointer()noexcept{
+      return reinterpret_cast<std::uint8_t*>(t->data())+i;
+    }
+    inline void advance(std::size_t n)noexcept{
+      i += n;
+    }
+  };
+  static constexpr pusher create_pusher(target_type& t)noexcept{
+    return {&t};
+  }
+  using puller = contiguous_puller<T>;
+  static constexpr puller create_puller(const target_type& t)noexcept{
+    return {t.data()};
+  }
+  static inline std::size_t size(const target_type& t)noexcept{
+    return t.size();
+  }
+  static constexpr bool valid(const target_type& t)noexcept{
+    return t.capacity() != 0;
+  }
+};
+
+template<typename T>
+requires(sizeof(T) == 1)
+struct default_container_operator<std::pair<std::unique_ptr<T[]>, std::size_t>>{
+  using target_type = std::pair<std::unique_ptr<T[]>, std::size_t>;
+  static inline target_type construct(std::size_t size){
+    return {typename target_type::first_type{static_cast<T*>(::operator new[](size))}, 0};
+  }
+  struct pusher{
+    static constexpr bool is_contiguous = true;
+    target_type* t;
+    inline void push(std::uint8_t x)noexcept{
+      t->first[t->second++] = static_cast<T>(x);
+    }
+    template<typename U>
+    requires std::unsigned_integral<U> && (sizeof(U) != 1)
+    inline void push(U t)noexcept{
+      this->push(static_cast<std::uint8_t>(t));
+    }
+    inline target_type finalize()noexcept{
+      return std::move(*t);
+    }
+    inline std::uint8_t* raw_pointer()noexcept{
+      return reinterpret_cast<std::uint8_t*>(t->first.get())+t->second;
+    }
+    inline void advance(std::size_t n)noexcept{
+      t->second += n;
+    }
+  };
+  static constexpr pusher create_pusher(target_type& t)noexcept{
+    return {&t};
+  }
+  using puller = contiguous_puller<T>;
+  static constexpr puller create_puller(const target_type& t)noexcept{
+    return {t.first.get()};
+  }
+  static inline std::size_t size(const target_type& t)noexcept{
+    return t.second;
+  }
+  static constexpr bool valid(const target_type& t)noexcept{
+    return t.first != nullptr;
+  }
+};
+
+template<typename T>
+requires(sizeof(T) == 1)
+struct default_container_operator<std::pair<T*, std::size_t>>{
+  using target_type = std::pair<T*, std::size_t>;
+  using puller = contiguous_puller<T>;
+  static constexpr puller create_puller(const target_type& t)noexcept{
+    return {t.first};
+  }
+  static inline std::size_t size(const target_type& t)noexcept{
+    return t.second;
+  }
+  static inline bool valid(const target_type& t)noexcept{
+    return t.first != nullptr;
+  }
+};
+
+}
+
+template<typename T>
+struct container_operator : detail::default_container_operator<T>{};
+
+class qoi{
+  template<std::size_t Size>
+  static inline void efficient_memcpy(void* dst, const void* src){
+    if constexpr(Size == 3){
+      std::memcpy(dst, src, 2);
+      std::memcpy(static_cast<std::byte*>(dst)+2, static_cast<const std::byte*>(src)+2, 1);
+    }
+    else
+      std::memcpy(dst, src, Size);
+  }
+  template<std::size_t Size, typename T>
+  static inline void push(T& dst, const void* src){
+    if constexpr(T::is_contiguous){
+      auto*const ptr = dst.raw_pointer();
+      dst.advance(Size);
+      efficient_memcpy<Size>(ptr, src);
+    }
+    else{
+      const auto* ptr = static_cast<const std::uint8_t*>(src);
+      auto size = Size;
+      while(size --> 0)
+        dst.push(*ptr++);
+    }
+  }
+  template<std::size_t Size, typename T>
+  static inline void pull(void* dst, T& src){
+    if constexpr(T::is_contiguous){
+      const auto*const ptr = src.raw_pointer();
+      src.advance(Size);
+      efficient_memcpy<Size>(dst, ptr);
+    }
+    else{
+      auto* ptr = static_cast<std::uint8_t*>(dst);
+      auto size = Size;
+      while(size --> 0)
+        *ptr++ = src.pull();
+    }
+  }
+  enum chunk_tag : std::uint32_t{
+    index = 0b0000'0000u,
+    diff  = 0b0100'0000u,
+    luma  = 0b1000'0000u,
+    run   = 0b1100'0000u,
+    rgb   = 0b1111'1110u,
+    rgba  = 0b1111'1111u,
+  };
+  static constexpr std::size_t index_size = 64u;
+ public:
+  enum class colorspace : std::uint8_t{
+    srgb = 0,
+    linear = 1,
+  };
+  struct desc{
+    std::uint32_t width;
+    std::uint32_t height;
+    std::uint8_t channels;
+    qoi::colorspace colorspace;
+    constexpr bool operator==(const desc&)const noexcept = default;
+  };
+  struct rgba_t{
+    std::uint8_t r, g, b, a;
+    inline std::uint32_t v()const{
+      static_assert(sizeof(rgba_t) == sizeof(std::uint32_t));
+      if constexpr(std::endian::native == std::endian::little){
+        std::uint32_t x;
+        std::memcpy(&x, this, sizeof(std::uint32_t));
+        return x;
+      }
+      else
+        return std::uint32_t{r}       |
+               std::uint32_t{g} <<  8 |
+               std::uint32_t{b} << 16 |
+               std::uint32_t{a} << 24;
+    }
+    inline std::uint_fast32_t hash()const{
+      static constexpr std::uint64_t constant =
+        static_cast<std::uint64_t>(3u) << 56 |
+                                   5u  << 16 |
+        static_cast<std::uint64_t>(7u) << 40 |
+                                  11u;
+      const auto v = static_cast<std::uint64_t>(this->v());
+      return (((v<<32|v)&0xFF00FF0000FF00FF)*constant)>>56;
+    }
+    inline bool operator==(const rgba_t& rhs)const{
+      return v() == rhs.v();
+    }
+    inline bool operator!=(const rgba_t& rhs)const{
+      return v() != rhs.v();
+    }
+  };
+  struct rgb_t{
+    std::uint8_t r, g, b;
+    inline std::uint32_t v()const{
+      static_assert(sizeof(rgb_t) == 3u);
+      if constexpr(std::endian::native == std::endian::little){
+        std::uint32_t x = 255u << 24u;
+        efficient_memcpy<3>(&x, this);
+        return x;
+      }
+      else
+        return std::uint32_t{r}       |
+               std::uint32_t{g} <<  8 |
+               std::uint32_t{b} << 16 |
+                           255u << 24;
+    }
+    inline std::uint_fast32_t hash()const{
+      static constexpr std::uint64_t constant =
+        static_cast<std::uint64_t>(3u) << 56 |
+                                   5u  << 16 |
+        static_cast<std::uint64_t>(7u) << 40 |
+                                  11u;
+      const auto v =
+        static_cast<std::uint64_t>(r)          |
+        static_cast<std::uint64_t>(g)    << 40 |
+        static_cast<std::uint64_t>(b)    << 16 |
+        static_cast<std::uint64_t>(0xff) << 56 ;
+      return (v*constant)>>56;
+    }
+    inline bool operator==(const rgb_t& rhs)const{
+      return ((this->r^rhs.r)|(this->g^rhs.g)|(this->b^rhs.b)) == 0;
+    }
+  };
+  static constexpr std::uint32_t magic = 
+    113u /*q*/ << 24 | 111u /*o*/ << 16 | 105u /*i*/ <<  8 | 102u /*f*/ ;
+  static constexpr std::size_t header_size =
+    sizeof(magic) +
+    sizeof(std::declval<desc>().width) +
+    sizeof(std::declval<desc>().height) +
+    sizeof(std::declval<desc>().channels) +
+    sizeof(std::declval<desc>().colorspace);
+  static constexpr std::size_t pixels_max = 400000000u;
+  static constexpr std::uint8_t padding[8] = {0, 0, 0, 0, 0, 0, 0, 1};
+  template<typename Puller>
+  static inline std::uint32_t read_32(Puller& p){
+    if constexpr(std::endian::native == std::endian::big && Puller::is_contiguous){
+      std::uint32_t x;
+      pull<sizeof(x)>(&x, p);
+      return x;
+    }
+    else{
+      const auto _1 = p.pull();
+      const auto _2 = p.pull();
+      const auto _3 = p.pull();
+      const auto _4 = p.pull();
+      return static_cast<std::uint32_t>(_1 << 24 | _2 << 16 | _3 << 8 | _4);
+    }
+  }
+  template<typename Pusher>
+  static inline void write_32(Pusher& p, std::uint32_t value){
+    if constexpr(std::endian::native == std::endian::big && Pusher::is_contiguous)
+      push<sizeof(value)>(p, value);
+    else{
+      p.push((value & 0xff000000) >> 24);
+      p.push((value & 0x00ff0000) >> 16);
+      p.push((value & 0x0000ff00) >>  8);
+      p.push( value & 0x000000ff       );
+    }
+  }
+ private:
+  template<bool Alpha>
+  using local_rgba_pixel_t = std::conditional_t<Alpha, rgba_t, rgb_t>;
+  template<bool Alpha>
+  static constexpr local_rgba_pixel_t<Alpha> default_pixel()noexcept{
+    if constexpr(Alpha)
+      return {0, 0, 0, 255};
+    else
+      return {};
+  }
+  template<bool Alpha>
+  struct local_pixel{
+    std::uint8_t rgb = static_cast<std::uint8_t>(chunk_tag::rgb);
+    local_rgba_pixel_t<Alpha> v;
+  };
+  static_assert(std::has_unique_object_representations_v<local_pixel<true>> and std::has_unique_object_representations_v<local_pixel<false>>);
+  template<std::uint_fast8_t Channels, typename Pusher, typename Puller>
+  static inline void encode_body(Pusher& p, Puller& pixels, rgba_t (&index)[index_size], std::size_t px_len, local_rgba_pixel_t<Channels == 4u> px_prev = default_pixel<Channels == 4u>(), std::uint8_t prev_hash = static_cast<std::uint8_t>(index_size), std::size_t run = 0){
+    local_pixel<Channels == 4u> px;
+    while(px_len--)[[likely]]{
+      pull<Channels>(&px.v, pixels);
+      if(px.v.v() == px_prev.v()){
+        ++run;
+        continue;
+      }
+      if(run > 0){
+        while(run >= 62)[[unlikely]]{
+          static constexpr std::uint8_t x = chunk_tag::run | 61;
+          p.push(x);
+          run -= 62;
+        }
+        if(run > 1){
+          p.push(chunk_tag::run | (run-1));
+          run = 0;
+        }
+        else if(run == 1){
+          if(prev_hash == index_size)[[unlikely]]
+            p.push(chunk_tag::run);
+          else
+            p.push(chunk_tag::index | prev_hash);
+          run = 0;
+        }
+      }
+
+      const auto index_pos = px.v.hash() % index_size;
+      prev_hash = index_pos;
+
+      do{
+        if(index[index_pos].v() == px.v.v()){
+          p.push(chunk_tag::index | index_pos);
+          break;
+        }
+        efficient_memcpy<Channels>(index + index_pos, &px.v);
+        if constexpr(Channels == 3)
+          index[index_pos].a = 255u;
+
+        if constexpr(Channels == 4)
+          if(px.v.a != px_prev.a){
+            p.push(chunk_tag::rgba);
+            push<4>(p, &px.v);
+            break;
+          }
+        const auto vg_2 = static_cast<int>(px.v.g) - static_cast<int>(px_prev.g);
+        if(const std::uint8_t g = vg_2+32; g < 64){
+          const auto vr = static_cast<int>(px.v.r) - static_cast<int>(px_prev.r) + 2;
+          const auto vg = vg_2 + 2;
+          const auto vb = static_cast<int>(px.v.b) - static_cast<int>(px_prev.b) + 2;
+
+          if(static_cast<std::uint8_t>(vr|vg|vb) < 4){
+            p.push(chunk_tag::diff | vr << 4 | vg << 2 | vb);
+            break;
+          }
+          const auto vg_r = vr - vg + 8;
+          const auto vg_b = vb - vg + 8;
+          if(static_cast<std::uint8_t>(vg_r|vg_b) < 16){
+            p.push(chunk_tag::luma | g);
+            p.push(vg_r << 4 | vg_b);
+          }
+          else
+            push<4>(p, &px);
+        }
+        else
+          push<4>(p, &px);
+      }while(false);
+      efficient_memcpy<Channels>(&px_prev, &px.v);
+    }
+    while(run >= 62)[[unlikely]]{
+      static constexpr std::uint8_t x = chunk_tag::run | 61;
+      p.push(x);
+      run -= 62;
+    }
+    if(run > 0)
+      p.push(chunk_tag::run | (run-1));
+  }
+#ifndef QOIXX_NO_SIMD
+#if defined(__ARM_FEATURE_SVE)
+  template<bool Alpha>
+  using pixels_type = std::conditional_t<Alpha, svuint8x4_t, svuint8x3_t>;
+  template<typename... Args>
+  requires (std::same_as<std::decay_t<Args>, svuint8_t> && ...)
+  static inline pixels_type<sizeof...(Args) == 4> create(Args&&... args)noexcept{
+    if constexpr(sizeof...(Args) == 4)
+      return svcreate4_u8(std::forward<Args>(args)...);
+    else
+      return svcreate3_u8(std::forward<Args>(args)...);
+  }
+  template<std::size_t ImmIndex>
+  static inline svuint8_t get(svuint8x4_t t)noexcept{
+    return svget4_u8(t, ImmIndex);
+  }
+  template<std::size_t ImmIndex>
+  static inline svuint8_t get(svuint8x3_t t)noexcept{
+    return svget3_u8(t, ImmIndex);
+  }
+  template<bool Alpha>
+  static inline pixels_type<Alpha> load(svbool_t pg, const std::uint8_t* ptr)noexcept{
+    if constexpr(Alpha)
+      return svld4_u8(pg, ptr);
+    else
+      return svld3_u8(pg, ptr);
+  }
+  template<std::size_t SVERegisterSize, std::uint_fast8_t Channels, typename Pusher, typename Puller>
+  static inline void encode_sve(Pusher& p_, Puller& pixels_, const desc& desc){
+    static constexpr bool Alpha = Channels == 4;
+    std::uint8_t* p = p_.raw_pointer();
+    const std::uint8_t* pixels = pixels_.raw_pointer();
+
+    rgba_t index[index_size] = {};
+
+    const auto zero = svdup_n_u8(0);
+    const auto iota = svindex_u8(0, 1);
+
+    pixels_type<Alpha> prev;
+    if constexpr(Alpha)
+      prev = create(zero, zero, zero, svdup_n_u8(255));
+    else
+      prev = create(zero, zero, zero);
+
+    std::size_t run = 0;
+    rgba_t px = {0, 0, 0, 255};
+    auto prev_hash = static_cast<std::uint8_t>(index_size);
+
+    const std::size_t px_len = desc.width * desc.height;
+    static constexpr auto vector_lanes = SVERegisterSize/8;
+    for(std::size_t i = 0; i < px_len; i += vector_lanes){
+      const auto mask = svwhilelt_b8_u64(i, px_len);
+      const auto num = std::min(px_len-i, vector_lanes);
+      const auto pxs = load<Alpha>(mask, pixels);
+      static constexpr std::uint64_t imm = SVERegisterSize/8-1;
+      auto rv = svsub_u8_x(mask, get<0>(pxs), svext_u8(get<0>(prev), get<0>(pxs), imm));
+      auto gv = svsub_u8_x(mask, get<1>(pxs), svext_u8(get<1>(prev), get<1>(pxs), imm));
+      auto bv = svsub_u8_x(mask, get<2>(pxs), svext_u8(get<2>(prev), get<2>(pxs), imm));
+      [[maybe_unused]] svbool_t av;
+      bool alpha = true;
+      if constexpr(Alpha){
+        av = svcmpeq_n_u8(mask, svsub_u8_x(mask, get<3>(pxs), svext_u8(get<3>(prev), get<3>(pxs), imm)), 0);
+        alpha = !svptest_any(mask, svnot_b_z(mask, av));
+      }
+      auto runv = svcmpeq_n_u8(mask, svorr_u8_x(mask, svorr_u8_x(mask, rv, gv), bv), 0);
+      if constexpr(Alpha)
+        runv = svand_b_z(mask, runv, av);
+      const auto not_runv = svnot_b_z(mask, runv);
+      if(!svptest_any(mask, not_runv)){
+        run += num;
+        pixels += num*Channels;
+        continue;
+      }
+      const auto r = svminv_u8(not_runv, iota);
+      run += r;
+      pixels += r*Channels;
+      if(run > 0){
+        while(run >= 62)[[unlikely]]{
+          static constexpr std::uint8_t x = chunk_tag::run | 61;
+          *p++ = x;
+          run -= 62;
+        }
+        if(run > 1){
+          *p++ = chunk_tag::run | (run-1);
+          run = 0;
+        }
+        else if(run == 1){
+          if(prev_hash == index_size)[[unlikely]]
+            *p++ = chunk_tag::run;
+          else
+            *p++ = chunk_tag::index | prev_hash;
+          run = 0;
+        }
+      }
+      rv = svadd_n_u8_x(mask, rv, 2);
+      gv = svadd_n_u8_x(mask, gv, 2);
+      bv = svadd_n_u8_x(mask, bv, 2);
+      const auto diffv = svorr_u8_z(svcmplt_n_u8(mask, svorr_u8_z(mask, svorr_u8_x(mask, rv, gv), bv), 4), svorr_n_u8_x(mask, svlsl_n_u8_x(mask, rv, 4), chunk_tag::diff), svorr_u8_x(mask, svlsl_n_u8_x(mask, gv, 2), bv));
+      rv = svadd_n_u8_x(mask, svsub_u8_x(mask, rv, gv), 8);
+      bv = svadd_n_u8_x(mask, svsub_u8_x(mask, bv, gv), 8);
+      gv = svadd_n_u8_x(mask, gv, 30);
+      const auto lu = svorr_n_u8_z(svcmpeq_n_u8(mask, svorr_u8_x(mask, svand_n_u8_x(mask, svorr_u8_x(mask, rv, bv), 0xf0), svand_n_u8_x(mask, gv, 0xc0)), 0), gv, chunk_tag::luma);
+      const auto ma = svorr_u8_x(mask, svlsl_n_u8_x(mask, rv, 4), bv);
+      svuint8_t hash;
+      if constexpr(Alpha)
+        hash = svand_n_u8_x(mask, svadd_u8_x(mask, svadd_u8_x(mask, svmul_n_u8_x(mask, get<0>(pxs), 3), svmul_n_u8_x(mask, get<1>(pxs), 5)), svadd_u8_x(mask, svmul_n_u8_x(mask, get<2>(pxs), 7), svmul_n_u8_x(mask, get<3>(pxs), 11))), 63);
+      else
+        hash = svand_n_u8_x(mask, svadd_u8_x(mask, svadd_u8_x(mask, svmul_n_u8_x(mask, get<0>(pxs), 3), svmul_n_u8_x(mask, get<1>(pxs), 5)), svadd_n_u8_x(mask, svmul_n_u8_x(mask, get<2>(pxs), 7), static_cast<std::uint8_t>(255*11))), 63);
+      std::uint8_t runs[SVERegisterSize/8], diffs[SVERegisterSize/8], lumas[SVERegisterSize/8*2], hashs[SVERegisterSize/8];
+      [[maybe_unused]] std::uint8_t alphas[SVERegisterSize/8];
+      svst1_u8(mask, runs, svadd_n_u8_m(runv, zero, 1));
+      svst1_u8(mask, diffs, diffv);
+      const auto luma = svcreate2_u8(lu, ma);
+      svst2_u8(mask, lumas, luma);
+      svst1_u8(mask, hashs, hash);
+      if constexpr(Alpha)
+        if(!alpha)
+          svst1_u8(mask, alphas, svadd_n_u8_m(av, zero, 1));
+      for(std::size_t i = r; i < num; ++i){
+        if(runs[i]){
+          ++run;
+          pixels += Channels;
+          continue;
+        }
+        if(run > 1){
+          *p++ = chunk_tag::run | (run-1);
+          run = 0;
+        }
+        else if(run == 1){
+          if(prev_hash == index_size)[[unlikely]]
+            *p++ = chunk_tag::run;
+          else
+            *p++ = chunk_tag::index | prev_hash;
+          run = 0;
+        }
+        const auto index_pos = hashs[i];
+        prev_hash = index_pos;
+        efficient_memcpy<Channels>(&px, pixels);
+        pixels += Channels;
+        if(index[index_pos] == px){
+          *p++ = chunk_tag::index | index_pos;
+          continue;
+        }
+        index[index_pos] = px;
+
+        if constexpr(Alpha)
+          if(!alpha && !alphas[i]){
+            *p++ = chunk_tag::rgba;
+            std::memcpy(p, &px, 4);
+            p += 4;
+            continue;
+          }
+        if(diffs[i])
+          *p++ = diffs[i];
+        else if(lumas[i*2]){
+          std::memcpy(p, lumas + i*2, 2);
+          p += 2;
+        }
+        else{
+          *p++ = chunk_tag::rgb;
+          efficient_memcpy<3>(p, &px);
+          p += 3;
+        }
+      }
+      prev = pxs;
+    }
+    while(run >= 62)[[unlikely]]{
+      static constexpr std::uint8_t x = chunk_tag::run | 61;
+      *p++ = x;
+      run -= 62;
+    }
+    if(run > 0){
+      *p++ = chunk_tag::run | (run-1);
+      run = 0;
+    }
+    p_.advance(p-p_.raw_pointer());
+    pixels_.advance(px_len*Channels);
+
+    push<sizeof(padding)>(p_, padding);
+  }
+#elif defined(__aarch64__)
+  template<bool Alpha>
+  using pixels_type = std::conditional_t<Alpha, uint8x16x4_t, uint8x16x3_t>;
+  template<bool Alpha>
+  static inline pixels_type<Alpha> load(const std::uint8_t* ptr)noexcept{
+    if constexpr(Alpha)
+      return vld4q_u8(ptr);
+    else
+      return vld3q_u8(ptr);
+  }
+  static constexpr std::size_t simd_lanes = 16;
+  template<std::uint_fast8_t Channels, typename Pusher, typename Puller>
+  static inline void encode_neon(Pusher& p_, Puller& pixels_, const desc& desc){
+    static constexpr bool Alpha = Channels == 4;
+    std::uint8_t* p = p_.raw_pointer();
+    const std::uint8_t* pixels = pixels_.raw_pointer();
+
+    rgba_t index[index_size] = {};
+
+    const auto zero = vdupq_n_u8(0);
+    static constexpr std::uint8_t iota_[simd_lanes] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+    const auto iota = vld1q_u8(iota_);
+
+    pixels_type<Alpha> prev;
+    prev.val[0] = prev.val[1] = prev.val[2] = zero;
+    if constexpr(Alpha)
+      prev.val[3] = vdupq_n_u8(255);
+
+    std::size_t run = 0;
+    rgba_t px = {0, 0, 0, 255};
+    auto prev_hash = static_cast<std::uint8_t>(index_size);
+
+    std::size_t px_len = desc.width * desc.height;
+    std::size_t simd_len = px_len / simd_lanes;
+    const std::size_t simd_len_16 = simd_len * simd_lanes;
+    px_len -= simd_len_16;
+    pixels_.advance(simd_len_16*Channels);
+    while(simd_len--){
+      const auto pxs = load<Alpha>(pixels);
+      pixels_type<Alpha> diff;
+      diff.val[0] = vsubq_u8(pxs.val[0], vextq_u8(prev.val[0], pxs.val[0], simd_lanes-1));
+      diff.val[1] = vsubq_u8(pxs.val[1], vextq_u8(prev.val[1], pxs.val[1], simd_lanes-1));
+      diff.val[2] = vsubq_u8(pxs.val[2], vextq_u8(prev.val[2], pxs.val[2], simd_lanes-1));
+      bool alpha = true;
+      if constexpr(Alpha){
+        diff.val[3] = vsubq_u8(pxs.val[3], vextq_u8(prev.val[3], pxs.val[3], simd_lanes-1));
+        diff.val[3] = vceqq_u8(diff.val[3], zero);
+        alpha = vminvq_u8(diff.val[3]) != 0;
+      }
+      auto runv = vceqq_u8(vorrq_u8(vorrq_u8(diff.val[0], diff.val[1]), diff.val[2]), zero);
+      if(vminvq_u8(runv) != 0 && alpha){
+        run += simd_lanes;
+        pixels += simd_lanes*Channels;
+        continue;
+      }
+      if constexpr(Alpha)
+        runv = vandq_u8(runv, diff.val[3]);
+      const auto r = vminvq_u8(vorrq_u8(vandq_u8(vmvnq_u8(runv), iota), runv));
+      run += r;
+      pixels += r*Channels;
+      if(run > 0){
+        while(run >= 62)[[unlikely]]{
+          static constexpr std::uint8_t x = chunk_tag::run | 61;
+          *p++ = x;
+          run -= 62;
+        }
+        if(run > 1){
+          *p++ = chunk_tag::run | (run-1);
+          run = 0;
+        }
+        else if(run == 1){
+          if(prev_hash == index_size)[[unlikely]]
+            *p++ = chunk_tag::run;
+          else
+            *p++ = chunk_tag::index | prev_hash;
+          run = 0;
+        }
+      }
+      const auto two = vdupq_n_u8(2);
+      diff.val[0] = vaddq_u8(diff.val[0], two);
+      diff.val[1] = vaddq_u8(diff.val[1], two);
+      diff.val[2] = vaddq_u8(diff.val[2], two);
+      const auto four = vdupq_n_u8(4);
+      const auto diffv = vandq_u8(vorrq_u8(vorrq_u8(vdupq_n_u8(chunk_tag::diff), vshlq_n_u8(diff.val[0], 4)), vorrq_u8(vshlq_n_u8(diff.val[1], 2), diff.val[2])), vcltq_u8(vorrq_u8(vorrq_u8(diff.val[0], diff.val[1]), diff.val[2]), four));
+      const auto eight = vdupq_n_u8(8);
+      diff.val[0] = vaddq_u8(vsubq_u8(diff.val[0], diff.val[1]), eight);
+      diff.val[2] = vaddq_u8(vsubq_u8(diff.val[2], diff.val[1]), eight);
+      diff.val[1] = vaddq_u8(diff.val[1], vdupq_n_u8(30));
+      const auto lu = vandq_u8(vorrq_u8(vdupq_n_u8(chunk_tag::luma), diff.val[1]), vceqq_u8(vorrq_u8(vandq_u8(vorrq_u8(diff.val[0], diff.val[2]), vdupq_n_u8(0xf0)), vandq_u8(diff.val[1], vdupq_n_u8(0xc0))), zero));
+      const auto ma = vorrq_u8(vshlq_n_u8(diff.val[0], 4), diff.val[2]);
+      uint8x16_t hash;
+      if constexpr(Alpha)
+        hash = vandq_u8(vaddq_u8(vaddq_u8(vmulq_u8(pxs.val[0], vdupq_n_u8(3)), vmulq_u8(pxs.val[1], vdupq_n_u8(5))), vaddq_u8(vmulq_u8(pxs.val[2], vdupq_n_u8(7)), vmulq_u8(pxs.val[3], vdupq_n_u8(11)))), vdupq_n_u8(63));
+      else
+        hash = vandq_u8(vaddq_u8(vaddq_u8(vmulq_u8(pxs.val[0], vdupq_n_u8(3)), vmulq_u8(pxs.val[1], vdupq_n_u8(5))), vaddq_u8(vmulq_u8(pxs.val[2], vdupq_n_u8(7)), vdupq_n_u8(static_cast<std::uint8_t>(255*11)))), vdupq_n_u8(63));
+      std::uint8_t runs[simd_lanes], diffs[simd_lanes], lumas[simd_lanes*2], hashs[simd_lanes];
+      [[maybe_unused]] std::uint8_t alphas[simd_lanes];
+      vst1q_u8(runs, runv);
+      vst1q_u8(diffs, diffv);
+      vst2q_u8(lumas, (uint8x16x2_t{lu, ma}));
+      vst1q_u8(hashs, hash);
+      if constexpr(Alpha)
+        if(!alpha)
+          vst1q_u8(alphas, diff.val[3]);
+      for(std::size_t i = r; i < simd_lanes; ++i){
+        if(runs[i]){
+          ++run;
+          pixels += Channels;
+          continue;
+        }
+        if(run > 1){
+          *p++ = chunk_tag::run | (run-1);
+          run = 0;
+        }
+        else if(run == 1){
+          if(prev_hash == index_size)[[unlikely]]
+            *p++ = chunk_tag::run;
+          else
+            *p++ = chunk_tag::index | prev_hash;
+          run = 0;
+        }
+        const auto index_pos = hashs[i];
+        prev_hash = index_pos;
+        efficient_memcpy<Channels>(&px, pixels);
+        pixels += Channels;
+        if(index[index_pos] == px){
+          *p++ = chunk_tag::index | index_pos;
+          continue;
+        }
+        index[index_pos] = px;
+
+        if constexpr(Alpha)
+          if(!alpha && !alphas[i]){
+            *p++ = chunk_tag::rgba;
+            std::memcpy(p, &px, 4);
+            p += 4;
+            continue;
+          }
+        if(diffs[i])
+          *p++ = diffs[i];
+        else if(lumas[i*2]){
+          std::memcpy(p, lumas + i*2, 2);
+          p += 2;
+        }
+        else{
+          *p++ = chunk_tag::rgb;
+          efficient_memcpy<3>(p, &px);
+          p += 3;
+        }
+      }
+      prev = pxs;
+    }
+    p_.advance(p-p_.raw_pointer());
+
+    if constexpr(Alpha)
+      encode_body<Channels>(p_, pixels_, index, px_len, px, prev_hash, run);
+    else{
+      rgb_t px_prev;
+      efficient_memcpy<3>(&px_prev, &px);
+      encode_body<Channels>(p_, pixels_, index, px_len, px_prev, prev_hash, run);
+    }
+
+    push<sizeof(padding)>(p_, padding);
+  }
+#elif defined(__AVX2__)
+  static constexpr unsigned de_bruijn_bit_position_sequence[32] = {
+    0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+  };
+  static constexpr unsigned lsb32(std::uint32_t x)noexcept{
+    return de_bruijn_bit_position_sequence[(static_cast<std::uint32_t>(x&-static_cast<std::int32_t>(x))*0x077cb531u) >> 27];
+  }
+  template<std::uint8_t M>
+  static inline __m256i slli_epi8(__m256i v)noexcept{
+    const auto mask = _mm256_set1_epi8(static_cast<std::uint8_t>(0xff << M) >> M);
+    return _mm256_slli_epi16(_mm256_and_si256(v, mask), M);
+  }
+  template<std::uint8_t M>
+  static inline __m256i mul_epi8(__m256i v)noexcept{
+    if constexpr(M == 0)
+      return _mm256_setzero_si256();
+    else if constexpr(M == 1)
+      return v;
+    else if constexpr(M == 2)
+      return slli_epi8<1>(v);
+    else if constexpr(M == 3)
+      return _mm256_add_epi8(slli_epi8<1>(v), v);
+    else if constexpr(M == 4)
+      return slli_epi8<2>(v);
+    else if constexpr(M == 5)
+      return _mm256_add_epi8(slli_epi8<2>(v), v);
+    else if constexpr(M == 6)
+      return _mm256_add_epi8(slli_epi8<2>(v), slli_epi8<1>(v));
+    else if constexpr(M == 7)
+      return _mm256_sub_epi8(slli_epi8<3>(v), v);
+    else if constexpr(M == 8)
+      return slli_epi8<3>(v);
+    else if constexpr(M == 9)
+      return _mm256_add_epi8(slli_epi8<3>(v), v);
+    else if constexpr(M == 10)
+      return _mm256_add_epi8(slli_epi8<3>(v), slli_epi8<1>(v));
+    else if constexpr(M == 11)
+      return _mm256_add_epi8(_mm256_add_epi8(slli_epi8<3>(v), slli_epi8<1>(v)), v);
+    else if constexpr(M == 12)
+      return _mm256_add_epi8(slli_epi8<3>(v), slli_epi8<2>(v));
+    else if constexpr(M == 13)
+      return _mm256_add_epi8(_mm256_add_epi8(slli_epi8<3>(v), slli_epi8<2>(v)), v);
+    else if constexpr(M == 14)
+      return _mm256_sub_epi8(slli_epi8<4>(v), slli_epi8<1>(v));
+    else if constexpr(M == 15)
+      return _mm256_sub_epi8(slli_epi8<4>(v), v);
+    else
+      static_assert(M <= 15);
+  }
+  static inline __m256i prev_vector(__m256i pxs, __m256i prev)noexcept{
+    const auto permute = _mm256_permute2x128_si256(pxs, pxs, 0x08);
+    const auto inserted = _mm256_inserti128_si256(permute, _mm256_extracti128_si256(prev, 1), 0);
+    return _mm256_alignr_epi8(pxs, inserted, 15);
+  }
+  template<bool Alpha>
+  struct pixels_type{
+    __m256i val[3+Alpha];
+  };
+  static constexpr std::size_t simd_lanes = 256/8;
+  template<bool Alpha>
+  static inline pixels_type<Alpha> load(const std::uint8_t* ptr)noexcept{
+    if constexpr(Alpha){
+      const auto t1 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr));
+      const auto t2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr+simd_lanes));
+      const auto t3 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr+simd_lanes*2));
+      const auto t4 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr+simd_lanes*3));
+      const auto lo12 = _mm256_unpacklo_epi8(t1, t2);
+      const auto lo34 = _mm256_unpacklo_epi8(t3, t4);
+      const auto lolo12lo34 = _mm256_unpacklo_epi16(lo12, lo34);
+      const auto hilo12lo34 = _mm256_unpackhi_epi16(lo12, lo34);
+      const auto lololo12lo34hilo12lo34 = _mm256_unpacklo_epi32(lolo12lo34, hilo12lo34);
+      const auto hilolo12lo34hilo12lo34 = _mm256_unpackhi_epi32(lolo12lo34, hilo12lo34);
+      const auto hi12 = _mm256_unpackhi_epi8(t1, t2);
+      const auto hi34 = _mm256_unpackhi_epi8(t3, t4);
+      const auto lohi12hi34 = _mm256_unpacklo_epi16(hi12, hi34);
+      const auto hihi12hi34 = _mm256_unpackhi_epi16(hi12, hi34);
+      const auto lolohi12hi34hihi12hi34 = _mm256_unpacklo_epi32(lohi12hi34, hihi12hi34);
+      const auto lolololo12lo34hilo12lo34lolohi12hi34hihi12hi34 = _mm256_unpacklo_epi64(lololo12lo34hilo12lo34, lolohi12hi34hihi12hi34);
+      const auto hilololo12lo34hilo12lo34lolohi12hi34hihi12hi34 = _mm256_unpackhi_epi64(lololo12lo34hilo12lo34, lolohi12hi34hihi12hi34);
+      const auto hilohi12hi34hihi12hi34 = _mm256_unpackhi_epi32(lohi12hi34, hihi12hi34);
+      const auto lohilolo12lo34hilo12lo34hilohi12hi34hihi12hi34 = _mm256_unpacklo_epi64(hilolo12lo34hilo12lo34, hilohi12hi34hihi12hi34);
+      const auto hihilolo12lo34hilo12lo34hilohi12hi34hihi12hi34 = _mm256_unpackhi_epi64(hilolo12lo34hilo12lo34, hilohi12hi34hihi12hi34);
+      const auto mask1 = _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
+      const auto mask2 = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7);
+      const auto r = _mm256_permutevar8x32_epi32(_mm256_shuffle_epi8(lolololo12lo34hilo12lo34lolohi12hi34hihi12hi34, mask1), mask2);
+      const auto g = _mm256_permutevar8x32_epi32(_mm256_shuffle_epi8(hilololo12lo34hilo12lo34lolohi12hi34hihi12hi34, mask1), mask2);
+      const auto b = _mm256_permutevar8x32_epi32(_mm256_shuffle_epi8(lohilolo12lo34hilo12lo34hilohi12hi34hihi12hi34, mask1), mask2);
+      const auto a = _mm256_permutevar8x32_epi32(_mm256_shuffle_epi8(hihilolo12lo34hilo12lo34hilohi12hi34hihi12hi34, mask1), mask2);
+      return {{r, g, b, a}};
+    }
+    else{
+      const auto t1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr));
+      const auto t2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr+simd_lanes/2));
+      const auto t3 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr+simd_lanes));
+      const auto t4 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr+simd_lanes*3/2));
+      const auto t5 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr+simd_lanes*2));
+      const auto t6 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr+simd_lanes*5/2));
+      const auto mask01 = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 1, 4, 7, 10, 13, 2, 5, 8, 11, 14);
+      const auto mask02 = _mm_setr_epi8(2, 5, 8, 11, 14, 0, 3, 6, 9, 12, 15, 1, 4, 7, 10, 13);
+      const auto mask03 = _mm_setr_epi8(1, 4, 7, 10, 13, 2, 5, 8, 11, 14, 0, 3, 6, 9, 12, 15);
+      static constexpr char _128 = static_cast<char>(0b1000'0000);
+      const auto mask11 = _mm_setr_epi8(0, 0, 0, 0, 0, 0, _128, _128, _128, _128, _128, _128, _128, _128, _128, _128);
+      const auto mask21 = _mm_setr_epi8(_128, _128, _128, _128, _128, _128, _128, _128, _128, _128, _128, 0, 0, 0, 0, 0);
+      const auto mask12 = _mm_setr_epi8(_128, _128, _128, _128, _128, 0, 0, 0, 0, 0, 0, _128, _128, _128, _128, _128);
+      const auto mask22 = _mm_setr_epi8(0, 0, 0, 0, 0, _128, _128, _128, _128, _128, _128, _128, _128, _128, _128, _128);
+      const auto mask13 = _mm_setr_epi8(_128, _128, _128, _128, _128, _128, _128, _128, _128, _128, 0, 0, 0, 0, 0, 0);
+      const auto mask23 = _mm_setr_epi8(_128, _128, _128, _128, _128, 0, 0, 0, 0, 0, _128, _128, _128, _128, _128, _128);
+      const auto x1 = _mm_shuffle_epi8(t1, mask01);
+      const auto x2 = _mm_shuffle_epi8(t2, mask02);
+      const auto x3 = _mm_shuffle_epi8(t3, mask03);
+      const auto x4 = _mm_shuffle_epi8(t4, mask01);
+      const auto x5 = _mm_shuffle_epi8(t5, mask02);
+      const auto x6 = _mm_shuffle_epi8(t6, mask03);
+      const auto r1 = _mm_blendv_epi8(_mm_alignr_epi8(x3, x3, 5), _mm_blendv_epi8(x1, _mm_alignr_epi8(x2, x2, 10), mask11), mask21);
+      const auto g1 = _mm_blendv_epi8(_mm_alignr_epi8(x1, x1, 6), _mm_blendv_epi8(x2, _mm_alignr_epi8(x3, x3, 10), mask12), mask22);
+      const auto b1 = _mm_blendv_epi8(_mm_alignr_epi8(x2, x2, 6), _mm_blendv_epi8(x3, _mm_alignr_epi8(x1, x1, 11), mask13), mask23);
+      const auto r2 = _mm_blendv_epi8(_mm_alignr_epi8(x6, x6, 5), _mm_blendv_epi8(x4, _mm_alignr_epi8(x5, x5, 10), mask11), mask21);
+      const auto g2 = _mm_blendv_epi8(_mm_alignr_epi8(x4, x4, 6), _mm_blendv_epi8(x5, _mm_alignr_epi8(x6, x6, 10), mask12), mask22);
+      const auto b2 = _mm_blendv_epi8(_mm_alignr_epi8(x5, x5, 6), _mm_blendv_epi8(x6, _mm_alignr_epi8(x4, x4, 11), mask13), mask23);
+      const auto r = _mm256_inserti128_si256(_mm256_castsi128_si256(r1), r2, 1);
+      const auto g = _mm256_inserti128_si256(_mm256_castsi128_si256(g1), g2, 1);
+      const auto b = _mm256_inserti128_si256(_mm256_castsi128_si256(b1), b2, 1);
+      return {{r, g, b}};
+    }
+  }
+  template<std::uint_fast8_t Channels, typename Pusher, typename Puller>
+  static inline void encode_avx2(Pusher& p_, Puller& pixels_, const desc& desc){
+    static constexpr bool Alpha = Channels == 4;
+    std::uint8_t* p = p_.raw_pointer();
+    const std::uint8_t* pixels = pixels_.raw_pointer();
+
+    rgba_t index[index_size] = {};
+
+    const auto zero = _mm256_setzero_si256();
+
+    pixels_type<Alpha> prev;
+    prev.val[0] = prev.val[1] = prev.val[2] = zero;
+    if constexpr(Alpha)
+      prev.val[3] = _mm256_set1_epi8(static_cast<char>(0xff));
+
+    std::size_t run = 0;
+    rgba_t px = {0, 0, 0, 255};
+    auto prev_hash = static_cast<std::uint8_t>(index_size);
+
+    std::size_t px_len = desc.width * desc.height;
+    std::size_t simd_len = px_len / simd_lanes;
+    const std::size_t simd_len_32 = simd_len * simd_lanes;
+    px_len -= simd_len_32;
+    pixels_.advance(simd_len_32*Channels);
+    while(simd_len--){
+      const auto pxs = load<Alpha>(pixels);
+      pixels_type<Alpha> diff;
+      diff.val[0] = _mm256_sub_epi8(pxs.val[0], prev_vector(pxs.val[0], prev.val[0]));
+      diff.val[1] = _mm256_sub_epi8(pxs.val[1], prev_vector(pxs.val[1], prev.val[1]));
+      diff.val[2] = _mm256_sub_epi8(pxs.val[2], prev_vector(pxs.val[2], prev.val[2]));
+      bool alpha = true;
+      if constexpr(Alpha){
+        diff.val[3] = _mm256_sub_epi8(pxs.val[3], prev_vector(pxs.val[3], prev.val[3]));
+        alpha = _mm256_testz_si256(diff.val[3], diff.val[3]);
+        diff.val[3] = _mm256_cmpeq_epi8(diff.val[3], zero);
+      }
+      const auto ored = _mm256_or_si256(_mm256_or_si256(diff.val[0], diff.val[1]), diff.val[2]);
+      auto runv = _mm256_cmpeq_epi8(ored, zero);
+      if(_mm256_testz_si256(ored, ored) && alpha){
+        run += simd_lanes;
+        pixels += simd_lanes*Channels;
+        continue;
+      }
+      if constexpr(Alpha)
+        runv = _mm256_and_si256(runv, diff.val[3]);
+      const auto r = lsb32(~_mm256_movemask_epi8(runv));
+      run += r;
+      pixels += r*Channels;
+      if(run > 0){
+        while(run >= 62)[[unlikely]]{
+          static constexpr std::uint8_t x = chunk_tag::run | 61;
+          *p++ = x;
+          run -= 62;
+        }
+        if(run > 1){
+          *p++ = static_cast<std::uint8_t>(chunk_tag::run | (run-1));
+          run = 0;
+        }
+        else if(run == 1){
+          if(prev_hash == index_size)[[unlikely]]
+            *p++ = chunk_tag::run;
+          else
+            *p++ = chunk_tag::index | prev_hash;
+          run = 0;
+        }
+      }
+      const auto two = _mm256_set1_epi8(2);
+      diff.val[0] = _mm256_add_epi8(diff.val[0], two);
+      diff.val[1] = _mm256_add_epi8(diff.val[1], two);
+      diff.val[2] = _mm256_add_epi8(diff.val[2], two);
+      const auto diffor = _mm256_or_si256(_mm256_or_si256(diff.val[0], diff.val[1]), diff.val[2]);
+      const auto diffv = _mm256_and_si256(_mm256_or_si256(_mm256_or_si256(_mm256_set1_epi8(chunk_tag::diff), slli_epi8<4>(diff.val[0])), _mm256_or_si256(slli_epi8<2>(diff.val[1]), diff.val[2])), _mm256_cmpeq_epi8(_mm256_and_si256(diffor, _mm256_set1_epi8(0b11)), diffor));
+      const auto eight = _mm256_set1_epi8(8);
+      diff.val[0] = _mm256_add_epi8(_mm256_sub_epi8(diff.val[0], diff.val[1]), eight);
+      diff.val[2] = _mm256_add_epi8(_mm256_sub_epi8(diff.val[2], diff.val[1]), eight);
+      diff.val[1] = _mm256_add_epi8(diff.val[1], _mm256_set1_epi8(30));
+      const auto luma_mask = _mm256_setr_epi32(0, 1, 4, 5, 2, 3, 6, 7);
+      const auto lu = _mm256_permutevar8x32_epi32(_mm256_and_si256(_mm256_or_si256(_mm256_set1_epi8(static_cast<char>(chunk_tag::luma)), diff.val[1]), _mm256_cmpeq_epi8(_mm256_or_si256(_mm256_and_si256(_mm256_or_si256(diff.val[0], diff.val[2]), _mm256_set1_epi8(static_cast<char>(0xf0))), _mm256_and_si256(diff.val[1], _mm256_set1_epi8(static_cast<char>(0xc0)))), zero)), luma_mask);
+      const auto ma = _mm256_permutevar8x32_epi32(_mm256_or_si256(slli_epi8<4>(diff.val[0]), diff.val[2]), luma_mask);
+      __m256i hash;
+      if constexpr(Alpha)
+        hash = _mm256_and_si256(_mm256_add_epi8(_mm256_add_epi8(mul_epi8<3>(pxs.val[0]), mul_epi8<5>(pxs.val[1])), _mm256_add_epi8(mul_epi8<7>(pxs.val[2]), mul_epi8<11>(pxs.val[3]))), _mm256_set1_epi8(63));
+      else
+        hash = _mm256_and_si256(_mm256_add_epi8(_mm256_add_epi8(mul_epi8<3>(pxs.val[0]), mul_epi8<5>(pxs.val[1])), _mm256_add_epi8(mul_epi8<7>(pxs.val[2]), _mm256_set1_epi8(static_cast<std::uint8_t>(255*11)))), _mm256_set1_epi8(63));
+      alignas(alignof(__m256i)) std::uint8_t runs[simd_lanes], diffs[simd_lanes], lumas[simd_lanes*2], hashs[simd_lanes];
+      [[maybe_unused]] alignas(alignof(__m256i)) std::uint8_t alphas[simd_lanes];
+      _mm256_store_si256(reinterpret_cast<__m256i*>(runs), runv);
+      _mm256_store_si256(reinterpret_cast<__m256i*>(diffs), diffv);
+      _mm256_store_si256(reinterpret_cast<__m256i*>(lumas), _mm256_unpacklo_epi8(lu, ma));
+      _mm256_store_si256(reinterpret_cast<__m256i*>(lumas)+1, _mm256_unpackhi_epi8(lu, ma));
+      _mm256_store_si256(reinterpret_cast<__m256i*>(hashs), hash);
+      if constexpr(Alpha)
+        if(!alpha)
+          _mm256_store_si256(reinterpret_cast<__m256i*>(alphas), diff.val[3]);
+      for(std::size_t i = r; i < simd_lanes; ++i){
+        if(runs[i]){
+          ++run;
+          pixels += Channels;
+          continue;
+        }
+        if(run > 1){
+          *p++ = static_cast<std::uint8_t>(chunk_tag::run | (run-1));
+          run = 0;
+        }
+        else if(run == 1){
+          if(prev_hash == index_size)[[unlikely]]
+            *p++ = chunk_tag::run;
+          else
+            *p++ = chunk_tag::index | prev_hash;
+          run = 0;
+        }
+        const auto index_pos = hashs[i];
+        prev_hash = index_pos;
+        efficient_memcpy<Channels>(&px, pixels);
+        pixels += Channels;
+        if(index[index_pos] == px){
+          *p++ = chunk_tag::index | index_pos;
+          continue;
+        }
+        index[index_pos] = px;
+
+        if constexpr(Alpha)
+          if(!alpha && !alphas[i]){
+            *p++ = chunk_tag::rgba;
+            std::memcpy(p, &px, 4);
+            p += 4;
+            continue;
+          }
+        if(diffs[i])
+          *p++ = diffs[i];
+        else if(lumas[i*2]){
+          std::memcpy(p, lumas + i*2, 2);
+          p += 2;
+        }
+        else{
+          *p++ = chunk_tag::rgb;
+          efficient_memcpy<3>(p, &px);
+          p += 3;
+        }
+      }
+      prev = pxs;
+    }
+    p_.advance(p-p_.raw_pointer());
+
+    if constexpr(Alpha)
+      encode_body<Channels>(p_, pixels_, index, px_len, px, prev_hash, run);
+    else{
+      rgb_t px_prev;
+      efficient_memcpy<3>(&px_prev, &px);
+      encode_body<Channels>(p_, pixels_, index, px_len, px_prev, prev_hash, run);
+    }
+
+    push<sizeof(padding)>(p_, padding);
+  }
+#endif
+#endif
+
+  template<std::uint_fast8_t Channels, typename Pusher, typename Puller>
+  static inline void encode_impl(Pusher& p, Puller& pixels, const desc& desc){
+    rgba_t index[index_size] = {};
+
+    std::size_t px_len = desc.width * desc.height;
+    encode_body<Channels>(p, pixels, index, px_len);
+
+    push<sizeof(padding)>(p, padding);
+  }
+
+  template<typename Puller>
+  static inline desc decode_header(Puller& p){
+    desc d;
+    const auto magic_ = read_32(p);
+    d.width = read_32(p);
+    d.height = read_32(p);
+    d.channels = p.pull();
+    d.colorspace = static_cast<qoi::colorspace>(p.pull());
+    if(
+      d.width == 0 || d.height == 0 || magic_ != magic ||
+      d.channels < 3 || d.channels > 4 ||
+      d.height >= pixels_max / d.width
+    )[[unlikely]]
+      throw std::runtime_error("qoixx::qoi::decode: invalid header");
+    return d;
+  }
+
+#ifndef QOIXX_DECODE_WITH_TABLES
+#define QOIXX_HPP_DECODE_WITH_TABLES_NOT_DEFINED
+#ifdef __aarch64__
+#define QOIXX_DECODE_WITH_TABLES 0
+#else
+#define QOIXX_DECODE_WITH_TABLES 1
+#endif
+#endif
+
+#if QOIXX_DECODE_WITH_TABLES
+  static constexpr std::size_t hash_table_offset = std::numeric_limits<std::uint8_t>::max()+1 - chunk_tag::diff;
+  static constexpr std::array<int, std::numeric_limits<std::uint8_t>::max()+1+chunk_tag::run-chunk_tag::diff> create_hash_diff_table(){
+    std::array<int, std::numeric_limits<std::uint8_t>::max()+1+chunk_tag::run-chunk_tag::diff> table = {};
+    for(std::size_t i = 0; i <= std::numeric_limits<std::uint8_t>::max(); ++i){
+      constexpr std::uint32_t mask_tail_4 = 0b0000'1111u;
+      const auto vr = (i >> 4);
+      const auto vb = (i & mask_tail_4);
+      table[i] = (vr*3 + vb*7) % index_size;
+    }
+    for(std::size_t i = chunk_tag::diff; i < chunk_tag::luma; ++i){
+      constexpr std::uint32_t mask_tail_2 = 0b0000'0011u;
+      const auto vr = static_cast<int>((i >> 4) & mask_tail_2) - 2;
+      const auto vg = static_cast<int>((i >> 2) & mask_tail_2) - 2;
+      const auto vb = static_cast<int>( i       & mask_tail_2) - 2;
+      table[i+hash_table_offset] = static_cast<std::uint8_t>((vr*3 + vg*5 + vb*7) % index_size);
+    }
+    for(std::size_t i = chunk_tag::luma; i < chunk_tag::run; ++i){
+        constexpr int vgv = chunk_tag::luma+40;
+        const int vg = i - vgv;
+        table[i+hash_table_offset] = static_cast<std::uint8_t>((vg*3 + (vg+8)*5 + vg*7) % index_size);
+    }
+    return table;
+  }
+  static constexpr std::array<std::array<std::uint8_t, 2>, std::numeric_limits<std::uint8_t>::max()+1> create_luma_table(){
+    std::array<std::array<std::uint8_t, 2>, std::numeric_limits<std::uint8_t>::max()+1> table = {};
+    for(std::size_t i = 0; i <= std::numeric_limits<std::uint8_t>::max(); ++i){
+      constexpr std::uint32_t mask_tail_4 = 0b0000'1111u;
+      const auto vr = (i >> 4);
+      const auto vb = (i & mask_tail_4);
+      table[i][0] = static_cast<uint8_t>(vr);
+      table[i][1] = static_cast<uint8_t>(vb);
+    }
+    return table;
+  }
+  static constexpr std::array<std::array<std::int8_t, 3>, chunk_tag::luma> create_diff_table(){
+    std::array<std::array<std::int8_t, 3>, chunk_tag::luma> table = {};
+    for(std::size_t i = chunk_tag::diff; i < chunk_tag::luma; ++i){
+      constexpr std::uint32_t mask_tail_2 = 0b0000'0011u;
+      const auto vr = ((i >> 4) & mask_tail_2) - 2;
+      const auto vg = ((i >> 2) & mask_tail_2) - 2;
+      const auto vb = ( i       & mask_tail_2) - 2;
+      table[i][0] = static_cast<uint8_t>(vr);
+      table[i][1] = static_cast<uint8_t>(vg);
+      table[i][2] = static_cast<uint8_t>(vb);
+    }
+    return table;
+  }
+#endif
+
+  template<std::size_t Channels, typename Pusher, typename Puller>
+  static inline void decode_impl(Pusher& pixels, Puller& p, std::size_t px_len, std::size_t size){
+#ifndef __aarch64__
+    using rgba_t = std::conditional_t<Channels == 4, qoi::rgba_t, qoi::rgb_t>;
+#endif
+    rgba_t px = {};
+    if constexpr(std::is_same<rgba_t, qoi::rgba_t>::value)
+      px.a = 255;
+    rgba_t index[index_size];
+    if constexpr(std::is_same<rgba_t, qoi::rgba_t>::value){
+      index[(0*3+0*5+0*7+0*11)%index_size] = {};
+      index[(0*3+0*5+0*7+255*11)%index_size] = px;
+    }
+    else
+      index[(0*3+0*5+0*7+255*11)%index_size] = {};
+
+#if QOIXX_DECODE_WITH_TABLES
+#define QOIXX_HPP_WITH_TABLES(...) __VA_ARGS__
+#define QOIXX_HPP_WITHOUT_TABLES(...)
+#else
+#define QOIXX_HPP_WITH_TABLES(...)
+#define QOIXX_HPP_WITHOUT_TABLES(...) __VA_ARGS__
+#endif
+
+    QOIXX_HPP_WITH_TABLES(
+    auto hash = px.hash() % index_size;
+    static constexpr auto luma_hash_diff_table = create_hash_diff_table();
+    static constexpr auto hash_diff_table = luma_hash_diff_table.data() + hash_table_offset;
+    )
+
+    const auto f = [&pixels, &p, &px_len, &size, &px, &index QOIXX_HPP_WITH_TABLES(, &hash)]{
+      const auto b1 = p.pull();
+      --size;
+
+#if defined(__aarch64__) and not defined(QOIXX_NO_SIMD)
+#define QOIXX_HPP_DECODE_RUN(px, run) { \
+    if constexpr(Pusher::is_contiguous){ \
+      ++run; \
+      if(run >= 8){ \
+        std::conditional_t<Channels == 4, uint8x8x4_t, uint8x8x3_t> data = {vdup_n_u8(px.r), vdup_n_u8(px.g), vdup_n_u8(px.b)}; \
+        if constexpr(Channels == 4) \
+          data.val[3] = vdup_n_u8(px.a); \
+        while(run>=8){ \
+          if constexpr(Channels == 4) \
+            vst4_u8(pixels.raw_pointer(), data); \
+          else \
+            vst3_u8(pixels.raw_pointer(), data); \
+          pixels.advance(Channels*8); \
+          run -= 8; \
+        } \
+      } \
+      while(run--){push<Channels>(pixels, &px);} \
+    } \
+    else \
+      do{push<Channels>(pixels, &px);}while(run--); \
+  }
+#else
+#define QOIXX_HPP_DECODE_RUN(px, run) do{push<Channels>(pixels, &px);}while(run--);
+#endif
+
+      if(b1 >= chunk_tag::run){
+        if(b1 < chunk_tag::rgb){
+          /*run*/
+          static constexpr std::uint32_t mask_tail_6 = 0b0011'1111u;
+          std::size_t run = b1 & mask_tail_6;
+          if(run >= px_len)[[unlikely]]
+            run = px_len;
+          px_len -= run;
+          QOIXX_HPP_DECODE_RUN(px, run)
+          return;
+        }
+        if(b1 == chunk_tag::rgb){
+          pull<3>(&px, p);
+          size -= 3;
+          QOIXX_HPP_WITH_TABLES(hash = px.hash() % index_size;)
+        }
+        if constexpr(Channels == 4){
+          if(b1 == chunk_tag::rgba){
+            pull<4>(&px, p);
+            size -= 4;
+            QOIXX_HPP_WITH_TABLES(hash = px.hash() % index_size;)
+          }
+        }
+        else{
+          if(b1 == chunk_tag::rgba)[[unlikely]]{
+            pull<3>(&px, p);
+            p.advance(1);
+            size -= 4;
+            QOIXX_HPP_WITH_TABLES(hash = px.hash() % index_size;)
+          }
+        }
+      }
+      else if(b1 < chunk_tag::diff){
+        /*index*/
+        if constexpr(std::is_same<rgba_t, qoi::rgba_t>::value)
+          px = index[b1];
+        else
+          efficient_memcpy<Channels>(&px, index + b1);
+        push<Channels>(pixels, &px);
+        QOIXX_HPP_WITH_TABLES(hash = b1;)
+        return;
+      }
+      else if(b1 >= chunk_tag::luma){
+        /*luma*/
+        const auto b2 = p.pull();
+        --size;
+        QOIXX_HPP_WITH_TABLES(
+        static constexpr auto table = create_luma_table();
+        const auto drb = table[b2];
+        )
+        static constexpr int vgv = chunk_tag::luma+40;
+        const int vg = b1 - vgv;
+        QOIXX_HPP_WITH_TABLES(
+        px.r += vg + drb[0];
+        px.g += vg + 8;
+        px.b += vg + drb[1];
+        hash = (static_cast<int>(hash)+hash_diff_table[b1]+luma_hash_diff_table[b2]) % index_size;
+        ) QOIXX_HPP_WITHOUT_TABLES(
+        static constexpr std::uint32_t mask_tail_4 = 0b0000'1111u;
+        px.r += vg + (b2 >> 4);
+        px.g += vg + 8;
+        px.b += vg + (b2 & mask_tail_4);
+        )
+      }
+      else{
+        /*diff*/
+        QOIXX_HPP_WITH_TABLES(
+        static constexpr auto table = create_diff_table();
+        const auto drgb = table[b1];
+        px.r += drgb[0];
+        px.g += drgb[1];
+        px.b += drgb[2];
+        hash = (static_cast<int>(hash)+hash_diff_table[b1]) % index_size;
+        ) QOIXX_HPP_WITHOUT_TABLES(
+        static constexpr std::uint32_t mask_tail_2 = 0b0000'0011u;
+        px.r += ((b1 >> 4) & mask_tail_2) - 2;
+        px.g += ((b1 >> 2) & mask_tail_2) - 2;
+        px.b += ( b1       & mask_tail_2) - 2;
+        )
+      }
+#undef QOIXX_HPP_DECODE_RUN
+      if constexpr(std::is_same<rgba_t, qoi::rgba_t>::value)
+        index[QOIXX_HPP_WITH_TABLES(hash) QOIXX_HPP_WITHOUT_TABLES(px.hash() % index_size)] = px;
+      else
+        efficient_memcpy<Channels>(index + QOIXX_HPP_WITH_TABLES(hash) QOIXX_HPP_WITHOUT_TABLES(px.hash() % index_size), &px);
+#undef QOIXX_HPP_WITHOUT_TABLES
+#undef QOIXX_HPP_WITH_TABLES
+#ifdef QOIXX_HPP_DECODE_WITH_TABLES_NOT_DEFINED
+#undef QOIXX_DECODE_WITH_TABLES
+#undef QOIXX_HPP_DECODE_WITH_TABLES_NOT_DEFINED
+#endif
+
+      push<Channels>(pixels, &px);
+    };
+
+    while(px_len--)[[likely]]{
+      f();
+      if(size < sizeof(padding))[[unlikely]]{
+        throw std::runtime_error("qoixx::qoi::decode: insufficient input data");
+      }
+    }
+  }
+ public:
+  template<typename T, typename U>
+  static inline T encode(const U& u, const desc& desc){
+    using coU = container_operator<U>;
+    if(!coU::valid(u) || coU::size(u) < desc.width*desc.height*desc.channels || desc.width == 0 || desc.height == 0 || desc.channels < 3 || desc.channels > 4 || desc.height >= pixels_max / desc.width)[[unlikely]]
+      throw std::invalid_argument{"qoixx::qoi::encode: invalid argument"};
+
+    const auto max_size = static_cast<std::size_t>(desc.width) * desc.height * (desc.channels + 1) + header_size + sizeof(padding);
+    using coT = container_operator<T>;
+    T data = coT::construct(max_size);
+    auto p = coT::create_pusher(data);
+    auto puller = coU::create_puller(u);
+
+    write_32(p, magic);
+    write_32(p, desc.width);
+    write_32(p, desc.height);
+    p.push(desc.channels);
+    p.push(static_cast<std::uint8_t>(desc.colorspace));
+
+#ifndef QOIXX_NO_SIMD
+#if defined(__ARM_FEATURE_SVE)
+    if constexpr(coT::pusher::is_contiguous && coU::puller::is_contiguous)
+      if(desc.channels == 4)
+#define QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH \
+        switch(svcntb()){ \
+          QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(128); \
+          QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(256); \
+          QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(384); \
+          QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(512); \
+          QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(640); \
+          QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(768); \
+          QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(896); \
+          QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(1024); \
+          QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(1152); \
+          QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(1280); \
+          QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(1408); \
+          QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(1536); \
+          QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(1664); \
+          QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(1792); \
+          QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(1920); \
+          QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(2048); \
+          default: while(true){/*unreachable*/} \
+        }
+#define QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(i) case i/8: encode_sve<i, 4>(p, puller, desc); break
+        QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH
+#undef QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE
+      else
+#define QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(i) case i/8: encode_sve<i, 3>(p, puller, desc); break;
+        QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH
+#undef QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE
+#undef QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH
+    else
+#elif defined(__aarch64__)
+    if constexpr(coT::pusher::is_contiguous && coU::puller::is_contiguous)
+      if(desc.channels == 4)
+        encode_neon<4>(p, puller, desc);
+      else
+        encode_neon<3>(p, puller, desc);
+    else
+#elif defined(__AVX2__)
+    if constexpr(coT::pusher::is_contiguous && coU::puller::is_contiguous)
+      if(desc.channels == 4)
+        encode_avx2<4>(p, puller, desc);
+      else
+        encode_avx2<3>(p, puller, desc);
+    else
+#endif
+#endif
+      if(desc.channels == 4)
+        encode_impl<4>(p, puller, desc);
+      else
+        encode_impl<3>(p, puller, desc);
+
+    return p.finalize();
+  }
+  template<typename T, typename U>
+  requires(sizeof(U) == 1)
+  static inline T encode(const U* pixels, std::size_t size, const desc& desc){
+    return encode<T>(std::make_pair(pixels, size), desc);
+  }
+  template<typename T, typename U>
+  requires (!std::is_pointer_v<U>)
+  static inline std::pair<T, desc> decode(const U& u, std::uint8_t channels = 0){
+    using coU = container_operator<U>;
+    const auto size = coU::size(u);
+    if(!coU::valid(u) || size < header_size + sizeof(padding) || (channels != 0 && channels != 3 && channels != 4))[[unlikely]]
+      throw std::invalid_argument{"qoixx::qoi::decode: invalid argument"};
+    auto puller = coU::create_puller(u);
+
+    const auto d = decode_header(puller);
+    if(channels == 0)
+      channels = d.channels;
+
+    const std::size_t px_len = static_cast<std::size_t>(d.width) * d.height;
+    using coT = container_operator<T>;
+    T data = coT::construct(px_len*channels);
+    auto p = coT::create_pusher(data);
+
+    if(channels == 4)
+      decode_impl<4>(p, puller, px_len, size);
+    else
+      decode_impl<3>(p, puller, px_len, size);
+
+    return std::make_pair(std::move(p.finalize()), d);
+  }
+  template<typename T, typename U>
+  requires(sizeof(U) == 1)
+  static inline std::pair<T, desc> decode(const U* pixels, std::size_t size, std::uint8_t channels = 0){
+    return decode<T>(std::make_pair(pixels, size), channels);
+  }
+};
+
+}
+
+#endif //QOIXX_HPP_INCLUDED_
\ No newline at end of file
diff --git a/shared/raudio.c b/shared/raudio.c
new file mode 100644
index 0000000..50438f8
--- /dev/null
+++ b/shared/raudio.c
@@ -0,0 +1,2797 @@
+/**********************************************************************************************
+*
+*   raudio v1.1 - A simple and easy-to-use audio library based on miniaudio
+*
+*   FEATURES:
+*       - Manage audio device (init/close)
+*       - Manage raw audio context
+*       - Manage mixing channels
+*       - Load and unload audio files
+*       - Format wave data (sample rate, size, channels)
+*       - Play/Stop/Pause/Resume loaded audio
+*
+*   CONFIGURATION:
+*       #define SUPPORT_MODULE_RAUDIO
+*           raudio module is included in the build
+*
+*       #define RAUDIO_STANDALONE
+*           Define to use the module as standalone library (independently of raylib).
+*           Required types and functions are defined in the same module.
+*
+*       #define SUPPORT_FILEFORMAT_WAV
+*       #define SUPPORT_FILEFORMAT_OGG
+*       #define SUPPORT_FILEFORMAT_MP3
+*       #define SUPPORT_FILEFORMAT_QOA
+*       #define SUPPORT_FILEFORMAT_FLAC
+*       #define SUPPORT_FILEFORMAT_XM
+*       #define SUPPORT_FILEFORMAT_MOD
+*           Selected desired fileformats to be supported for loading. Some of those formats are
+*           supported by default, to remove support, just comment unrequired #define in this module
+*
+*   DEPENDENCIES:
+*       miniaudio.h  - Audio device management lib (https://github.com/mackron/miniaudio)
+*       stb_vorbis.h - Ogg audio files loading (http://www.nothings.org/stb_vorbis/)
+*       dr_wav.h     - WAV audio files loading (http://github.com/mackron/dr_libs)
+*       dr_mp3.h     - MP3 audio file loading (https://github.com/mackron/dr_libs)
+*       dr_flac.h    - FLAC audio file loading (https://github.com/mackron/dr_libs)
+*       jar_xm.h     - XM module file loading
+*       jar_mod.h    - MOD audio file loading
+*
+*   CONTRIBUTORS:
+*       David Reid (github: @mackron) (Nov. 2017):
+*           - Complete port to miniaudio library
+*
+*       Joshua Reisenauer (github: @kd7tck) (2015):
+*           - XM audio module support (jar_xm)
+*           - MOD audio module support (jar_mod)
+*           - Mixing channels support
+*           - Raw audio context support
+*
+*
+*   LICENSE: zlib/libpng
+*
+*   Copyright (c) 2013-2025 Ramon Santamaria (@raysan5)
+*
+*   This software is provided "as-is", without any express or implied warranty. In no event
+*   will the authors be held liable for any damages arising from the use of this software.
+*
+*   Permission is granted to anyone to use this software for any purpose, including commercial
+*   applications, and to alter it and redistribute it freely, subject to the following restrictions:
+*
+*     1. The origin of this software must not be misrepresented; you must not claim that you
+*     wrote the original software. If you use this software in a product, an acknowledgment
+*     in the product documentation would be appreciated but is not required.
+*
+*     2. Altered source versions must be plainly marked as such, and must not be misrepresented
+*     as being the original software.
+*
+*     3. This notice may not be removed or altered from any source distribution.
+*
+**********************************************************************************************/
+
+#if defined(RAUDIO_STANDALONE)
+    #include "raudio.h"
+#else
+    #include "raylib.h"         // Declares module functions
+
+    // Check if config flags have been externally provided on compilation line
+    #if !defined(EXTERNAL_CONFIG_FLAGS)
+        #include "config.h"     // Defines module configuration flags
+    #endif
+    #include "utils.h"          // Required for: fopen() Android mapping
+#endif
+
+#if defined(SUPPORT_MODULE_RAUDIO)
+
+#if defined(_WIN32)
+// To avoid conflicting windows.h symbols with raylib, some flags are defined
+// WARNING: Those flags avoid inclusion of some Win32 headers that could be required
+// by user at some point and won't be included...
+//-------------------------------------------------------------------------------------
+
+// If defined, the following flags inhibit definition of the indicated items.
+#define NOGDICAPMASKS     // CC_*, LC_*, PC_*, CP_*, TC_*, RC_
+#define NOVIRTUALKEYCODES // VK_*
+#define NOWINMESSAGES     // WM_*, EM_*, LB_*, CB_*
+#define NOWINSTYLES       // WS_*, CS_*, ES_*, LBS_*, SBS_*, CBS_*
+#define NOSYSMETRICS      // SM_*
+#define NOMENUS           // MF_*
+#define NOICONS           // IDI_*
+#define NOKEYSTATES       // MK_*
+#define NOSYSCOMMANDS     // SC_*
+#define NORASTEROPS       // Binary and Tertiary raster ops
+#define NOSHOWWINDOW      // SW_*
+#define OEMRESOURCE       // OEM Resource values
+#define NOATOM            // Atom Manager routines
+#define NOCLIPBOARD       // Clipboard routines
+#define NOCOLOR           // Screen colors
+#define NOCTLMGR          // Control and Dialog routines
+#define NODRAWTEXT        // DrawText() and DT_*
+#define NOGDI             // All GDI defines and routines
+#define NOKERNEL          // All KERNEL defines and routines
+#define NOUSER            // All USER defines and routines
+//#define NONLS             // All NLS defines and routines
+#define NOMB              // MB_* and MessageBox()
+#define NOMEMMGR          // GMEM_*, LMEM_*, GHND, LHND, associated routines
+#define NOMETAFILE        // typedef METAFILEPICT
+#define NOMINMAX          // Macros min(a,b) and max(a,b)
+#define NOMSG             // typedef MSG and associated routines
+#define NOOPENFILE        // OpenFile(), OemToAnsi, AnsiToOem, and OF_*
+#define NOSCROLL          // SB_* and scrolling routines
+#define NOSERVICE         // All Service Controller routines, SERVICE_ equates, etc.
+#define NOSOUND           // Sound driver routines
+#define NOTEXTMETRIC      // typedef TEXTMETRIC and associated routines
+#define NOWH              // SetWindowsHook and WH_*
+#define NOWINOFFSETS      // GWL_*, GCL_*, associated routines
+#define NOCOMM            // COMM driver routines
+#define NOKANJI           // Kanji support stuff.
+#define NOHELP            // Help engine interface.
+#define NOPROFILER        // Profiler interface.
+#define NODEFERWINDOWPOS  // DeferWindowPos routines
+#define NOMCX             // Modem Configuration Extensions
+
+// Type required before windows.h inclusion
+typedef struct tagMSG *LPMSG;
+
+#include <windows.h>        // Windows functionality (miniaudio)
+
+// Type required by some unused function...
+typedef struct tagBITMAPINFOHEADER {
+  DWORD biSize;
+  LONG  biWidth;
+  LONG  biHeight;
+  WORD  biPlanes;
+  WORD  biBitCount;
+  DWORD biCompression;
+  DWORD biSizeImage;
+  LONG  biXPelsPerMeter;
+  LONG  biYPelsPerMeter;
+  DWORD biClrUsed;
+  DWORD biClrImportant;
+} BITMAPINFOHEADER, *PBITMAPINFOHEADER;
+
+#include <objbase.h>        // Component Object Model (COM) header
+#include <mmreg.h>          // Windows Multimedia, defines some WAVE structs
+#include <mmsystem.h>       // Windows Multimedia, used by Windows GDI, defines DIBINDEX macro
+
+// Some required types defined for MSVC/TinyC compiler
+#if defined(_MSC_VER) || defined(__TINYC__)
+    #include "propidl.h"
+#endif
+#endif
+
+#define MA_MALLOC RL_MALLOC
+#define MA_FREE RL_FREE
+
+#define MA_NO_JACK
+#define MA_NO_WAV
+#define MA_NO_FLAC
+#define MA_NO_MP3
+#define MA_NO_RESOURCE_MANAGER
+#define MA_NO_NODE_GRAPH
+#define MA_NO_ENGINE
+#define MA_NO_GENERATION
+
+// Threading model: Default: [0] COINIT_MULTITHREADED: COM calls objects on any thread (free threading)
+#define MA_COINIT_VALUE  2              // [2] COINIT_APARTMENTTHREADED: Each object has its own thread (apartment model)
+
+#define MINIAUDIO_IMPLEMENTATION
+//#define MA_DEBUG_OUTPUT
+#include "external/miniaudio.h"         // Audio device initialization and management
+#undef PlaySound                        // Win32 API: windows.h > mmsystem.h defines PlaySound macro
+
+#include <stdlib.h>                     // Required for: malloc(), free()
+#include <stdio.h>                      // Required for: FILE, fopen(), fclose(), fread()
+#include <string.h>                     // Required for: strcmp() [Used in IsFileExtension(), LoadWaveFromMemory(), LoadMusicStreamFromMemory()]
+
+#if defined(RAUDIO_STANDALONE)
+    #ifndef TRACELOG
+        #define TRACELOG(level, ...)    printf(__VA_ARGS__)
+    #endif
+
+    // Allow custom memory allocators
+    #ifndef RL_MALLOC
+        #define RL_MALLOC(sz)           malloc(sz)
+    #endif
+    #ifndef RL_CALLOC
+        #define RL_CALLOC(n,sz)         calloc(n,sz)
+    #endif
+    #ifndef RL_REALLOC
+        #define RL_REALLOC(ptr,sz)      realloc(ptr,sz)
+    #endif
+    #ifndef RL_FREE
+        #define RL_FREE(ptr)            free(ptr)
+    #endif
+#endif
+
+#if defined(SUPPORT_FILEFORMAT_WAV)
+    #define DRWAV_MALLOC RL_MALLOC
+    #define DRWAV_REALLOC RL_REALLOC
+    #define DRWAV_FREE RL_FREE
+
+    #define DR_WAV_IMPLEMENTATION
+    #include "external/dr_wav.h"        // WAV loading functions
+#endif
+
+#if defined(SUPPORT_FILEFORMAT_OGG)
+    // TODO: Remap stb_vorbis malloc()/free() calls to RL_MALLOC/RL_FREE
+    #include "external/stb_vorbis.c"    // OGG loading functions
+#endif
+
+#if defined(SUPPORT_FILEFORMAT_MP3)
+    #define DRMP3_MALLOC RL_MALLOC
+    #define DRMP3_REALLOC RL_REALLOC
+    #define DRMP3_FREE RL_FREE
+
+    #define DR_MP3_IMPLEMENTATION
+    #include "external/dr_mp3.h"        // MP3 loading functions
+#endif
+
+#if defined(SUPPORT_FILEFORMAT_QOA)
+    #define QOA_MALLOC RL_MALLOC
+    #define QOA_FREE RL_FREE
+
+    #if defined(_MSC_VER)           // Disable some MSVC warning
+        #pragma warning(push)
+        #pragma warning(disable : 4018)
+        #pragma warning(disable : 4267)
+        #pragma warning(disable : 4244)
+    #endif
+
+    #define QOA_IMPLEMENTATION
+    #include "external/qoa.h"           // QOA loading and saving functions
+    #include "external/qoaplay.c"       // QOA stream playing helper functions
+
+    #if defined(_MSC_VER)
+        #pragma warning(pop)        // Disable MSVC warning suppression
+    #endif
+#endif
+
+#if defined(SUPPORT_FILEFORMAT_FLAC)
+    #define DRFLAC_MALLOC RL_MALLOC
+    #define DRFLAC_REALLOC RL_REALLOC
+    #define DRFLAC_FREE RL_FREE
+
+    #define DR_FLAC_IMPLEMENTATION
+    #define DR_FLAC_NO_WIN32_IO
+    #include "external/dr_flac.h"       // FLAC loading functions
+#endif
+
+#if defined(SUPPORT_FILEFORMAT_XM)
+    #define JARXM_MALLOC RL_MALLOC
+    #define JARXM_FREE RL_FREE
+
+    #if defined(_MSC_VER)           // Disable some MSVC warning
+        #pragma warning(push)
+        #pragma warning(disable : 4244)
+    #endif
+
+    #define JAR_XM_IMPLEMENTATION
+    #include "external/jar_xm.h"        // XM loading functions
+
+    #if defined(_MSC_VER)
+        #pragma warning(pop)        // Disable MSVC warning suppression
+    #endif
+#endif
+
+#if defined(SUPPORT_FILEFORMAT_MOD)
+    #define JARMOD_MALLOC RL_MALLOC
+    #define JARMOD_FREE RL_FREE
+
+    #define JAR_MOD_IMPLEMENTATION
+    #include "external/jar_mod.h"       // MOD loading functions
+#endif
+
+//----------------------------------------------------------------------------------
+// Defines and Macros
+//----------------------------------------------------------------------------------
+#ifndef AUDIO_DEVICE_FORMAT
+    #define AUDIO_DEVICE_FORMAT    ma_format_f32    // Device output format (float-32bit)
+#endif
+#ifndef AUDIO_DEVICE_CHANNELS
+    #define AUDIO_DEVICE_CHANNELS              2    // Device output channels: stereo
+#endif
+#ifndef AUDIO_DEVICE_SAMPLE_RATE
+    #define AUDIO_DEVICE_SAMPLE_RATE           0    // Device output sample rate
+#endif
+
+#ifndef MAX_AUDIO_BUFFER_POOL_CHANNELS
+    #define MAX_AUDIO_BUFFER_POOL_CHANNELS    16    // Audio pool channels
+#endif
+
+//----------------------------------------------------------------------------------
+// Types and Structures Definition
+//----------------------------------------------------------------------------------
+#if defined(RAUDIO_STANDALONE)
+// Trace log level
+// NOTE: Organized by priority level
+typedef enum {
+    LOG_ALL = 0,        // Display all logs
+    LOG_TRACE,          // Trace logging, intended for internal use only
+    LOG_DEBUG,          // Debug logging, used for internal debugging, it should be disabled on release builds
+    LOG_INFO,           // Info logging, used for program execution info
+    LOG_WARNING,        // Warning logging, used on recoverable failures
+    LOG_ERROR,          // Error logging, used on unrecoverable failures
+    LOG_FATAL,          // Fatal logging, used to abort program: exit(EXIT_FAILURE)
+    LOG_NONE            // Disable logging
+} TraceLogLevel;
+#endif
+
+// Music context type
+// NOTE: Depends on data structure provided by the library
+// in charge of reading the different file types
+typedef enum {
+    MUSIC_AUDIO_NONE = 0,   // No audio context loaded
+    MUSIC_AUDIO_WAV,        // WAV audio context
+    MUSIC_AUDIO_OGG,        // OGG audio context
+    MUSIC_AUDIO_FLAC,       // FLAC audio context
+    MUSIC_AUDIO_MP3,        // MP3 audio context
+    MUSIC_AUDIO_QOA,        // QOA audio context
+    MUSIC_MODULE_XM,        // XM module audio context
+    MUSIC_MODULE_MOD        // MOD module audio context
+} MusicContextType;
+
+// NOTE: Different logic is used when feeding data to the playback device
+// depending on whether data is streamed (Music vs Sound)
+typedef enum {
+    AUDIO_BUFFER_USAGE_STATIC = 0,
+    AUDIO_BUFFER_USAGE_STREAM
+} AudioBufferUsage;
+
+// Audio buffer struct
+struct rAudioBuffer {
+    ma_data_converter converter;    // Audio data converter
+
+    AudioCallback callback;         // Audio buffer callback for buffer filling on audio threads
+    rAudioProcessor *processor;     // Audio processor
+
+    float volume;                   // Audio buffer volume
+    float pitch;                    // Audio buffer pitch
+    float pan;                      // Audio buffer pan (0.0f to 1.0f)
+
+    bool playing;                   // Audio buffer state: AUDIO_PLAYING
+    bool paused;                    // Audio buffer state: AUDIO_PAUSED
+    bool looping;                   // Audio buffer looping, default to true for AudioStreams
+    int usage;                      // Audio buffer usage mode: STATIC or STREAM
+
+    bool isSubBufferProcessed[2];   // SubBuffer processed (virtual double buffer)
+    unsigned int sizeInFrames;      // Total buffer size in frames
+    unsigned int frameCursorPos;    // Frame cursor position
+    unsigned int framesProcessed;   // Total frames processed in this buffer (required for play timing)
+
+    unsigned char *data;            // Data buffer, on music stream keeps filling
+
+    rAudioBuffer *next;             // Next audio buffer on the list
+    rAudioBuffer *prev;             // Previous audio buffer on the list
+};
+
+// Audio processor struct
+// NOTE: Useful to apply effects to an AudioBuffer
+struct rAudioProcessor {
+    AudioCallback process;          // Processor callback function
+    rAudioProcessor *next;          // Next audio processor on the list
+    rAudioProcessor *prev;          // Previous audio processor on the list
+};
+
+#define AudioBuffer rAudioBuffer    // HACK: To avoid CoreAudio (macOS) symbol collision
+
+// Audio data context
+typedef struct AudioData {
+    struct {
+        ma_context context;         // miniaudio context data
+        ma_device device;           // miniaudio device
+        ma_mutex lock;              // miniaudio mutex lock
+        bool isReady;               // Check if audio device is ready
+        size_t pcmBufferSize;       // Pre-allocated buffer size
+        void *pcmBuffer;            // Pre-allocated buffer to read audio data from file/memory
+    } System;
+    struct {
+        AudioBuffer *first;         // Pointer to first AudioBuffer in the list
+        AudioBuffer *last;          // Pointer to last AudioBuffer in the list
+        int defaultSize;            // Default audio buffer size for audio streams
+    } Buffer;
+    rAudioProcessor *mixedProcessor;
+} AudioData;
+
+//----------------------------------------------------------------------------------
+// Global Variables Definition
+//----------------------------------------------------------------------------------
+static AudioData AUDIO = {          // Global AUDIO context
+
+    // NOTE: Music buffer size is defined by number of samples, independent of sample size and channels number
+    // After some math, considering a sampleRate of 48000, a buffer refill rate of 1/60 seconds and a
+    // standard double-buffering system, a 4096 samples buffer has been chosen, it should be enough
+    // In case of music-stalls, just increase this number
+    .Buffer.defaultSize = 0,
+    .mixedProcessor = NULL
+};
+
+//----------------------------------------------------------------------------------
+// Module specific Functions Declaration
+//----------------------------------------------------------------------------------
+static void OnLog(void *pUserData, ma_uint32 level, const char *pMessage);
+static void OnSendAudioDataToDevice(ma_device *pDevice, void *pFramesOut, const void *pFramesInput, ma_uint32 frameCount);
+static void MixAudioFrames(float *framesOut, const float *framesIn, ma_uint32 frameCount, AudioBuffer *buffer);
+
+#if defined(RAUDIO_STANDALONE)
+static bool IsFileExtension(const char *fileName, const char *ext); // Check file extension
+static const char *GetFileExtension(const char *fileName);          // Get pointer to extension for a filename string (includes the dot: .png)
+static const char *GetFileName(const char *filePath);               // Get pointer to filename for a path string
+static const char *GetFileNameWithoutExt(const char *filePath);     // Get filename string without extension (uses static string)
+
+static unsigned char *LoadFileData(const char *fileName, int *dataSize);    // Load file data as byte array (read)
+static bool SaveFileData(const char *fileName, void *data, int dataSize);   // Save data to file from byte array (write)
+static bool SaveFileText(const char *fileName, char *text);         // Save text data to file (write), string must be '\0' terminated
+#endif
+
+//----------------------------------------------------------------------------------
+// AudioBuffer management functions declaration
+// NOTE: Those functions are not exposed by raylib... for the moment
+//----------------------------------------------------------------------------------
+AudioBuffer *LoadAudioBuffer(ma_format format, ma_uint32 channels, ma_uint32 sampleRate, ma_uint32 sizeInFrames, int usage);
+void UnloadAudioBuffer(AudioBuffer *buffer);
+
+bool IsAudioBufferPlaying(AudioBuffer *buffer);
+void PlayAudioBuffer(AudioBuffer *buffer);
+void StopAudioBuffer(AudioBuffer *buffer);
+void PauseAudioBuffer(AudioBuffer *buffer);
+void ResumeAudioBuffer(AudioBuffer *buffer);
+void SetAudioBufferVolume(AudioBuffer *buffer, float volume);
+void SetAudioBufferPitch(AudioBuffer *buffer, float pitch);
+void SetAudioBufferPan(AudioBuffer *buffer, float pan);
+void TrackAudioBuffer(AudioBuffer *buffer);
+void UntrackAudioBuffer(AudioBuffer *buffer);
+
+//----------------------------------------------------------------------------------
+// Module Functions Definition - Audio Device initialization and Closing
+//----------------------------------------------------------------------------------
+// Initialize audio device
+void InitAudioDevice(void)
+{
+    // Init audio context
+    ma_context_config ctxConfig = ma_context_config_init();
+    ma_log_callback_init(OnLog, NULL);
+
+    ma_result result = ma_context_init(NULL, 0, &ctxConfig, &AUDIO.System.context);
+    if (result != MA_SUCCESS)
+    {
+        TRACELOG(LOG_WARNING, "AUDIO: Failed to initialize context");
+        return;
+    }
+
+    // Init audio device
+    // NOTE: Using the default device. Format is floating point because it simplifies mixing.
+    ma_device_config config = ma_device_config_init(ma_device_type_playback);
+    config.playback.pDeviceID = NULL;  // NULL for the default playback AUDIO.System.device.
+    config.playback.format = AUDIO_DEVICE_FORMAT;
+    config.playback.channels = AUDIO_DEVICE_CHANNELS;
+    config.capture.pDeviceID = NULL;  // NULL for the default capture AUDIO.System.device.
+    config.capture.format = ma_format_s16;
+    config.capture.channels = 1;
+    config.sampleRate = AUDIO_DEVICE_SAMPLE_RATE;
+    config.dataCallback = OnSendAudioDataToDevice;
+    config.pUserData = NULL;
+
+    result = ma_device_init(&AUDIO.System.context, &config, &AUDIO.System.device);
+    if (result != MA_SUCCESS)
+    {
+        TRACELOG(LOG_WARNING, "AUDIO: Failed to initialize playback device");
+        ma_context_uninit(&AUDIO.System.context);
+        return;
+    }
+
+    // Mixing happens on a separate thread which means we need to synchronize. I'm using a mutex here to make things simple, but may
+    // want to look at something a bit smarter later on to keep everything real-time, if that's necessary.
+    if (ma_mutex_init(&AUDIO.System.lock) != MA_SUCCESS)
+    {
+        TRACELOG(LOG_WARNING, "AUDIO: Failed to create mutex for mixing");
+        ma_device_uninit(&AUDIO.System.device);
+        ma_context_uninit(&AUDIO.System.context);
+        return;
+    }
+
+    // Keep the device running the whole time. May want to consider doing something a bit smarter and only have the device running
+    // while there's at least one sound being played.
+    result = ma_device_start(&AUDIO.System.device);
+    if (result != MA_SUCCESS)
+    {
+        TRACELOG(LOG_WARNING, "AUDIO: Failed to start playback device");
+        ma_device_uninit(&AUDIO.System.device);
+        ma_context_uninit(&AUDIO.System.context);
+        return;
+    }
+
+    TRACELOG(LOG_INFO, "AUDIO: Device initialized successfully");
+    TRACELOG(LOG_INFO, "    > Backend:       miniaudio / %s", ma_get_backend_name(AUDIO.System.context.backend));
+    TRACELOG(LOG_INFO, "    > Format:        %s -> %s", ma_get_format_name(AUDIO.System.device.playback.format), ma_get_format_name(AUDIO.System.device.playback.internalFormat));
+    TRACELOG(LOG_INFO, "    > Channels:      %d -> %d", AUDIO.System.device.playback.channels, AUDIO.System.device.playback.internalChannels);
+    TRACELOG(LOG_INFO, "    > Sample rate:   %d -> %d", AUDIO.System.device.sampleRate, AUDIO.System.device.playback.internalSampleRate);
+    TRACELOG(LOG_INFO, "    > Periods size:  %d", AUDIO.System.device.playback.internalPeriodSizeInFrames*AUDIO.System.device.playback.internalPeriods);
+
+    AUDIO.System.isReady = true;
+}
+
+// Close the audio device for all contexts
+void CloseAudioDevice(void)
+{
+    if (AUDIO.System.isReady)
+    {
+        ma_mutex_uninit(&AUDIO.System.lock);
+        ma_device_uninit(&AUDIO.System.device);
+        ma_context_uninit(&AUDIO.System.context);
+
+        AUDIO.System.isReady = false;
+        RL_FREE(AUDIO.System.pcmBuffer);
+        AUDIO.System.pcmBuffer = NULL;
+        AUDIO.System.pcmBufferSize = 0;
+
+        TRACELOG(LOG_INFO, "AUDIO: Device closed successfully");
+    }
+    else TRACELOG(LOG_WARNING, "AUDIO: Device could not be closed, not currently initialized");
+}
+
+// Check if device has been initialized successfully
+bool IsAudioDeviceReady(void)
+{
+    return AUDIO.System.isReady;
+}
+
+// Set master volume (listener)
+void SetMasterVolume(float volume)
+{
+    ma_device_set_master_volume(&AUDIO.System.device, volume);
+}
+
+// Get master volume (listener)
+float GetMasterVolume(void)
+{
+    float volume = 0.0f;
+    ma_device_get_master_volume(&AUDIO.System.device, &volume);
+    return volume;
+}
+
+//----------------------------------------------------------------------------------
+// Module Functions Definition - Audio Buffer management
+//----------------------------------------------------------------------------------
+
+// Initialize a new audio buffer (filled with silence)
+AudioBuffer *LoadAudioBuffer(ma_format format, ma_uint32 channels, ma_uint32 sampleRate, ma_uint32 sizeInFrames, int usage)
+{
+    AudioBuffer *audioBuffer = (AudioBuffer *)RL_CALLOC(1, sizeof(AudioBuffer));
+
+    if (audioBuffer == NULL)
+    {
+        TRACELOG(LOG_WARNING, "AUDIO: Failed to allocate memory for buffer");
+        return NULL;
+    }
+
+    if (sizeInFrames > 0) audioBuffer->data = RL_CALLOC(sizeInFrames*channels*ma_get_bytes_per_sample(format), 1);
+
+    // Audio data runs through a format converter
+    ma_data_converter_config converterConfig = ma_data_converter_config_init(format, AUDIO_DEVICE_FORMAT, channels, AUDIO_DEVICE_CHANNELS, sampleRate, AUDIO.System.device.sampleRate);
+    converterConfig.allowDynamicSampleRate = true;
+
+    ma_result result = ma_data_converter_init(&converterConfig, NULL, &audioBuffer->converter);
+
+    if (result != MA_SUCCESS)
+    {
+        TRACELOG(LOG_WARNING, "AUDIO: Failed to create data conversion pipeline");
+        RL_FREE(audioBuffer);
+        return NULL;
+    }
+
+    // Init audio buffer values
+    audioBuffer->volume = 1.0f;
+    audioBuffer->pitch = 1.0f;
+    audioBuffer->pan = 0.5f;
+
+    audioBuffer->callback = NULL;
+    audioBuffer->processor = NULL;
+
+    audioBuffer->playing = false;
+    audioBuffer->paused = false;
+    audioBuffer->looping = false;
+
+    audioBuffer->usage = usage;
+    audioBuffer->frameCursorPos = 0;
+    audioBuffer->sizeInFrames = sizeInFrames;
+
+    // Buffers should be marked as processed by default so that a call to
+    // UpdateAudioStream() immediately after initialization works correctly
+    audioBuffer->isSubBufferProcessed[0] = true;
+    audioBuffer->isSubBufferProcessed[1] = true;
+
+    // Track audio buffer to linked list next position
+    TrackAudioBuffer(audioBuffer);
+
+    return audioBuffer;
+}
+
+// Delete an audio buffer
+void UnloadAudioBuffer(AudioBuffer *buffer)
+{
+    if (buffer != NULL)
+    {
+        ma_data_converter_uninit(&buffer->converter, NULL);
+        UntrackAudioBuffer(buffer);
+        RL_FREE(buffer->data);
+        RL_FREE(buffer);
+    }
+}
+
+// Check if an audio buffer is playing
+bool IsAudioBufferPlaying(AudioBuffer *buffer)
+{
+    bool result = false;
+
+    if (buffer != NULL) result = (buffer->playing && !buffer->paused);
+
+    return result;
+}
+
+// Play an audio buffer
+// NOTE: Buffer is restarted to the start.
+// Use PauseAudioBuffer() and ResumeAudioBuffer() if the playback position should be maintained.
+void PlayAudioBuffer(AudioBuffer *buffer)
+{
+    if (buffer != NULL)
+    {
+        buffer->playing = true;
+        buffer->paused = false;
+        buffer->frameCursorPos = 0;
+    }
+}
+
+// Stop an audio buffer
+void StopAudioBuffer(AudioBuffer *buffer)
+{
+    if (buffer != NULL)
+    {
+        if (IsAudioBufferPlaying(buffer))
+        {
+            buffer->playing = false;
+            buffer->paused = false;
+            buffer->frameCursorPos = 0;
+            buffer->framesProcessed = 0;
+            buffer->isSubBufferProcessed[0] = true;
+            buffer->isSubBufferProcessed[1] = true;
+        }
+    }
+}
+
+// Pause an audio buffer
+void PauseAudioBuffer(AudioBuffer *buffer)
+{
+    if (buffer != NULL) buffer->paused = true;
+}
+
+// Resume an audio buffer
+void ResumeAudioBuffer(AudioBuffer *buffer)
+{
+    if (buffer != NULL) buffer->paused = false;
+}
+
+// Set volume for an audio buffer
+void SetAudioBufferVolume(AudioBuffer *buffer, float volume)
+{
+    if (buffer != NULL) buffer->volume = volume;
+}
+
+// Set pitch for an audio buffer
+void SetAudioBufferPitch(AudioBuffer *buffer, float pitch)
+{
+    if ((buffer != NULL) && (pitch > 0.0f))
+    {
+        // Pitching is just an adjustment of the sample rate.
+        // Note that this changes the duration of the sound:
+        //  - higher pitches will make the sound faster
+        //  - lower pitches make it slower
+        ma_uint32 outputSampleRate = (ma_uint32)((float)buffer->converter.sampleRateOut/pitch);
+        ma_data_converter_set_rate(&buffer->converter, buffer->converter.sampleRateIn, outputSampleRate);
+
+        buffer->pitch = pitch;
+    }
+}
+
+// Set pan for an audio buffer
+void SetAudioBufferPan(AudioBuffer *buffer, float pan)
+{
+    if (pan < 0.0f) pan = 0.0f;
+    else if (pan > 1.0f) pan = 1.0f;
+
+    if (buffer != NULL) buffer->pan = pan;
+}
+
+// Track audio buffer to linked list next position
+void TrackAudioBuffer(AudioBuffer *buffer)
+{
+    ma_mutex_lock(&AUDIO.System.lock);
+    {
+        if (AUDIO.Buffer.first == NULL) AUDIO.Buffer.first = buffer;
+        else
+        {
+            AUDIO.Buffer.last->next = buffer;
+            buffer->prev = AUDIO.Buffer.last;
+        }
+
+        AUDIO.Buffer.last = buffer;
+    }
+    ma_mutex_unlock(&AUDIO.System.lock);
+}
+
+// Untrack audio buffer from linked list
+void UntrackAudioBuffer(AudioBuffer *buffer)
+{
+    ma_mutex_lock(&AUDIO.System.lock);
+    {
+        if (buffer->prev == NULL) AUDIO.Buffer.first = buffer->next;
+        else buffer->prev->next = buffer->next;
+
+        if (buffer->next == NULL) AUDIO.Buffer.last = buffer->prev;
+        else buffer->next->prev = buffer->prev;
+
+        buffer->prev = NULL;
+        buffer->next = NULL;
+    }
+    ma_mutex_unlock(&AUDIO.System.lock);
+}
+
+//----------------------------------------------------------------------------------
+// Module Functions Definition - Sounds loading and playing (.WAV)
+//----------------------------------------------------------------------------------
+
+// Load wave data from file
+Wave LoadWave(const char *fileName)
+{
+    Wave wave = { 0 };
+
+    // Loading file to memory
+    int dataSize = 0;
+    unsigned char *fileData = LoadFileData(fileName, &dataSize);
+
+    // Loading wave from memory data
+    if (fileData != NULL) wave = LoadWaveFromMemory(GetFileExtension(fileName), fileData, dataSize);
+
+    RL_FREE(fileData);
+
+    return wave;
+}
+
+// Load wave from memory buffer, fileType refers to extension: i.e. ".wav"
+// WARNING: File extension must be provided in lower-case
+Wave LoadWaveFromMemory(const char *fileType, const unsigned char *fileData, int dataSize)
+{
+    Wave wave = { 0 };
+
+    if (false) { }
+#if defined(SUPPORT_FILEFORMAT_WAV)
+    else if ((strcmp(fileType, ".wav") == 0) || (strcmp(fileType, ".WAV") == 0))
+    {
+        drwav wav = { 0 };
+        bool success = drwav_init_memory(&wav, fileData, dataSize, NULL);
+
+        if (success)
+        {
+            wave.frameCount = (unsigned int)wav.totalPCMFrameCount;
+            wave.sampleRate = wav.sampleRate;
+            wave.sampleSize = 16;
+            wave.channels = wav.channels;
+            wave.data = (short *)RL_MALLOC(wave.frameCount*wave.channels*sizeof(short));
+
+            // NOTE: We are forcing conversion to 16bit sample size on reading
+            drwav_read_pcm_frames_s16(&wav, wav.totalPCMFrameCount, wave.data);
+        }
+        else TRACELOG(LOG_WARNING, "WAVE: Failed to load WAV data");
+
+        drwav_uninit(&wav);
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_OGG)
+    else if ((strcmp(fileType, ".ogg") == 0) || (strcmp(fileType, ".OGG") == 0))
+    {
+        stb_vorbis *oggData = stb_vorbis_open_memory((unsigned char *)fileData, dataSize, NULL, NULL);
+
+        if (oggData != NULL)
+        {
+            stb_vorbis_info info = stb_vorbis_get_info(oggData);
+
+            wave.sampleRate = info.sample_rate;
+            wave.sampleSize = 16;       // By default, ogg data is 16 bit per sample (short)
+            wave.channels = info.channels;
+            wave.frameCount = (unsigned int)stb_vorbis_stream_length_in_samples(oggData);  // NOTE: It returns frames!
+            wave.data = (short *)RL_MALLOC(wave.frameCount*wave.channels*sizeof(short));
+
+            // NOTE: Get the number of samples to process (be careful! we ask for number of shorts, not bytes!)
+            stb_vorbis_get_samples_short_interleaved(oggData, info.channels, (short *)wave.data, wave.frameCount*wave.channels);
+            stb_vorbis_close(oggData);
+        }
+        else TRACELOG(LOG_WARNING, "WAVE: Failed to load OGG data");
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_MP3)
+    else if ((strcmp(fileType, ".mp3") == 0) || (strcmp(fileType, ".MP3") == 0))
+    {
+        drmp3_config config = { 0 };
+        unsigned long long int totalFrameCount = 0;
+
+        // NOTE: We are forcing conversion to 32bit float sample size on reading
+        wave.data = drmp3_open_memory_and_read_pcm_frames_f32(fileData, dataSize, &config, &totalFrameCount, NULL);
+        wave.sampleSize = 32;
+
+        if (wave.data != NULL)
+        {
+            wave.channels = config.channels;
+            wave.sampleRate = config.sampleRate;
+            wave.frameCount = (int)totalFrameCount;
+        }
+        else TRACELOG(LOG_WARNING, "WAVE: Failed to load MP3 data");
+
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_QOA)
+    else if ((strcmp(fileType, ".qoa") == 0) || (strcmp(fileType, ".QOA") == 0))
+    {
+        qoa_desc qoa = { 0 };
+
+        // NOTE: Returned sample data is always 16 bit?
+        wave.data = qoa_decode(fileData, dataSize, &qoa);
+        wave.sampleSize = 16;
+
+        if (wave.data != NULL)
+        {
+            wave.channels = qoa.channels;
+            wave.sampleRate = qoa.samplerate;
+            wave.frameCount = qoa.samples;
+        }
+        else TRACELOG(LOG_WARNING, "WAVE: Failed to load QOA data");
+
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_FLAC)
+    else if ((strcmp(fileType, ".flac") == 0) || (strcmp(fileType, ".FLAC") == 0))
+    {
+        unsigned long long int totalFrameCount = 0;
+
+        // NOTE: We are forcing conversion to 16bit sample size on reading
+        wave.data = drflac_open_memory_and_read_pcm_frames_s16(fileData, dataSize, &wave.channels, &wave.sampleRate, &totalFrameCount, NULL);
+        wave.sampleSize = 16;
+
+        if (wave.data != NULL) wave.frameCount = (unsigned int)totalFrameCount;
+        else TRACELOG(LOG_WARNING, "WAVE: Failed to load FLAC data");
+    }
+#endif
+    else TRACELOG(LOG_WARNING, "WAVE: Data format not supported");
+
+    TRACELOG(LOG_INFO, "WAVE: Data loaded successfully (%i Hz, %i bit, %i channels)", wave.sampleRate, wave.sampleSize, wave.channels);
+
+    return wave;
+}
+
+// Checks if wave data is ready
+bool IsWaveReady(Wave wave)
+{
+    return ((wave.data != NULL) &&      // Validate wave data available
+            (wave.frameCount > 0) &&    // Validate frame count
+            (wave.sampleRate > 0) &&    // Validate sample rate is supported
+            (wave.sampleSize > 0) &&    // Validate sample size is supported
+            (wave.channels > 0));       // Validate number of channels supported
+}
+
+// Load sound from file
+// NOTE: The entire file is loaded to memory to be played (no-streaming)
+Sound LoadSound(const char *fileName)
+{
+    Wave wave = LoadWave(fileName);
+
+    Sound sound = LoadSoundFromWave(wave);
+
+    UnloadWave(wave);       // Sound is loaded, we can unload wave
+
+    return sound;
+}
+
+// Load sound from wave data
+// NOTE: Wave data must be unallocated manually
+Sound LoadSoundFromWave(Wave wave)
+{
+    Sound sound = { 0 };
+
+    if (wave.data != NULL)
+    {
+        // When using miniaudio we need to do our own mixing.
+        // To simplify this we need convert the format of each sound to be consistent with
+        // the format used to open the playback AUDIO.System.device. We can do this two ways:
+        //
+        //   1) Convert the whole sound in one go at load time (here).
+        //   2) Convert the audio data in chunks at mixing time.
+        //
+        // First option has been selected, format conversion is done on the loading stage.
+        // The downside is that it uses more memory if the original sound is u8 or s16.
+        ma_format formatIn = ((wave.sampleSize == 8)? ma_format_u8 : ((wave.sampleSize == 16)? ma_format_s16 : ma_format_f32));
+        ma_uint32 frameCountIn = wave.frameCount;
+
+        ma_uint32 frameCount = (ma_uint32)ma_convert_frames(NULL, 0, AUDIO_DEVICE_FORMAT, AUDIO_DEVICE_CHANNELS, AUDIO.System.device.sampleRate, NULL, frameCountIn, formatIn, wave.channels, wave.sampleRate);
+        if (frameCount == 0) TRACELOG(LOG_WARNING, "SOUND: Failed to get frame count for format conversion");
+
+        AudioBuffer *audioBuffer = LoadAudioBuffer(AUDIO_DEVICE_FORMAT, AUDIO_DEVICE_CHANNELS, AUDIO.System.device.sampleRate, frameCount, AUDIO_BUFFER_USAGE_STATIC);
+        if (audioBuffer == NULL)
+        {
+            TRACELOG(LOG_WARNING, "SOUND: Failed to create buffer");
+            return sound; // early return to avoid dereferencing the audioBuffer null pointer
+        }
+
+        frameCount = (ma_uint32)ma_convert_frames(audioBuffer->data, frameCount, AUDIO_DEVICE_FORMAT, AUDIO_DEVICE_CHANNELS, AUDIO.System.device.sampleRate, wave.data, frameCountIn, formatIn, wave.channels, wave.sampleRate);
+        if (frameCount == 0) TRACELOG(LOG_WARNING, "SOUND: Failed format conversion");
+
+        sound.frameCount = frameCount;
+        sound.stream.sampleRate = AUDIO.System.device.sampleRate;
+        sound.stream.sampleSize = 32;
+        sound.stream.channels = AUDIO_DEVICE_CHANNELS;
+        sound.stream.buffer = audioBuffer;
+    }
+
+    return sound;
+}
+
+// Clone sound from existing sound data, clone does not own wave data
+// NOTE: Wave data must be unallocated manually and will be shared across all clones
+Sound LoadSoundAlias(Sound source)
+{
+    Sound sound = { 0 };
+
+    if (source.stream.buffer->data != NULL)
+    {
+        AudioBuffer *audioBuffer = LoadAudioBuffer(AUDIO_DEVICE_FORMAT, AUDIO_DEVICE_CHANNELS, AUDIO.System.device.sampleRate, 0, AUDIO_BUFFER_USAGE_STATIC);
+        
+        if (audioBuffer == NULL)
+        {
+            TRACELOG(LOG_WARNING, "SOUND: Failed to create buffer");
+            return sound; // Early return to avoid dereferencing the audioBuffer null pointer
+        }
+        
+        audioBuffer->sizeInFrames = source.stream.buffer->sizeInFrames;
+        audioBuffer->volume = source.stream.buffer->volume;
+        audioBuffer->data = source.stream.buffer->data;
+
+        sound.frameCount = source.frameCount;
+        sound.stream.sampleRate = AUDIO.System.device.sampleRate;
+        sound.stream.sampleSize = 32;
+        sound.stream.channels = AUDIO_DEVICE_CHANNELS;
+        sound.stream.buffer = audioBuffer;
+    }
+
+    return sound;
+}
+
+
+// Checks if a sound is ready
+bool IsSoundReady(Sound sound)
+{
+    return ((sound.frameCount > 0) &&           // Validate frame count
+            (sound.stream.buffer != NULL) &&    // Validate stream buffer
+            (sound.stream.sampleRate > 0) &&    // Validate sample rate is supported
+            (sound.stream.sampleSize > 0) &&    // Validate sample size is supported
+            (sound.stream.channels > 0));       // Validate number of channels supported
+}
+
+// Unload wave data
+void UnloadWave(Wave wave)
+{
+    RL_FREE(wave.data);
+    //TRACELOG(LOG_INFO, "WAVE: Unloaded wave data from RAM");
+}
+
+// Unload sound
+void UnloadSound(Sound sound)
+{
+    UnloadAudioBuffer(sound.stream.buffer);
+    //TRACELOG(LOG_INFO, "SOUND: Unloaded sound data from RAM");
+}
+
+void UnloadSoundAlias(Sound alias)
+{
+    // Untrack and unload just the sound buffer, not the sample data, it is shared with the source for the alias
+    if (alias.stream.buffer != NULL)
+    {
+        ma_data_converter_uninit(&alias.stream.buffer->converter, NULL);
+        UntrackAudioBuffer(alias.stream.buffer);
+        RL_FREE(alias.stream.buffer);
+    }
+}
+
+// Update sound buffer with new data
+void UpdateSound(Sound sound, const void *data, int frameCount)
+{
+    if (sound.stream.buffer != NULL)
+    {
+        StopAudioBuffer(sound.stream.buffer);
+
+        // TODO: May want to lock/unlock this since this data buffer is read at mixing time
+        memcpy(sound.stream.buffer->data, data, frameCount*ma_get_bytes_per_frame(sound.stream.buffer->converter.formatIn, sound.stream.buffer->converter.channelsIn));
+    }
+}
+
+// Export wave data to file
+bool ExportWave(Wave wave, const char *fileName)
+{
+    bool success = false;
+
+    if (false) { }
+#if defined(SUPPORT_FILEFORMAT_WAV)
+    else if (IsFileExtension(fileName, ".wav"))
+    {
+        drwav wav = { 0 };
+        drwav_data_format format = { 0 };
+        format.container = drwav_container_riff;
+        if (wave.sampleSize == 32) format.format = DR_WAVE_FORMAT_IEEE_FLOAT;
+        else format.format = DR_WAVE_FORMAT_PCM;
+        format.channels = wave.channels;
+        format.sampleRate = wave.sampleRate;
+        format.bitsPerSample = wave.sampleSize;
+
+        void *fileData = NULL;
+        size_t fileDataSize = 0;
+        success = drwav_init_memory_write(&wav, &fileData, &fileDataSize, &format, NULL);
+        if (success) success = (int)drwav_write_pcm_frames(&wav, wave.frameCount, wave.data);
+        drwav_result result = drwav_uninit(&wav);
+
+        if (result == DRWAV_SUCCESS) success = SaveFileData(fileName, (unsigned char *)fileData, (unsigned int)fileDataSize);
+
+        drwav_free(fileData, NULL);
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_QOA)
+    else if (IsFileExtension(fileName, ".qoa"))
+    {
+        if (wave.sampleSize == 16)
+        {
+            qoa_desc qoa = { 0 };
+            qoa.channels = wave.channels;
+            qoa.samplerate = wave.sampleRate;
+            qoa.samples = wave.frameCount;
+
+            int bytesWritten = qoa_write(fileName, wave.data, &qoa);
+            if (bytesWritten > 0) success = true;
+        }
+        else TRACELOG(LOG_WARNING, "AUDIO: Wave data must be 16 bit per sample for QOA format export");
+    }
+#endif
+    else if (IsFileExtension(fileName, ".raw"))
+    {
+        // Export raw sample data (without header)
+        // NOTE: It's up to the user to track wave parameters
+        success = SaveFileData(fileName, wave.data, wave.frameCount*wave.channels*wave.sampleSize/8);
+    }
+
+    if (success) TRACELOG(LOG_INFO, "FILEIO: [%s] Wave data exported successfully", fileName);
+    else TRACELOG(LOG_WARNING, "FILEIO: [%s] Failed to export wave data", fileName);
+
+    return success;
+}
+
+// Export wave sample data to code (.h)
+bool ExportWaveAsCode(Wave wave, const char *fileName)
+{
+    bool success = false;
+
+#ifndef TEXT_BYTES_PER_LINE
+    #define TEXT_BYTES_PER_LINE     20
+#endif
+
+    int waveDataSize = wave.frameCount*wave.channels*wave.sampleSize/8;
+
+    // NOTE: Text data buffer size is estimated considering wave data size in bytes
+    // and requiring 6 char bytes for every byte: "0x00, "
+    char *txtData = (char *)RL_CALLOC(waveDataSize*6 + 2000, sizeof(char));
+
+    int byteCount = 0;
+    byteCount += sprintf(txtData + byteCount, "\n//////////////////////////////////////////////////////////////////////////////////\n");
+    byteCount += sprintf(txtData + byteCount, "//                                                                              //\n");
+    byteCount += sprintf(txtData + byteCount, "// WaveAsCode exporter v1.1 - Wave data exported as an array of bytes           //\n");
+    byteCount += sprintf(txtData + byteCount, "//                                                                              //\n");
+    byteCount += sprintf(txtData + byteCount, "// more info and bugs-report:  github.com/raysan5/raylib                        //\n");
+    byteCount += sprintf(txtData + byteCount, "// feedback and support:       ray[at]raylib.com                                //\n");
+    byteCount += sprintf(txtData + byteCount, "//                                                                              //\n");
+    byteCount += sprintf(txtData + byteCount, "// Copyright (c) 2018-2025 Ramon Santamaria (@raysan5)                          //\n");
+    byteCount += sprintf(txtData + byteCount, "//                                                                              //\n");
+    byteCount += sprintf(txtData + byteCount, "//////////////////////////////////////////////////////////////////////////////////\n\n");
+
+    // Get file name from path and convert variable name to uppercase
+    char varFileName[256] = { 0 };
+    strcpy(varFileName, GetFileNameWithoutExt(fileName));
+    for (int i = 0; varFileName[i] != '\0'; i++) if (varFileName[i] >= 'a' && varFileName[i] <= 'z') { varFileName[i] = varFileName[i] - 32; }
+
+    // Add wave information
+    byteCount += sprintf(txtData + byteCount, "// Wave data information\n");
+    byteCount += sprintf(txtData + byteCount, "#define %s_FRAME_COUNT      %u\n", varFileName, wave.frameCount);
+    byteCount += sprintf(txtData + byteCount, "#define %s_SAMPLE_RATE      %u\n", varFileName, wave.sampleRate);
+    byteCount += sprintf(txtData + byteCount, "#define %s_SAMPLE_SIZE      %u\n", varFileName, wave.sampleSize);
+    byteCount += sprintf(txtData + byteCount, "#define %s_CHANNELS         %u\n\n", varFileName, wave.channels);
+
+    // Write wave data as an array of values
+    // Wave data is exported as byte array for 8/16bit and float array for 32bit float data
+    // NOTE: Frame data exported is channel-interlaced: frame01[sampleChannel1, sampleChannel2, ...], frame02[], frame03[]
+    if (wave.sampleSize == 32)
+    {
+        byteCount += sprintf(txtData + byteCount, "static float %s_DATA[%i] = {\n", varFileName, waveDataSize/4);
+        for (int i = 1; i < waveDataSize/4; i++) byteCount += sprintf(txtData + byteCount, ((i%TEXT_BYTES_PER_LINE == 0)? "%.4ff,\n    " : "%.4ff, "), ((float *)wave.data)[i - 1]);
+        byteCount += sprintf(txtData + byteCount, "%.4ff };\n", ((float *)wave.data)[waveDataSize/4 - 1]);
+    }
+    else
+    {
+        byteCount += sprintf(txtData + byteCount, "static unsigned char %s_DATA[%i] = { ", varFileName, waveDataSize);
+        for (int i = 1; i < waveDataSize; i++) byteCount += sprintf(txtData + byteCount, ((i%TEXT_BYTES_PER_LINE == 0)? "0x%x,\n    " : "0x%x, "), ((unsigned char *)wave.data)[i - 1]);
+        byteCount += sprintf(txtData + byteCount, "0x%x };\n", ((unsigned char *)wave.data)[waveDataSize - 1]);
+    }
+
+    // NOTE: Text data length exported is determined by '\0' (NULL) character
+    success = SaveFileText(fileName, txtData);
+
+    RL_FREE(txtData);
+
+    if (success != 0) TRACELOG(LOG_INFO, "FILEIO: [%s] Wave as code exported successfully", fileName);
+    else TRACELOG(LOG_WARNING, "FILEIO: [%s] Failed to export wave as code", fileName);
+
+    return success;
+}
+
+// Play a sound
+void PlaySound(Sound sound)
+{
+    PlayAudioBuffer(sound.stream.buffer);
+}
+
+// Pause a sound
+void PauseSound(Sound sound)
+{
+    PauseAudioBuffer(sound.stream.buffer);
+}
+
+// Resume a paused sound
+void ResumeSound(Sound sound)
+{
+    ResumeAudioBuffer(sound.stream.buffer);
+}
+
+// Stop reproducing a sound
+void StopSound(Sound sound)
+{
+    StopAudioBuffer(sound.stream.buffer);
+}
+
+// Check if a sound is playing
+bool IsSoundPlaying(Sound sound)
+{
+    return IsAudioBufferPlaying(sound.stream.buffer);
+}
+
+// Set volume for a sound
+void SetSoundVolume(Sound sound, float volume)
+{
+    SetAudioBufferVolume(sound.stream.buffer, volume);
+}
+
+// Set pitch for a sound
+void SetSoundPitch(Sound sound, float pitch)
+{
+    SetAudioBufferPitch(sound.stream.buffer, pitch);
+}
+
+// Set pan for a sound
+void SetSoundPan(Sound sound, float pan)
+{
+    SetAudioBufferPan(sound.stream.buffer, pan);
+}
+
+// Convert wave data to desired format
+void WaveFormat(Wave *wave, int sampleRate, int sampleSize, int channels)
+{
+    ma_format formatIn = ((wave->sampleSize == 8)? ma_format_u8 : ((wave->sampleSize == 16)? ma_format_s16 : ma_format_f32));
+    ma_format formatOut = ((sampleSize == 8)? ma_format_u8 : ((sampleSize == 16)? ma_format_s16 : ma_format_f32));
+
+    ma_uint32 frameCountIn = wave->frameCount;
+    ma_uint32 frameCount = (ma_uint32)ma_convert_frames(NULL, 0, formatOut, channels, sampleRate, NULL, frameCountIn, formatIn, wave->channels, wave->sampleRate);
+
+    if (frameCount == 0)
+    {
+        TRACELOG(LOG_WARNING, "WAVE: Failed to get frame count for format conversion");
+        return;
+    }
+
+    void *data = RL_MALLOC(frameCount*channels*(sampleSize/8));
+
+    frameCount = (ma_uint32)ma_convert_frames(data, frameCount, formatOut, channels, sampleRate, wave->data, frameCountIn, formatIn, wave->channels, wave->sampleRate);
+    if (frameCount == 0)
+    {
+        TRACELOG(LOG_WARNING, "WAVE: Failed format conversion");
+        return;
+    }
+
+    wave->frameCount = frameCount;
+    wave->sampleSize = sampleSize;
+    wave->sampleRate = sampleRate;
+    wave->channels = channels;
+
+    RL_FREE(wave->data);
+    wave->data = data;
+}
+
+// Copy a wave to a new wave
+Wave WaveCopy(Wave wave)
+{
+    Wave newWave = { 0 };
+
+    newWave.data = RL_MALLOC(wave.frameCount*wave.channels*wave.sampleSize/8);
+
+    if (newWave.data != NULL)
+    {
+        // NOTE: Size must be provided in bytes
+        memcpy(newWave.data, wave.data, wave.frameCount*wave.channels*wave.sampleSize/8);
+
+        newWave.frameCount = wave.frameCount;
+        newWave.sampleRate = wave.sampleRate;
+        newWave.sampleSize = wave.sampleSize;
+        newWave.channels = wave.channels;
+    }
+
+    return newWave;
+}
+
+// Crop a wave to defined samples range
+// NOTE: Security check in case of out-of-range
+void WaveCrop(Wave *wave, int initSample, int finalSample)
+{
+    if ((initSample >= 0) && (initSample < finalSample) && ((unsigned int)finalSample < (wave->frameCount*wave->channels)))
+    {
+        int sampleCount = finalSample - initSample;
+
+        void *data = RL_MALLOC(sampleCount*wave->sampleSize/8);
+
+        memcpy(data, (unsigned char *)wave->data + (initSample*wave->channels*wave->sampleSize/8), sampleCount*wave->sampleSize/8);
+
+        RL_FREE(wave->data);
+        wave->data = data;
+    }
+    else TRACELOG(LOG_WARNING, "WAVE: Crop range out of bounds");
+}
+
+// Load samples data from wave as a floats array
+// NOTE 1: Returned sample values are normalized to range [-1..1]
+// NOTE 2: Sample data allocated should be freed with UnloadWaveSamples()
+float *LoadWaveSamples(Wave wave)
+{
+    float *samples = (float *)RL_MALLOC(wave.frameCount*wave.channels*sizeof(float));
+
+    // NOTE: sampleCount is the total number of interlaced samples (including channels)
+
+    for (unsigned int i = 0; i < wave.frameCount*wave.channels; i++)
+    {
+        if (wave.sampleSize == 8) samples[i] = (float)(((unsigned char *)wave.data)[i] - 127)/256.0f;
+        else if (wave.sampleSize == 16) samples[i] = (float)(((short *)wave.data)[i])/32767.0f;
+        else if (wave.sampleSize == 32) samples[i] = ((float *)wave.data)[i];
+    }
+
+    return samples;
+}
+
+// Unload samples data loaded with LoadWaveSamples()
+void UnloadWaveSamples(float *samples)
+{
+    RL_FREE(samples);
+}
+
+//----------------------------------------------------------------------------------
+// Module Functions Definition - Music loading and stream playing
+//----------------------------------------------------------------------------------
+
+// Load music stream from file
+Music LoadMusicStream(const char *fileName)
+{
+    Music music = { 0 };
+    bool musicLoaded = false;
+
+    if (false) { }
+#if defined(SUPPORT_FILEFORMAT_WAV)
+    else if (IsFileExtension(fileName, ".wav"))
+    {
+        drwav *ctxWav = RL_CALLOC(1, sizeof(drwav));
+        bool success = drwav_init_file(ctxWav, fileName, NULL);
+
+        music.ctxType = MUSIC_AUDIO_WAV;
+        music.ctxData = ctxWav;
+
+        if (success)
+        {
+            int sampleSize = ctxWav->bitsPerSample;
+            if (ctxWav->bitsPerSample == 24) sampleSize = 16;   // Forcing conversion to s16 on UpdateMusicStream()
+
+            music.stream = LoadAudioStream(ctxWav->sampleRate, sampleSize, ctxWav->channels);
+            music.frameCount = (unsigned int)ctxWav->totalPCMFrameCount;
+            music.looping = true;   // Looping enabled by default
+            musicLoaded = true;
+        }
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_OGG)
+    else if (IsFileExtension(fileName, ".ogg"))
+    {
+        // Open ogg audio stream
+        music.ctxType = MUSIC_AUDIO_OGG;
+        music.ctxData = stb_vorbis_open_filename(fileName, NULL, NULL);
+
+        if (music.ctxData != NULL)
+        {
+            stb_vorbis_info info = stb_vorbis_get_info((stb_vorbis *)music.ctxData);  // Get Ogg file info
+
+            // OGG bit rate defaults to 16 bit, it's enough for compressed format
+            music.stream = LoadAudioStream(info.sample_rate, 16, info.channels);
+
+            // WARNING: It seems this function returns length in frames, not samples, so we multiply by channels
+            music.frameCount = (unsigned int)stb_vorbis_stream_length_in_samples((stb_vorbis *)music.ctxData);
+            music.looping = true;   // Looping enabled by default
+            musicLoaded = true;
+        }
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_MP3)
+    else if (IsFileExtension(fileName, ".mp3"))
+    {
+        drmp3 *ctxMp3 = RL_CALLOC(1, sizeof(drmp3));
+        int result = drmp3_init_file(ctxMp3, fileName, NULL);
+
+        music.ctxType = MUSIC_AUDIO_MP3;
+        music.ctxData = ctxMp3;
+
+        if (result > 0)
+        {
+            music.stream = LoadAudioStream(ctxMp3->sampleRate, 32, ctxMp3->channels);
+            music.frameCount = (unsigned int)drmp3_get_pcm_frame_count(ctxMp3);
+            music.looping = true;   // Looping enabled by default
+            musicLoaded = true;
+        }
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_QOA)
+    else if (IsFileExtension(fileName, ".qoa"))
+    {
+        qoaplay_desc *ctxQoa = qoaplay_open(fileName);
+        music.ctxType = MUSIC_AUDIO_QOA;
+        music.ctxData = ctxQoa;
+
+        if (ctxQoa->file != NULL)
+        {
+            // NOTE: We are loading samples are 32bit float normalized data, so,
+            // we configure the output audio stream to also use float 32bit
+            music.stream = LoadAudioStream(ctxQoa->info.samplerate, 32, ctxQoa->info.channels);
+            music.frameCount = ctxQoa->info.samples;
+            music.looping = true;   // Looping enabled by default
+            musicLoaded = true;
+        }
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_FLAC)
+    else if (IsFileExtension(fileName, ".flac"))
+    {
+        music.ctxType = MUSIC_AUDIO_FLAC;
+        music.ctxData = drflac_open_file(fileName, NULL);
+
+        if (music.ctxData != NULL)
+        {
+            drflac *ctxFlac = (drflac *)music.ctxData;
+
+            music.stream = LoadAudioStream(ctxFlac->sampleRate, ctxFlac->bitsPerSample, ctxFlac->channels);
+            music.frameCount = (unsigned int)ctxFlac->totalPCMFrameCount;
+            music.looping = true;   // Looping enabled by default
+            musicLoaded = true;
+        }
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_XM)
+    else if (IsFileExtension(fileName, ".xm"))
+    {
+        jar_xm_context_t *ctxXm = NULL;
+        int result = jar_xm_create_context_from_file(&ctxXm, AUDIO.System.device.sampleRate, fileName);
+
+        music.ctxType = MUSIC_MODULE_XM;
+        music.ctxData = ctxXm;
+
+        if (result == 0)    // XM AUDIO.System.context created successfully
+        {
+            jar_xm_set_max_loop_count(ctxXm, 0);    // Set infinite number of loops
+
+            unsigned int bits = 32;
+            if (AUDIO_DEVICE_FORMAT == ma_format_s16) bits = 16;
+            else if (AUDIO_DEVICE_FORMAT == ma_format_u8) bits = 8;
+
+            // NOTE: Only stereo is supported for XM
+            music.stream = LoadAudioStream(AUDIO.System.device.sampleRate, bits, AUDIO_DEVICE_CHANNELS);
+            music.frameCount = (unsigned int)jar_xm_get_remaining_samples(ctxXm);    // NOTE: Always 2 channels (stereo)
+            music.looping = true;   // Looping enabled by default
+            jar_xm_reset(ctxXm);    // Make sure we start at the beginning of the song
+            musicLoaded = true;
+        }
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_MOD)
+    else if (IsFileExtension(fileName, ".mod"))
+    {
+        jar_mod_context_t *ctxMod = RL_CALLOC(1, sizeof(jar_mod_context_t));
+        jar_mod_init(ctxMod);
+        int result = jar_mod_load_file(ctxMod, fileName);
+
+        music.ctxType = MUSIC_MODULE_MOD;
+        music.ctxData = ctxMod;
+
+        if (result > 0)
+        {
+            // NOTE: Only stereo is supported for MOD
+            music.stream = LoadAudioStream(AUDIO.System.device.sampleRate, 16, AUDIO_DEVICE_CHANNELS);
+            music.frameCount = (unsigned int)jar_mod_max_samples(ctxMod);    // NOTE: Always 2 channels (stereo)
+            music.looping = true;   // Looping enabled by default
+            musicLoaded = true;
+        }
+    }
+#endif
+    else TRACELOG(LOG_WARNING, "STREAM: [%s] File format not supported", fileName);
+
+    if (!musicLoaded)
+    {
+        if (false) { }
+    #if defined(SUPPORT_FILEFORMAT_WAV)
+        else if (music.ctxType == MUSIC_AUDIO_WAV) drwav_uninit((drwav *)music.ctxData);
+    #endif
+    #if defined(SUPPORT_FILEFORMAT_OGG)
+        else if (music.ctxType == MUSIC_AUDIO_OGG) stb_vorbis_close((stb_vorbis *)music.ctxData);
+    #endif
+    #if defined(SUPPORT_FILEFORMAT_MP3)
+        else if (music.ctxType == MUSIC_AUDIO_MP3) { drmp3_uninit((drmp3 *)music.ctxData); RL_FREE(music.ctxData); }
+    #endif
+    #if defined(SUPPORT_FILEFORMAT_QOA)
+        else if (music.ctxType == MUSIC_AUDIO_QOA) qoaplay_close((qoaplay_desc *)music.ctxData);
+    #endif
+    #if defined(SUPPORT_FILEFORMAT_FLAC)
+        else if (music.ctxType == MUSIC_AUDIO_FLAC) drflac_free((drflac *)music.ctxData, NULL);
+    #endif
+    #if defined(SUPPORT_FILEFORMAT_XM)
+        else if (music.ctxType == MUSIC_MODULE_XM) jar_xm_free_context((jar_xm_context_t *)music.ctxData);
+    #endif
+    #if defined(SUPPORT_FILEFORMAT_MOD)
+        else if (music.ctxType == MUSIC_MODULE_MOD) { jar_mod_unload((jar_mod_context_t *)music.ctxData); RL_FREE(music.ctxData); }
+    #endif
+
+        music.ctxData = NULL;
+        TRACELOG(LOG_WARNING, "FILEIO: [%s] Music file could not be opened", fileName);
+    }
+    else
+    {
+        // Show some music stream info
+        TRACELOG(LOG_INFO, "FILEIO: [%s] Music file loaded successfully", fileName);
+        TRACELOG(LOG_INFO, "    > Sample rate:   %i Hz", music.stream.sampleRate);
+        TRACELOG(LOG_INFO, "    > Sample size:   %i bits", music.stream.sampleSize);
+        TRACELOG(LOG_INFO, "    > Channels:      %i (%s)", music.stream.channels, (music.stream.channels == 1)? "Mono" : (music.stream.channels == 2)? "Stereo" : "Multi");
+        TRACELOG(LOG_INFO, "    > Total frames:  %i", music.frameCount);
+    }
+
+    return music;
+}
+
+// Load music stream from memory buffer, fileType refers to extension: i.e. ".wav"
+// WARNING: File extension must be provided in lower-case
+Music LoadMusicStreamFromMemory(const char *fileType, const unsigned char *data, int dataSize)
+{
+    Music music = { 0 };
+    bool musicLoaded = false;
+
+    if (false) { }
+#if defined(SUPPORT_FILEFORMAT_WAV)
+    else if ((strcmp(fileType, ".wav") == 0) || (strcmp(fileType, ".WAV") == 0))
+    {
+        drwav *ctxWav = RL_CALLOC(1, sizeof(drwav));
+
+        bool success = drwav_init_memory(ctxWav, (const void *)data, dataSize, NULL);
+
+        music.ctxType = MUSIC_AUDIO_WAV;
+        music.ctxData = ctxWav;
+
+        if (success)
+        {
+            int sampleSize = ctxWav->bitsPerSample;
+            if (ctxWav->bitsPerSample == 24) sampleSize = 16;   // Forcing conversion to s16 on UpdateMusicStream()
+
+            music.stream = LoadAudioStream(ctxWav->sampleRate, sampleSize, ctxWav->channels);
+            music.frameCount = (unsigned int)ctxWav->totalPCMFrameCount;
+            music.looping = true;   // Looping enabled by default
+            musicLoaded = true;
+        }
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_OGG)
+    else if ((strcmp(fileType, ".ogg") == 0) || (strcmp(fileType, ".OGG") == 0))
+    {
+        // Open ogg audio stream
+        music.ctxType = MUSIC_AUDIO_OGG;
+        //music.ctxData = stb_vorbis_open_filename(fileName, NULL, NULL);
+        music.ctxData = stb_vorbis_open_memory((const unsigned char *)data, dataSize, NULL, NULL);
+
+        if (music.ctxData != NULL)
+        {
+            stb_vorbis_info info = stb_vorbis_get_info((stb_vorbis *)music.ctxData);  // Get Ogg file info
+
+            // OGG bit rate defaults to 16 bit, it's enough for compressed format
+            music.stream = LoadAudioStream(info.sample_rate, 16, info.channels);
+
+            // WARNING: It seems this function returns length in frames, not samples, so we multiply by channels
+            music.frameCount = (unsigned int)stb_vorbis_stream_length_in_samples((stb_vorbis *)music.ctxData);
+            music.looping = true;   // Looping enabled by default
+            musicLoaded = true;
+        }
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_MP3)
+    else if ((strcmp(fileType, ".mp3") == 0) || (strcmp(fileType, ".MP3") == 0))
+    {
+        drmp3 *ctxMp3 = RL_CALLOC(1, sizeof(drmp3));
+        int success = drmp3_init_memory(ctxMp3, (const void*)data, dataSize, NULL);
+
+        music.ctxType = MUSIC_AUDIO_MP3;
+        music.ctxData = ctxMp3;
+
+        if (success)
+        {
+            music.stream = LoadAudioStream(ctxMp3->sampleRate, 32, ctxMp3->channels);
+            music.frameCount = (unsigned int)drmp3_get_pcm_frame_count(ctxMp3);
+            music.looping = true;   // Looping enabled by default
+            musicLoaded = true;
+        }
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_QOA)
+    else if ((strcmp(fileType, ".qoa") == 0) || (strcmp(fileType, ".QOA") == 0))
+    {
+        qoaplay_desc *ctxQoa = qoaplay_open_memory(data, dataSize);
+        music.ctxType = MUSIC_AUDIO_QOA;
+        music.ctxData = ctxQoa;
+
+        if ((ctxQoa->file_data != NULL) && (ctxQoa->file_data_size != 0))
+        {
+            // NOTE: We are loading samples are 32bit float normalized data, so,
+            // we configure the output audio stream to also use float 32bit
+            music.stream = LoadAudioStream(ctxQoa->info.samplerate, 32, ctxQoa->info.channels);
+            music.frameCount = ctxQoa->info.samples;
+            music.looping = true;   // Looping enabled by default
+            musicLoaded = true;
+        }
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_FLAC)
+    else if ((strcmp(fileType, ".flac") == 0) || (strcmp(fileType, ".FLAC") == 0))
+    {
+        music.ctxType = MUSIC_AUDIO_FLAC;
+        music.ctxData = drflac_open_memory((const void*)data, dataSize, NULL);
+
+        if (music.ctxData != NULL)
+        {
+            drflac *ctxFlac = (drflac *)music.ctxData;
+
+            music.stream = LoadAudioStream(ctxFlac->sampleRate, ctxFlac->bitsPerSample, ctxFlac->channels);
+            music.frameCount = (unsigned int)ctxFlac->totalPCMFrameCount;
+            music.looping = true;   // Looping enabled by default
+            musicLoaded = true;
+        }
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_XM)
+    else if ((strcmp(fileType, ".xm") == 0) || (strcmp(fileType, ".XM") == 0))
+    {
+        jar_xm_context_t *ctxXm = NULL;
+        int result = jar_xm_create_context_safe(&ctxXm, (const char *)data, dataSize, AUDIO.System.device.sampleRate);
+        if (result == 0)    // XM AUDIO.System.context created successfully
+        {
+            music.ctxType = MUSIC_MODULE_XM;
+            jar_xm_set_max_loop_count(ctxXm, 0);    // Set infinite number of loops
+
+            unsigned int bits = 32;
+            if (AUDIO_DEVICE_FORMAT == ma_format_s16) bits = 16;
+            else if (AUDIO_DEVICE_FORMAT == ma_format_u8) bits = 8;
+
+            // NOTE: Only stereo is supported for XM
+            music.stream = LoadAudioStream(AUDIO.System.device.sampleRate, bits, 2);
+            music.frameCount = (unsigned int)jar_xm_get_remaining_samples(ctxXm);    // NOTE: Always 2 channels (stereo)
+            music.looping = true;   // Looping enabled by default
+            jar_xm_reset(ctxXm);    // Make sure we start at the beginning of the song
+
+            music.ctxData = ctxXm;
+            musicLoaded = true;
+        }
+    }
+#endif
+#if defined(SUPPORT_FILEFORMAT_MOD)
+    else if ((strcmp(fileType, ".mod") == 0) || (strcmp(fileType, ".MOD") == 0))
+    {
+        jar_mod_context_t *ctxMod = (jar_mod_context_t *)RL_MALLOC(sizeof(jar_mod_context_t));
+        int result = 0;
+
+        jar_mod_init(ctxMod);
+
+        // Copy data to allocated memory for default UnloadMusicStream
+        unsigned char *newData = (unsigned char *)RL_MALLOC(dataSize);
+        int it = dataSize/sizeof(unsigned char);
+        for (int i = 0; i < it; i++) newData[i] = data[i];
+
+        // Memory loaded version for jar_mod_load_file()
+        if (dataSize && (dataSize < 32*1024*1024))
+        {
+            ctxMod->modfilesize = dataSize;
+            ctxMod->modfile = newData;
+            if (jar_mod_load(ctxMod, (void *)ctxMod->modfile, dataSize)) result = dataSize;
+        }
+
+        if (result > 0)
+        {
+            music.ctxType = MUSIC_MODULE_MOD;
+
+            // NOTE: Only stereo is supported for MOD
+            music.stream = LoadAudioStream(AUDIO.System.device.sampleRate, 16, 2);
+            music.frameCount = (unsigned int)jar_mod_max_samples(ctxMod);    // NOTE: Always 2 channels (stereo)
+            music.looping = true;   // Looping enabled by default
+            musicLoaded = true;
+
+            music.ctxData = ctxMod;
+            musicLoaded = true;
+        }
+    }
+#endif
+    else TRACELOG(LOG_WARNING, "STREAM: Data format not supported");
+
+    if (!musicLoaded)
+    {
+        if (false) { }
+#if defined(SUPPORT_FILEFORMAT_WAV)
+        else if (music.ctxType == MUSIC_AUDIO_WAV) drwav_uninit((drwav *)music.ctxData);
+#endif
+#if defined(SUPPORT_FILEFORMAT_OGG)
+        else if (music.ctxType == MUSIC_AUDIO_OGG) stb_vorbis_close((stb_vorbis *)music.ctxData);
+#endif
+#if defined(SUPPORT_FILEFORMAT_MP3)
+        else if (music.ctxType == MUSIC_AUDIO_MP3) { drmp3_uninit((drmp3 *)music.ctxData); RL_FREE(music.ctxData); }
+#endif
+#if defined(SUPPORT_FILEFORMAT_QOA)
+        else if (music.ctxType == MUSIC_AUDIO_QOA) qoaplay_close((qoaplay_desc *)music.ctxData);
+#endif
+#if defined(SUPPORT_FILEFORMAT_FLAC)
+        else if (music.ctxType == MUSIC_AUDIO_FLAC) drflac_free((drflac *)music.ctxData, NULL);
+#endif
+#if defined(SUPPORT_FILEFORMAT_XM)
+        else if (music.ctxType == MUSIC_MODULE_XM) jar_xm_free_context((jar_xm_context_t *)music.ctxData);
+#endif
+#if defined(SUPPORT_FILEFORMAT_MOD)
+        else if (music.ctxType == MUSIC_MODULE_MOD) { jar_mod_unload((jar_mod_context_t *)music.ctxData); RL_FREE(music.ctxData); }
+#endif
+
+        music.ctxData = NULL;
+        TRACELOG(LOG_WARNING, "FILEIO: Music data could not be loaded");
+    }
+    else
+    {
+        // Show some music stream info
+        TRACELOG(LOG_INFO, "FILEIO: Music data loaded successfully");
+        TRACELOG(LOG_INFO, "    > Sample rate:   %i Hz", music.stream.sampleRate);
+        TRACELOG(LOG_INFO, "    > Sample size:   %i bits", music.stream.sampleSize);
+        TRACELOG(LOG_INFO, "    > Channels:      %i (%s)", music.stream.channels, (music.stream.channels == 1)? "Mono" : (music.stream.channels == 2)? "Stereo" : "Multi");
+        TRACELOG(LOG_INFO, "    > Total frames:  %i", music.frameCount);
+    }
+
+    return music;
+}
+
+// Checks if a music stream is ready
+bool IsMusicReady(Music music)
+{
+    return ((music.ctxData != NULL) &&          // Validate context loaded
+            (music.frameCount > 0) &&           // Validate audio frame count
+            (music.stream.sampleRate > 0) &&    // Validate sample rate is supported
+            (music.stream.sampleSize > 0) &&    // Validate sample size is supported
+            (music.stream.channels > 0));       // Validate number of channels supported
+}
+
+// Unload music stream
+void UnloadMusicStream(Music music)
+{
+    UnloadAudioStream(music.stream);
+
+    if (music.ctxData != NULL)
+    {
+        if (false) { }
+#if defined(SUPPORT_FILEFORMAT_WAV)
+        else if (music.ctxType == MUSIC_AUDIO_WAV) drwav_uninit((drwav *)music.ctxData);
+#endif
+#if defined(SUPPORT_FILEFORMAT_OGG)
+        else if (music.ctxType == MUSIC_AUDIO_OGG) stb_vorbis_close((stb_vorbis *)music.ctxData);
+#endif
+#if defined(SUPPORT_FILEFORMAT_MP3)
+        else if (music.ctxType == MUSIC_AUDIO_MP3) { drmp3_uninit((drmp3 *)music.ctxData); RL_FREE(music.ctxData); }
+#endif
+#if defined(SUPPORT_FILEFORMAT_QOA)
+        else if (music.ctxType == MUSIC_AUDIO_QOA) qoaplay_close((qoaplay_desc *)music.ctxData);
+#endif
+#if defined(SUPPORT_FILEFORMAT_FLAC)
+        else if (music.ctxType == MUSIC_AUDIO_FLAC) drflac_free((drflac *)music.ctxData, NULL);
+#endif
+#if defined(SUPPORT_FILEFORMAT_XM)
+        else if (music.ctxType == MUSIC_MODULE_XM) jar_xm_free_context((jar_xm_context_t *)music.ctxData);
+#endif
+#if defined(SUPPORT_FILEFORMAT_MOD)
+        else if (music.ctxType == MUSIC_MODULE_MOD) { jar_mod_unload((jar_mod_context_t *)music.ctxData); RL_FREE(music.ctxData); }
+#endif
+    }
+}
+
+// Start music playing (open stream)
+void PlayMusicStream(Music music)
+{
+    if (music.stream.buffer != NULL)
+    {
+        // For music streams, we need to make sure we maintain the frame cursor position
+        // This is a hack for this section of code in UpdateMusicStream()
+        // NOTE: In case window is minimized, music stream is stopped, just make sure to
+        // play again on window restore: if (IsMusicStreamPlaying(music)) PlayMusicStream(music);
+        ma_uint32 frameCursorPos = music.stream.buffer->frameCursorPos;
+        PlayAudioStream(music.stream);  // WARNING: This resets the cursor position.
+        music.stream.buffer->frameCursorPos = frameCursorPos;
+    }
+}
+
+// Pause music playing
+void PauseMusicStream(Music music)
+{
+    PauseAudioStream(music.stream);
+}
+
+// Resume music playing
+void ResumeMusicStream(Music music)
+{
+    ResumeAudioStream(music.stream);
+}
+
+// Stop music playing (close stream)
+void StopMusicStream(Music music)
+{
+    StopAudioStream(music.stream);
+
+    switch (music.ctxType)
+    {
+#if defined(SUPPORT_FILEFORMAT_WAV)
+        case MUSIC_AUDIO_WAV: drwav_seek_to_first_pcm_frame((drwav *)music.ctxData); break;
+#endif
+#if defined(SUPPORT_FILEFORMAT_OGG)
+        case MUSIC_AUDIO_OGG: stb_vorbis_seek_start((stb_vorbis *)music.ctxData); break;
+#endif
+#if defined(SUPPORT_FILEFORMAT_MP3)
+        case MUSIC_AUDIO_MP3: drmp3_seek_to_start_of_stream((drmp3 *)music.ctxData); break;
+#endif
+#if defined(SUPPORT_FILEFORMAT_QOA)
+        case MUSIC_AUDIO_QOA: qoaplay_rewind((qoaplay_desc *)music.ctxData); break;
+#endif
+#if defined(SUPPORT_FILEFORMAT_FLAC)
+        case MUSIC_AUDIO_FLAC: drflac__seek_to_first_frame((drflac *)music.ctxData); break;
+#endif
+#if defined(SUPPORT_FILEFORMAT_XM)
+        case MUSIC_MODULE_XM: jar_xm_reset((jar_xm_context_t *)music.ctxData); break;
+#endif
+#if defined(SUPPORT_FILEFORMAT_MOD)
+        case MUSIC_MODULE_MOD: jar_mod_seek_start((jar_mod_context_t *)music.ctxData); break;
+#endif
+        default: break;
+    }
+}
+
+// Seek music to a certain position (in seconds)
+void SeekMusicStream(Music music, float position)
+{
+    // Seeking is not supported in module formats
+    if ((music.ctxType == MUSIC_MODULE_XM) || (music.ctxType == MUSIC_MODULE_MOD)) return;
+
+    unsigned int positionInFrames = (unsigned int)(position*music.stream.sampleRate);
+
+    switch (music.ctxType)
+    {
+#if defined(SUPPORT_FILEFORMAT_WAV)
+        case MUSIC_AUDIO_WAV: drwav_seek_to_pcm_frame((drwav *)music.ctxData, positionInFrames); break;
+#endif
+#if defined(SUPPORT_FILEFORMAT_OGG)
+        case MUSIC_AUDIO_OGG: stb_vorbis_seek_frame((stb_vorbis *)music.ctxData, positionInFrames); break;
+#endif
+#if defined(SUPPORT_FILEFORMAT_MP3)
+        case MUSIC_AUDIO_MP3: drmp3_seek_to_pcm_frame((drmp3 *)music.ctxData, positionInFrames); break;
+#endif
+#if defined(SUPPORT_FILEFORMAT_QOA)
+        case MUSIC_AUDIO_QOA:
+        {
+            int qoaFrame = positionInFrames/QOA_FRAME_LEN;
+            qoaplay_seek_frame((qoaplay_desc *)music.ctxData, qoaFrame); // Seeks to QOA frame, not PCM frame
+
+            // We need to compute QOA frame number and update positionInFrames
+            positionInFrames = ((qoaplay_desc *)music.ctxData)->sample_position;
+        } break;
+#endif
+#if defined(SUPPORT_FILEFORMAT_FLAC)
+        case MUSIC_AUDIO_FLAC: drflac_seek_to_pcm_frame((drflac *)music.ctxData, positionInFrames); break;
+#endif
+        default: break;
+    }
+
+    music.stream.buffer->framesProcessed = positionInFrames;
+}
+
+// Update (re-fill) music buffers if data already processed
+void UpdateMusicStream(Music music)
+{
+    if (music.stream.buffer == NULL) return;
+
+    unsigned int subBufferSizeInFrames = music.stream.buffer->sizeInFrames/2;
+
+    // On first call of this function we lazily pre-allocated a temp buffer to read audio files/memory data in
+    int frameSize = music.stream.channels*music.stream.sampleSize/8;
+    unsigned int pcmSize = subBufferSizeInFrames*frameSize;
+
+    if (AUDIO.System.pcmBufferSize < pcmSize)
+    {
+        RL_FREE(AUDIO.System.pcmBuffer);
+        AUDIO.System.pcmBuffer = RL_CALLOC(1, pcmSize);
+        AUDIO.System.pcmBufferSize = pcmSize;
+    }
+
+    // Check both sub-buffers to check if they require refilling
+    for (int i = 0; i < 2; i++)
+    {
+        if ((music.stream.buffer != NULL) && !music.stream.buffer->isSubBufferProcessed[i]) continue; // No refilling required, move to next sub-buffer
+
+        unsigned int framesLeft = music.frameCount - music.stream.buffer->framesProcessed;  // Frames left to be processed
+        unsigned int framesToStream = 0;                 // Total frames to be streamed
+
+        if ((framesLeft >= subBufferSizeInFrames) || music.looping) framesToStream = subBufferSizeInFrames;
+        else framesToStream = framesLeft;
+
+        int frameCountStillNeeded = framesToStream;
+        int frameCountReadTotal = 0;
+
+        switch (music.ctxType)
+        {
+        #if defined(SUPPORT_FILEFORMAT_WAV)
+            case MUSIC_AUDIO_WAV:
+            {
+                if (music.stream.sampleSize == 16)
+                {
+                    while (true)
+                    {
+                        int frameCountRead = (int)drwav_read_pcm_frames_s16((drwav *)music.ctxData, frameCountStillNeeded, (short *)((char *)AUDIO.System.pcmBuffer + frameCountReadTotal*frameSize));
+                        frameCountReadTotal += frameCountRead;
+                        frameCountStillNeeded -= frameCountRead;
+                        if (frameCountStillNeeded == 0) break;
+                        else drwav_seek_to_first_pcm_frame((drwav *)music.ctxData);
+                    }
+                }
+                else if (music.stream.sampleSize == 32)
+                {
+                    while (true)
+                    {
+                        int frameCountRead = (int)drwav_read_pcm_frames_f32((drwav *)music.ctxData, frameCountStillNeeded, (float *)((char *)AUDIO.System.pcmBuffer + frameCountReadTotal*frameSize));
+                        frameCountReadTotal += frameCountRead;
+                        frameCountStillNeeded -= frameCountRead;
+                        if (frameCountStillNeeded == 0) break;
+                        else drwav_seek_to_first_pcm_frame((drwav *)music.ctxData);
+                    }
+                }
+            } break;
+        #endif
+        #if defined(SUPPORT_FILEFORMAT_OGG)
+            case MUSIC_AUDIO_OGG:
+            {
+                while (true)
+                {
+                    int frameCountRead = stb_vorbis_get_samples_short_interleaved((stb_vorbis *)music.ctxData, music.stream.channels, (short *)((char *)AUDIO.System.pcmBuffer + frameCountReadTotal*frameSize), frameCountStillNeeded*music.stream.channels);
+                    frameCountReadTotal += frameCountRead;
+                    frameCountStillNeeded -= frameCountRead;
+                    if (frameCountStillNeeded == 0) break;
+                    else stb_vorbis_seek_start((stb_vorbis *)music.ctxData);
+                }
+            } break;
+        #endif
+        #if defined(SUPPORT_FILEFORMAT_MP3)
+            case MUSIC_AUDIO_MP3:
+            {
+                while (true)
+                {
+                    int frameCountRead = (int)drmp3_read_pcm_frames_f32((drmp3 *)music.ctxData, frameCountStillNeeded, (float *)((char *)AUDIO.System.pcmBuffer + frameCountReadTotal*frameSize));
+                    frameCountReadTotal += frameCountRead;
+                    frameCountStillNeeded -= frameCountRead;
+                    if (frameCountStillNeeded == 0) break;
+                    else drmp3_seek_to_start_of_stream((drmp3 *)music.ctxData);
+                }
+            } break;
+        #endif
+        #if defined(SUPPORT_FILEFORMAT_QOA)
+            case MUSIC_AUDIO_QOA:
+            {
+                unsigned int frameCountRead = qoaplay_decode((qoaplay_desc *)music.ctxData, (float *)AUDIO.System.pcmBuffer, framesToStream);
+                frameCountReadTotal += frameCountRead;
+                /*
+                while (true)
+                {
+                    int frameCountRead = (int)qoaplay_decode((qoaplay_desc *)music.ctxData, (float *)((char *)AUDIO.System.pcmBuffer + frameCountReadTotal*frameSize),  frameCountStillNeeded);
+                    frameCountReadTotal += frameCountRead;
+                    frameCountStillNeeded -= frameCountRead;
+                    if (frameCountStillNeeded == 0) break;
+                    else qoaplay_rewind((qoaplay_desc *)music.ctxData);
+                }
+                */
+            } break;
+        #endif
+        #if defined(SUPPORT_FILEFORMAT_FLAC)
+            case MUSIC_AUDIO_FLAC:
+            {
+                while (true)
+                {
+                    int frameCountRead = (int)drflac_read_pcm_frames_s16((drflac *)music.ctxData, frameCountStillNeeded, (short *)((char *)AUDIO.System.pcmBuffer + frameCountReadTotal*frameSize));
+                    frameCountReadTotal += frameCountRead;
+                    frameCountStillNeeded -= frameCountRead;
+                    if (frameCountStillNeeded == 0) break;
+                    else drflac__seek_to_first_frame((drflac *)music.ctxData);
+                }
+            } break;
+        #endif
+        #if defined(SUPPORT_FILEFORMAT_XM)
+            case MUSIC_MODULE_XM:
+            {
+                // NOTE: Internally we consider 2 channels generation, so sampleCount/2
+                if (AUDIO_DEVICE_FORMAT == ma_format_f32) jar_xm_generate_samples((jar_xm_context_t *)music.ctxData, (float *)AUDIO.System.pcmBuffer, framesToStream);
+                else if (AUDIO_DEVICE_FORMAT == ma_format_s16) jar_xm_generate_samples_16bit((jar_xm_context_t *)music.ctxData, (short *)AUDIO.System.pcmBuffer, framesToStream);
+                else if (AUDIO_DEVICE_FORMAT == ma_format_u8) jar_xm_generate_samples_8bit((jar_xm_context_t *)music.ctxData, (char *)AUDIO.System.pcmBuffer, framesToStream);
+                //jar_xm_reset((jar_xm_context_t *)music.ctxData);
+
+            } break;
+        #endif
+        #if defined(SUPPORT_FILEFORMAT_MOD)
+            case MUSIC_MODULE_MOD:
+            {
+                // NOTE: 3rd parameter (nbsample) specify the number of stereo 16bits samples you want, so sampleCount/2
+                jar_mod_fillbuffer((jar_mod_context_t *)music.ctxData, (short *)AUDIO.System.pcmBuffer, framesToStream, 0);
+                //jar_mod_seek_start((jar_mod_context_t *)music.ctxData);
+
+            } break;
+        #endif
+            default: break;
+        }
+
+        UpdateAudioStream(music.stream, AUDIO.System.pcmBuffer, framesToStream);
+
+        music.stream.buffer->framesProcessed = music.stream.buffer->framesProcessed%music.frameCount;
+
+        if (framesLeft <= subBufferSizeInFrames)
+        {
+            if (!music.looping)
+            {
+                // Streaming is ending, we filled latest frames from input
+                StopMusicStream(music);
+                return;
+            }
+        }
+    }
+
+    // NOTE: In case window is minimized, music stream is stopped,
+    // just make sure to play again on window restore
+    if (IsMusicStreamPlaying(music)) PlayMusicStream(music);
+}
+
+// Check if any music is playing
+bool IsMusicStreamPlaying(Music music)
+{
+    return IsAudioStreamPlaying(music.stream);
+}
+
+// Set volume for music
+void SetMusicVolume(Music music, float volume)
+{
+    SetAudioStreamVolume(music.stream, volume);
+}
+
+// Set pitch for music
+void SetMusicPitch(Music music, float pitch)
+{
+    SetAudioBufferPitch(music.stream.buffer, pitch);
+}
+
+// Set pan for a music
+void SetMusicPan(Music music, float pan)
+{
+    SetAudioBufferPan(music.stream.buffer, pan);
+}
+
+// Get music time length (in seconds)
+float GetMusicTimeLength(Music music)
+{
+    float totalSeconds = 0.0f;
+
+    totalSeconds = (float)music.frameCount/music.stream.sampleRate;
+
+    return totalSeconds;
+}
+
+// Get current music time played (in seconds)
+float GetMusicTimePlayed(Music music)
+{
+    float secondsPlayed = 0.0f;
+    if (music.stream.buffer != NULL)
+    {
+#if defined(SUPPORT_FILEFORMAT_XM)
+        if (music.ctxType == MUSIC_MODULE_XM)
+        {
+            uint64_t framesPlayed = 0;
+
+            jar_xm_get_position(music.ctxData, NULL, NULL, NULL, &framesPlayed);
+            secondsPlayed = (float)framesPlayed/music.stream.sampleRate;
+        }
+        else
+#endif
+        {
+            //ma_uint32 frameSizeInBytes = ma_get_bytes_per_sample(music.stream.buffer->dsp.formatConverterIn.config.formatIn)*music.stream.buffer->dsp.formatConverterIn.config.channels;
+            int framesProcessed = (int)music.stream.buffer->framesProcessed;
+            int subBufferSize = (int)music.stream.buffer->sizeInFrames/2;
+            int framesInFirstBuffer = music.stream.buffer->isSubBufferProcessed[0]? 0 : subBufferSize;
+            int framesInSecondBuffer = music.stream.buffer->isSubBufferProcessed[1]? 0 : subBufferSize;
+            int framesSentToMix = music.stream.buffer->frameCursorPos%subBufferSize;
+            int framesPlayed = (framesProcessed - framesInFirstBuffer - framesInSecondBuffer + framesSentToMix)%(int)music.frameCount;
+            if (framesPlayed < 0) framesPlayed += music.frameCount;
+            secondsPlayed = (float)framesPlayed/music.stream.sampleRate;
+        }
+    }
+
+    return secondsPlayed;
+}
+
+// Load audio stream (to stream audio pcm data)
+AudioStream LoadAudioStream(unsigned int sampleRate, unsigned int sampleSize, unsigned int channels)
+{
+    AudioStream stream = { 0 };
+
+    stream.sampleRate = sampleRate;
+    stream.sampleSize = sampleSize;
+    stream.channels = channels;
+
+    ma_format formatIn = ((stream.sampleSize == 8)? ma_format_u8 : ((stream.sampleSize == 16)? ma_format_s16 : ma_format_f32));
+
+    // The size of a streaming buffer must be at least double the size of a period
+    unsigned int periodSize = AUDIO.System.device.playback.internalPeriodSizeInFrames;
+
+    // If the buffer is not set, compute one that would give us a buffer good enough for a decent frame rate
+    unsigned int subBufferSize = (AUDIO.Buffer.defaultSize == 0)? AUDIO.System.device.sampleRate/30 : AUDIO.Buffer.defaultSize;
+
+    if (subBufferSize < periodSize) subBufferSize = periodSize;
+
+    // Create a double audio buffer of defined size
+    stream.buffer = LoadAudioBuffer(formatIn, stream.channels, stream.sampleRate, subBufferSize*2, AUDIO_BUFFER_USAGE_STREAM);
+
+    if (stream.buffer != NULL)
+    {
+        stream.buffer->looping = true;    // Always loop for streaming buffers
+        TRACELOG(LOG_INFO, "STREAM: Initialized successfully (%i Hz, %i bit, %s)", stream.sampleRate, stream.sampleSize, (stream.channels == 1)? "Mono" : "Stereo");
+    }
+    else TRACELOG(LOG_WARNING, "STREAM: Failed to load audio buffer, stream could not be created");
+
+    return stream;
+}
+
+// Checks if an audio stream is ready
+bool IsAudioStreamReady(AudioStream stream)
+{
+    return ((stream.buffer != NULL) &&    // Validate stream buffer
+            (stream.sampleRate > 0) &&    // Validate sample rate is supported
+            (stream.sampleSize > 0) &&    // Validate sample size is supported
+            (stream.channels > 0));       // Validate number of channels supported
+}
+
+// Unload audio stream and free memory
+void UnloadAudioStream(AudioStream stream)
+{
+    UnloadAudioBuffer(stream.buffer);
+
+    TRACELOG(LOG_INFO, "STREAM: Unloaded audio stream data from RAM");
+}
+
+// Update audio stream buffers with data
+// NOTE 1: Only updates one buffer of the stream source: dequeue -> update -> queue
+// NOTE 2: To dequeue a buffer it needs to be processed: IsAudioStreamProcessed()
+void UpdateAudioStream(AudioStream stream, const void *data, int frameCount)
+{
+    if (stream.buffer != NULL)
+    {
+        if (stream.buffer->isSubBufferProcessed[0] || stream.buffer->isSubBufferProcessed[1])
+        {
+            ma_uint32 subBufferToUpdate = 0;
+
+            if (stream.buffer->isSubBufferProcessed[0] && stream.buffer->isSubBufferProcessed[1])
+            {
+                // Both buffers are available for updating.
+                // Update the first one and make sure the cursor is moved back to the front.
+                subBufferToUpdate = 0;
+                stream.buffer->frameCursorPos = 0;
+            }
+            else
+            {
+                // Just update whichever sub-buffer is processed.
+                subBufferToUpdate = (stream.buffer->isSubBufferProcessed[0])? 0 : 1;
+            }
+
+            ma_uint32 subBufferSizeInFrames = stream.buffer->sizeInFrames/2;
+            unsigned char *subBuffer = stream.buffer->data + ((subBufferSizeInFrames*stream.channels*(stream.sampleSize/8))*subBufferToUpdate);
+
+            // Total frames processed in buffer is always the complete size, filled with 0 if required
+            stream.buffer->framesProcessed += subBufferSizeInFrames;
+
+            // Does this API expect a whole buffer to be updated in one go?
+            // Assuming so, but if not will need to change this logic.
+            if (subBufferSizeInFrames >= (ma_uint32)frameCount)
+            {
+                ma_uint32 framesToWrite = (ma_uint32)frameCount;
+
+                ma_uint32 bytesToWrite = framesToWrite*stream.channels*(stream.sampleSize/8);
+                memcpy(subBuffer, data, bytesToWrite);
+
+                // Any leftover frames should be filled with zeros.
+                ma_uint32 leftoverFrameCount = subBufferSizeInFrames - framesToWrite;
+
+                if (leftoverFrameCount > 0) memset(subBuffer + bytesToWrite, 0, leftoverFrameCount*stream.channels*(stream.sampleSize/8));
+
+                stream.buffer->isSubBufferProcessed[subBufferToUpdate] = false;
+            }
+            else TRACELOG(LOG_WARNING, "STREAM: Attempting to write too many frames to buffer");
+        }
+        else TRACELOG(LOG_WARNING, "STREAM: Buffer not available for updating");
+    }
+}
+
+// Check if any audio stream buffers requires refill
+bool IsAudioStreamProcessed(AudioStream stream)
+{
+    if (stream.buffer == NULL) return false;
+
+    return (stream.buffer->isSubBufferProcessed[0] || stream.buffer->isSubBufferProcessed[1]);
+}
+
+// Play audio stream
+void PlayAudioStream(AudioStream stream)
+{
+    PlayAudioBuffer(stream.buffer);
+}
+
+// Play audio stream
+void PauseAudioStream(AudioStream stream)
+{
+    PauseAudioBuffer(stream.buffer);
+}
+
+// Resume audio stream playing
+void ResumeAudioStream(AudioStream stream)
+{
+    ResumeAudioBuffer(stream.buffer);
+}
+
+// Check if audio stream is playing.
+bool IsAudioStreamPlaying(AudioStream stream)
+{
+    return IsAudioBufferPlaying(stream.buffer);
+}
+
+// Stop audio stream
+void StopAudioStream(AudioStream stream)
+{
+    StopAudioBuffer(stream.buffer);
+}
+
+// Set volume for audio stream (1.0 is max level)
+void SetAudioStreamVolume(AudioStream stream, float volume)
+{
+    SetAudioBufferVolume(stream.buffer, volume);
+}
+
+// Set pitch for audio stream (1.0 is base level)
+void SetAudioStreamPitch(AudioStream stream, float pitch)
+{
+    SetAudioBufferPitch(stream.buffer, pitch);
+}
+
+// Set pan for audio stream
+void SetAudioStreamPan(AudioStream stream, float pan)
+{
+    SetAudioBufferPan(stream.buffer, pan);
+}
+
+// Default size for new audio streams
+void SetAudioStreamBufferSizeDefault(int size)
+{
+    AUDIO.Buffer.defaultSize = size;
+}
+
+// Audio thread callback to request new data
+void SetAudioStreamCallback(AudioStream stream, AudioCallback callback)
+{
+    if (stream.buffer != NULL) stream.buffer->callback = callback;
+}
+
+// Add processor to audio stream. Contrary to buffers, the order of processors is important.
+// The new processor must be added at the end. As there aren't supposed to be a lot of processors attached to
+// a given stream, we iterate through the list to find the end. That way we don't need a pointer to the last element.
+void AttachAudioStreamProcessor(AudioStream stream, AudioCallback process)
+{
+    ma_mutex_lock(&AUDIO.System.lock);
+
+    rAudioProcessor *processor = (rAudioProcessor *)RL_CALLOC(1, sizeof(rAudioProcessor));
+    processor->process = process;
+
+    rAudioProcessor *last = stream.buffer->processor;
+
+    while (last && last->next)
+    {
+        last = last->next;
+    }
+    if (last)
+    {
+        processor->prev = last;
+        last->next = processor;
+    }
+    else stream.buffer->processor = processor;
+
+    ma_mutex_unlock(&AUDIO.System.lock);
+}
+
+// Remove processor from audio stream
+void DetachAudioStreamProcessor(AudioStream stream, AudioCallback process)
+{
+    ma_mutex_lock(&AUDIO.System.lock);
+
+    rAudioProcessor *processor = stream.buffer->processor;
+
+    while (processor)
+    {
+        rAudioProcessor *next = processor->next;
+        rAudioProcessor *prev = processor->prev;
+
+        if (processor->process == process)
+        {
+            if (stream.buffer->processor == processor) stream.buffer->processor = next;
+            if (prev) prev->next = next;
+            if (next) next->prev = prev;
+
+            RL_FREE(processor);
+        }
+
+        processor = next;
+    }
+
+    ma_mutex_unlock(&AUDIO.System.lock);
+}
+
+// Add processor to audio pipeline. Order of processors is important
+// Works the same way as {Attach,Detach}AudioStreamProcessor() functions, except
+// these two work on the already mixed output just before sending it to the sound hardware
+void AttachAudioMixedProcessor(AudioCallback process)
+{
+    ma_mutex_lock(&AUDIO.System.lock);
+
+    rAudioProcessor *processor = (rAudioProcessor *)RL_CALLOC(1, sizeof(rAudioProcessor));
+    processor->process = process;
+
+    rAudioProcessor *last = AUDIO.mixedProcessor;
+
+    while (last && last->next)
+    {
+        last = last->next;
+    }
+    if (last)
+    {
+        processor->prev = last;
+        last->next = processor;
+    }
+    else AUDIO.mixedProcessor = processor;
+
+    ma_mutex_unlock(&AUDIO.System.lock);
+}
+
+// Remove processor from audio pipeline
+void DetachAudioMixedProcessor(AudioCallback process)
+{
+    ma_mutex_lock(&AUDIO.System.lock);
+
+    rAudioProcessor *processor = AUDIO.mixedProcessor;
+
+    while (processor)
+    {
+        rAudioProcessor *next = processor->next;
+        rAudioProcessor *prev = processor->prev;
+
+        if (processor->process == process)
+        {
+            if (AUDIO.mixedProcessor == processor) AUDIO.mixedProcessor = next;
+            if (prev) prev->next = next;
+            if (next) next->prev = prev;
+
+            RL_FREE(processor);
+        }
+
+        processor = next;
+    }
+
+    ma_mutex_unlock(&AUDIO.System.lock);
+}
+
+
+//----------------------------------------------------------------------------------
+// Module specific Functions Definition
+//----------------------------------------------------------------------------------
+
+// Log callback function
+static void OnLog(void *pUserData, ma_uint32 level, const char *pMessage)
+{
+    TRACELOG(LOG_WARNING, "miniaudio: %s", pMessage);   // All log messages from miniaudio are errors
+}
+
+// Reads audio data from an AudioBuffer object in internal format.
+static ma_uint32 ReadAudioBufferFramesInInternalFormat(AudioBuffer *audioBuffer, void *framesOut, ma_uint32 frameCount)
+{
+    // Using audio buffer callback
+    if (audioBuffer->callback)
+    {
+        audioBuffer->callback(framesOut, frameCount);
+        audioBuffer->framesProcessed += frameCount;
+
+        return frameCount;
+    }
+
+    ma_uint32 subBufferSizeInFrames = (audioBuffer->sizeInFrames > 1)? audioBuffer->sizeInFrames/2 : audioBuffer->sizeInFrames;
+    ma_uint32 currentSubBufferIndex = audioBuffer->frameCursorPos/subBufferSizeInFrames;
+
+    if (currentSubBufferIndex > 1) return 0;
+
+    // Another thread can update the processed state of buffers, so
+    // we just take a copy here to try and avoid potential synchronization problems
+    bool isSubBufferProcessed[2] = { 0 };
+    isSubBufferProcessed[0] = audioBuffer->isSubBufferProcessed[0];
+    isSubBufferProcessed[1] = audioBuffer->isSubBufferProcessed[1];
+
+    ma_uint32 frameSizeInBytes = ma_get_bytes_per_frame(audioBuffer->converter.formatIn, audioBuffer->converter.channelsIn);
+
+    // Fill out every frame until we find a buffer that's marked as processed. Then fill the remainder with 0
+    ma_uint32 framesRead = 0;
+    while (1)
+    {
+        // We break from this loop differently depending on the buffer's usage
+        //  - For static buffers, we simply fill as much data as we can
+        //  - For streaming buffers we only fill half of the buffer that are processed
+        //    Unprocessed halves must keep their audio data in-tact
+        if (audioBuffer->usage == AUDIO_BUFFER_USAGE_STATIC)
+        {
+            if (framesRead >= frameCount) break;
+        }
+        else
+        {
+            if (isSubBufferProcessed[currentSubBufferIndex]) break;
+        }
+
+        ma_uint32 totalFramesRemaining = (frameCount - framesRead);
+        if (totalFramesRemaining == 0) break;
+
+        ma_uint32 framesRemainingInOutputBuffer;
+        if (audioBuffer->usage == AUDIO_BUFFER_USAGE_STATIC)
+        {
+            framesRemainingInOutputBuffer = audioBuffer->sizeInFrames - audioBuffer->frameCursorPos;
+        }
+        else
+        {
+            ma_uint32 firstFrameIndexOfThisSubBuffer = subBufferSizeInFrames*currentSubBufferIndex;
+            framesRemainingInOutputBuffer = subBufferSizeInFrames - (audioBuffer->frameCursorPos - firstFrameIndexOfThisSubBuffer);
+        }
+
+        ma_uint32 framesToRead = totalFramesRemaining;
+        if (framesToRead > framesRemainingInOutputBuffer) framesToRead = framesRemainingInOutputBuffer;
+
+        memcpy((unsigned char *)framesOut + (framesRead*frameSizeInBytes), audioBuffer->data + (audioBuffer->frameCursorPos*frameSizeInBytes), framesToRead*frameSizeInBytes);
+        audioBuffer->frameCursorPos = (audioBuffer->frameCursorPos + framesToRead)%audioBuffer->sizeInFrames;
+        framesRead += framesToRead;
+
+        // If we've read to the end of the buffer, mark it as processed
+        if (framesToRead == framesRemainingInOutputBuffer)
+        {
+            audioBuffer->isSubBufferProcessed[currentSubBufferIndex] = true;
+            isSubBufferProcessed[currentSubBufferIndex] = true;
+
+            currentSubBufferIndex = (currentSubBufferIndex + 1)%2;
+
+            // We need to break from this loop if we're not looping
+            if (!audioBuffer->looping)
+            {
+                StopAudioBuffer(audioBuffer);
+                break;
+            }
+        }
+    }
+
+    // Zero-fill excess
+    ma_uint32 totalFramesRemaining = (frameCount - framesRead);
+    if (totalFramesRemaining > 0)
+    {
+        memset((unsigned char *)framesOut + (framesRead*frameSizeInBytes), 0, totalFramesRemaining*frameSizeInBytes);
+
+        // For static buffers we can fill the remaining frames with silence for safety, but we don't want
+        // to report those frames as "read". The reason for this is that the caller uses the return value
+        // to know whether a non-looping sound has finished playback.
+        if (audioBuffer->usage != AUDIO_BUFFER_USAGE_STATIC) framesRead += totalFramesRemaining;
+    }
+
+    return framesRead;
+}
+
+// Reads audio data from an AudioBuffer object in device format. Returned data will be in a format appropriate for mixing.
+static ma_uint32 ReadAudioBufferFramesInMixingFormat(AudioBuffer *audioBuffer, float *framesOut, ma_uint32 frameCount)
+{
+    // What's going on here is that we're continuously converting data from the AudioBuffer's internal format to the mixing format, which
+    // should be defined by the output format of the data converter. We do this until frameCount frames have been output. The important
+    // detail to remember here is that we never, ever attempt to read more input data than is required for the specified number of output
+    // frames. This can be achieved with ma_data_converter_get_required_input_frame_count().
+    ma_uint8 inputBuffer[4096] = { 0 };
+    ma_uint32 inputBufferFrameCap = sizeof(inputBuffer)/ma_get_bytes_per_frame(audioBuffer->converter.formatIn, audioBuffer->converter.channelsIn);
+
+    ma_uint32 totalOutputFramesProcessed = 0;
+    while (totalOutputFramesProcessed < frameCount)
+    {
+        ma_uint64 outputFramesToProcessThisIteration = frameCount - totalOutputFramesProcessed;
+        ma_uint64 inputFramesToProcessThisIteration = 0;
+
+        (void)ma_data_converter_get_required_input_frame_count(&audioBuffer->converter, outputFramesToProcessThisIteration, &inputFramesToProcessThisIteration);
+        if (inputFramesToProcessThisIteration > inputBufferFrameCap)
+        {
+            inputFramesToProcessThisIteration = inputBufferFrameCap;
+        }
+
+        float *runningFramesOut = framesOut + (totalOutputFramesProcessed*audioBuffer->converter.channelsOut);
+
+        /* At this point we can convert the data to our mixing format. */
+        ma_uint64 inputFramesProcessedThisIteration = ReadAudioBufferFramesInInternalFormat(audioBuffer, inputBuffer, (ma_uint32)inputFramesToProcessThisIteration);    /* Safe cast. */
+        ma_uint64 outputFramesProcessedThisIteration = outputFramesToProcessThisIteration;
+        ma_data_converter_process_pcm_frames(&audioBuffer->converter, inputBuffer, &inputFramesProcessedThisIteration, runningFramesOut, &outputFramesProcessedThisIteration);
+
+        totalOutputFramesProcessed += (ma_uint32)outputFramesProcessedThisIteration; /* Safe cast. */
+
+        if (inputFramesProcessedThisIteration < inputFramesToProcessThisIteration)
+        {
+            break;  /* Ran out of input data. */
+        }
+
+        /* This should never be hit, but will add it here for safety. Ensures we get out of the loop when no input nor output frames are processed. */
+        if (inputFramesProcessedThisIteration == 0 && outputFramesProcessedThisIteration == 0)
+        {
+            break;
+        }
+    }
+
+    return totalOutputFramesProcessed;
+}
+
+// Sending audio data to device callback function
+// This function will be called when miniaudio needs more data
+// NOTE: All the mixing takes place here
+static void OnSendAudioDataToDevice(ma_device *pDevice, void *pFramesOut, const void *pFramesInput, ma_uint32 frameCount)
+{
+    (void)pDevice;
+
+    // Mixing is basically just an accumulation, we need to initialize the output buffer to 0
+    memset(pFramesOut, 0, frameCount*pDevice->playback.channels*ma_get_bytes_per_sample(pDevice->playback.format));
+
+    // Using a mutex here for thread-safety which makes things not real-time
+    // This is unlikely to be necessary for this project, but may want to consider how you might want to avoid this
+    ma_mutex_lock(&AUDIO.System.lock);
+    {
+        for (AudioBuffer *audioBuffer = AUDIO.Buffer.first; audioBuffer != NULL; audioBuffer = audioBuffer->next)
+        {
+            // Ignore stopped or paused sounds
+            if (!audioBuffer->playing || audioBuffer->paused) continue;
+
+            ma_uint32 framesRead = 0;
+
+            while (1)
+            {
+                if (framesRead >= frameCount) break;
+
+                // Just read as much data as we can from the stream
+                ma_uint32 framesToRead = (frameCount - framesRead);
+
+                while (framesToRead > 0)
+                {
+                    float tempBuffer[1024] = { 0 }; // Frames for stereo
+
+                    ma_uint32 framesToReadRightNow = framesToRead;
+                    if (framesToReadRightNow > sizeof(tempBuffer)/sizeof(tempBuffer[0])/AUDIO_DEVICE_CHANNELS)
+                    {
+                        framesToReadRightNow = sizeof(tempBuffer)/sizeof(tempBuffer[0])/AUDIO_DEVICE_CHANNELS;
+                    }
+
+                    ma_uint32 framesJustRead = ReadAudioBufferFramesInMixingFormat(audioBuffer, tempBuffer, framesToReadRightNow);
+                    if (framesJustRead > 0)
+                    {
+                        float *framesOut = (float *)pFramesOut + (framesRead*AUDIO.System.device.playback.channels);
+                        float *framesIn = tempBuffer;
+
+                        // Apply processors chain if defined
+                        rAudioProcessor *processor = audioBuffer->processor;
+                        while (processor)
+                        {
+                            processor->process(framesIn, framesJustRead);
+                            processor = processor->next;
+                        }
+
+                        MixAudioFrames(framesOut, framesIn, framesJustRead, audioBuffer);
+
+                        framesToRead -= framesJustRead;
+                        framesRead += framesJustRead;
+                    }
+
+                    if (!audioBuffer->playing)
+                    {
+                        framesRead = frameCount;
+                        break;
+                    }
+
+                    // If we weren't able to read all the frames we requested, break
+                    if (framesJustRead < framesToReadRightNow)
+                    {
+                        if (!audioBuffer->looping)
+                        {
+                            StopAudioBuffer(audioBuffer);
+                            break;
+                        }
+                        else
+                        {
+                            // Should never get here, but just for safety,
+                            // move the cursor position back to the start and continue the loop
+                            audioBuffer->frameCursorPos = 0;
+                            continue;
+                        }
+                    }
+                }
+
+                // If for some reason we weren't able to read every frame we'll need to break from the loop
+                // Not doing this could theoretically put us into an infinite loop
+                if (framesToRead > 0) break;
+            }
+        }
+    }
+
+    rAudioProcessor *processor = AUDIO.mixedProcessor;
+    while (processor)
+    {
+        processor->process(pFramesOut, frameCount);
+        processor = processor->next;
+    }
+
+    ma_mutex_unlock(&AUDIO.System.lock);
+}
+
+// Main mixing function, pretty simple in this project, just an accumulation
+// NOTE: framesOut is both an input and an output, it is initially filled with zeros outside of this function
+static void MixAudioFrames(float *framesOut, const float *framesIn, ma_uint32 frameCount, AudioBuffer *buffer)
+{
+    const float localVolume = buffer->volume;
+    const ma_uint32 channels = AUDIO.System.device.playback.channels;
+
+    if (channels == 2)  // We consider panning
+    {
+        const float left = buffer->pan;
+        const float right = 1.0f - left;
+
+        // Fast sine approximation in [0..1] for pan law: y = 0.5f*x*(3 - x*x);
+        const float levels[2] = { localVolume*0.5f*left*(3.0f - left*left), localVolume*0.5f*right*(3.0f - right*right) };
+
+        float *frameOut = framesOut;
+        const float *frameIn = framesIn;
+
+        for (ma_uint32 frame = 0; frame < frameCount; frame++)
+        {
+            frameOut[0] += (frameIn[0]*levels[0]);
+            frameOut[1] += (frameIn[1]*levels[1]);
+
+            frameOut += 2;
+            frameIn += 2;
+        }
+    }
+    else  // We do not consider panning
+    {
+        for (ma_uint32 frame = 0; frame < frameCount; frame++)
+        {
+            for (ma_uint32 c = 0; c < channels; c++)
+            {
+                float *frameOut = framesOut + (frame*channels);
+                const float *frameIn = framesIn + (frame*channels);
+
+                // Output accumulates input multiplied by volume to provided output (usually 0)
+                frameOut[c] += (frameIn[c]*localVolume);
+            }
+        }
+    }
+}
+
+// Some required functions for audio standalone module version
+#if defined(RAUDIO_STANDALONE)
+// Check file extension
+static bool IsFileExtension(const char *fileName, const char *ext)
+{
+    bool result = false;
+    const char *fileExt;
+
+    if ((fileExt = strrchr(fileName, '.')) != NULL)
+    {
+        if (strcmp(fileExt, ext) == 0) result = true;
+    }
+
+    return result;
+}
+
+// Get pointer to extension for a filename string (includes the dot: .png)
+static const char *GetFileExtension(const char *fileName)
+{
+    const char *dot = strrchr(fileName, '.');
+
+    if (!dot || dot == fileName) return NULL;
+
+    return dot;
+}
+
+// String pointer reverse break: returns right-most occurrence of charset in s
+static const char *strprbrk(const char *s, const char *charset)
+{
+    const char *latestMatch = NULL;
+    for (; s = strpbrk(s, charset), s != NULL; latestMatch = s++) { }
+    return latestMatch;
+}
+
+// Get pointer to filename for a path string
+static const char *GetFileName(const char *filePath)
+{
+    const char *fileName = NULL;
+    if (filePath != NULL) fileName = strprbrk(filePath, "\\/");
+
+    if (!fileName) return filePath;
+
+    return fileName + 1;
+}
+
+// Get filename string without extension (uses static string)
+static const char *GetFileNameWithoutExt(const char *filePath)
+{
+    #define MAX_FILENAMEWITHOUTEXT_LENGTH   256
+
+    static char fileName[MAX_FILENAMEWITHOUTEXT_LENGTH] = { 0 };
+    memset(fileName, 0, MAX_FILENAMEWITHOUTEXT_LENGTH);
+
+    if (filePath != NULL) strcpy(fileName, GetFileName(filePath));   // Get filename with extension
+
+    int size = (int)strlen(fileName);   // Get size in bytes
+
+    for (int i = 0; (i < size) && (i < MAX_FILENAMEWITHOUTEXT_LENGTH); i++)
+    {
+        if (fileName[i] == '.')
+        {
+            // NOTE: We break on first '.' found
+            fileName[i] = '\0';
+            break;
+        }
+    }
+
+    return fileName;
+}
+
+// Load data from file into a buffer
+static unsigned char *LoadFileData(const char *fileName, int *dataSize)
+{
+    unsigned char *data = NULL;
+    *dataSize = 0;
+
+    if (fileName != NULL)
+    {
+        FILE *file = fopen(fileName, "rb");
+
+        if (file != NULL)
+        {
+            // WARNING: On binary streams SEEK_END could not be found,
+            // using fseek() and ftell() could not work in some (rare) cases
+            fseek(file, 0, SEEK_END);
+            int size = ftell(file);
+            fseek(file, 0, SEEK_SET);
+
+            if (size > 0)
+            {
+                data = (unsigned char *)RL_MALLOC(size*sizeof(unsigned char));
+
+                // NOTE: fread() returns number of read elements instead of bytes, so we read [1 byte, size elements]
+                unsigned int count = (unsigned int)fread(data, sizeof(unsigned char), size, file);
+                *dataSize = count;
+
+                if (count != size) TRACELOG(LOG_WARNING, "FILEIO: [%s] File partially loaded", fileName);
+                else TRACELOG(LOG_INFO, "FILEIO: [%s] File loaded successfully", fileName);
+            }
+            else TRACELOG(LOG_WARNING, "FILEIO: [%s] Failed to read file", fileName);
+
+            fclose(file);
+        }
+        else TRACELOG(LOG_WARNING, "FILEIO: [%s] Failed to open file", fileName);
+    }
+    else TRACELOG(LOG_WARNING, "FILEIO: File name provided is not valid");
+
+    return data;
+}
+
+// Save data to file from buffer
+static bool SaveFileData(const char *fileName, void *data, int dataSize)
+{
+    if (fileName != NULL)
+    {
+        FILE *file = fopen(fileName, "wb");
+
+        if (file != NULL)
+        {
+            unsigned int count = (unsigned int)fwrite(data, sizeof(unsigned char), dataSize, file);
+
+            if (count == 0) TRACELOG(LOG_WARNING, "FILEIO: [%s] Failed to write file", fileName);
+            else if (count != dataSize) TRACELOG(LOG_WARNING, "FILEIO: [%s] File partially written", fileName);
+            else TRACELOG(LOG_INFO, "FILEIO: [%s] File saved successfully", fileName);
+
+            fclose(file);
+        }
+        else
+        {
+            TRACELOG(LOG_WARNING, "FILEIO: [%s] Failed to open file", fileName);
+            return false;
+        }
+    }
+    else
+    {
+        TRACELOG(LOG_WARNING, "FILEIO: File name provided is not valid");
+        return false;
+    }
+
+    return true;
+}
+
+// Save text data to file (write), string must be '\0' terminated
+static bool SaveFileText(const char *fileName, char *text)
+{
+    if (fileName != NULL)
+    {
+        FILE *file = fopen(fileName, "wt");
+
+        if (file != NULL)
+        {
+            int count = fprintf(file, "%s", text);
+
+            if (count == 0) TRACELOG(LOG_WARNING, "FILEIO: [%s] Failed to write text file", fileName);
+            else TRACELOG(LOG_INFO, "FILEIO: [%s] Text file saved successfully", fileName);
+
+            fclose(file);
+        }
+        else
+        {
+            TRACELOG(LOG_WARNING, "FILEIO: [%s] Failed to open text file", fileName);
+            return false;
+        }
+    }
+    else
+    {
+        TRACELOG(LOG_WARNING, "FILEIO: File name provided is not valid");
+        return false;
+    }
+
+    return true;
+}
+#endif
+
+#undef AudioBuffer
+
+#endif      // SUPPORT_MODULE_RAUDIO
diff --git a/shared/raudio.h b/shared/raudio.h
new file mode 100644
index 0000000..c42e42a
--- /dev/null
+++ b/shared/raudio.h
@@ -0,0 +1,222 @@
+/**********************************************************************************************
+*
+*   raudio v1.1 - A simple and easy-to-use audio library based on miniaudio
+*
+*   FEATURES:
+*       - Manage audio device (init/close)
+*       - Manage raw audio context
+*       - Manage mixing channels
+*       - Load and unload audio files
+*       - Format wave data (sample rate, size, channels)
+*       - Play/Stop/Pause/Resume loaded audio
+*
+*   DEPENDENCIES:
+*       miniaudio.h  - Audio device management lib (https://github.com/mackron/miniaudio)
+*       stb_vorbis.h - Ogg audio files loading (http://www.nothings.org/stb_vorbis/)
+*       dr_wav.h     - WAV audio files loading (http://github.com/mackron/dr_libs)
+*       dr_mp3.h     - MP3 audio file loading (https://github.com/mackron/dr_libs)
+*       dr_flac.h    - FLAC audio file loading (https://github.com/mackron/dr_libs)
+*       jar_xm.h     - XM module file loading
+*       jar_mod.h    - MOD audio file loading
+*
+*   CONTRIBUTORS:
+*       David Reid (github: @mackron) (Nov. 2017):
+*           - Complete port to miniaudio library
+*
+*       Joshua Reisenauer (github: @kd7tck) (2015):
+*           - XM audio module support (jar_xm)
+*           - MOD audio module support (jar_mod)
+*           - Mixing channels support
+*           - Raw audio context support
+*
+*
+*   LICENSE: zlib/libpng
+*
+*   Copyright (c) 2013-2025 Ramon Santamaria (@raysan5)
+*
+*   This software is provided "as-is", without any express or implied warranty. In no event
+*   will the authors be held liable for any damages arising from the use of this software.
+*
+*   Permission is granted to anyone to use this software for any purpose, including commercial
+*   applications, and to alter it and redistribute it freely, subject to the following restrictions:
+*
+*     1. The origin of this software must not be misrepresented; you must not claim that you
+*     wrote the original software. If you use this software in a product, an acknowledgment
+*     in the product documentation would be appreciated but is not required.
+*
+*     2. Altered source versions must be plainly marked as such, and must not be misrepresented
+*     as being the original software.
+*
+*     3. This notice may not be removed or altered from any source distribution.
+*
+**********************************************************************************************/
+
+#ifndef RAUDIO_H
+#define RAUDIO_H
+
+//----------------------------------------------------------------------------------
+// Defines and Macros
+//----------------------------------------------------------------------------------
+// In case this file is included, we are using raudio in standalone mode
+#ifndef RAUDIO_STANDALONE
+#define RAUDIO_STANDALONE
+#endif // RAUDIO_STANDALONE
+
+// Allow custom memory allocators
+#ifndef RL_MALLOC
+    #define RL_MALLOC(sz)       malloc(sz)
+#endif
+#ifndef RL_CALLOC
+    #define RL_CALLOC(n,sz)     calloc(n,sz)
+#endif
+#ifndef RL_FREE
+    #define RL_FREE(p)          free(p)
+#endif
+
+//----------------------------------------------------------------------------------
+// Types and Structures Definition
+//----------------------------------------------------------------------------------
+#if (defined(__STDC__) && __STDC_VERSION__ >= 199901L) || (defined(_MSC_VER) && _MSC_VER >= 1800)
+    #include <stdbool.h>
+#elif !defined(__cplusplus) && !defined(bool)
+    typedef enum bool { false = 0, true = !false } bool;
+    #define RL_BOOL_TYPE
+#endif
+
+typedef void (*AudioCallback)(void *bufferData, unsigned int frames);
+
+// Wave, audio wave data
+typedef struct Wave {
+    unsigned int frameCount;    // Total number of frames (considering channels)
+    unsigned int sampleRate;    // Frequency (samples per second)
+    unsigned int sampleSize;    // Bit depth (bits per sample): 8, 16, 32 (24 not supported)
+    unsigned int channels;      // Number of channels (1-mono, 2-stereo, ...)
+    void *data;                 // Buffer data pointer
+} Wave;
+
+// Opaque structs declaration
+typedef struct rAudioBuffer rAudioBuffer;
+typedef struct rAudioProcessor rAudioProcessor;
+
+// AudioStream, custom audio stream
+typedef struct AudioStream {
+    rAudioBuffer *buffer;       // Pointer to internal data used by the audio system
+    rAudioProcessor *processor; // Pointer to internal data processor, useful for audio effects
+
+    unsigned int sampleRate;    // Frequency (samples per second)
+    unsigned int sampleSize;    // Bit depth (bits per sample): 8, 16, 32 (24 not supported)
+    unsigned int channels;      // Number of channels (1-mono, 2-stereo, ...)
+} AudioStream;
+
+// Sound
+typedef struct Sound {
+    AudioStream stream;         // Audio stream
+    unsigned int frameCount;    // Total number of frames (considering channels)
+} Sound;
+
+// Music, audio stream, anything longer than ~10 seconds should be streamed
+typedef struct Music {
+    AudioStream stream;         // Audio stream
+    unsigned int frameCount;    // Total number of frames (considering channels)
+    bool looping;               // Music looping enable
+
+    int ctxType;                // Type of music context (audio filetype)
+    void *ctxData;              // Audio context data, depends on type
+} Music;
+
+//----------------------------------------------------------------------------------
+// Global Variables Definition
+//----------------------------------------------------------------------------------
+//...
+
+//----------------------------------------------------------------------------------
+// Module Functions Declaration
+//----------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+extern "C" {            // Prevents name mangling of functions
+#endif
+
+// Audio device management functions
+void InitAudioDevice(void);                                     // Initialize audio device and context
+void CloseAudioDevice(void);                                    // Close the audio device and context
+bool IsAudioDeviceReady(void);                                  // Check if audio device has been initialized successfully
+void SetMasterVolume(float volume);                             // Set master volume (listener)
+float GetMasterVolume(void);                                    // Get master volume (listener)
+
+// Wave/Sound loading/unloading functions
+Wave LoadWave(const char *fileName);                            // Load wave data from file
+Wave LoadWaveFromMemory(const char *fileType, const unsigned char *fileData, int dataSize); // Load wave from memory buffer, fileType refers to extension: i.e. ".wav"
+bool IsWaveReady(Wave wave);                                    // Checks if wave data is ready
+Sound LoadSound(const char *fileName);                          // Load sound from file
+Sound LoadSoundFromWave(Wave wave);                             // Load sound from wave data
+Sound LoadSoundAlias(Sound source);                             // Create a new sound that shares the same sample data as the source sound, does not own the sound data
+bool IsSoundReady(Sound sound);                                 // Checks if a sound is ready
+void UpdateSound(Sound sound, const void *data, int frameCount);// Update sound buffer with new data
+void UnloadWave(Wave wave);                                     // Unload wave data
+void UnloadSound(Sound sound);                                  // Unload sound
+void UnloadSoundAlias(Sound alias);                             // Unload a sound alias (does not deallocate sample data)
+bool ExportWave(Wave wave, const char *fileName);               // Export wave data to file, returns true on success
+bool ExportWaveAsCode(Wave wave, const char *fileName);         // Export wave sample data to code (.h), returns true on success
+
+// Wave/Sound management functions
+void PlaySound(Sound sound);                                    // Play a sound
+void StopSound(Sound sound);                                    // Stop playing a sound
+void PauseSound(Sound sound);                                   // Pause a sound
+void ResumeSound(Sound sound);                                  // Resume a paused sound
+bool IsSoundPlaying(Sound sound);                               // Check if a sound is currently playing
+void SetSoundVolume(Sound sound, float volume);                 // Set volume for a sound (1.0 is max level)
+void SetSoundPitch(Sound sound, float pitch);                   // Set pitch for a sound (1.0 is base level)
+void SetSoundPan(Sound sound, float pan);                       // Set pan for a sound (0.0 to 1.0, 0.5=center)
+Wave WaveCopy(Wave wave);                                       // Copy a wave to a new wave
+void WaveCrop(Wave *wave, int initSample, int finalSample);     // Crop a wave to defined samples range
+void WaveFormat(Wave *wave, int sampleRate, int sampleSize, int channels);  // Convert wave data to desired format
+float *LoadWaveSamples(Wave wave);                              // Load samples data from wave as a floats array
+void UnloadWaveSamples(float *samples);                         // Unload samples data loaded with LoadWaveSamples()
+
+// Music management functions
+Music LoadMusicStream(const char *fileName);                    // Load music stream from file
+Music LoadMusicStreamFromMemory(const char *fileType, const unsigned char* data, int dataSize); // Load music stream from data
+bool IsMusicReady(Music music);                                 // Checks if a music stream is ready
+void UnloadMusicStream(Music music);                            // Unload music stream
+void PlayMusicStream(Music music);                              // Start music playing
+bool IsMusicStreamPlaying(Music music);                         // Check if music is playing
+void UpdateMusicStream(Music music);                            // Updates buffers for music streaming
+void StopMusicStream(Music music);                              // Stop music playing
+void PauseMusicStream(Music music);                             // Pause music playing
+void ResumeMusicStream(Music music);                            // Resume playing paused music
+void SeekMusicStream(Music music, float position);              // Seek music to a position (in seconds)
+void SetMusicVolume(Music music, float volume);                 // Set volume for music (1.0 is max level)
+void SetMusicPitch(Music music, float pitch);                   // Set pitch for a music (1.0 is base level)
+void SetMusicPan(Music music, float pan);                       // Set pan for a music (0.0 to 1.0, 0.5=center)
+float GetMusicTimeLength(Music music);                          // Get music time length (in seconds)
+float GetMusicTimePlayed(Music music);                          // Get current music time played (in seconds)
+
+// AudioStream management functions
+AudioStream LoadAudioStream(unsigned int sampleRate, unsigned int sampleSize, unsigned int channels); // Load audio stream (to stream raw audio pcm data)
+bool IsAudioStreamReady(AudioStream stream);                    // Checks if an audio stream is ready
+void UnloadAudioStream(AudioStream stream);                     // Unload audio stream and free memory
+void UpdateAudioStream(AudioStream stream, const void *data, int samplesCount); // Update audio stream buffers with data
+bool IsAudioStreamProcessed(AudioStream stream);                // Check if any audio stream buffers requires refill
+void PlayAudioStream(AudioStream stream);                       // Play audio stream
+void PauseAudioStream(AudioStream stream);                      // Pause audio stream
+void ResumeAudioStream(AudioStream stream);                     // Resume audio stream
+bool IsAudioStreamPlaying(AudioStream stream);                  // Check if audio stream is playing
+void StopAudioStream(AudioStream stream);                       // Stop audio stream
+void SetAudioStreamVolume(AudioStream stream, float volume);    // Set volume for audio stream (1.0 is max level)
+void SetAudioStreamPitch(AudioStream stream, float pitch);      // Set pitch for audio stream (1.0 is base level)
+void SetAudioStreamPan(AudioStream strean, float pan);          // Set pan for audio stream  (0.0 to 1.0, 0.5=center)
+void SetAudioStreamBufferSizeDefault(int size);                 // Default size for new audio streams
+void SetAudioStreamCallback(AudioStream stream, AudioCallback callback);  // Audio thread callback to request new data
+
+void AttachAudioStreamProcessor(AudioStream stream, AudioCallback processor); // Attach audio stream processor to stream
+void DetachAudioStreamProcessor(AudioStream stream, AudioCallback processor); // Detach audio stream processor from stream
+
+void AttachAudioMixedProcessor(AudioCallback processor); // Attach audio stream processor to the entire audio pipeline
+void DetachAudioMixedProcessor(AudioCallback processor); // Detach audio stream processor from the entire audio pipeline
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // RAUDIO_H
diff --git a/story-editor/src/zip.cpp b/shared/zip.cpp
similarity index 100%
rename from story-editor/src/zip.cpp
rename to shared/zip.cpp
diff --git a/story-editor/src/zip.h b/shared/zip.h
similarity index 100%
rename from story-editor/src/zip.h
rename to shared/zip.h
diff --git a/story-editor/00_prepare_outdir.sh b/story-editor/00_prepare_outdir.sh
new file mode 100755
index 0000000..c43bf28
--- /dev/null
+++ b/story-editor/00_prepare_outdir.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# Nom de l'exécutable à analyser (à remplacer par le votre)
+executable="build-linux/story-editor"
+
+# Commande ldd et stockage de la sortie dans une variable
+ldd_output=$(ldd "$executable"  | awk '/ => / { print $1 }')
+
+echo $ldd_output
+
+
+
+# repertoire_courant=$(pwd)
+
+# # Récupère les noms des bibliothèques en filtrant sur le répertoire courant
+# bibliotheques_courant=$(ldd "$executable" | awk -v dir="$repertoire_courant" '/ => / && $3 ~ "^"dir"/" { print $3 }')
+
+# # Affiche les résultats
+# echo "Bibliothèques dans le répertoire courant :"
+# echo "$bibliotheques_courant"
diff --git a/story-editor/01_linux_create_docker_image.sh b/story-editor/01_linux_create_docker_image.sh
new file mode 100755
index 0000000..557fcde
--- /dev/null
+++ b/story-editor/01_linux_create_docker_image.sh
@@ -0,0 +1 @@
+docker buildx build -t cpp-dev-linux -f Dockerfile.linux . --no-cache --load
diff --git a/story-editor/02_linux_docker_launch_build.sh b/story-editor/02_linux_docker_launch_build.sh
new file mode 100755
index 0000000..b2cc320
--- /dev/null
+++ b/story-editor/02_linux_docker_launch_build.sh
@@ -0,0 +1,3 @@
+
+docker run -v $(pwd)/..:/workspace cpp-dev-linux /bin/bash -c "cd /workspace/story-editor && ./build_linux.sh"
+
diff --git a/story-editor/03_linux_docker_launch_package.sh b/story-editor/03_linux_docker_launch_package.sh
new file mode 100755
index 0000000..acb1029
--- /dev/null
+++ b/story-editor/03_linux_docker_launch_package.sh
@@ -0,0 +1,3 @@
+
+docker run --privileged -v $(pwd)/..:/workspace cpp-dev-linux /bin/bash -c "cd /workspace/story-editor && ./build_appimage.sh"
+
diff --git a/story-editor/CMakeLists.txt b/story-editor/CMakeLists.txt
index 482ae45..15d49e6 100644
--- a/story-editor/CMakeLists.txt
+++ b/story-editor/CMakeLists.txt
@@ -21,81 +21,11 @@ endif()
 
 find_package(OpenGL REQUIRED)
 
-# set(OPENSSL_ROOT_DIR /libs/openssl)
-# find_package(OpenSSL REQUIRED)
-
 set(IMGUI_VERSION 1.91.6)
 
 include(FetchContent)
-
-include(cmake/CPM.cmake)
-
-
-# =========================================================================================================================
-# MBedTLS
-# =========================================================================================================================
-CPMAddPackage(
-    NAME mbedtls    
-    GITHUB_REPOSITORY Mbed-TLS/mbedtls
-    VERSION 3.6.2
-    OPTIONS
-        "USE_STATIC_MBEDTLS_LIBRARY ON"
-        "ENABLE_PROGRAMS OFF"
-        "ENABLE_TESTING OFF"
-)
-
-include_directories(${mbedtls_INCLUDE_DIR})
-# set(MBEDTLS_STATIC_LIBRARY ON)
-
-# =========================================================================================================================
-# CibetWeb
-# =========================================================================================================================
-CPMAddPackage(
-    NAME civetweb
-    GITHUB_REPOSITORY  civetweb/civetweb
-    VERSION 1.16
-    OPTIONS 
-        "CIVETWEB_BUILD_TESTING OFF"
-        "CIVETWEB_ENABLE_SERVER_EXECUTABLE OFF"
-        "CIVETWEB_ENABLE_CXX ON"
-        "CIVETWEB_ENABLE_WEBSOCKETS ON"
-        "CIVETWEB_ENABLE_ASAN OFF"
-)
- 
-find_package(civetweb REQUIRED)
-include_directories(${civetweb_SOURCE_DIR}/include)
-
-# =========================================================================================================================
-# CURL
-# =========================================================================================================================
-
-
-# Définit les options de cURL pour utiliser mBedTLS
-
-set(CURL_USE_OPENSSL OFF  CACHE BOOL "Disable OpenSSL." FORCE)
-set(CURL_USE_MBEDTLS ON  CACHE BOOL "Use MBED TLS." FORCE)
-set(CURL_USE_LIBSSH2 OFF  CACHE BOOL "Disable SSH." FORCE)
-set(MBEDTLS_INCLUDE_DIRS ${mbedtls_SOURCE_DIR}/include)
-set(MBEDTLS_LIBRARY ${mbedtls_BINARY_DIR}/libmbedtls.a)
-set(MBEDX509_LIBRARY ${mbedtls_BINARY_DIR}/libmbedx509.a)
-set(MBEDCRYPTO_LIBRARY ${mbedtls_BINARY_DIR}/libmbedcrypto.a)
-
-# Télécharge et configure cURL
-FetchContent_Declare(
-  curl
-  GIT_REPOSITORY https://github.com/curl/curl.git
-  GIT_TAG        curl-8_7_1
-  GIT_SHALLOW TRUE
-  GIT_PROGRESS TRUE
-)
-
-set(BUILD_CURL_EXE FALSE)
-set(BUILD_STATIC_LIBS TRUE)
-FetchContent_MakeAvailable(curl)
-
-# Assurez-vous que votre projet trouve les headers de mBedTLS et cURL
-include_directories(${mbedtls_SOURCE_DIR}/include)
-include_directories(${curl_SOURCE_DIR}/include)
+# Adhere to GNU filesystem layout conventions
+include(GNUInstallDirs)
 
 
 # =========================================================================================================================
@@ -114,70 +44,37 @@ add_compile_definitions(CUSTOM_IMGUIFILEDIALOG_CONFIG="${CMAKE_SOURCE_DIR}/src/C
 add_compile_definitions(IMGUI_INCLUDE="imgui.h")
 add_subdirectory(libs/ImGuiFileDialog)
 
-# =========================================================================================================================
-# SDL3
-# =========================================================================================================================
-FetchContent_Declare(
-    sdl3
-    GIT_REPOSITORY https://github.com/libsdl-org/SDL.git
-    GIT_TAG 78cc5c173404488d80751af226d1eaf67033bcc4   # === preview-3.1.6
-    GIT_SHALLOW TRUE
-    GIT_PROGRESS TRUE
-)
-
-set(BUILD_SHARED_LIBS TRUE)
-set(SDL_STATIC TRUE)
-FetchContent_MakeAvailable(sdl3)
-include_directories(${sdl3_SOURCE_DIR}/include)
-# add_subdirectory(${sdl3_SOURCE_DIR}) 
 
 # =========================================================================================================================
 # SDL3 MIXER
 # =========================================================================================================================
 
-CPMAddPackage(
-   NAME sdl3_mixer
-   URL https://github.com/libsdl-org/SDL_mixer/archive/d4eba31e4ac23a81fffad02e91b17dcb2449a2cb.tar.gz
+add_subdirectory(externals/civetweb EXCLUDE_FROM_ALL)
+add_subdirectory(externals/curl EXCLUDE_FROM_ALL)
 
-    OPTIONS 
-        "BUILD_SHARED_LIBS TRUE"
-        "SDL_PULSEAUDIO_SHARED TRUE"
-        "SDL_PIPEWIRE_SHARED TRUE" 
-)
+# Configure SDL by calling its CMake file.
+# we use EXCLUDE_FROM_ALL so that its install targets and configs don't
+# pollute upwards into our configuration.
+add_subdirectory(externals/SDL EXCLUDE_FROM_ALL)
 
-# FetchContent_Declare(
-#     sdl3_mixer
-#     GIT_REPOSITORY https://github.com/libsdl-org/SDL_mixer.git
-#     GIT_TAG d4eba31e4ac23a81fffad02e91b17dcb2449a2cb
-#     # GIT_SHALLOW TRUE # Ne pas activer shallow sinon le tag n'est pas trouvé
-#     # GIT_PROGRESS TRUE
-#     GIT_SUBMODULES ""
-# )
+# SDL_mixer (used for playing audio)
+set(SDLMIXER_MIDI_NATIVE OFF)     # disable formats we don't use to make the build faster and smaller. Also some of these don't work on all platforms so you'll need to do some experimentation.
+set(SDLMIXER_GME OFF)
+set(SDLMIXER_WAVPACK OFF)     
+set(SDLMIXER_MOD OFF)
+set(SDLMIXER_OPUS OFF)
+set(SDLMIXER_VENDORED ON)   # tell SDL_mixer to build its own dependencies
+add_subdirectory(externals/SDL_mixer EXCLUDE_FROM_ALL)
 
-# set(BUILD_SHARED_LIBS TRUE)
-# set(SDL_PULSEAUDIO_SHARED TRUE)
-# set(SDL_PIPEWIRE_SHARED TRUE)
-# FetchContent_MakeAvailable(sdl3_mixer)
-# include_directories(${sdl3_mixer_SOURCE_DIR}/include)
+# SDL_image (used for loading various image formats)
+set(SDLIMAGE_VENDORED OFF)
+set(SDLIMAGE_AVIF OFF)	# disable formats we don't use to make the build faster and smaller.
+set(SDLIMAGE_BMP ON)
+set(SDLIMAGE_JPEG ON)
+set(SDLIMAGE_WEBP ON)
+add_subdirectory(externals/SDL_image EXCLUDE_FROM_ALL)
 
 
-# =========================================================================================================================
-# SDL3-Image
-# =========================================================================================================================
-FetchContent_Declare(
-    sdl_image
-    GIT_REPOSITORY https://github.com/libsdl-org/SDL_image.git
-    GIT_TAG bcc97c044266080256ef6ed0d690859677212b2b
-    GIT_PROGRESS TRUE
-    # GIT_SHALLOW TRUE # Ne pas activer shallow sinon le tag n'est pas trouvé
-) 
-
-
-set(SDL3IMAGE_INSTALL OFF)
-set(BUILD_SHARED_LIBS TRUE)
-FetchContent_MakeAvailable(sdl_image)
-include_directories(${sdl_image_SOURCE_DIR}/include)
-
 # =========================================================================================================================
 # Project sources
 # =========================================================================================================================
@@ -185,28 +82,25 @@ set(SRCS
 
     src/main.cpp
 
-    src/window_base.cpp
-    src/console_window.cpp
-    src/emulator_window.cpp
-    src/main_window.cpp
-    src/library_window.cpp
-    src/platform_folders.cpp
-
-
+    src/windows/window_base.cpp
+    src/windows/console_window.cpp
+    src/windows/emulator_window.cpp
+    src/windows/main_window.cpp
+    src/windows/library_window.cpp
+    src/windows/resources_window.cpp
+    src/windows/properties_window.cpp
+    src/windows/debugger_window.cpp
+    src/windows/cpu_window.cpp
+    src/windows/variables_window.cpp
+    
     src/node_editor/media_node_widget.cpp
     src/node_editor/base_node_widget.cpp
     src/node_editor/node_editor_window.cpp
     src/node_editor/function_node_widget.cpp
-
-    src/resources_window.cpp
-    src/properties_window.cpp
-    src/cpu_window.cpp
+    src/node_editor/variable_node_widget.cpp
+   
     src/gui.cpp
-
-    src/code_editor.cpp
     src/media_converter.cpp
-    src/miniz.c
-    src/zip.cpp
     src/web_server.cpp
 
     src/importers/pack_archive.cpp
@@ -226,28 +120,35 @@ set(SRCS
     ${imgui_SOURCE_DIR}/imgui_tables.cpp
     ${imgui_SOURCE_DIR}/imgui_draw.cpp
 
-    ../firmware/chip32/chip32_assembler.cpp
-    ../firmware/chip32/chip32_vm.c
-
     ../shared/audio_player.cpp
     ../shared/stb_vorbis.c
     ../shared/resource_manager.cpp
     ../shared/library_manager.cpp
     ../shared/downloader.cpp
     ../shared/story_db.cpp
+    ../shared/miniz.c
+    ../shared/zip.cpp
+    ../shared/platform_folders.cpp
+    ../shared/raudio.c
+
 
     # Core engine files
-    
-    ../core/src/compiler.cpp
-    ../core/src/story_project.cpp
-    ../core/src/story_page.cpp
-    ../core/src/base_node.cpp
-    ../core/src/media_node.cpp
-    ../core/src/function_node.cpp
-    ../core/src/connection.cpp
+    ../core/story-manager/src/compiler.cpp
+    ../core/story-manager/src/story_project.cpp
+    ../core/story-manager/src/story_page.cpp
+    ../core/story-manager/src/base_node.cpp
+    ../core/story-manager/src/media_node.cpp
+    ../core/story-manager/src/compare_node.cpp
+    ../core/story-manager/src/branch_node.cpp
+    ../core/story-manager/src/variable_node.cpp
+    ../core/story-manager/src/function_node.cpp
+    ../core/story-manager/src/connection.cpp
+    ../core/story-manager/lib/sys_lib.cpp
+    ../core/story-manager/lib/resource.cpp
 
-    ../core/lib/sys_lib.cpp
-    ../core/lib/resource.cpp
+
+    ../core/chip32/chip32_assembler.cpp
+    ../core/chip32/chip32_vm.c
 )
 
 if(WIN32)
@@ -271,30 +172,34 @@ endif()
 
 target_include_directories(${STORY_EDITOR_PROJECT} PUBLIC
     ${imgui_SOURCE_DIR}
-    ${sdl2_SOURCE_DIR}/include
-
-    # ${te_SOURCE_DIR}
-    
     ${imgui_SOURCE_DIR}/backends
+
     libs/ImGuiFileDialog
     libs/imgui-node-editor
+    libs
 
-    ${curl_INCLUDE_DIR}
-
-    ${CMAKE_SOURCE_DIR}/src
-    ${CMAKE_SOURCE_DIR}/src/importers
-    ${CMAKE_SOURCE_DIR}/src/node_editor
-    ${CMAKE_SOURCE_DIR}/src/node_engine
+    src
+    src/importers
+    src/node_editor
+    src/windows
 
     ../firmware/library
-    ../firmware/chip32
+    ../core/chip32
     ../shared
-    ../core/src
-    ../core/lib
-    ../core/interfaces
+    ../core/story-manager/src
+    ../core/story-manager/lib
+    ../core/story-manager/interfaces
 )
 
-add_definitions(-DIMGUI_USE_WCHAR32 -DVERSION_MAJOR=${PROJECT_VERSION_MAJOR} -DVERSION_MINOR=${PROJECT_VERSION_MINOR} -DVERSION_PATCH=${PROJECT_VERSION_PATCH})
+add_definitions(
+    -DIMGUI_USE_WCHAR32 
+    -DVERSION_MAJOR=${PROJECT_VERSION_MAJOR} 
+    -DVERSION_MINOR=${PROJECT_VERSION_MINOR} 
+    -DVERSION_PATCH=${PROJECT_VERSION_PATCH}
+
+    -DRAUDIO_STANDALONE
+    -DSUPPORT_MODULE_RAUDIO
+)
 add_link_options(-static-libgcc -static-libstdc++)
 
 if (APPLE)
@@ -318,15 +223,11 @@ set_target_properties(${PROJECT_NAME} PROPERTIES
 
 endif()
 
-# target_compile_definitions(${STORY_EDITOR_PROJECT} PUBLIC cimg_display=0)
-
-# target_compile_definitions(${STORY_EDITOR_PROJECT} PUBLIC "$<$<CONFIG:DEBUG>:DEBUG>")
 
 target_link_directories(${STORY_EDITOR_PROJECT} PUBLIC 
     ${sdl3_BINARY_DIR}
-    ${curl_BINARY_DIR}
+    ${libcurl_BINARY_DIR}
     ${mbedtls_BINARY_DIR}
-    # ${CivetWeb_BINARY_DIR}
 )
 
 # On est obligé de passer par une variable pour injecter 
@@ -337,14 +238,11 @@ set(SDL_MIXER_BIN_DIR ${sdl3_mixer_BINARY_DIR})
 
 if(UNIX)
     target_link_libraries(${STORY_EDITOR_PROJECT}
-
-        
-        SDL3::SDL3
         SDL3_image::SDL3_image
         SDL3_mixer::SDL3_mixer
-        libcurl_static
-        mbedtls
-        civetweb-cpp
+        SDL3::SDL3
+        CURL::libcurl
+        civetweb::civetweb-cpp
         pthread
         OpenGL::GL
         dl
@@ -353,34 +251,33 @@ if(UNIX)
 elseif(WIN32)
     target_link_libraries(${STORY_EDITOR_PROJECT}
         OpenGL::GL
-        SDL3::SDL3
         SDL3_image::SDL3_image
         SDL3_mixer::SDL3_mixer
+        SDL3::SDL3
         libcurl_static
         ws2_32.lib psapi.lib setupapi.lib cfgmgr32.lib advapi32.lib
     )
 endif()
 
-# =========================================================================================================================
-# CPACK INSTALLER
-# =========================================================================================================================
-install(TARGETS ${STORY_EDITOR_PROJECT} RUNTIME DESTINATION ".")
+set(CMAKE_INSTALL_PREFIX "${CMAKE_SOURCE_DIR}/OUTPUT" CACHE PATH "Directory for sbnw installation" FORCE)
 
-# Personnaliser les options d'installation
-set(CPACK_PACKAGE_NAME "Open-Story-Editor")
-set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Anthony Rabine")
-set(CPACK_PACKAGE_DESCRIPTION "Open Story Teller - Node based editor")
-set(CPACK_PACKAGE_VENDOR "D8S")
-set(CPACK_PACKAGE_VERSION_MAJOR "${PROJECT_VERSION_MAJOR}")
-set(CPACK_PACKAGE_VERSION_MINOR "${PROJECT_VERSION_MINOR}")
-set(CPACK_PACKAGE_VERSION_PATCH "${PROJECT_VERSION_PATCH}")
-set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/LICENSE")
+# set_target_properties(${STORY_EDITOR_PROJECT}
+#     PROPERTIES
+#         LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/$<CONFIG>/lib
+#         RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/$<CONFIG>/bin
+# )
+
+
+# =========================================================================================================================
+# DIRECTORY INSTALLER
+# =========================================================================================================================
 
 # install(DIRECTORY "${PROJECT_SOURCE_DIR}/assets/" DESTINATION "assets")
 install(DIRECTORY "${PROJECT_SOURCE_DIR}/fonts/" DESTINATION "fonts")
 install(DIRECTORY "${PROJECT_SOURCE_DIR}/scripts/" DESTINATION "scripts")
-install_files("." FILES "${CMAKE_SOURCE_DIR}/LICENSE")
-install_files("." FILES "${CMAKE_SOURCE_DIR}/tools/imgui.ini")
+install(FILES "${CMAKE_SOURCE_DIR}/LICENSE" DESTINATION ".")
+install(FILES "${CMAKE_SOURCE_DIR}/tools/imgui.ini" DESTINATION "bin")
+install(TARGETS ${STORY_EDITOR_PROJECT} BUNDLE DESTINATION bin)
 
 if(WIN32)
     install_files("." FILES "${SDL_BIN_DIR}/SDL3.dll")
@@ -396,9 +293,13 @@ endif()
 
 if(LINUX)
 
-    install_files("." FILES "${SDL_BIN_DIR}/libSDL3.so")
-    install_files("." FILES "${SDL_IMAGE_BIN_DIR}/libSDL3_image.so")
-    install_files("." FILES "${SDL_MIXER_BIN_DIR}/libSDL3_mixer.so")
+    install(DIRECTORY "${SDL_BIN_DIR}/" DESTINATION lib FILES_MATCHING REGEX "libSDL3.so(\\..*)?$")
+    install(DIRECTORY "${SDL_IMAGE_BIN_DIR}/" DESTINATION lib FILES_MATCHING REGEX "libSDL3_image.so(\\..*)?$")
+    install(DIRECTORY "${SDL_MIXER_BIN_DIR}/" DESTINATION lib FILES_MATCHING REGEX "libSDL3_mixer.so(\\..*)?$")
+
+    # install_files("." FILES "${SDL_BIN_DIR}/libSDL3.so")
+    # install_files("." FILES "${SDL_IMAGE_BIN_DIR}/libSDL3_image.so")
+    # install_files("." FILES "${SDL_MIXER_BIN_DIR}/libSDL3_mixer.so")
 endif()
 
 if (APPLE)
@@ -407,4 +308,3 @@ if (APPLE)
     install_files("." FILES "${SDL_BIN_DIR}/libSDL2-2.0.0.dylib")
 endif()
 
-include(CPack)
diff --git a/story-editor/CMakePresets.json b/story-editor/CMakePresets.json
index b17f2fb..b41ad1f 100644
--- a/story-editor/CMakePresets.json
+++ b/story-editor/CMakePresets.json
@@ -1,5 +1,5 @@
 {
-    "version": 10,
+    "version": 3,
     "cmakeMinimumRequired": {
       "major": 3,
       "minor": 23,
@@ -13,7 +13,7 @@
         "generator": "Ninja",
         "binaryDir": "${sourceDir}/build",
         "environment": {
-          "CPM_SOURCE_CACHE": "$env{HOME}/.cache/CPM"
+
         }
       },
       {
diff --git a/story-editor/Dockerfile.linux b/story-editor/Dockerfile.linux
new file mode 100644
index 0000000..736efb1
--- /dev/null
+++ b/story-editor/Dockerfile.linux
@@ -0,0 +1,49 @@
+    # Utiliser une image de base Alpine Linux
+FROM ubuntu:24.04
+
+# Installer les dépendances nécessaires
+# DEBIAN_FRONTEND=noninteractive permet d'éviter une interaction lors de la config de tzdata
+RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
+    build-essential \
+    flatpak \
+    flatpak-builder \
+    file \
+    wget \
+    imagemagick \
+    cmake \
+    git \
+    zlib1g-dev \
+    mesa-common-dev \
+    libgl1-mesa-dev \
+    libgles2-mesa-dev \
+    libxcursor-dev \
+    libxrandr-dev  \
+    libxinerama-dev \
+    libxi-dev \
+    libasound2-dev \
+    libpulse-dev \
+    libaudio-dev \
+    libjack-dev  \
+    libsndio-dev  \
+    libx11-dev  \
+    libxext-dev \
+    libxfixes-dev \
+    libxss-dev \
+    libxkbcommon-dev  \
+    libudev-dev  \
+    fcitx-libs-dev \
+    libpipewire-0.3-dev  \
+    libwayland-dev  \
+    libdecor-0-dev  \
+    liburing-dev \
+    libdrm-dev \
+    libgbm-dev  \
+    libegl1-mesa-dev \
+    libdbus-1-dev  \
+    libibus-1.0-dev
+    
+
+RUN mkdir /workspace
+
+# Commande par défaut (modifiable dans le script Bash)
+CMD ["/bin/bash"]
diff --git a/story-editor/Dockerfile b/story-editor/Dockerfile.mingw64
similarity index 100%
rename from story-editor/Dockerfile
rename to story-editor/Dockerfile.mingw64
diff --git a/story-editor/OpenStoryEditor-x86_64.AppImage b/story-editor/OpenStoryEditor-x86_64.AppImage
new file mode 100755
index 0000000..b3bd104
Binary files /dev/null and b/story-editor/OpenStoryEditor-x86_64.AppImage differ
diff --git a/story-editor/appimagetool-x86_64.AppImage b/story-editor/appimagetool-x86_64.AppImage
new file mode 100755
index 0000000..64d66ff
Binary files /dev/null and b/story-editor/appimagetool-x86_64.AppImage differ
diff --git a/story-editor/build_appimage.sh b/story-editor/build_appimage.sh
new file mode 100755
index 0000000..1bca0be
--- /dev/null
+++ b/story-editor/build_appimage.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+# Variables
+APP_NAME="OpenStoryEditor"
+APP_DIR="AppDir"
+APP_EXE="story-editor"
+BUILD_DIR="build-linux"
+APPIMAGE_TOOL="appimagetool-x86_64.AppImage"
+APPIMAGE_URL="https://github.com/AppImage/AppImageKit/releases/download/continuous/appimagetool-x86_64.AppImage"
+
+# Vérification des dépendances
+if ! command -v cmake &> /dev/null; then
+    echo "Erreur : cmake n'est pas installé. Installez-le avant de continuer."
+    exit 1
+fi
+
+if ! command -v wget &> /dev/null; then
+    echo "Erreur : wget n'est pas installé. Installez-le avant de continuer."
+    exit 1
+fi
+
+# Étape 1 : Préparer l'AppDir
+echo "Préparation de l'AppDir..."
+rm -rf "${APP_DIR}"
+mkdir -p "${APP_DIR}/usr/bin"
+mkdir -p "${APP_DIR}/usr/share/applications"
+mkdir -p "${APP_DIR}/usr/share/icons/hicolor/256x256/apps"
+
+# Copier les fichiers nécessaires
+cp "${BUILD_DIR}/${APP_EXE}" "${APP_DIR}/usr/bin/"
+cat > "${APP_DIR}/${APP_NAME}.desktop" <<EOL
+[Desktop Entry]
+Type=Application
+Name=${APP_NAME}
+Exec=${APP_EXE}
+Icon=${APP_NAME}
+Terminal=false
+Categories=Utility;
+EOL
+
+cat > "${APP_DIR}/AppRun" <<EOL
+#!/bin/bash
+HERE="$(dirname "$(readlink -f "${0}")")"
+export PATH="$HERE/usr/bin:$PATH"
+export LD_LIBRARY_PATH="$HERE/usr/lib:$LD_LIBRARY_PATH"
+exec "$HERE/usr/bin/${APP_EXE}" "$@"
+EOL
+
+chmod +x ${APP_DIR}/AppRun
+
+# Ajouter une icône (optionnel, remplacer par une vraie icône si disponible)
+cp story-editor-logo-256x256.png "${APP_DIR}/usr/share/icons/hicolor/256x256/apps/${APP_NAME}.png"
+cp story-editor-logo-256x256.png "${APP_DIR}/${APP_NAME}.png"
+cp "${APP_DIR}/${APP_NAME}.desktop" "${APP_DIR}/usr/share/applications/${APP_NAME}.desktop"
+
+# Étape 2 : Télécharger l'outil AppImage
+if [ ! -f "${APPIMAGE_TOOL}" ]; then
+    echo "Téléchargement de l'outil AppImageTool..."
+    wget "${APPIMAGE_URL}" -O "${APPIMAGE_TOOL}"
+    chmod +x "${APPIMAGE_TOOL}"
+fi
+
+# Étape 3 : Générer l'AppImage
+echo "Génération de l'AppImage..."
+ARCH=x86_64 ./"${APPIMAGE_TOOL}" "${APP_DIR}" || { echo "Erreur : échec de la création de l'AppImage."; exit 1; }
+
+echo "AppImage générée avec succès : ${APP_NAME}-x86_64.AppImage"
diff --git a/story-editor/build_flatpak.sh b/story-editor/build_flatpak.sh
new file mode 100755
index 0000000..e128a4c
--- /dev/null
+++ b/story-editor/build_flatpak.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+# Variables
+APP_NAME="OpenStoryEditor"
+APP_ID="eu.d8s.OpenStoryEditor"
+VERSION="1.0.0"
+ICON_NAME="stoty-editor-logo-256x256.png"
+BUILD_DIR="build" # Répertoire où se trouve l'exécutable généré
+FLATPAK_DIR="flatpak_build"
+FLATPAK_MANIFEST="${FLATPAK_DIR}/${APP_ID}.yaml"
+
+# Vérifier les prérequis
+if ! command -v flatpak-builder &> /dev/null; then
+    echo "Erreur : flatpak-builder n'est pas installé. Installez-le avant de continuer."
+    exit 1
+fi
+
+if [ ! -f "${BUILD_DIR}/${APP_NAME}" ]; then
+    echo "Erreur : L'exécutable '${BUILD_DIR}/${APP_NAME}' est introuvable."
+    exit 1
+fi
+
+if [ ! -f "${ICON_NAME}" ]; then
+    echo "Erreur : L'icône '${ICON_NAME}' est introuvable. Placez une icône PNG de 256x256 pixels à la racine."
+    exit 1
+fi
+
+# Préparation du répertoire Flatpak
+echo "Création du répertoire Flatpak..."
+rm -rf "${FLATPAK_DIR}"
+mkdir -p "${FLATPAK_DIR}"
+
+# Génération du fichier manifeste YAML
+echo "Création du fichier manifeste Flatpak (${FLATPAK_MANIFEST})..."
+cat > "${FLATPAK_MANIFEST}" <<EOL
+app-id: ${APP_ID}
+runtime: org.freedesktop.Platform
+runtime-version: "23.08"
+sdk: org.freedesktop.Sdk
+command: ${APP_NAME}
+finish-args:
+  - --share=network
+  - --share=ipc
+  - --device=dri
+  - --socket=x11
+  - --socket=wayland
+  - --filesystem=home
+
+modules:
+  - name: ${APP_NAME}
+    buildsystem: simple
+    build-commands:
+      - install -Dm755 story-editor /app/bin/story-editor
+      - install -Dm644 story-editor.desktop /app/share/applications/story-editor.desktop
+      - install -Dm644 ${ICON_NAME} /app/share/icons/hicolor/256x256/apps/${APP_ID}.png
+    sources:
+      - type: dir
+        path: ../${BUILD_DIR}
+EOL
+
+# Création du fichier .desktop
+echo "Création du fichier .desktop..."
+cat > "${BUILD_DIR}/${APP_NAME}.desktop" <<EOL
+[Desktop Entry]
+Type=Application
+Name=${APP_NAME}
+Exec=${APP_NAME}
+Icon=${APP_ID}
+Terminal=false
+Categories=Utility;
+EOL
+
+# Construction du Flatpak
+echo "Construction du Flatpak..."
+flatpak-builder --force-clean \
+    "${FLATPAK_DIR}/build-dir" \
+    "${FLATPAK_MANIFEST}"
+
+if [ $? -ne 0 ]; then
+    echo "Erreur : Échec de la construction du Flatpak."
+    exit 1
+fi
+
+# Exportation du Flatpak en un fichier .flatpak
+echo "Exportation du Flatpak..."
+flatpak build-bundle "${FLATPAK_DIR}/build-dir" \
+    "${FLATPAK_DIR}/${APP_NAME}-${VERSION}.flatpak" \
+    "${APP_ID}" "${VERSION}"
+
+if [ $? -eq 0 ]; then
+    echo "Flatpak généré avec succès : ${FLATPAK_DIR}/${APP_NAME}-${VERSION}.flatpak"
+else
+    echo "Erreur : Échec de l'exportation du Flatpak."
+    exit 1
+fi
diff --git a/story-editor/build_linux.sh b/story-editor/build_linux.sh
new file mode 100755
index 0000000..19de05f
--- /dev/null
+++ b/story-editor/build_linux.sh
@@ -0,0 +1,6 @@
+mkdir -p /workspace/story-editor/build-linux
+cd /workspace/story-editor/build-linux
+git config --global http.sslverify false
+cmake -DCMAKE_BUILD_TYPE=Release ..
+make
+cpack
diff --git a/story-editor/cmake/CPM.cmake b/story-editor/cmake/CPM.cmake
deleted file mode 100644
index 42b637a..0000000
--- a/story-editor/cmake/CPM.cmake
+++ /dev/null
@@ -1,1285 +0,0 @@
-# CPM.cmake - CMake's missing package manager
-# ===========================================
-# See https://github.com/cpm-cmake/CPM.cmake for usage and update instructions.
-#
-# MIT License
-# -----------
-#[[
-  Copyright (c) 2019-2023 Lars Melchior and contributors
-
-  Permission is hereby granted, free of charge, to any person obtaining a copy
-  of this software and associated documentation files (the "Software"), to deal
-  in the Software without restriction, including without limitation the rights
-  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-  copies of the Software, and to permit persons to whom the Software is
-  furnished to do so, subject to the following conditions:
-
-  The above copyright notice and this permission notice shall be included in all
-  copies or substantial portions of the Software.
-
-  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-  SOFTWARE.
-]]
-
-cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
-
-# Initialize logging prefix
-if(NOT CPM_INDENT)
-  set(CPM_INDENT
-      "CPM:"
-      CACHE INTERNAL ""
-  )
-endif()
-
-if(NOT COMMAND cpm_message)
-  function(cpm_message)
-    message(${ARGV})
-  endfunction()
-endif()
-
-set(CURRENT_CPM_VERSION 0.40.5)
-
-get_filename_component(CPM_CURRENT_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}" REALPATH)
-if(CPM_DIRECTORY)
-  if(NOT CPM_DIRECTORY STREQUAL CPM_CURRENT_DIRECTORY)
-    if(CPM_VERSION VERSION_LESS CURRENT_CPM_VERSION)
-      message(
-        AUTHOR_WARNING
-          "${CPM_INDENT} \
-A dependency is using a more recent CPM version (${CURRENT_CPM_VERSION}) than the current project (${CPM_VERSION}). \
-It is recommended to upgrade CPM to the most recent version. \
-See https://github.com/cpm-cmake/CPM.cmake for more information."
-      )
-    endif()
-    if(${CMAKE_VERSION} VERSION_LESS "3.17.0")
-      include(FetchContent)
-    endif()
-    return()
-  endif()
-
-  get_property(
-    CPM_INITIALIZED GLOBAL ""
-    PROPERTY CPM_INITIALIZED
-    SET
-  )
-  if(CPM_INITIALIZED)
-    return()
-  endif()
-endif()
-
-if(CURRENT_CPM_VERSION MATCHES "development-version")
-  message(
-    WARNING "${CPM_INDENT} Your project is using an unstable development version of CPM.cmake. \
-Please update to a recent release if possible. \
-See https://github.com/cpm-cmake/CPM.cmake for details."
-  )
-endif()
-
-set_property(GLOBAL PROPERTY CPM_INITIALIZED true)
-
-macro(cpm_set_policies)
-  # the policy allows us to change options without caching
-  cmake_policy(SET CMP0077 NEW)
-  set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
-
-  # the policy allows us to change set(CACHE) without caching
-  if(POLICY CMP0126)
-    cmake_policy(SET CMP0126 NEW)
-    set(CMAKE_POLICY_DEFAULT_CMP0126 NEW)
-  endif()
-
-  # The policy uses the download time for timestamp, instead of the timestamp in the archive. This
-  # allows for proper rebuilds when a projects url changes
-  if(POLICY CMP0135)
-    cmake_policy(SET CMP0135 NEW)
-    set(CMAKE_POLICY_DEFAULT_CMP0135 NEW)
-  endif()
-
-  # treat relative git repository paths as being relative to the parent project's remote
-  if(POLICY CMP0150)
-    cmake_policy(SET CMP0150 NEW)
-    set(CMAKE_POLICY_DEFAULT_CMP0150 NEW)
-  endif()
-endmacro()
-cpm_set_policies()
-
-option(CPM_USE_LOCAL_PACKAGES "Always try to use `find_package` to get dependencies"
-       $ENV{CPM_USE_LOCAL_PACKAGES}
-)
-option(CPM_LOCAL_PACKAGES_ONLY "Only use `find_package` to get dependencies"
-       $ENV{CPM_LOCAL_PACKAGES_ONLY}
-)
-option(CPM_DOWNLOAD_ALL "Always download dependencies from source" $ENV{CPM_DOWNLOAD_ALL})
-option(CPM_DONT_UPDATE_MODULE_PATH "Don't update the module path to allow using find_package"
-       $ENV{CPM_DONT_UPDATE_MODULE_PATH}
-)
-option(CPM_DONT_CREATE_PACKAGE_LOCK "Don't create a package lock file in the binary path"
-       $ENV{CPM_DONT_CREATE_PACKAGE_LOCK}
-)
-option(CPM_INCLUDE_ALL_IN_PACKAGE_LOCK
-       "Add all packages added through CPM.cmake to the package lock"
-       $ENV{CPM_INCLUDE_ALL_IN_PACKAGE_LOCK}
-)
-option(CPM_USE_NAMED_CACHE_DIRECTORIES
-       "Use additional directory of package name in cache on the most nested level."
-       $ENV{CPM_USE_NAMED_CACHE_DIRECTORIES}
-)
-
-set(CPM_VERSION
-    ${CURRENT_CPM_VERSION}
-    CACHE INTERNAL ""
-)
-set(CPM_DIRECTORY
-    ${CPM_CURRENT_DIRECTORY}
-    CACHE INTERNAL ""
-)
-set(CPM_FILE
-    ${CMAKE_CURRENT_LIST_FILE}
-    CACHE INTERNAL ""
-)
-set(CPM_PACKAGES
-    ""
-    CACHE INTERNAL ""
-)
-set(CPM_DRY_RUN
-    OFF
-    CACHE INTERNAL "Don't download or configure dependencies (for testing)"
-)
-
-if(DEFINED ENV{CPM_SOURCE_CACHE})
-  set(CPM_SOURCE_CACHE_DEFAULT $ENV{CPM_SOURCE_CACHE})
-else()
-  set(CPM_SOURCE_CACHE_DEFAULT OFF)
-endif()
-
-set(CPM_SOURCE_CACHE
-    ${CPM_SOURCE_CACHE_DEFAULT}
-    CACHE PATH "Directory to download CPM dependencies"
-)
-
-if(NOT CPM_DONT_UPDATE_MODULE_PATH AND NOT DEFINED CMAKE_FIND_PACKAGE_REDIRECTS_DIR)
-  set(CPM_MODULE_PATH
-      "${CMAKE_BINARY_DIR}/CPM_modules"
-      CACHE INTERNAL ""
-  )
-  # remove old modules
-  file(REMOVE_RECURSE ${CPM_MODULE_PATH})
-  file(MAKE_DIRECTORY ${CPM_MODULE_PATH})
-  # locally added CPM modules should override global packages
-  set(CMAKE_MODULE_PATH "${CPM_MODULE_PATH};${CMAKE_MODULE_PATH}")
-endif()
-
-if(NOT CPM_DONT_CREATE_PACKAGE_LOCK)
-  set(CPM_PACKAGE_LOCK_FILE
-      "${CMAKE_BINARY_DIR}/cpm-package-lock.cmake"
-      CACHE INTERNAL ""
-  )
-  file(WRITE ${CPM_PACKAGE_LOCK_FILE}
-       "# CPM Package Lock\n# This file should be committed to version control\n\n"
-  )
-endif()
-
-include(FetchContent)
-
-# Try to infer package name from git repository uri (path or url)
-function(cpm_package_name_from_git_uri URI RESULT)
-  if("${URI}" MATCHES "([^/:]+)/?.git/?$")
-    set(${RESULT}
-        ${CMAKE_MATCH_1}
-        PARENT_SCOPE
-    )
-  else()
-    unset(${RESULT} PARENT_SCOPE)
-  endif()
-endfunction()
-
-# Try to infer package name and version from a url
-function(cpm_package_name_and_ver_from_url url outName outVer)
-  if(url MATCHES "[/\\?]([a-zA-Z0-9_\\.-]+)\\.(tar|tar\\.gz|tar\\.bz2|zip|ZIP)(\\?|/|$)")
-    # We matched an archive
-    set(filename "${CMAKE_MATCH_1}")
-
-    if(filename MATCHES "([a-zA-Z0-9_\\.-]+)[_-]v?(([0-9]+\\.)*[0-9]+[a-zA-Z0-9]*)")
-      # We matched <name>-<version> (ie foo-1.2.3)
-      set(${outName}
-          "${CMAKE_MATCH_1}"
-          PARENT_SCOPE
-      )
-      set(${outVer}
-          "${CMAKE_MATCH_2}"
-          PARENT_SCOPE
-      )
-    elseif(filename MATCHES "(([0-9]+\\.)+[0-9]+[a-zA-Z0-9]*)")
-      # We couldn't find a name, but we found a version
-      #
-      # In many cases (which we don't handle here) the url would look something like
-      # `irrelevant/ACTUAL_PACKAGE_NAME/irrelevant/1.2.3.zip`. In such a case we can't possibly
-      # distinguish the package name from the irrelevant bits. Moreover if we try to match the
-      # package name from the filename, we'd get bogus at best.
-      unset(${outName} PARENT_SCOPE)
-      set(${outVer}
-          "${CMAKE_MATCH_1}"
-          PARENT_SCOPE
-      )
-    else()
-      # Boldly assume that the file name is the package name.
-      #
-      # Yes, something like `irrelevant/ACTUAL_NAME/irrelevant/download.zip` will ruin our day, but
-      # such cases should be quite rare. No popular service does this... we think.
-      set(${outName}
-          "${filename}"
-          PARENT_SCOPE
-      )
-      unset(${outVer} PARENT_SCOPE)
-    endif()
-  else()
-    # No ideas yet what to do with non-archives
-    unset(${outName} PARENT_SCOPE)
-    unset(${outVer} PARENT_SCOPE)
-  endif()
-endfunction()
-
-function(cpm_find_package NAME VERSION)
-  string(REPLACE " " ";" EXTRA_ARGS "${ARGN}")
-  find_package(${NAME} ${VERSION} ${EXTRA_ARGS} QUIET)
-  if(${CPM_ARGS_NAME}_FOUND)
-    if(DEFINED ${CPM_ARGS_NAME}_VERSION)
-      set(VERSION ${${CPM_ARGS_NAME}_VERSION})
-    endif()
-    cpm_message(STATUS "${CPM_INDENT} Using local package ${CPM_ARGS_NAME}@${VERSION}")
-    CPMRegisterPackage(${CPM_ARGS_NAME} "${VERSION}")
-    set(CPM_PACKAGE_FOUND
-        YES
-        PARENT_SCOPE
-    )
-  else()
-    set(CPM_PACKAGE_FOUND
-        NO
-        PARENT_SCOPE
-    )
-  endif()
-endfunction()
-
-# Create a custom FindXXX.cmake module for a CPM package This prevents `find_package(NAME)` from
-# finding the system library
-function(cpm_create_module_file Name)
-  if(NOT CPM_DONT_UPDATE_MODULE_PATH)
-    if(DEFINED CMAKE_FIND_PACKAGE_REDIRECTS_DIR)
-      # Redirect find_package calls to the CPM package. This is what FetchContent does when you set
-      # OVERRIDE_FIND_PACKAGE. The CMAKE_FIND_PACKAGE_REDIRECTS_DIR works for find_package in CONFIG
-      # mode, unlike the Find${Name}.cmake fallback. CMAKE_FIND_PACKAGE_REDIRECTS_DIR is not defined
-      # in script mode, or in CMake < 3.24.
-      # https://cmake.org/cmake/help/latest/module/FetchContent.html#fetchcontent-find-package-integration-examples
-      string(TOLOWER ${Name} NameLower)
-      file(WRITE ${CMAKE_FIND_PACKAGE_REDIRECTS_DIR}/${NameLower}-config.cmake
-           "include(\"${CMAKE_CURRENT_LIST_DIR}/${NameLower}-extra.cmake\" OPTIONAL)\n"
-           "include(\"${CMAKE_CURRENT_LIST_DIR}/${Name}Extra.cmake\" OPTIONAL)\n"
-      )
-      file(WRITE ${CMAKE_FIND_PACKAGE_REDIRECTS_DIR}/${NameLower}-version.cmake
-           "set(PACKAGE_VERSION_COMPATIBLE TRUE)\n" "set(PACKAGE_VERSION_EXACT TRUE)\n"
-      )
-    else()
-      file(WRITE ${CPM_MODULE_PATH}/Find${Name}.cmake
-           "include(\"${CPM_FILE}\")\n${ARGN}\nset(${Name}_FOUND TRUE)"
-      )
-    endif()
-  endif()
-endfunction()
-
-# Find a package locally or fallback to CPMAddPackage
-function(CPMFindPackage)
-  set(oneValueArgs NAME VERSION GIT_TAG FIND_PACKAGE_ARGUMENTS)
-
-  cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "" ${ARGN})
-
-  if(NOT DEFINED CPM_ARGS_VERSION)
-    if(DEFINED CPM_ARGS_GIT_TAG)
-      cpm_get_version_from_git_tag("${CPM_ARGS_GIT_TAG}" CPM_ARGS_VERSION)
-    endif()
-  endif()
-
-  set(downloadPackage ${CPM_DOWNLOAD_ALL})
-  if(DEFINED CPM_DOWNLOAD_${CPM_ARGS_NAME})
-    set(downloadPackage ${CPM_DOWNLOAD_${CPM_ARGS_NAME}})
-  elseif(DEFINED ENV{CPM_DOWNLOAD_${CPM_ARGS_NAME}})
-    set(downloadPackage $ENV{CPM_DOWNLOAD_${CPM_ARGS_NAME}})
-  endif()
-  if(downloadPackage)
-    CPMAddPackage(${ARGN})
-    cpm_export_variables(${CPM_ARGS_NAME})
-    return()
-  endif()
-
-  cpm_find_package(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}" ${CPM_ARGS_FIND_PACKAGE_ARGUMENTS})
-
-  if(NOT CPM_PACKAGE_FOUND)
-    CPMAddPackage(${ARGN})
-    cpm_export_variables(${CPM_ARGS_NAME})
-  endif()
-
-endfunction()
-
-# checks if a package has been added before
-function(cpm_check_if_package_already_added CPM_ARGS_NAME CPM_ARGS_VERSION)
-  if("${CPM_ARGS_NAME}" IN_LIST CPM_PACKAGES)
-    CPMGetPackageVersion(${CPM_ARGS_NAME} CPM_PACKAGE_VERSION)
-    if("${CPM_PACKAGE_VERSION}" VERSION_LESS "${CPM_ARGS_VERSION}")
-      message(
-        WARNING
-          "${CPM_INDENT} Requires a newer version of ${CPM_ARGS_NAME} (${CPM_ARGS_VERSION}) than currently included (${CPM_PACKAGE_VERSION})."
-      )
-    endif()
-    cpm_get_fetch_properties(${CPM_ARGS_NAME})
-    set(${CPM_ARGS_NAME}_ADDED NO)
-    set(CPM_PACKAGE_ALREADY_ADDED
-        YES
-        PARENT_SCOPE
-    )
-    cpm_export_variables(${CPM_ARGS_NAME})
-  else()
-    set(CPM_PACKAGE_ALREADY_ADDED
-        NO
-        PARENT_SCOPE
-    )
-  endif()
-endfunction()
-
-# Parse the argument of CPMAddPackage in case a single one was provided and convert it to a list of
-# arguments which can then be parsed idiomatically. For example gh:foo/bar@1.2.3 will be converted
-# to: GITHUB_REPOSITORY;foo/bar;VERSION;1.2.3
-function(cpm_parse_add_package_single_arg arg outArgs)
-  # Look for a scheme
-  if("${arg}" MATCHES "^([a-zA-Z]+):(.+)$")
-    string(TOLOWER "${CMAKE_MATCH_1}" scheme)
-    set(uri "${CMAKE_MATCH_2}")
-
-    # Check for CPM-specific schemes
-    if(scheme STREQUAL "gh")
-      set(out "GITHUB_REPOSITORY;${uri}")
-      set(packageType "git")
-    elseif(scheme STREQUAL "gl")
-      set(out "GITLAB_REPOSITORY;${uri}")
-      set(packageType "git")
-    elseif(scheme STREQUAL "bb")
-      set(out "BITBUCKET_REPOSITORY;${uri}")
-      set(packageType "git")
-      # A CPM-specific scheme was not found. Looks like this is a generic URL so try to determine
-      # type
-    elseif(arg MATCHES ".git/?(@|#|$)")
-      set(out "GIT_REPOSITORY;${arg}")
-      set(packageType "git")
-    else()
-      # Fall back to a URL
-      set(out "URL;${arg}")
-      set(packageType "archive")
-
-      # We could also check for SVN since FetchContent supports it, but SVN is so rare these days.
-      # We just won't bother with the additional complexity it will induce in this function. SVN is
-      # done by multi-arg
-    endif()
-  else()
-    if(arg MATCHES ".git/?(@|#|$)")
-      set(out "GIT_REPOSITORY;${arg}")
-      set(packageType "git")
-    else()
-      # Give up
-      message(FATAL_ERROR "${CPM_INDENT} Can't determine package type of '${arg}'")
-    endif()
-  endif()
-
-  # For all packages we interpret @... as version. Only replace the last occurrence. Thus URIs
-  # containing '@' can be used
-  string(REGEX REPLACE "@([^@]+)$" ";VERSION;\\1" out "${out}")
-
-  # Parse the rest according to package type
-  if(packageType STREQUAL "git")
-    # For git repos we interpret #... as a tag or branch or commit hash
-    string(REGEX REPLACE "#([^#]+)$" ";GIT_TAG;\\1" out "${out}")
-  elseif(packageType STREQUAL "archive")
-    # For archives we interpret #... as a URL hash.
-    string(REGEX REPLACE "#([^#]+)$" ";URL_HASH;\\1" out "${out}")
-    # We don't try to parse the version if it's not provided explicitly. cpm_get_version_from_url
-    # should do this at a later point
-  else()
-    # We should never get here. This is an assertion and hitting it means there's a problem with the
-    # code above. A packageType was set, but not handled by this if-else.
-    message(FATAL_ERROR "${CPM_INDENT} Unsupported package type '${packageType}' of '${arg}'")
-  endif()
-
-  set(${outArgs}
-      ${out}
-      PARENT_SCOPE
-  )
-endfunction()
-
-# Check that the working directory for a git repo is clean
-function(cpm_check_git_working_dir_is_clean repoPath gitTag isClean)
-
-  find_package(Git REQUIRED)
-
-  if(NOT GIT_EXECUTABLE)
-    # No git executable, assume directory is clean
-    set(${isClean}
-        TRUE
-        PARENT_SCOPE
-    )
-    return()
-  endif()
-
-  # check for uncommitted changes
-  execute_process(
-    COMMAND ${GIT_EXECUTABLE} status --porcelain
-    RESULT_VARIABLE resultGitStatus
-    OUTPUT_VARIABLE repoStatus
-    OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET
-    WORKING_DIRECTORY ${repoPath}
-  )
-  if(resultGitStatus)
-    # not supposed to happen, assume clean anyway
-    message(WARNING "${CPM_INDENT} Calling git status on folder ${repoPath} failed")
-    set(${isClean}
-        TRUE
-        PARENT_SCOPE
-    )
-    return()
-  endif()
-
-  if(NOT "${repoStatus}" STREQUAL "")
-    set(${isClean}
-        FALSE
-        PARENT_SCOPE
-    )
-    return()
-  endif()
-
-  # check for committed changes
-  execute_process(
-    COMMAND ${GIT_EXECUTABLE} diff -s --exit-code ${gitTag}
-    RESULT_VARIABLE resultGitDiff
-    OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_QUIET
-    WORKING_DIRECTORY ${repoPath}
-  )
-
-  if(${resultGitDiff} EQUAL 0)
-    set(${isClean}
-        TRUE
-        PARENT_SCOPE
-    )
-  else()
-    set(${isClean}
-        FALSE
-        PARENT_SCOPE
-    )
-  endif()
-
-endfunction()
-
-# Add PATCH_COMMAND to CPM_ARGS_UNPARSED_ARGUMENTS. This method consumes a list of files in ARGN
-# then generates a `PATCH_COMMAND` appropriate for `ExternalProject_Add()`. This command is appended
-# to the parent scope's `CPM_ARGS_UNPARSED_ARGUMENTS`.
-function(cpm_add_patches)
-  # Return if no patch files are supplied.
-  if(NOT ARGN)
-    return()
-  endif()
-
-  # Find the patch program.
-  find_program(PATCH_EXECUTABLE patch)
-  if(CMAKE_HOST_WIN32 AND NOT PATCH_EXECUTABLE)
-    # The Windows git executable is distributed with patch.exe. Find the path to the executable, if
-    # it exists, then search `../usr/bin` and `../../usr/bin` for patch.exe.
-    find_package(Git QUIET)
-    if(GIT_EXECUTABLE)
-      get_filename_component(extra_search_path ${GIT_EXECUTABLE} DIRECTORY)
-      get_filename_component(extra_search_path_1up ${extra_search_path} DIRECTORY)
-      get_filename_component(extra_search_path_2up ${extra_search_path_1up} DIRECTORY)
-      find_program(
-        PATCH_EXECUTABLE patch HINTS "${extra_search_path_1up}/usr/bin"
-                                     "${extra_search_path_2up}/usr/bin"
-      )
-    endif()
-  endif()
-  if(NOT PATCH_EXECUTABLE)
-    message(FATAL_ERROR "Couldn't find `patch` executable to use with PATCHES keyword.")
-  endif()
-
-  # Create a temporary
-  set(temp_list ${CPM_ARGS_UNPARSED_ARGUMENTS})
-
-  # Ensure each file exists (or error out) and add it to the list.
-  set(first_item True)
-  foreach(PATCH_FILE ${ARGN})
-    # Make sure the patch file exists, if we can't find it, try again in the current directory.
-    if(NOT EXISTS "${PATCH_FILE}")
-      if(NOT EXISTS "${CMAKE_CURRENT_LIST_DIR}/${PATCH_FILE}")
-        message(FATAL_ERROR "Couldn't find patch file: '${PATCH_FILE}'")
-      endif()
-      set(PATCH_FILE "${CMAKE_CURRENT_LIST_DIR}/${PATCH_FILE}")
-    endif()
-
-    # Convert to absolute path for use with patch file command.
-    get_filename_component(PATCH_FILE "${PATCH_FILE}" ABSOLUTE)
-
-    # The first patch entry must be preceded by "PATCH_COMMAND" while the following items are
-    # preceded by "&&".
-    if(first_item)
-      set(first_item False)
-      list(APPEND temp_list "PATCH_COMMAND")
-    else()
-      list(APPEND temp_list "&&")
-    endif()
-    # Add the patch command to the list
-    list(APPEND temp_list "${PATCH_EXECUTABLE}" "-p1" "<" "${PATCH_FILE}")
-  endforeach()
-
-  # Move temp out into parent scope.
-  set(CPM_ARGS_UNPARSED_ARGUMENTS
-      ${temp_list}
-      PARENT_SCOPE
-  )
-
-endfunction()
-
-# method to overwrite internal FetchContent properties, to allow using CPM.cmake to overload
-# FetchContent calls. As these are internal cmake properties, this method should be used carefully
-# and may need modification in future CMake versions. Source:
-# https://github.com/Kitware/CMake/blob/dc3d0b5a0a7d26d43d6cfeb511e224533b5d188f/Modules/FetchContent.cmake#L1152
-function(cpm_override_fetchcontent contentName)
-  cmake_parse_arguments(PARSE_ARGV 1 arg "" "SOURCE_DIR;BINARY_DIR" "")
-  if(NOT "${arg_UNPARSED_ARGUMENTS}" STREQUAL "")
-    message(FATAL_ERROR "${CPM_INDENT} Unsupported arguments: ${arg_UNPARSED_ARGUMENTS}")
-  endif()
-
-  string(TOLOWER ${contentName} contentNameLower)
-  set(prefix "_FetchContent_${contentNameLower}")
-
-  set(propertyName "${prefix}_sourceDir")
-  define_property(
-    GLOBAL
-    PROPERTY ${propertyName}
-    BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
-    FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
-  )
-  set_property(GLOBAL PROPERTY ${propertyName} "${arg_SOURCE_DIR}")
-
-  set(propertyName "${prefix}_binaryDir")
-  define_property(
-    GLOBAL
-    PROPERTY ${propertyName}
-    BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
-    FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
-  )
-  set_property(GLOBAL PROPERTY ${propertyName} "${arg_BINARY_DIR}")
-
-  set(propertyName "${prefix}_populated")
-  define_property(
-    GLOBAL
-    PROPERTY ${propertyName}
-    BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
-    FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
-  )
-  set_property(GLOBAL PROPERTY ${propertyName} TRUE)
-endfunction()
-
-# Download and add a package from source
-function(CPMAddPackage)
-  cpm_set_policies()
-
-  list(LENGTH ARGN argnLength)
-  if(argnLength EQUAL 1)
-    cpm_parse_add_package_single_arg("${ARGN}" ARGN)
-
-    # The shorthand syntax implies EXCLUDE_FROM_ALL and SYSTEM
-    set(ARGN "${ARGN};EXCLUDE_FROM_ALL;YES;SYSTEM;YES;")
-  endif()
-
-  set(oneValueArgs
-      NAME
-      FORCE
-      VERSION
-      GIT_TAG
-      DOWNLOAD_ONLY
-      GITHUB_REPOSITORY
-      GITLAB_REPOSITORY
-      BITBUCKET_REPOSITORY
-      GIT_REPOSITORY
-      SOURCE_DIR
-      FIND_PACKAGE_ARGUMENTS
-      NO_CACHE
-      SYSTEM
-      GIT_SHALLOW
-      EXCLUDE_FROM_ALL
-      SOURCE_SUBDIR
-      CUSTOM_CACHE_KEY
-  )
-
-  set(multiValueArgs URL OPTIONS DOWNLOAD_COMMAND PATCHES)
-
-  cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
-
-  # Set default values for arguments
-
-  if(NOT DEFINED CPM_ARGS_VERSION)
-    if(DEFINED CPM_ARGS_GIT_TAG)
-      cpm_get_version_from_git_tag("${CPM_ARGS_GIT_TAG}" CPM_ARGS_VERSION)
-    endif()
-  endif()
-
-  if(CPM_ARGS_DOWNLOAD_ONLY)
-    set(DOWNLOAD_ONLY ${CPM_ARGS_DOWNLOAD_ONLY})
-  else()
-    set(DOWNLOAD_ONLY NO)
-  endif()
-
-  if(DEFINED CPM_ARGS_GITHUB_REPOSITORY)
-    set(CPM_ARGS_GIT_REPOSITORY "https://github.com/${CPM_ARGS_GITHUB_REPOSITORY}.git")
-  elseif(DEFINED CPM_ARGS_GITLAB_REPOSITORY)
-    set(CPM_ARGS_GIT_REPOSITORY "https://gitlab.com/${CPM_ARGS_GITLAB_REPOSITORY}.git")
-  elseif(DEFINED CPM_ARGS_BITBUCKET_REPOSITORY)
-    set(CPM_ARGS_GIT_REPOSITORY "https://bitbucket.org/${CPM_ARGS_BITBUCKET_REPOSITORY}.git")
-  endif()
-
-  if(DEFINED CPM_ARGS_GIT_REPOSITORY)
-    list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS GIT_REPOSITORY ${CPM_ARGS_GIT_REPOSITORY})
-    if(NOT DEFINED CPM_ARGS_GIT_TAG)
-      set(CPM_ARGS_GIT_TAG v${CPM_ARGS_VERSION})
-    endif()
-
-    # If a name wasn't provided, try to infer it from the git repo
-    if(NOT DEFINED CPM_ARGS_NAME)
-      cpm_package_name_from_git_uri(${CPM_ARGS_GIT_REPOSITORY} CPM_ARGS_NAME)
-    endif()
-  endif()
-
-  set(CPM_SKIP_FETCH FALSE)
-
-  if(DEFINED CPM_ARGS_GIT_TAG)
-    list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS GIT_TAG ${CPM_ARGS_GIT_TAG})
-    # If GIT_SHALLOW is explicitly specified, honor the value.
-    if(DEFINED CPM_ARGS_GIT_SHALLOW)
-      list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS GIT_SHALLOW ${CPM_ARGS_GIT_SHALLOW})
-    endif()
-  endif()
-
-  if(DEFINED CPM_ARGS_URL)
-    # If a name or version aren't provided, try to infer them from the URL
-    list(GET CPM_ARGS_URL 0 firstUrl)
-    cpm_package_name_and_ver_from_url(${firstUrl} nameFromUrl verFromUrl)
-    # If we fail to obtain name and version from the first URL, we could try other URLs if any.
-    # However multiple URLs are expected to be quite rare, so for now we won't bother.
-
-    # If the caller provided their own name and version, they trump the inferred ones.
-    if(NOT DEFINED CPM_ARGS_NAME)
-      set(CPM_ARGS_NAME ${nameFromUrl})
-    endif()
-    if(NOT DEFINED CPM_ARGS_VERSION)
-      set(CPM_ARGS_VERSION ${verFromUrl})
-    endif()
-
-    list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS URL "${CPM_ARGS_URL}")
-  endif()
-
-  # Check for required arguments
-
-  if(NOT DEFINED CPM_ARGS_NAME)
-    message(
-      FATAL_ERROR
-        "${CPM_INDENT} 'NAME' was not provided and couldn't be automatically inferred for package added with arguments: '${ARGN}'"
-    )
-  endif()
-
-  # Check if package has been added before
-  cpm_check_if_package_already_added(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}")
-  if(CPM_PACKAGE_ALREADY_ADDED)
-    cpm_export_variables(${CPM_ARGS_NAME})
-    return()
-  endif()
-
-  # Check for manual overrides
-  if(NOT CPM_ARGS_FORCE AND NOT "${CPM_${CPM_ARGS_NAME}_SOURCE}" STREQUAL "")
-    set(PACKAGE_SOURCE ${CPM_${CPM_ARGS_NAME}_SOURCE})
-    set(CPM_${CPM_ARGS_NAME}_SOURCE "")
-    CPMAddPackage(
-      NAME "${CPM_ARGS_NAME}"
-      SOURCE_DIR "${PACKAGE_SOURCE}"
-      EXCLUDE_FROM_ALL "${CPM_ARGS_EXCLUDE_FROM_ALL}"
-      SYSTEM "${CPM_ARGS_SYSTEM}"
-      PATCHES "${CPM_ARGS_PATCHES}"
-      OPTIONS "${CPM_ARGS_OPTIONS}"
-      SOURCE_SUBDIR "${CPM_ARGS_SOURCE_SUBDIR}"
-      DOWNLOAD_ONLY "${DOWNLOAD_ONLY}"
-      FORCE True
-    )
-    cpm_export_variables(${CPM_ARGS_NAME})
-    return()
-  endif()
-
-  # Check for available declaration
-  if(NOT CPM_ARGS_FORCE AND NOT "${CPM_DECLARATION_${CPM_ARGS_NAME}}" STREQUAL "")
-    set(declaration ${CPM_DECLARATION_${CPM_ARGS_NAME}})
-    set(CPM_DECLARATION_${CPM_ARGS_NAME} "")
-    CPMAddPackage(${declaration})
-    cpm_export_variables(${CPM_ARGS_NAME})
-    # checking again to ensure version and option compatibility
-    cpm_check_if_package_already_added(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}")
-    return()
-  endif()
-
-  if(NOT CPM_ARGS_FORCE)
-    if(CPM_USE_LOCAL_PACKAGES OR CPM_LOCAL_PACKAGES_ONLY)
-      cpm_find_package(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}" ${CPM_ARGS_FIND_PACKAGE_ARGUMENTS})
-
-      if(CPM_PACKAGE_FOUND)
-        cpm_export_variables(${CPM_ARGS_NAME})
-        return()
-      endif()
-
-      if(CPM_LOCAL_PACKAGES_ONLY)
-        message(
-          SEND_ERROR
-            "${CPM_INDENT} ${CPM_ARGS_NAME} not found via find_package(${CPM_ARGS_NAME} ${CPM_ARGS_VERSION})"
-        )
-      endif()
-    endif()
-  endif()
-
-  CPMRegisterPackage("${CPM_ARGS_NAME}" "${CPM_ARGS_VERSION}")
-
-  if(DEFINED CPM_ARGS_GIT_TAG)
-    set(PACKAGE_INFO "${CPM_ARGS_GIT_TAG}")
-  elseif(DEFINED CPM_ARGS_SOURCE_DIR)
-    set(PACKAGE_INFO "${CPM_ARGS_SOURCE_DIR}")
-  else()
-    set(PACKAGE_INFO "${CPM_ARGS_VERSION}")
-  endif()
-
-  if(DEFINED FETCHCONTENT_BASE_DIR)
-    # respect user's FETCHCONTENT_BASE_DIR if set
-    set(CPM_FETCHCONTENT_BASE_DIR ${FETCHCONTENT_BASE_DIR})
-  else()
-    set(CPM_FETCHCONTENT_BASE_DIR ${CMAKE_BINARY_DIR}/_deps)
-  endif()
-
-  cpm_add_patches(${CPM_ARGS_PATCHES})
-
-  if(DEFINED CPM_ARGS_DOWNLOAD_COMMAND)
-    list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS DOWNLOAD_COMMAND ${CPM_ARGS_DOWNLOAD_COMMAND})
-  elseif(DEFINED CPM_ARGS_SOURCE_DIR)
-    list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS SOURCE_DIR ${CPM_ARGS_SOURCE_DIR})
-    if(NOT IS_ABSOLUTE ${CPM_ARGS_SOURCE_DIR})
-      # Expand `CPM_ARGS_SOURCE_DIR` relative path. This is important because EXISTS doesn't work
-      # for relative paths.
-      get_filename_component(
-        source_directory ${CPM_ARGS_SOURCE_DIR} REALPATH BASE_DIR ${CMAKE_CURRENT_BINARY_DIR}
-      )
-    else()
-      set(source_directory ${CPM_ARGS_SOURCE_DIR})
-    endif()
-    if(NOT EXISTS ${source_directory})
-      string(TOLOWER ${CPM_ARGS_NAME} lower_case_name)
-      # remove timestamps so CMake will re-download the dependency
-      file(REMOVE_RECURSE "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-subbuild")
-    endif()
-  elseif(CPM_SOURCE_CACHE AND NOT CPM_ARGS_NO_CACHE)
-    string(TOLOWER ${CPM_ARGS_NAME} lower_case_name)
-    set(origin_parameters ${CPM_ARGS_UNPARSED_ARGUMENTS})
-    list(SORT origin_parameters)
-    if(CPM_ARGS_CUSTOM_CACHE_KEY)
-      # Application set a custom unique directory name
-      set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${CPM_ARGS_CUSTOM_CACHE_KEY})
-    elseif(CPM_USE_NAMED_CACHE_DIRECTORIES)
-      string(SHA1 origin_hash "${origin_parameters};NEW_CACHE_STRUCTURE_TAG")
-      set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}/${CPM_ARGS_NAME})
-    else()
-      string(SHA1 origin_hash "${origin_parameters}")
-      set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash})
-    endif()
-    # Expand `download_directory` relative path. This is important because EXISTS doesn't work for
-    # relative paths.
-    get_filename_component(download_directory ${download_directory} ABSOLUTE)
-    list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS SOURCE_DIR ${download_directory})
-
-    if(CPM_SOURCE_CACHE)
-      file(LOCK ${download_directory}/../cmake.lock)
-    endif()
-
-    if(EXISTS ${download_directory})
-      if(CPM_SOURCE_CACHE)
-        file(LOCK ${download_directory}/../cmake.lock RELEASE)
-      endif()
-
-      cpm_store_fetch_properties(
-        ${CPM_ARGS_NAME} "${download_directory}"
-        "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-build"
-      )
-      cpm_get_fetch_properties("${CPM_ARGS_NAME}")
-
-      if(DEFINED CPM_ARGS_GIT_TAG AND NOT (PATCH_COMMAND IN_LIST CPM_ARGS_UNPARSED_ARGUMENTS))
-        # warn if cache has been changed since checkout
-        cpm_check_git_working_dir_is_clean(${download_directory} ${CPM_ARGS_GIT_TAG} IS_CLEAN)
-        if(NOT ${IS_CLEAN})
-          message(
-            WARNING "${CPM_INDENT} Cache for ${CPM_ARGS_NAME} (${download_directory}) is dirty"
-          )
-        endif()
-      endif()
-
-      cpm_add_subdirectory(
-        "${CPM_ARGS_NAME}"
-        "${DOWNLOAD_ONLY}"
-        "${${CPM_ARGS_NAME}_SOURCE_DIR}/${CPM_ARGS_SOURCE_SUBDIR}"
-        "${${CPM_ARGS_NAME}_BINARY_DIR}"
-        "${CPM_ARGS_EXCLUDE_FROM_ALL}"
-        "${CPM_ARGS_SYSTEM}"
-        "${CPM_ARGS_OPTIONS}"
-      )
-      set(PACKAGE_INFO "${PACKAGE_INFO} at ${download_directory}")
-
-      # As the source dir is already cached/populated, we override the call to FetchContent.
-      set(CPM_SKIP_FETCH TRUE)
-      cpm_override_fetchcontent(
-        "${lower_case_name}" SOURCE_DIR "${${CPM_ARGS_NAME}_SOURCE_DIR}/${CPM_ARGS_SOURCE_SUBDIR}"
-        BINARY_DIR "${${CPM_ARGS_NAME}_BINARY_DIR}"
-      )
-
-    else()
-      # Enable shallow clone when GIT_TAG is not a commit hash. Our guess may not be accurate, but
-      # it should guarantee no commit hash get mis-detected.
-      if(NOT DEFINED CPM_ARGS_GIT_SHALLOW)
-        cpm_is_git_tag_commit_hash("${CPM_ARGS_GIT_TAG}" IS_HASH)
-        if(NOT ${IS_HASH})
-          list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS GIT_SHALLOW TRUE)
-        endif()
-      endif()
-
-      # remove timestamps so CMake will re-download the dependency
-      file(REMOVE_RECURSE ${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-subbuild)
-      set(PACKAGE_INFO "${PACKAGE_INFO} to ${download_directory}")
-    endif()
-  endif()
-
-  cpm_create_module_file(${CPM_ARGS_NAME} "CPMAddPackage(\"${ARGN}\")")
-
-  if(CPM_PACKAGE_LOCK_ENABLED)
-    if((CPM_ARGS_VERSION AND NOT CPM_ARGS_SOURCE_DIR) OR CPM_INCLUDE_ALL_IN_PACKAGE_LOCK)
-      cpm_add_to_package_lock(${CPM_ARGS_NAME} "${ARGN}")
-    elseif(CPM_ARGS_SOURCE_DIR)
-      cpm_add_comment_to_package_lock(${CPM_ARGS_NAME} "local directory")
-    else()
-      cpm_add_comment_to_package_lock(${CPM_ARGS_NAME} "${ARGN}")
-    endif()
-  endif()
-
-  cpm_message(
-    STATUS "${CPM_INDENT} Adding package ${CPM_ARGS_NAME}@${CPM_ARGS_VERSION} (${PACKAGE_INFO})"
-  )
-
-  if(NOT CPM_SKIP_FETCH)
-    # CMake 3.28 added EXCLUDE, SYSTEM (3.25), and SOURCE_SUBDIR (3.18) to FetchContent_Declare.
-    # Calling FetchContent_MakeAvailable will then internally forward these options to
-    # add_subdirectory. Up until these changes, we had to call FetchContent_Populate and
-    # add_subdirectory separately, which is no longer necessary and has been deprecated as of 3.30.
-    # A Bug in CMake prevents us to use the non-deprecated functions until 3.30.3.
-    set(fetchContentDeclareExtraArgs "")
-    if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.30.3")
-      if(${CPM_ARGS_EXCLUDE_FROM_ALL})
-        list(APPEND fetchContentDeclareExtraArgs EXCLUDE_FROM_ALL)
-      endif()
-      if(${CPM_ARGS_SYSTEM})
-        list(APPEND fetchContentDeclareExtraArgs SYSTEM)
-      endif()
-      if(DEFINED CPM_ARGS_SOURCE_SUBDIR)
-        list(APPEND fetchContentDeclareExtraArgs SOURCE_SUBDIR ${CPM_ARGS_SOURCE_SUBDIR})
-      endif()
-      # For CMake version <3.28 OPTIONS are parsed in cpm_add_subdirectory
-      if(CPM_ARGS_OPTIONS AND NOT DOWNLOAD_ONLY)
-        foreach(OPTION ${CPM_ARGS_OPTIONS})
-          cpm_parse_option("${OPTION}")
-          set(${OPTION_KEY} "${OPTION_VALUE}")
-        endforeach()
-      endif()
-    endif()
-    cpm_declare_fetch(
-      "${CPM_ARGS_NAME}" ${fetchContentDeclareExtraArgs} "${CPM_ARGS_UNPARSED_ARGUMENTS}"
-    )
-
-    cpm_fetch_package("${CPM_ARGS_NAME}" ${DOWNLOAD_ONLY} populated ${CPM_ARGS_UNPARSED_ARGUMENTS})
-    if(CPM_SOURCE_CACHE AND download_directory)
-      file(LOCK ${download_directory}/../cmake.lock RELEASE)
-    endif()
-    if(${populated} AND ${CMAKE_VERSION} VERSION_LESS "3.30.3")
-      cpm_add_subdirectory(
-        "${CPM_ARGS_NAME}"
-        "${DOWNLOAD_ONLY}"
-        "${${CPM_ARGS_NAME}_SOURCE_DIR}/${CPM_ARGS_SOURCE_SUBDIR}"
-        "${${CPM_ARGS_NAME}_BINARY_DIR}"
-        "${CPM_ARGS_EXCLUDE_FROM_ALL}"
-        "${CPM_ARGS_SYSTEM}"
-        "${CPM_ARGS_OPTIONS}"
-      )
-    endif()
-    cpm_get_fetch_properties("${CPM_ARGS_NAME}")
-  endif()
-
-  set(${CPM_ARGS_NAME}_ADDED YES)
-  cpm_export_variables("${CPM_ARGS_NAME}")
-endfunction()
-
-# Fetch a previously declared package
-macro(CPMGetPackage Name)
-  if(DEFINED "CPM_DECLARATION_${Name}")
-    CPMAddPackage(NAME ${Name})
-  else()
-    message(SEND_ERROR "${CPM_INDENT} Cannot retrieve package ${Name}: no declaration available")
-  endif()
-endmacro()
-
-# export variables available to the caller to the parent scope expects ${CPM_ARGS_NAME} to be set
-macro(cpm_export_variables name)
-  set(${name}_SOURCE_DIR
-      "${${name}_SOURCE_DIR}"
-      PARENT_SCOPE
-  )
-  set(${name}_BINARY_DIR
-      "${${name}_BINARY_DIR}"
-      PARENT_SCOPE
-  )
-  set(${name}_ADDED
-      "${${name}_ADDED}"
-      PARENT_SCOPE
-  )
-  set(CPM_LAST_PACKAGE_NAME
-      "${name}"
-      PARENT_SCOPE
-  )
-endmacro()
-
-# declares a package, so that any call to CPMAddPackage for the package name will use these
-# arguments instead. Previous declarations will not be overridden.
-macro(CPMDeclarePackage Name)
-  if(NOT DEFINED "CPM_DECLARATION_${Name}")
-    set("CPM_DECLARATION_${Name}" "${ARGN}")
-  endif()
-endmacro()
-
-function(cpm_add_to_package_lock Name)
-  if(NOT CPM_DONT_CREATE_PACKAGE_LOCK)
-    cpm_prettify_package_arguments(PRETTY_ARGN false ${ARGN})
-    file(APPEND ${CPM_PACKAGE_LOCK_FILE} "# ${Name}\nCPMDeclarePackage(${Name}\n${PRETTY_ARGN})\n")
-  endif()
-endfunction()
-
-function(cpm_add_comment_to_package_lock Name)
-  if(NOT CPM_DONT_CREATE_PACKAGE_LOCK)
-    cpm_prettify_package_arguments(PRETTY_ARGN true ${ARGN})
-    file(APPEND ${CPM_PACKAGE_LOCK_FILE}
-         "# ${Name} (unversioned)\n# CPMDeclarePackage(${Name}\n${PRETTY_ARGN}#)\n"
-    )
-  endif()
-endfunction()
-
-# includes the package lock file if it exists and creates a target `cpm-update-package-lock` to
-# update it
-macro(CPMUsePackageLock file)
-  if(NOT CPM_DONT_CREATE_PACKAGE_LOCK)
-    get_filename_component(CPM_ABSOLUTE_PACKAGE_LOCK_PATH ${file} ABSOLUTE)
-    if(EXISTS ${CPM_ABSOLUTE_PACKAGE_LOCK_PATH})
-      include(${CPM_ABSOLUTE_PACKAGE_LOCK_PATH})
-    endif()
-    if(NOT TARGET cpm-update-package-lock)
-      add_custom_target(
-        cpm-update-package-lock COMMAND ${CMAKE_COMMAND} -E copy ${CPM_PACKAGE_LOCK_FILE}
-                                        ${CPM_ABSOLUTE_PACKAGE_LOCK_PATH}
-      )
-    endif()
-    set(CPM_PACKAGE_LOCK_ENABLED true)
-  endif()
-endmacro()
-
-# registers a package that has been added to CPM
-function(CPMRegisterPackage PACKAGE VERSION)
-  list(APPEND CPM_PACKAGES ${PACKAGE})
-  set(CPM_PACKAGES
-      ${CPM_PACKAGES}
-      CACHE INTERNAL ""
-  )
-  set("CPM_PACKAGE_${PACKAGE}_VERSION"
-      ${VERSION}
-      CACHE INTERNAL ""
-  )
-endfunction()
-
-# retrieve the current version of the package to ${OUTPUT}
-function(CPMGetPackageVersion PACKAGE OUTPUT)
-  set(${OUTPUT}
-      "${CPM_PACKAGE_${PACKAGE}_VERSION}"
-      PARENT_SCOPE
-  )
-endfunction()
-
-# declares a package in FetchContent_Declare
-function(cpm_declare_fetch PACKAGE)
-  if(${CPM_DRY_RUN})
-    cpm_message(STATUS "${CPM_INDENT} Package not declared (dry run)")
-    return()
-  endif()
-
-  FetchContent_Declare(${PACKAGE} ${ARGN})
-endfunction()
-
-# returns properties for a package previously defined by cpm_declare_fetch
-function(cpm_get_fetch_properties PACKAGE)
-  if(${CPM_DRY_RUN})
-    return()
-  endif()
-
-  set(${PACKAGE}_SOURCE_DIR
-      "${CPM_PACKAGE_${PACKAGE}_SOURCE_DIR}"
-      PARENT_SCOPE
-  )
-  set(${PACKAGE}_BINARY_DIR
-      "${CPM_PACKAGE_${PACKAGE}_BINARY_DIR}"
-      PARENT_SCOPE
-  )
-endfunction()
-
-function(cpm_store_fetch_properties PACKAGE source_dir binary_dir)
-  if(${CPM_DRY_RUN})
-    return()
-  endif()
-
-  set(CPM_PACKAGE_${PACKAGE}_SOURCE_DIR
-      "${source_dir}"
-      CACHE INTERNAL ""
-  )
-  set(CPM_PACKAGE_${PACKAGE}_BINARY_DIR
-      "${binary_dir}"
-      CACHE INTERNAL ""
-  )
-endfunction()
-
-# adds a package as a subdirectory if viable, according to provided options
-function(
-  cpm_add_subdirectory
-  PACKAGE
-  DOWNLOAD_ONLY
-  SOURCE_DIR
-  BINARY_DIR
-  EXCLUDE
-  SYSTEM
-  OPTIONS
-)
-
-  if(NOT DOWNLOAD_ONLY AND EXISTS ${SOURCE_DIR}/CMakeLists.txt)
-    set(addSubdirectoryExtraArgs "")
-    if(EXCLUDE)
-      list(APPEND addSubdirectoryExtraArgs EXCLUDE_FROM_ALL)
-    endif()
-    if("${SYSTEM}" AND "${CMAKE_VERSION}" VERSION_GREATER_EQUAL "3.25")
-      # https://cmake.org/cmake/help/latest/prop_dir/SYSTEM.html#prop_dir:SYSTEM
-      list(APPEND addSubdirectoryExtraArgs SYSTEM)
-    endif()
-    if(OPTIONS)
-      foreach(OPTION ${OPTIONS})
-        cpm_parse_option("${OPTION}")
-        set(${OPTION_KEY} "${OPTION_VALUE}")
-      endforeach()
-    endif()
-    set(CPM_OLD_INDENT "${CPM_INDENT}")
-    set(CPM_INDENT "${CPM_INDENT} ${PACKAGE}:")
-    add_subdirectory(${SOURCE_DIR} ${BINARY_DIR} ${addSubdirectoryExtraArgs})
-    set(CPM_INDENT "${CPM_OLD_INDENT}")
-  endif()
-endfunction()
-
-# downloads a previously declared package via FetchContent and exports the variables
-# `${PACKAGE}_SOURCE_DIR` and `${PACKAGE}_BINARY_DIR` to the parent scope
-function(cpm_fetch_package PACKAGE DOWNLOAD_ONLY populated)
-  set(${populated}
-      FALSE
-      PARENT_SCOPE
-  )
-  if(${CPM_DRY_RUN})
-    cpm_message(STATUS "${CPM_INDENT} Package ${PACKAGE} not fetched (dry run)")
-    return()
-  endif()
-
-  FetchContent_GetProperties(${PACKAGE})
-
-  string(TOLOWER "${PACKAGE}" lower_case_name)
-
-  if(NOT ${lower_case_name}_POPULATED)
-    if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.30.3")
-      if(DOWNLOAD_ONLY)
-        # MakeAvailable will call add_subdirectory internally which is not what we want when
-        # DOWNLOAD_ONLY is set. Populate will only download the dependency without adding it to the
-        # build
-        FetchContent_Populate(
-          ${PACKAGE}
-          SOURCE_DIR "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-src"
-          BINARY_DIR "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-build"
-          SUBBUILD_DIR "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-subbuild"
-          ${ARGN}
-        )
-      else()
-        FetchContent_MakeAvailable(${PACKAGE})
-      endif()
-    else()
-      FetchContent_Populate(${PACKAGE})
-    endif()
-    set(${populated}
-        TRUE
-        PARENT_SCOPE
-    )
-  endif()
-
-  cpm_store_fetch_properties(
-    ${CPM_ARGS_NAME} ${${lower_case_name}_SOURCE_DIR} ${${lower_case_name}_BINARY_DIR}
-  )
-
-  set(${PACKAGE}_SOURCE_DIR
-      ${${lower_case_name}_SOURCE_DIR}
-      PARENT_SCOPE
-  )
-  set(${PACKAGE}_BINARY_DIR
-      ${${lower_case_name}_BINARY_DIR}
-      PARENT_SCOPE
-  )
-endfunction()
-
-# splits a package option
-function(cpm_parse_option OPTION)
-  string(REGEX MATCH "^[^ ]+" OPTION_KEY "${OPTION}")
-  string(LENGTH "${OPTION}" OPTION_LENGTH)
-  string(LENGTH "${OPTION_KEY}" OPTION_KEY_LENGTH)
-  if(OPTION_KEY_LENGTH STREQUAL OPTION_LENGTH)
-    # no value for key provided, assume user wants to set option to "ON"
-    set(OPTION_VALUE "ON")
-  else()
-    math(EXPR OPTION_KEY_LENGTH "${OPTION_KEY_LENGTH}+1")
-    string(SUBSTRING "${OPTION}" "${OPTION_KEY_LENGTH}" "-1" OPTION_VALUE)
-  endif()
-  set(OPTION_KEY
-      "${OPTION_KEY}"
-      PARENT_SCOPE
-  )
-  set(OPTION_VALUE
-      "${OPTION_VALUE}"
-      PARENT_SCOPE
-  )
-endfunction()
-
-# guesses the package version from a git tag
-function(cpm_get_version_from_git_tag GIT_TAG RESULT)
-  string(LENGTH ${GIT_TAG} length)
-  if(length EQUAL 40)
-    # GIT_TAG is probably a git hash
-    set(${RESULT}
-        0
-        PARENT_SCOPE
-    )
-  else()
-    string(REGEX MATCH "v?([0123456789.]*).*" _ ${GIT_TAG})
-    set(${RESULT}
-        ${CMAKE_MATCH_1}
-        PARENT_SCOPE
-    )
-  endif()
-endfunction()
-
-# guesses if the git tag is a commit hash or an actual tag or a branch name.
-function(cpm_is_git_tag_commit_hash GIT_TAG RESULT)
-  string(LENGTH "${GIT_TAG}" length)
-  # full hash has 40 characters, and short hash has at least 7 characters.
-  if(length LESS 7 OR length GREATER 40)
-    set(${RESULT}
-        0
-        PARENT_SCOPE
-    )
-  else()
-    if(${GIT_TAG} MATCHES "^[a-fA-F0-9]+$")
-      set(${RESULT}
-          1
-          PARENT_SCOPE
-      )
-    else()
-      set(${RESULT}
-          0
-          PARENT_SCOPE
-      )
-    endif()
-  endif()
-endfunction()
-
-function(cpm_prettify_package_arguments OUT_VAR IS_IN_COMMENT)
-  set(oneValueArgs
-      NAME
-      FORCE
-      VERSION
-      GIT_TAG
-      DOWNLOAD_ONLY
-      GITHUB_REPOSITORY
-      GITLAB_REPOSITORY
-      BITBUCKET_REPOSITORY
-      GIT_REPOSITORY
-      SOURCE_DIR
-      FIND_PACKAGE_ARGUMENTS
-      NO_CACHE
-      SYSTEM
-      GIT_SHALLOW
-      EXCLUDE_FROM_ALL
-      SOURCE_SUBDIR
-  )
-  set(multiValueArgs URL OPTIONS DOWNLOAD_COMMAND)
-  cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
-
-  foreach(oneArgName ${oneValueArgs})
-    if(DEFINED CPM_ARGS_${oneArgName})
-      if(${IS_IN_COMMENT})
-        string(APPEND PRETTY_OUT_VAR "#")
-      endif()
-      if(${oneArgName} STREQUAL "SOURCE_DIR")
-        string(REPLACE ${CMAKE_SOURCE_DIR} "\${CMAKE_SOURCE_DIR}" CPM_ARGS_${oneArgName}
-                       ${CPM_ARGS_${oneArgName}}
-        )
-      endif()
-      string(APPEND PRETTY_OUT_VAR "  ${oneArgName} ${CPM_ARGS_${oneArgName}}\n")
-    endif()
-  endforeach()
-  foreach(multiArgName ${multiValueArgs})
-    if(DEFINED CPM_ARGS_${multiArgName})
-      if(${IS_IN_COMMENT})
-        string(APPEND PRETTY_OUT_VAR "#")
-      endif()
-      string(APPEND PRETTY_OUT_VAR "  ${multiArgName}\n")
-      foreach(singleOption ${CPM_ARGS_${multiArgName}})
-        if(${IS_IN_COMMENT})
-          string(APPEND PRETTY_OUT_VAR "#")
-        endif()
-        string(APPEND PRETTY_OUT_VAR "    \"${singleOption}\"\n")
-      endforeach()
-    endif()
-  endforeach()
-
-  if(NOT "${CPM_ARGS_UNPARSED_ARGUMENTS}" STREQUAL "")
-    if(${IS_IN_COMMENT})
-      string(APPEND PRETTY_OUT_VAR "#")
-    endif()
-    string(APPEND PRETTY_OUT_VAR " ")
-    foreach(CPM_ARGS_UNPARSED_ARGUMENT ${CPM_ARGS_UNPARSED_ARGUMENTS})
-      string(APPEND PRETTY_OUT_VAR " ${CPM_ARGS_UNPARSED_ARGUMENT}")
-    endforeach()
-    string(APPEND PRETTY_OUT_VAR "\n")
-  endif()
-
-  set(${OUT_VAR}
-      ${PRETTY_OUT_VAR}
-      PARENT_SCOPE
-  )
-
-endfunction()
diff --git a/story-editor/create_docker_image_win.sh b/story-editor/create_docker_image_win.sh
index 04efd06..d23418f 100755
--- a/story-editor/create_docker_image_win.sh
+++ b/story-editor/create_docker_image_win.sh
@@ -1 +1 @@
-docker build -t cpp-dev . --load
+docker build -t cpp-dev -f Dockerfile.mingw64 --load
diff --git a/story-editor/externals/SDL b/story-editor/externals/SDL
new file mode 160000
index 0000000..11dbff2
--- /dev/null
+++ b/story-editor/externals/SDL
@@ -0,0 +1 @@
+Subproject commit 11dbff246f9015bb79a8f5c761b5b7099a9b579b
diff --git a/story-editor/externals/SDL_image b/story-editor/externals/SDL_image
new file mode 160000
index 0000000..0e9b6b6
--- /dev/null
+++ b/story-editor/externals/SDL_image
@@ -0,0 +1 @@
+Subproject commit 0e9b6b675e804f0a03f9d3ae6e871471f9505d3c
diff --git a/story-editor/externals/SDL_mixer b/story-editor/externals/SDL_mixer
new file mode 160000
index 0000000..af6a29d
--- /dev/null
+++ b/story-editor/externals/SDL_mixer
@@ -0,0 +1 @@
+Subproject commit af6a29df4e14c6ce72608b3ccd49cf35e1014255
diff --git a/story-editor/externals/civetweb b/story-editor/externals/civetweb
new file mode 160000
index 0000000..7f95a26
--- /dev/null
+++ b/story-editor/externals/civetweb
@@ -0,0 +1 @@
+Subproject commit 7f95a2632ef651402c15c39b72c4620382dd82bf
diff --git a/story-editor/externals/curl b/story-editor/externals/curl
new file mode 160000
index 0000000..34c1c65
--- /dev/null
+++ b/story-editor/externals/curl
@@ -0,0 +1 @@
+Subproject commit 34c1c653fc475efb828658f900979596905c688e
diff --git a/story-editor/imgui.ini b/story-editor/imgui.ini
index fb65035..6f8e4fd 100644
--- a/story-editor/imgui.ini
+++ b/story-editor/imgui.ini
@@ -4,60 +4,60 @@ Size=1220,694
 Collapsed=0
 
 [Window][Debug##Default]
-Pos=260,575
+Pos=260,280
 Size=32,42
 Collapsed=0
 
 [Window][Library Manager]
-Pos=552,26
-Size=728,827
+Pos=630,346
+Size=650,374
 Collapsed=0
-DockId=0x00000003,3
+DockId=0x0000000B,0
 
 [Window][Console]
-Pos=60,855
-Size=1220,289
+Pos=60,460
+Size=568,260
 Collapsed=0
-DockId=0x00000006,0
+DockId=0x00000009,0
 
 [Window][Emulator]
-Pos=552,26
-Size=728,827
+Pos=630,26
+Size=453,318
 Collapsed=0
-DockId=0x00000003,2
+DockId=0x00000003,0
 
 [Window][Code editor]
 Pos=466,290
 Size=814,331
 Collapsed=0
-DockId=0x00000004,0
+DockId=0x00000001,0
 
 [Window][Resources]
-Pos=552,26
-Size=728,827
+Pos=630,346
+Size=650,374
 Collapsed=0
-DockId=0x00000003,0
+DockId=0x0000000B,1
 
 [Window][Properties]
-Pos=552,26
-Size=728,827
+Pos=630,26
+Size=453,318
 Collapsed=0
 DockId=0x00000003,1
 
 [Window][Node editor]
 Pos=60,26
-Size=490,827
+Size=568,432
 Collapsed=0
-DockId=0x00000002,0
+DockId=0x00000008,0
 
 [Window][QuitConfirm]
-Pos=479,524
+Pos=479,312
 Size=321,96
 Collapsed=0
 
 [Window][ToolBar]
 Pos=0,26
-Size=60,1118
+Size=60,694
 Collapsed=0
 
 [Window][ProjectPropertiesPopup]
@@ -86,10 +86,10 @@ Size=951,564
 Collapsed=0
 
 [Window][CPU]
-Pos=552,26
-Size=728,827
+Pos=60,460
+Size=568,260
 Collapsed=0
-DockId=0x00000003,4
+DockId=0x00000009,1
 
 [Window][Choose File##ChooseFileDlgKey]
 Pos=122,114
@@ -97,20 +97,20 @@ Size=1020,496
 Collapsed=0
 
 [Window][TOOLBAR]
-Pos=73,64
+Pos=85,72
 Size=79,42
 Collapsed=0
 
 [Window][WindowOverViewport_11111111]
 Pos=60,26
-Size=1220,1118
+Size=1220,694
 Collapsed=0
 
 [Window][Code viewer]
-Pos=552,26
-Size=728,827
+Pos=1085,26
+Size=195,318
 Collapsed=0
-DockId=0x00000003,5
+DockId=0x00000006,0
 
 [Window][Import story##ImportStoryDlgKey]
 Pos=256,33
@@ -122,6 +122,23 @@ Pos=216,85
 Size=874,542
 Collapsed=0
 
+[Window][Choose a binary story##SetSourceScriptDlgKey]
+Pos=189,66
+Size=879,517
+Collapsed=0
+
+[Window][RAM view]
+Pos=60,460
+Size=568,260
+Collapsed=0
+DockId=0x00000009,2
+
+[Window][Variables]
+Pos=60,460
+Size=568,260
+Collapsed=0
+DockId=0x00000009,3
+
 [Table][0x54B1A511,5]
 RefScale=20
 Column 0  Width=197 Sort=0v
@@ -192,7 +209,7 @@ Column 2  Width=124
 
 [Table][0x7728942D,5]
 RefScale=20
-Column 0  Width=207 Sort=0v
+Column 0  Width=249 Sort=0v
 Column 1  Width=119
 Column 2  Width=104
 Column 3  Width=108
@@ -262,12 +279,42 @@ Column 0  Sort=0v
 RefScale=20
 Column 0  Sort=0v
 
-[Docking][Data]
-DockSpace       ID=0x08BD597D Window=0x1BBC0F80 Pos=60,26 Size=1220,1118 Split=Y
-  DockNode      ID=0x00000005 Parent=0x08BD597D SizeRef=1220,403 Split=X
-    DockNode    ID=0x00000002 Parent=0x00000005 SizeRef=490,694 CentralNode=1 Selected=0xBB79A587
-    DockNode    ID=0x00000001 Parent=0x00000005 SizeRef=728,694 Split=Y Selected=0x63869CAF
-      DockNode  ID=0x00000003 Parent=0x00000001 SizeRef=543,294 Selected=0x52EB28B5
-      DockNode  ID=0x00000004 Parent=0x00000001 SizeRef=543,372 Selected=0x7563A968
-  DockNode      ID=0x00000006 Parent=0x08BD597D SizeRef=1220,289 Selected=0xEA83D666
+[Table][0xFD320A1F,3]
+RefScale=20
+Column 0  Width=4 Sort=0v
+Column 1  Width=4
+Column 2  Width=4
+
+[Table][0x965BCEFA,4]
+RefScale=20
+Column 0  Sort=0v
+
+[Table][0xB3490CB0,4]
+RefScale=20
+Column 0  Sort=0v
+
+[Table][0x63137D12,4]
+RefScale=20
+Column 0  Sort=0v
+
+[Table][0x1A9CDA45,4]
+RefScale=20
+Column 0  Sort=0v
+
+[Table][0xCEE1704D,4]
+RefScale=20
+Column 0  Sort=0v
+
+[Docking][Data]
+DockSpace         ID=0x08BD597D Window=0x1BBC0F80 Pos=60,26 Size=1220,694 Split=X
+  DockNode        ID=0x00000004 Parent=0x08BD597D SizeRef=568,694 Split=Y Selected=0xBB79A587
+    DockNode      ID=0x00000008 Parent=0x00000004 SizeRef=607,432 Selected=0xBB79A587
+    DockNode      ID=0x00000009 Parent=0x00000004 SizeRef=607,260 Selected=0x6DE9B20C
+  DockNode        ID=0x00000005 Parent=0x08BD597D SizeRef=769,694 Split=X
+    DockNode      ID=0x00000002 Parent=0x00000005 SizeRef=316,694 Split=Y Selected=0xBB79A587
+      DockNode    ID=0x0000000A Parent=0x00000002 SizeRef=306,462 Split=X Selected=0x8C72BEA8
+        DockNode  ID=0x00000003 Parent=0x0000000A SizeRef=453,363 CentralNode=1 Selected=0x8C72BEA8
+        DockNode  ID=0x00000006 Parent=0x0000000A SizeRef=195,363 Selected=0x52EB28B5
+      DockNode    ID=0x0000000B Parent=0x00000002 SizeRef=306,374 Selected=0x63869CAF
+    DockNode      ID=0x00000001 Parent=0x00000005 SizeRef=902,694 Selected=0x63869CAF
 
diff --git a/story-editor/src/IconsFontAwesome5_c.h b/story-editor/libs/IconsFontAwesome5_c.h
similarity index 100%
rename from story-editor/src/IconsFontAwesome5_c.h
rename to story-editor/libs/IconsFontAwesome5_c.h
diff --git a/story-editor/src/IconsMaterialDesignIcons.h b/story-editor/libs/IconsMaterialDesignIcons.h
similarity index 100%
rename from story-editor/src/IconsMaterialDesignIcons.h
rename to story-editor/libs/IconsMaterialDesignIcons.h
diff --git a/story-editor/libs/ImGuiFileDialog/ImGuiFileDialog.cpp b/story-editor/libs/ImGuiFileDialog/ImGuiFileDialog.cpp
index ae2ca83..ba7af8a 100644
--- a/story-editor/libs/ImGuiFileDialog/ImGuiFileDialog.cpp
+++ b/story-editor/libs/ImGuiFileDialog/ImGuiFileDialog.cpp
@@ -3994,7 +3994,9 @@ void IGFD::FileDialog::m_DisplayPathPopup(ImVec2 vSize) {
 
                         if (ImGui::TableNextColumn())  // file name
                         {
-                            if (ImGui::Selectable(infos_ptr->fileNameExt.c_str(), &selected, ImGuiSelectableFlags_SpanAllColumns | ImGuiSelectableFlags_SpanAvailWidth)) {
+                            static int f = ImGuiSelectableFlags_SpanAllColumns;
+                            f |= ImGuiSelectableFlags_SpanAvailWidth;
+                            if (ImGui::Selectable(infos_ptr->fileNameExt.c_str(), &selected,  f )) {
                                 fdi.SetCurrentPath(fdi.ComposeNewPath(fdi.GetCurrentPopupComposedPath()));
                                 fdi.pathClicked = fdi.SelectDirectory(infos_ptr);
                                 ImGui::CloseCurrentPopup();
diff --git a/story-editor/src/imgui_memory_editor.h b/story-editor/libs/imgui_memory_editor.h
similarity index 97%
rename from story-editor/src/imgui_memory_editor.h
rename to story-editor/libs/imgui_memory_editor.h
index ec2beef..cb3545a 100644
--- a/story-editor/src/imgui_memory_editor.h
+++ b/story-editor/libs/imgui_memory_editor.h
@@ -249,10 +249,10 @@ struct MemoryEditor
         if (DataEditingAddr != (size_t)-1)
         {
             // Move cursor but only apply on next frame so scrolling with be synchronized (because currently we can't change the scrolling while the window is being rendered)
-            if (ImGui::IsKeyPressed(ImGui::GetKeyIndex(ImGuiKey_UpArrow)) && (ptrdiff_t)DataEditingAddr >= (ptrdiff_t)Cols)                     { data_editing_addr_next = DataEditingAddr - Cols; }
-            else if (ImGui::IsKeyPressed(ImGui::GetKeyIndex(ImGuiKey_DownArrow)) && (ptrdiff_t)DataEditingAddr < (ptrdiff_t)mem_size - Cols)    { data_editing_addr_next = DataEditingAddr + Cols; }
-            else if (ImGui::IsKeyPressed(ImGui::GetKeyIndex(ImGuiKey_LeftArrow)) && (ptrdiff_t)DataEditingAddr > (ptrdiff_t)0)                  { data_editing_addr_next = DataEditingAddr - 1; }
-            else if (ImGui::IsKeyPressed(ImGui::GetKeyIndex(ImGuiKey_RightArrow)) && (ptrdiff_t)DataEditingAddr < (ptrdiff_t)mem_size - 1)      { data_editing_addr_next = DataEditingAddr + 1; }
+            if (ImGui::IsKeyPressed(ImGuiKey_UpArrow) && (ptrdiff_t)DataEditingAddr >= (ptrdiff_t)Cols)                     { data_editing_addr_next = DataEditingAddr - Cols; }
+            else if (ImGui::IsKeyPressed(ImGuiKey_DownArrow) && (ptrdiff_t)DataEditingAddr < (ptrdiff_t)mem_size - Cols)    { data_editing_addr_next = DataEditingAddr + Cols; }
+            else if (ImGui::IsKeyPressed(ImGuiKey_LeftArrow) && (ptrdiff_t)DataEditingAddr > (ptrdiff_t)0)                  { data_editing_addr_next = DataEditingAddr - 1; }
+            else if (ImGui::IsKeyPressed(ImGuiKey_RightArrow) && (ptrdiff_t)DataEditingAddr < (ptrdiff_t)mem_size - 1)      { data_editing_addr_next = DataEditingAddr + 1; }
         }
 
         // Draw vertical separator
diff --git a/story-editor/src/stb_image.h b/story-editor/libs/stb_image.h
similarity index 100%
rename from story-editor/src/stb_image.h
rename to story-editor/libs/stb_image.h
diff --git a/story-editor/scripts/test_compiler.chip32 b/story-editor/scripts/test_compiler.chip32
new file mode 100644
index 0000000..593695a
--- /dev/null
+++ b/story-editor/scripts/test_compiler.chip32
@@ -0,0 +1,36 @@
+; jump over the data, to our entry label
+    jump         .entry
+
+$imageBird          DC8  "example.bmp", 8  ; data
+$someConstant       DC32  12456789
+
+; DSxx to declare a variable in RAM, followed by the number of elements
+$RamData1           DV32    1 ; one 32-bit integer
+$RamData2           DV32    1 ; one 32-bit integer
+$MyArray            DV8    10 ; array of 10 bytes
+
+; label definition
+.entry:   ;; comment here should work
+; We create a stupid loop just for RAM variable testing
+
+    ; Fill the second ram data with pattern, to be sure that there is no memory corrumptions
+    lcons r0, 0xFFFFFFFF
+    lcons r2, $RamData2
+    store @r2, r0, 4 ; save R0 in RAM
+
+    lcons r0, 4 ; prepare loop: 4 iterations
+    lcons r2, $RamData1 ; save in R2 a ram address
+    store @r2, r0, 4 ; save R0 in RAM
+    lcons r1, 1
+.loop:
+    load r0, @r2, 4  ; load this variable
+    sub r0, r1
+    store @r2, r0, 4 ; save R0 in RAM
+    skipz r0   ; skip loop if R0 == 0
+    jump .loop
+
+
+    mov      r0, r2  ; copy R2 into R0 (blank space between , and R2)
+mov R0,R2  ; copy R2 into R0 (NO blank space between , and R2)
+
+    halt
\ No newline at end of file
diff --git a/story-editor/scripts/test_media.chip32 b/story-editor/scripts/test_media.chip32
new file mode 100644
index 0000000..0943d7d
--- /dev/null
+++ b/story-editor/scripts/test_media.chip32
@@ -0,0 +1,140 @@
+	jump    .nodeEntry920e33a5-99c2-429c-9214-3776f169051d
+$cover.png DC8 "cover.png", 8
+$quelle_destination.mp3 DC8 "quelle_destination.mp3", 8
+$mediaChoice920e33a5-99c2-429c-9214-3776f169051d DC32, 2, .nodeEntryea561fb0-994b-4777-80f3-58860eda97d6, .nodeEntry45fb3875-38aa-4683-abba-06868c8e109c
+
+$saturne.png DC8 "saturne.png", 8
+$saturne.mp3 DC8 "saturne.mp3", 8
+$mediaChoice45fb3875-38aa-4683-abba-06868c8e109c DC32, 1, .nodeEntry42abccda-d683-409d-aed9-7727df3d6974
+
+$mars.png DC8 "mars.png", 8
+$mars.mp3 DC8 "mars.mp3", 8
+$mediaChoiceea561fb0-994b-4777-80f3-58860eda97d6 DC32, 1, .nodeEntryca6a2e13-f4f0-4d1e-a64c-11acacfb97c0
+
+$sature_story.mp3 DC8 "sature_story.mp3", 8
+$mediaChoice42abccda-d683-409d-aed9-7727df3d6974 DC32, 0, 
+$mars_story.mp3 DC8 "mars_story.mp3", 8
+$mediaChoiceca6a2e13-f4f0-4d1e-a64c-11acacfb97c0 DC32, 0, 
+; ---------------------------- Default node Type: Choice
+.nodeEntry920e33a5-99c2-429c-9214-3776f169051d:
+lcons r0, $cover.png
+lcons r1, $quelle_destination.mp3
+syscall 1
+lcons r0, 0b10000000000
+syscall 2
+lcons r0, $mediaChoice920e33a5-99c2-429c-9214-3776f169051d
+jump .media ; no return possible, so a jump is enough
+; ---------------------------- Default node Type: Transition
+.nodeEntry45fb3875-38aa-4683-abba-06868c8e109c:
+lcons r0, $saturne.png
+lcons r1, $saturne.mp3
+syscall 1
+lcons r0, .nodeEntry42abccda-d683-409d-aed9-7727df3d6974
+ret
+
+; ---------------------------- Default node Type: Transition
+.nodeEntryea561fb0-994b-4777-80f3-58860eda97d6:
+lcons r0, $mars.png
+lcons r1, $mars.mp3
+syscall 1
+lcons r0, .nodeEntryca6a2e13-f4f0-4d1e-a64c-11acacfb97c0
+ret
+
+; ---------------------------- Default node Type: End
+.nodeEntry42abccda-d683-409d-aed9-7727df3d6974:
+lcons r0, 0
+lcons r1, $sature_story.mp3
+syscall 1
+ret
+
+; ---------------------------- Default node Type: End
+.nodeEntryca6a2e13-f4f0-4d1e-a64c-11acacfb97c0:
+lcons r0, 0
+lcons r1, $mars_story.mp3
+syscall 1
+ret
+
+; Generic media choice manager
+.media:
+    ; Les adresses des différents medias sont dans la stack
+; Arguments:
+    ; r0: address d'une structure de type "media choice"
+; Local:
+    ; t0: current media address
+    ; t1: increment 4
+    ; t3: address of the first element in the choice array
+    ; t4: address of the last element in the choice array
+    ; t5: where to jump when OK button is pressed
+    
+    mov t3, r0  ; copie de R0 pour travailler dessus
+    mov t2, r0  ; sauvegarde de R0
+    load r0, @t3, 4 ; Le premier élément est le nombre de choix possibles, ex: r0 = 12
+    lcons t1, 4
+    mul r0, t1  ; on calcule l'offset: r0 = nb_elements * 4 = 48
+    mov t4, t3  ; t4 = t3
+    add t4, r0  ; t4 pointe maintenant sur le dernier élément de la structure
+    add t3, t1  ; t3 pointe maintenant sur le premier élément
+    mov t0, t3      ; on commence sur le premier élément
+    
+.media_loop:
+
+    ; --------- We call a media transition node
+    load r0, @t0, 4 ; Get the address located at memory T0
+    call r0 ; call subroutine   
+    ; Return argument in R0: the address of the node to call whe OK is pressed
+    mov t5, r0  ; save it
+    
+    ; wait for event
+    lcons r0, 0b100111 ; mask for OK, previous and next buttons, home button
+    syscall 2
+    ; Event is stored in R0
+
+    ; -----  Test if event is OK button
+    lcons r1, 1 ; mask for OK button
+    and r1, r0 ; r1 = r1 AND r0
+    skipz r1 ; not OK, skip jump
+    jump .media_wait_event
+    
+    ; test previous event
+    lcons r1, 2 ; mask for previous button
+    and r1, r0 ; r1 = r1 AND r0
+    skipz r1 ; not OK, skip jump
+    jump .media_previous
+
+    lcons r1, 0b100000 ; mask for home button
+    and r1, r0 ; r1 = r1 AND r0
+    skipz r1 ; not Home, skip jump
+    jump .media_exit
+
+    ; all other events mean: next node
+    eq  r0, t0, t4   ;  t4 est le dernier élément
+    skipz r0   ;  zéro, on peut incrémenter l'adresse
+    jump .media_set_first
+    add  t0, t1 ; t0 += 4
+    jump .media_loop
+ 
+ .media_set_first:  ; on reboucle sur le premier élément de la structure
+    mov t0, t3
+    jump .media_loop
+
+.media_previous:
+    eq r0, t0, t3   ; on teste si on est au premier élément
+    skipz r0   ;  zéro, on peut décrémenter l'adresse
+    jump .media_set_last
+    sub  t0, t1 ; t0 += 4
+    jump .media_loop
+ 
+ .media_set_last:  ; on reboucle sur le dernier élément de la structure
+    mov t0, t4
+    jump .media_loop
+
+.media_wait_event:
+    call t5 ; jump to the node
+    lcons r0, 0b10000100001 ; mask for end of audio, Ok, and home buttons
+    syscall 2 ; wait for event (OK, home or end of audio), return to choice loop
+    jump .media_loop
+
+.media_exit:
+    lcons r0, 1 ; Home button pressed, send signal to exit story
+    syscall 3 ; exit story, we should never return from this call
+    halt ; just in case
diff --git a/story-editor/src/graph.cpp b/story-editor/src/graph.cpp
deleted file mode 100644
index 1a42cf7..0000000
--- a/story-editor/src/graph.cpp
+++ /dev/null
@@ -1,355 +0,0 @@
-
-
-
-#include <iostream>
-#include <queue>
-#include "graph.h"
-
-using namespace std;
-
-
-
-Graph::Graph(int newMaxSize)
-    : mInvalid()
-{
-    maxSize = newMaxSize;
-//    vertexList = new Vertex [maxSize];
-}
-
-
-Graph::Graph(const Graph& other)
-{
-    *this = other;
-}
-
-
-Graph& Graph::operator=(const Graph& other)
-{
-    if (this == &other)
-    {
-        return *this;
-    }
-
-    if (!empty())
-    {
-        vertexList.clear();
-    }
-
-    maxSize = other.maxSize;
-    vertexList.assign(other.vertexList.begin(), other.vertexList.end());
-
-    return *this;
-}
-
-
-Graph::~Graph()
-{
-//    delete[] vertexList;
-    vertexList.clear();
-}
-
-
-bool Graph::addVertex(const string& id)
-{
-    if (full())
-    {
-        return false;
-    }
-
-    Vertex newVertex;
-    newVertex.setId(id);
-    vertexList.push_back(newVertex);
-
-    return true;
-}
-
-
-void Graph::insertEdge(const string& v1, const string& v2, int wt)
-{
-    std::list<Vertex>::iterator it_v1 = vertexList.end();
-    std::list<Vertex>::iterator it_v2 = vertexList.end();
-    for (auto it = vertexList.begin(); it != vertexList.end(); ++it)
-    {
-        if (it->getId() == v1)
-        {
-            it_v1 = it;
-        }
-        if (it->getId() == v2)
-        {
-            it_v2 = it;
-        }
-    }
-
-    if ((it_v1 != vertexList.end()) && (it_v2 != vertexList.end()))
-    {
-        if (wt == -1)
-        {
-            wt = it_v1->first_edge_gap();
-        }
-        it_v1->edges.push_back(make_pair(wt, *it_v2));
-    }
-    else
-    {
-        std::cout << "Cannot create edge, one vertex is not found" << std::endl;
-    }
-}
-
-Graph::Vertex Graph::first() const
-{
-    if (vertexList.size() > 0) {
-        return vertexList.front();
-    } else {
-        return Vertex();
-    }
-}
-
-Graph::Vertex Graph::getVertex(const string &v) const
-{
-    for (auto it = vertexList.begin(); it != vertexList.end(); ++it)
-    {
-        if (it->getId() == v)
-        {
-            return *it;
-        }
-    }
-    return mInvalid;
-}
-
-Graph::Vertex Graph::getPreviousVertex(const string &id)
-{
-    for (auto it = vertexList.begin(); it != vertexList.end(); ++it)
-    {
-        if (it->edges.size() > 0)
-        {
-            Vertex v = it->edges.front().second;
-            if (v.getId() == id)
-            {
-                return *it;
-            }
-        }
-    }
-    return mInvalid;
-}
-
-Graph::Vertex Graph::getNextVertex(const std::string &id) const
-{
-    for (auto it = vertexList.begin(); it != vertexList.end(); ++it)
-    {
-        if ((it->getId() == id))
-        {
-            if (it->edges.size() > 0)
-            {
-                return it->edges.front().second;
-            }
-        }
-    }
-    return mInvalid;
-}
-
-
-void Graph::Vertex::removeEdge(const std::string &id)
-{
-    edges.remove_if([id](const vPair &n){ return (n.second.getId() == id); });
-}
-
-bool Graph::removeVertex(const string& v)
-{
-    std::list<Vertex>::iterator it_v = vertexList.end();
-
-    // On cherche le vertex
-    for (auto it = vertexList.begin(); it != vertexList.end(); ++it)
-    {
-        if (it->getId() == v)
-        {
-            it_v = it;
-        }
-    }
-
-    // vertex non trouvé
-    if (it_v == vertexList.end())
-    {
-        return false;
-    }
-
-    std::cout << "RM: " << it_v->getId()<< std::endl;
-    vertexList.erase(it_v);
-
-    // On supprime toutes les références à ce vertex dans les autres edges
-    // == On supprime les relations éventuelles
-    for (auto it = vertexList.begin(); it != vertexList.end(); ++it)
-    {
-        it->removeEdge(v);
-    }
-
-    return true;
-}
-
-
-bool Graph::removeEdge(const string& v1, const string& v2)
-{
-    bool success = false;
-    for (auto it = vertexList.begin(); it != vertexList.end(); ++it)
-    {
-        if (it->getId() == v1)
-        {
-            success = true;
-            it->removeEdge(v2);
-            break;
-        }
-    }
-
-    return success;
-}
-
-bool Graph::clearExtraEdges(const std::string &v1)
-{
-    bool success = false;
-    for (auto it = vertexList.begin(); it != vertexList.end(); ++it)
-    {
-        if (it->getId() == v1)
-        {
-            success = true;
-            it->edges.remove_if([](const Vertex::vPair &n){ return (n.first != 0); });
-            break;
-        }
-    }
-
-    return success;
-}
-
-
-void Graph::clear()
-{
-    vertexList.clear();
-}
-
-
-bool Graph::empty() const
-{
-    return (vertexList.size() == 0);
-}
-
-
-bool Graph::full() const
-{
-    return (vertexList.size() == maxSize);
-}
-
-
-void Graph::showGraph() const
-{
-    if (empty())
-    {
-        cout << "Empty Graph" << endl;
-        return;
-    }
-
-    cout << "Vertex and their adjacency list:" << endl;
-    for (auto it = vertexList.begin(); it != vertexList.end(); ++it)
-    {
-        cout << '\t' << it->getId();
-
-        for (const auto &j : it->edges)
-        {
-            cout << " --> [" << j.second.getId()
-                 << ", " << j.first << ']';
-        }
-        cout << endl;
-    }
-}
-/*
-
-void Graph::bfs(const string& src, const string& dest) const
-{
-    int src_idx = getIndex(src),
-        dest_idx = getIndex(dest);
-
-    if (src_idx == -1 ||
-        dest_idx == -1)
-        return;
-
-    queue<int> q;
-    vector<bool> visited(size, false);
-
-    q.push(src_idx);
-    visited[src_idx] = true;
-
-    while (!q.empty())
-    {
-        int u = q.front();
-        q.pop();
-
-        cout << ' ' << vertexList[u].getId() << endl;
-
-        if (u == dest_idx)
-            return;
-
-        for (auto i : vertexList[u].edges)
-        {
-            int v = getIndex(i.second.getId());
-
-            if (!visited[v])
-            {
-                visited[v] = true;
-                q.push(v);
-            }
-        }
-    }
-}
-
-
-void Graph::shortestPaths(const string& src) const
-{
-    typedef pair<int,int> vtx;
-    priority_queue<vtx,
-                   vector<vtx>,
-                   greater<vtx> > pq;
-
-    vector<int> dist(size, INF);
-
-    int src_idx = getIndex(src);
-    if (src_idx == -1) return;
-
-    dist[src_idx] = 0;
-    pq.push(make_pair(0, src_idx));
-
-    while (!pq.empty())
-    {
-        int u = pq.top().second;
-        pq.pop();
-
-        for (auto i : vertexList[u].edges)
-        {
-            int v = getIndex(i.second.getId());
-            int wt = i.first;
-
-            if (dist[v] > dist[u] + wt)
-            {
-                dist[v] = dist[u] + wt;
-                pq.push(make_pair(dist[v], v));
-            }
-        }
-    }
-
-    for (int i = 0; i < size; i++)
-    {
-        cout << '\t' << vertexList[i].getId();
-        if (dist[i] == INF)
-            cout << '\t' << '-' << endl;
-        else
-            cout << '\t' << dist[i] << endl;
-    }
-    cout << endl;
-}
-
-
-int Graph::getIndex(const string& v) const
-{
-    for (int i = 0; i < size; i++)
-        if (v == vertexList[i].getId())
-            return i;
-
-    return -1;
-}
-*/
-
diff --git a/story-editor/src/graph.h b/story-editor/src/graph.h
deleted file mode 100644
index 68b15a5..0000000
--- a/story-editor/src/graph.h
+++ /dev/null
@@ -1,134 +0,0 @@
-#ifndef GRAPH_H
-#define GRAPH_H
-
-
-#include <utility>
-#include <climits>
-#include <string>
-#include <vector>
-#include <list>
-#include <algorithm>
-
-class  Graph
-{
-static const int DEFAULT_MAX_SIZE = 1000;
-static const int INF = INT_MAX;
-
-public:
-    class Vertex
-    {
-    public:
-        Vertex()
-        {
-            mValid = false;
-        }
-
-        ~Vertex()
-        {
-
-        }
-
-        int first_edge_gap()
-        {
-            std::vector<int> vec;
-
-            for (auto &p : edges)
-            {
-                vec.push_back(p.first);
-            }
-
-            // Handle the special case of an empty vector.  Return 1.
-            if( vec.empty() )
-            {
-                return 1;
-            }
-
-            // Sort the vector
-            std::sort( vec.begin(), vec.end() );
-
-            // Find the first adjacent pair that differ by more than 1.
-            auto i = std::adjacent_find( vec.begin(), vec.end(), [](int l, int r){return l+1<r;} );
-
-            // Handle the special case of no gaps.  Return the last value + 1.
-            if ( i == vec.end() )
-            {
-                --i;
-            }
-
-            return 1 + *i;
-        }
-
-        bool isValid() const { return mValid; }
-        void setId(const std::string& newId)
-            { mId = newId; mValid = true; }
-        std::string getId() const
-            { return mId; }
-
-
-        void removeEdge(const std::string &id);
-
-        // poids / vertex
-        typedef std::pair<int, Vertex> vPair;
-        std::list<vPair> edges;
-        Vertex& operator=(const Vertex& other)
-        {
-            mId = other.mId;
-            edges = other.edges;
-            mValid = other.mValid;
-            return *this;
-        }
-
-    private:
-        std::string mId{"invalid"};
-        bool mValid{false};
-    };
-
-    Graph(int newMaxSize = DEFAULT_MAX_SIZE);
-    Graph(const Graph& other);
-    Graph& operator=(const Graph& other);
-    ~Graph();
-
-    typedef std::list<Vertex>::iterator Iter;
-
-    std::list<Vertex>::iterator begin() { return vertexList.begin(); }
-    std::list<Vertex>::const_iterator begin() const { return vertexList.begin(); }
-    std::list<Vertex>::iterator end() { return vertexList.end(); }
-    std::list<Vertex>::const_iterator end() const { return vertexList.end(); }
-
-    bool addVertex(const std::string& id);
-
-    // If defaut weight is -1, then auto attribute the weight
-    void insertEdge(const std::string& v1, const std::string& v2, int wt = -1);
-
-    Vertex first() const;
-    Vertex getVertex(const std::string& id) const;
-    Vertex getPreviousVertex(const std::string &id);
-    Vertex getNextVertex(const std::string &id) const;
-
-    bool searchVertex(const std::string& v, Vertex& returnVertex) const;
-//    bool searchEdge(const std::string& v1, const std::string& v2,
-//                    int& wt) const;
-    bool removeVertex(const std::string& v1);
-    bool removeEdge(const std::string& v1, const std::string& v2);
-    bool clearExtraEdges(const std::string& v1);
-
-    void clear();
-
-    bool empty() const;
-    bool full() const;
-
-    void showGraph() const;
-
-//    void bfs(const std::string& src,
-//             const std::string& dest) const;
-//    void shortestPaths(const std::string& src) const;
-
-private:
-    int getIndex(const std::string& v) const;
-    Vertex mInvalid;
-    std::list<Vertex> vertexList;
-    int maxSize;
-};
-
-
-#endif // GRAPH_H
diff --git a/story-editor/src/media_converter.cpp b/story-editor/src/media_converter.cpp
index cf055a1..5dfd94f 100644
--- a/story-editor/src/media_converter.cpp
+++ b/story-editor/src/media_converter.cpp
@@ -17,30 +17,60 @@
 #define DR_MP3_IMPLEMENTATION
 #include "dr_mp3.h"
 
-#define QOI_IMPLEMENTATION
-#include "qoi.h"
+#include <filesystem>
+#include <vector>
+#include <cstddef>
+#include <fstream>
+#include <iostream>
+#include <cstdlib>
+#include <memory>
+#include <variant>
+#include <string>
+#include <string_view>
+#include "qoixx.hpp"
+
+struct QoiDescription {
+	unsigned int width;
+	unsigned int height;
+	unsigned char channels;
+	unsigned char colorspace;
+    void * pixels;
+};
+
+static inline void save_file(const std::filesystem::path& path, const std::vector<std::byte>& bytes){
+  std::ofstream ofs{path, std::ios::binary};
+  ofs.write(reinterpret_cast<const char*>(bytes.data()), bytes.size());
+}
+
+using image = std::variant<QoiDescription, std::pair<std::vector<std::byte>, qoixx::qoi::desc>>;
 
 
-static int qoi_encode_and_write(const char *filename, const void *data, const qoi_desc *desc) {
-	FILE *f = fopen(filename, "wb");
-	int size;
-	void *encoded;
+template<typename... Fs>
+struct overloaded : Fs...{
+  using Fs::operator()...;
+};
+template<typename... Fs> overloaded(Fs...) -> overloaded<Fs...>;
 
-	if (!f) {
-		return 0;
-	}
-
-	encoded = qoi_encode(data, desc, &size);
-	if (!encoded) {
-		fclose(f);
-		return 0;
-	}
-
-	fwrite(encoded, 1, size, f);
-	fclose(f);
-
-	free(encoded);
-	return size;
+static inline void write_qoi(const std::filesystem::path& file_path, const image& image){
+  auto [ptr, size, desc] = std::visit(overloaded(
+    [](const QoiDescription& image){
+      return std::make_tuple(
+        reinterpret_cast<const std::byte*>(image.pixels),
+        static_cast<std::size_t>(image.width) * image.height * image.channels,
+        qoixx::qoi::desc{
+          .width = static_cast<std::uint32_t>(image.width),
+          .height = static_cast<std::uint32_t>(image.height),
+          .channels = static_cast<std::uint8_t>(image.channels),
+          .colorspace = qoixx::qoi::colorspace::srgb
+        }
+      );
+    },
+    [](const std::pair<std::vector<std::byte>, qoixx::qoi::desc>& image){
+      return std::make_tuple(image.first.data(), image.first.size(), image.second);
+    }
+  ), image);
+  const auto encoded = qoixx::qoi::encode<std::vector<std::byte>>(ptr, size, desc);
+  save_file(file_path, encoded);
 }
 
 MediaConverter::MediaConverter()
@@ -69,26 +99,28 @@ int MediaConverter::ImageToQoi(const std::string &inputFileName, const std::stri
 
     if (pixels != NULL)
     {
-        qoi_desc desc;
+        QoiDescription desc;
         desc.channels = channels;
-        desc.colorspace = QOI_SRGB;
+        desc.colorspace = 0; // #define QOI_SRGB   0
         desc.width = w;
         desc.height = h;
+        desc.pixels = pixels;
 
-
-        int encoded = qoi_encode_and_write(outputFileName.c_str(), pixels, &desc);
+        write_qoi(outputFileName.c_str(), desc);
         free(pixels);
-
+/*
         if (!encoded)
         {
             return cErrorCannotWriteOrEncodeOutputFile;
         }
+        */
     }
     else
     {
         return cErrorCannotDecodeInputFile;
     }
 
+
     return cSuccess;
 }
 
diff --git a/story-editor/src/node_editor/media_node_widget.cpp b/story-editor/src/node_editor/media_node_widget.cpp
index 9252dec..03ec2e1 100644
--- a/story-editor/src/node_editor/media_node_widget.cpp
+++ b/story-editor/src/node_editor/media_node_widget.cpp
@@ -25,24 +25,6 @@ void MediaNodeWidget::Draw()
 {
     BaseNodeWidget::FrameStart();
 
-/*
-    static ImGuiTableFlags flags = ImGuiTableFlags_Borders |
-                                   ImGuiTableFlags_NoHostExtendX | ImGuiTableFlags_SizingFixedFit;
-
-
-    ImGui::PushStyleVar(ImGuiStyleVar_CellPadding, ImVec2(10.0f, 10.0f));
-    if (ImGui::BeginTable("table1", 1, flags))
-    {
-        ImGui::TableNextRow();
-        ImU32 bg_color = ImGui::GetColorU32(ImVec4(0.3f, 0.3f, 0.7f, 1.0f));
-        ImGui::TableSetBgColor(ImGuiTableBgTarget_RowBg0, bg_color);
-        ImGui::TableSetColumnIndex(0);
-        ImGui::TextUnformatted("Media node");
-
-        ImGui::EndTable();
-    }
-    ImGui::PopStyleVar();
-*/
     const char * text = "Media node";
     // Obtenir la position courante du curseur
     ImVec2 pos = ImGui::GetCursorScreenPos();
diff --git a/story-editor/src/node_editor/node_editor_window.cpp b/story-editor/src/node_editor/node_editor_window.cpp
index c438753..92b6244 100644
--- a/story-editor/src/node_editor/node_editor_window.cpp
+++ b/story-editor/src/node_editor/node_editor_window.cpp
@@ -11,6 +11,7 @@
 
 #include "media_node_widget.h"
 #include "function_node_widget.h"
+#include "variable_node_widget.h"
 #include "gui.h"
 #include "uuid.h"
 
@@ -29,6 +30,7 @@ NodeEditorWindow::NodeEditorWindow(IStoryManager &manager)
 
     registerNode<MediaNodeWidget>("media-node");
     registerNode<FunctionNodeWidget>("function-node");
+    registerNode<VariableNodeWidget>("variable-node");
 }
 
 NodeEditorWindow::~NodeEditorWindow()
diff --git a/story-editor/src/node_editor/variable_node_widget.cpp b/story-editor/src/node_editor/variable_node_widget.cpp
new file mode 100644
index 0000000..b41acae
--- /dev/null
+++ b/story-editor/src/node_editor/variable_node_widget.cpp
@@ -0,0 +1,66 @@
+
+#include <sstream>
+#include "variable_node_widget.h"
+
+namespace ed = ax::NodeEditor;
+#include "IconsMaterialDesignIcons.h"
+#include "story_project.h"
+#include "uuid.h"
+
+VariableNodeWidget::VariableNodeWidget(IStoryManager &manager, std::shared_ptr<BaseNode> node)
+    : BaseNodeWidget(manager, node)
+    , m_manager(manager)
+{
+    // Create defaut one input and one output
+    //AddInput();
+    AddOutputs(1);
+    SetOutPinName(0, "");
+}
+
+void VariableNodeWidget::Initialize()
+{
+    BaseNodeWidget::Initialize();
+}
+
+
+
+void VariableNodeWidget::DrawProperties()
+{
+    ImGui::AlignTextToFramePadding();
+    static ImGuiComboFlags flags = 0;
+
+    if (ImGui::BeginCombo("Variables list", m_selectedVariable.c_str(), flags))
+    {
+        int i = 0;
+        m_manager.ScanVariable([&i, this] (Variable &var) {
+
+            // ImGui::PushID(static_cast<int>(i)); // Assure l'unicité des widgets
+
+            const bool is_selected = (m_selectedIndex == i);
+            if (ImGui::Selectable(var.name.c_str(), is_selected))
+            {
+                m_selectedIndex = i;
+                m_selectedVariable = var.name;
+            }
+
+            // Set the initial focus when opening the combo (scrolling + keyboard navigation focus)
+            if (is_selected)
+                ImGui::SetItemDefaultFocus();
+        });
+        ImGui::EndCombo();
+    }
+
+}
+
+void VariableNodeWidget::Draw()
+{
+    BaseNodeWidget::FrameStart();
+
+    ImGui::TextUnformatted(m_selectedVariable.c_str());
+
+    DrawPins();
+
+    BaseNodeWidget::FrameEnd();
+
+}
+
diff --git a/story-editor/src/node_editor/variable_node_widget.h b/story-editor/src/node_editor/variable_node_widget.h
new file mode 100644
index 0000000..79b2c06
--- /dev/null
+++ b/story-editor/src/node_editor/variable_node_widget.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <vector>
+#include <map>
+#include <mutex>
+#include <set>
+
+#include "base_node_widget.h"
+#include "i_story_manager.h"
+#include "i_story_project.h"
+#include "gui.h"
+#include "variable_node.h"
+#include <imgui_node_editor.h>
+#include "media_node.h"
+
+class VariableNodeWidget : public BaseNodeWidget
+{
+public:
+    VariableNodeWidget(IStoryManager &manager, std::shared_ptr<BaseNode> node);
+
+    void Draw() override;
+
+    virtual void DrawProperties() override;
+    virtual void Initialize() override;
+
+private:
+    IStoryManager &m_manager;
+    std::shared_ptr<VariableNode> m_variableNode;
+    int m_selectedIndex{-1};
+    std::string m_selectedVariable;
+};
diff --git a/story-editor/src/ost_wrapper.cpp b/story-editor/src/ost_wrapper.cpp
deleted file mode 100644
index 02bef30..0000000
--- a/story-editor/src/ost_wrapper.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-#include <iostream>
-#include <memory>
-#include "imgui.h"
-#include "IconsFontAwesome5.h"
-#include "SDL.h"
-#include "imgui-knobs.h"
-
-#include "ost_common.h"
-#include "ost_wrapper.h"
-#include "picture.h"
-
-static SDL_Texture *display_texture{nullptr};
-
-
-// OST stubs
-extern "C" void system_putc(char ch)
-{
-    std::cout << ch;
-    std::flush(std::cout);
-}
-
-
-static color_t image_buffer[320][240];
-
-extern "C" void ost_display_draw_h_line(uint16_t y, uint8_t *pixels, uint8_t *palette)
-{
-    color_t color;
-
-    for(uint16_t i = 0; i < 320; i++)
-    {
-        uint8_t val = pixels[i];
-
-        if (val > 15)
-        {
-            val = 0;
-        }
-
-        const uint8_t *palettePtr = &palette[val * 4];
-        color.r = palettePtr[0];
-        color.g = palettePtr[1];
-        color.b = palettePtr[2];
-
-        image_buffer[i][y] = color;
-    }
-}
-
-
-
-static void ost_impl_show_image(SDL_Renderer *renderer, const char *fileName)
-{
-    decompress(fileName);
-
-    display_texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_RGB565, SDL_TEXTUREACCESS_TARGET, 320, 240);
-
-    // Switch the renderer to the texture
-    SDL_SetRenderTarget(renderer, display_texture);
-    SDL_RenderClear(renderer);
-
-    for (uint16_t y = 0; y < 240; y++)
-    {
-        for(uint16_t x = 0; x < 320; x++)
-        {
-            SDL_SetRenderDrawColor(renderer, image_buffer[x][y].r, image_buffer[x][y].g, image_buffer[x][y].b, 255);
-            SDL_RenderDrawPoint(renderer, x, 239 - y);
-        }
-    }
-
-    SDL_RenderPresent(renderer);
-
-    // Switch back to main screen renderer
-    SDL_SetRenderTarget(renderer, NULL);
-}
-
-
-// Emulated hardware
-void draw_ost_device(SDL_Renderer *renderer, double deltaTime)
-{
-    static int load = 0;
-
-    if (load == 0)
-    {
-        load = 1;
-        ost_impl_show_image(renderer, "assets/images/example.bmp");
-    }
-
-
-    ImGui::Begin("OST device");
-
-    ImDrawList* draw_list = ImGui::GetWindowDrawList();
-
-    if (ImGui::Button("OK"))
-    {
-        std::cout << "OK button clicked" << std::endl;
-    }
-    ImGui::SameLine();
-    if (ImGui::Button("Home"))
-    {
-        std::cout << "Choose RIGHT" << std::endl;
-    }
-    ImGui::SameLine();
-    if (ImGui::Button("Pause"))
-    {
-        std::cout << "Choose RIGHT" << std::endl;
-    }
-    ImGui::SameLine();
-    if (ImGui::Button("Rotate LEFT"))
-    {
-        std::cout << "Choose LEFT" << std::endl;
-    }
-    ImGui::SameLine();
-    if (ImGui::Button("Rotate RIGHT"))
-    {
-        std::cout << "Choose RIGHT" << std::endl;
-    }
-
-    ImGui::NewLine();
-
-    // Contenu de l'écran
-
-    if (display_texture != nullptr)
-    {
-        ImGui::Image(display_texture, ImVec2((float)320, (float)240));
-    }
-    else
-    {
-        ImVec2 canvas_pos = ImGui::GetCursorScreenPos();            // ImDrawList API uses screen coordinates!
-        ImVec2 canvas_size(320, 240);
-
-        // Rectangle vide
-        draw_list->AddRectFilledMultiColor(canvas_pos, ImVec2(canvas_pos.x + canvas_size.x, canvas_pos.y + canvas_size.y), IM_COL32(50, 50, 50, 255), IM_COL32(50, 50, 60, 255), IM_COL32(60, 60, 70, 255), IM_COL32(50, 50, 60, 255));
-        // contour blanc
-        draw_list->AddRect(canvas_pos, ImVec2(canvas_pos.x + canvas_size.x, canvas_pos.y + canvas_size.y), IM_COL32(255, 255, 255, 255));
-    }
-
-    ImGui::End();
-}
-
-
diff --git a/story-editor/src/ost_wrapper.h b/story-editor/src/ost_wrapper.h
deleted file mode 100644
index cb92736..0000000
--- a/story-editor/src/ost_wrapper.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef OST_WRAPPER_H
-#define OST_WRAPPER_H
-
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-#include <SDL.h>
-
-void draw_ost_device(SDL_Renderer *renderer, double deltaTime);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // OST_WRAPPER_H
diff --git a/story-editor/src/console_window.cpp b/story-editor/src/windows/console_window.cpp
similarity index 100%
rename from story-editor/src/console_window.cpp
rename to story-editor/src/windows/console_window.cpp
diff --git a/story-editor/src/console_window.h b/story-editor/src/windows/console_window.h
similarity index 100%
rename from story-editor/src/console_window.h
rename to story-editor/src/windows/console_window.h
diff --git a/story-editor/src/cpu_window.cpp b/story-editor/src/windows/cpu_window.cpp
similarity index 100%
rename from story-editor/src/cpu_window.cpp
rename to story-editor/src/windows/cpu_window.cpp
diff --git a/story-editor/src/cpu_window.h b/story-editor/src/windows/cpu_window.h
similarity index 100%
rename from story-editor/src/cpu_window.h
rename to story-editor/src/windows/cpu_window.h
diff --git a/story-editor/src/code_editor.cpp b/story-editor/src/windows/debugger_window.cpp
similarity index 86%
rename from story-editor/src/code_editor.cpp
rename to story-editor/src/windows/debugger_window.cpp
index 898ec1d..b53244a 100644
--- a/story-editor/src/code_editor.cpp
+++ b/story-editor/src/windows/debugger_window.cpp
@@ -1,4 +1,4 @@
-#include "code_editor.h"
+#include "debugger_window.h"
 #include "IconsMaterialDesignIcons.h"
 #include <fstream>
 #include <memory>
@@ -9,7 +9,7 @@
 #include "imgui.h"
 
 
-void CodeEditor::TextViewDraw()
+void DebuggerWindow::TextViewDraw()
 {
     const ImVec2 childSize = ImVec2(0, 0);
     // Début de la scrollview (Child)
@@ -33,11 +33,14 @@ void CodeEditor::TextViewDraw()
         std::string line;
         std::istringstream is(m_text);
 
+        if ((m_focusOnLine != -1) && (m_focusOnLine != m_currentLine))
+        {
+            m_currentLine = m_focusOnLine;
             // Calcul de la position Y pour centrer la ligne cible
-        float scrollY = m_currentLine * lineHeight - ImGui::GetWindowHeight() * 0.5f + lineHeight * 0.5f;
-
-        // Début de la scrollview
-        ImGui::SetScrollY(scrollY);
+            float scrollY = m_currentLine * lineHeight - ImGui::GetWindowHeight() * 0.5f + lineHeight * 0.5f;
+            // Début de la scrollview
+            ImGui::SetScrollY(scrollY);
+        }
 
         while (std::getline(is, line)) 
         {
@@ -90,7 +93,7 @@ void CodeEditor::TextViewDraw()
 }
 
 
-CodeEditor::CodeEditor(IStoryManager &project)
+DebuggerWindow::DebuggerWindow(IStoryManager &project)
     : WindowBase("Code viewer")
     , m_storyManager(project)
 {
@@ -98,7 +101,7 @@ CodeEditor::CodeEditor(IStoryManager &project)
     // SetFlags(ImGuiWindowFlags_MenuBar);
 }
 
-void CodeEditor::Initialize()
+void DebuggerWindow::Initialize()
 {
     // error markers
 
@@ -112,19 +115,19 @@ void CodeEditor::Initialize()
 
 }
 
-void CodeEditor::ClearErrors()
+void DebuggerWindow::ClearErrors()
 {
     // m_markers.clear();
 }
 
-void CodeEditor::AddError(int line, const std::string &text)
+void DebuggerWindow::AddError(int line, const std::string &text)
 {
     // m_markers.insert(std::make_pair(line, text));
     // mEditor.SetErrorMarkers(m_markers);
 }
 
 
-void CodeEditor::SetScript(const std::string &txt)
+void DebuggerWindow::SetScript(const std::string &txt)
 {
     m_text = txt;
 
@@ -143,14 +146,14 @@ void CodeEditor::SetScript(const std::string &txt)
 //     HighlightLine(2);
 }
 
-std::string CodeEditor::GetScript() const
+std::string DebuggerWindow::GetScript() const
 {
     // return mEditor.GetText();
     return "";
 }
 
 
-void CodeEditor::Draw()
+void DebuggerWindow::Draw()
 {
     WindowBase::BeginDraw();
 
diff --git a/story-editor/src/code_editor.h b/story-editor/src/windows/debugger_window.h
similarity index 85%
rename from story-editor/src/code_editor.h
rename to story-editor/src/windows/debugger_window.h
index 83d3fb5..f37bd29 100644
--- a/story-editor/src/code_editor.h
+++ b/story-editor/src/windows/debugger_window.h
@@ -5,10 +5,10 @@
 #include "i_story_manager.h"
 #include <unordered_set>
 
-class CodeEditor : public WindowBase
+class DebuggerWindow : public WindowBase
 {
 public:
-    CodeEditor(IStoryManager &project);
+    DebuggerWindow(IStoryManager &project);
 
     virtual void Draw() override;
 
@@ -21,7 +21,7 @@ public:
 
     void HighlightLine(int line)
     {
-        m_currentLine = line;
+        m_focusOnLine = line;
         // mEditor.SetExecutionMarker(line);
         // m_highlights[line] = ImVec4(0.5f, 0.5f, 1.0f, 0.5f);
     }
@@ -31,6 +31,7 @@ private:
     std::string m_text;
     std::unordered_set<int> m_breakpoints;
     // std::map<int, ImVec4> m_highlights;
+    int m_focusOnLine{-1};
     int m_currentLine{-1};
     // TextEditor mEditor;
     // TextEditor::Breakpoints m_breakPoints;
diff --git a/story-editor/src/emulator_window.cpp b/story-editor/src/windows/emulator_window.cpp
similarity index 74%
rename from story-editor/src/emulator_window.cpp
rename to story-editor/src/windows/emulator_window.cpp
index 2e1aca5..91b3a1a 100644
--- a/story-editor/src/emulator_window.cpp
+++ b/story-editor/src/windows/emulator_window.cpp
@@ -115,15 +115,29 @@ void EmulatorWindow::Draw()
         config.path = ".";
         config.countSelectionMax = 1;
         config.flags = ImGuiFileDialogFlags_Modal;
-        ImGuiFileDialog::Instance()->OpenDialog("LoadBinarySoryDlgKey", 
+        ImGuiFileDialog::Instance()->OpenDialog("LoadBinaryStoryDlgKey", 
             "Choose a binary story", 
             ".c32",
             config
         );
     }
 
-     // ---------------- Load Binary story
-    if (ImGuiFileDialog::Instance()->Display("LoadBinarySoryDlgKey"))
+    ImGui::SameLine();
+    if (ImGui::Button("Set script source file (.chip32)"))
+    {
+        IGFD::FileDialogConfig config;
+        config.path = ".";
+        config.countSelectionMax = 1;
+        config.flags = ImGuiFileDialogFlags_Modal;
+        ImGuiFileDialog::Instance()->OpenDialog("SetSourceScriptDlgKey", 
+            "Choose a binary story", 
+            ".chip32",
+            config
+        );
+    }
+
+    // ---------------- Load Binary story
+    if (ImGuiFileDialog::Instance()->Display("LoadBinaryStoryDlgKey"))
     {
         if (ImGuiFileDialog::Instance()->IsOk())
         {
@@ -137,6 +151,20 @@ void EmulatorWindow::Draw()
         ImGuiFileDialog::Instance()->Close();
     }
 
+    // ---------------- External source file
+    if (ImGuiFileDialog::Instance()->Display("SetSourceScriptDlgKey"))
+    {
+        if (ImGuiFileDialog::Instance()->IsOk())
+        {
+            std::string filePathName = ImGuiFileDialog::Instance()->GetFilePathName();
+            std::string filePath = ImGuiFileDialog::Instance()->GetCurrentPath();
+            std::string filter = ImGuiFileDialog::Instance()->GetCurrentFilter();
+            
+            m_story.SetExternalSourceFile(filePathName);
+        }
+        // close
+        ImGuiFileDialog::Instance()->Close();
+    }
 
 
     WindowBase::EndDraw();
diff --git a/story-editor/src/emulator_window.h b/story-editor/src/windows/emulator_window.h
similarity index 100%
rename from story-editor/src/emulator_window.h
rename to story-editor/src/windows/emulator_window.h
diff --git a/story-editor/src/library_window.cpp b/story-editor/src/windows/library_window.cpp
similarity index 100%
rename from story-editor/src/library_window.cpp
rename to story-editor/src/windows/library_window.cpp
diff --git a/story-editor/src/library_window.h b/story-editor/src/windows/library_window.h
similarity index 100%
rename from story-editor/src/library_window.h
rename to story-editor/src/windows/library_window.h
diff --git a/story-editor/src/main_window.cpp b/story-editor/src/windows/main_window.cpp
similarity index 95%
rename from story-editor/src/main_window.cpp
rename to story-editor/src/windows/main_window.cpp
index 06fd905..ec3e340 100644
--- a/story-editor/src/main_window.cpp
+++ b/story-editor/src/windows/main_window.cpp
@@ -20,16 +20,18 @@
 #endif
 #include "IconsMaterialDesignIcons.h"
 #include "ImGuiFileDialog.h"
+#include "imgui_memory_editor.h"
 
 MainWindow::MainWindow()
     : m_resources(*this)
     , m_libraryManager(*this)
     , m_emulatorWindow(*this)
-    , m_codeEditorWindow(*this)
+    , m_debuggerWindow(*this)
     , m_cpuWindow(*this)
     , m_resourcesWindow(*this)
     , m_nodeEditorWindow(*this)
     , m_libraryWindow(*this, m_libraryManager)
+    , m_variablesWindow(*this)
     , m_player(*this)
     , m_webServer(m_libraryManager)
 {
@@ -497,7 +499,7 @@ bool MainWindow::Initialize()
         m_player.Initialize(); // Initialize audio after GUI (uses SDL)
     //  gui.ApplyTheme();
 
-        m_codeEditorWindow.Initialize();
+        m_debuggerWindow.Initialize();
         m_emulatorWindow.Initialize();
         m_nodeEditorWindow.Initialize();
         m_PropertiesWindow.Initialize();
@@ -795,9 +797,10 @@ void MainWindow::OpenProject(const std::string &uuid)
         m_nodeEditorWindow.Enable();
         m_emulatorWindow.Enable();
         m_consoleWindow.Enable();
-        m_codeEditorWindow.Enable();
+        m_debuggerWindow.Enable();
         m_resourcesWindow.Enable();
         m_PropertiesWindow.Enable();
+        m_variablesWindow.Enable();
         m_cpuWindow.Enable();
     }
     else
@@ -857,14 +860,15 @@ void MainWindow::CloseProject()
     m_nodeEditorWindow.Initialize();
     m_emulatorWindow.ClearImage();
     m_consoleWindow.ClearLog();
-    m_codeEditorWindow.ClearErrors();
-    m_codeEditorWindow.SetScript("");
+    m_debuggerWindow.ClearErrors();
+    m_debuggerWindow.SetScript("");
 
     m_nodeEditorWindow.Disable();
     m_emulatorWindow.Disable();
-    m_codeEditorWindow.Disable();
+    m_debuggerWindow.Disable();
     m_resourcesWindow.Disable();
     m_PropertiesWindow.Disable();
+    m_variablesWindow.Disable();
     m_cpuWindow.Disable();
 
     RefreshProjectInformation();
@@ -942,11 +946,14 @@ void MainWindow::Loop()
         {
             m_consoleWindow.Draw();
             m_emulatorWindow.Draw();
-            m_codeEditorWindow.Draw();
+            m_debuggerWindow.Draw();
             m_resourcesWindow.Draw();
             m_nodeEditorWindow.Draw();
+            m_variablesWindow.Draw();
             m_cpuWindow.Draw();
 
+            static MemoryEditor mem_edit_1;
+            mem_edit_1.DrawWindow("RAM view", m_chip32_ctx.ram.mem, m_chip32_ctx.ram.size);
 
             m_PropertiesWindow.SetSelectedNode(m_nodeEditorWindow.GetSelectedNode());
             m_PropertiesWindow.Draw();
@@ -1048,10 +1055,18 @@ void MainWindow::LoadBinaryStory(const std::string &filename)
 
         if (sz <= m_chip32_ctx.rom.size)
         {
-            fread(m_chip32_ctx.rom.mem, sz, 1, fp);
-            m_dbg.run_result = VM_READY;
-            chip32_initialize(&m_chip32_ctx);
-            Log("Loaded binary file: " + filename);
+            size_t sizeRead = fread(m_chip32_ctx.rom.mem, sz, 1, fp);
+            if (sizeRead == sz)
+            {
+                m_dbg.run_result = VM_READY;
+                chip32_initialize(&m_chip32_ctx);
+                Log("Loaded binary file: " + filename);
+            }
+            else
+            {
+                Log("Failed to load binary file", true);
+            }
+            
         }
         fclose(fp);
     }
@@ -1081,11 +1096,29 @@ uint32_t MainWindow::GetRegister(int reg)
     return regVal;
 }
 
+void MainWindow::ScanVariable(const std::function<void(Variable& element)>& operation)
+{
+    if (m_story)
+    {
+        m_story->ScanVariable(operation);
+    }
+}
+
+void MainWindow::AddVariable() 
+{
+    m_story->AddVariable();
+}
+
+void MainWindow::DeleteVariable(int i)
+{
+    m_story->DeleteVariable(i);
+}
+
 void MainWindow::BuildNodes(bool compileonly)
 {
     if (m_story->GenerateScript(m_currentCode))
     {
-        m_codeEditorWindow.SetScript(m_currentCode);
+        m_debuggerWindow.SetScript(m_currentCode);
         Build(compileonly);
     }
 }
@@ -1104,7 +1137,7 @@ void MainWindow::Build(bool compileonly)
     }
 
     Chip32::Assembler::Error err;
-    m_codeEditorWindow.ClearErrors();
+    m_debuggerWindow.ClearErrors();
     if (m_story->GenerateBinary(m_currentCode, err))
     {
         m_result.Print();
@@ -1126,17 +1159,23 @@ void MainWindow::Build(bool compileonly)
     else
     {
         Log(err.ToString(), true);
-        m_codeEditorWindow.AddError(err.line, err.message); // show also the error in the code editor
+        m_debuggerWindow.AddError(err.line, err.message); // show also the error in the code editor
     }
 }
 
 
 void MainWindow::BuildCode(bool compileonly)
 {
-    m_currentCode = m_codeEditorWindow.GetScript();
+    m_currentCode = SysLib::ReadFile(m_externalSourceFileName);
+    m_debuggerWindow.SetScript(m_currentCode);
     Build(compileonly);
 }
 
+void MainWindow::SetExternalSourceFile(const std::string &filename)
+{
+    m_externalSourceFileName = filename;
+}
+
 
 void MainWindow::UpdateVmView()
 {
@@ -1148,7 +1187,7 @@ void MainWindow::UpdateVmView()
 
     if (m_story->GetAssemblyLine(pcVal, m_dbg.line))
     {
-        m_codeEditorWindow.HighlightLine(m_dbg.line);
+        m_debuggerWindow.HighlightLine(m_dbg.line);
         std::cout << "Executing line: " << m_dbg.line << std::endl;
     }
     else
diff --git a/story-editor/src/main_window.h b/story-editor/src/windows/main_window.h
similarity index 91%
rename from story-editor/src/main_window.h
rename to story-editor/src/windows/main_window.h
index 217212e..adee0d9 100644
--- a/story-editor/src/main_window.h
+++ b/story-editor/src/windows/main_window.h
@@ -6,12 +6,13 @@
 
 #include "gui.h"
 #include "console_window.h"
-#include "code_editor.h"
+#include "debugger_window.h"
 
 #include "emulator_window.h"
 #include "resources_window.h"
 #include "node_editor_window.h"
 #include "properties_window.h"
+#include "variables_window.h"
 
 #include "chip32_assembler.h"
 #include "chip32_vm.h"
@@ -92,6 +93,7 @@ private:
     Chip32::Result m_result;
     DebugContext m_dbg;
     std::string m_currentCode;
+    std::string m_externalSourceFileName; // path of an external script to be used as compilation input
 
     std::vector<std::string> m_recentProjects;
 
@@ -102,7 +104,7 @@ private:
     Gui m_gui;
     EmulatorWindow m_emulatorWindow;
     ConsoleWindow m_consoleWindow;
-    CodeEditor m_codeEditorWindow;
+    DebuggerWindow m_debuggerWindow;
     CpuWindow m_cpuWindow;
 
     char m_project_name[256] = "";
@@ -115,6 +117,8 @@ private:
 
     LibraryWindow m_libraryWindow;
 
+    VariablesWindow m_variablesWindow;
+
     AudioPlayer m_player;
 
     struct VmEvent
@@ -143,10 +147,16 @@ private:
 
     virtual void BuildNodes(bool compileonly) override;
     virtual void BuildCode(bool compileonly) override;
+    virtual void SetExternalSourceFile(const std::string &filename) override;
     virtual void LoadBinaryStory(const std::string &filename) override;
     virtual void ToggleBreakpoint(int line) override;
     virtual uint32_t GetRegister(int reg) override;
 
+    // Variable
+    virtual void ScanVariable(const std::function<void(Variable& element)>& operation) override;
+    virtual void AddVariable() override;
+    virtual void DeleteVariable(int i);
+
     virtual void Play() override;
     virtual void Step() override;
     virtual void Run() override;
diff --git a/story-editor/src/properties_window.cpp b/story-editor/src/windows/properties_window.cpp
similarity index 100%
rename from story-editor/src/properties_window.cpp
rename to story-editor/src/windows/properties_window.cpp
diff --git a/story-editor/src/properties_window.h b/story-editor/src/windows/properties_window.h
similarity index 100%
rename from story-editor/src/properties_window.h
rename to story-editor/src/windows/properties_window.h
diff --git a/story-editor/src/resources_window.cpp b/story-editor/src/windows/resources_window.cpp
similarity index 100%
rename from story-editor/src/resources_window.cpp
rename to story-editor/src/windows/resources_window.cpp
diff --git a/story-editor/src/resources_window.h b/story-editor/src/windows/resources_window.h
similarity index 100%
rename from story-editor/src/resources_window.h
rename to story-editor/src/windows/resources_window.h
diff --git a/story-editor/src/windows/variables_window.cpp b/story-editor/src/windows/variables_window.cpp
new file mode 100644
index 0000000..47512e3
--- /dev/null
+++ b/story-editor/src/windows/variables_window.cpp
@@ -0,0 +1,113 @@
+#include "variables_window.h"
+#include "gui.h"
+#include "ImGuiFileDialog.h"
+#include "IconsMaterialDesignIcons.h"
+#include "chip32_vm.h"
+#include "variable.h"
+
+VariablesWindow::VariablesWindow(IStoryManager &proj)
+    : WindowBase("Variables")
+    , m_story(proj)
+{
+
+}
+
+void VariablesWindow::Initialize()
+{
+
+
+
+}
+
+
+// Fonction pour convertir un entier en float selon l'échelle
+float ScaledToFloat(int64_t value, int scalePower) {
+    return static_cast<float>(value) * std::pow(10.0f, scalePower);
+}
+
+// Fonction pour convertir un float en entier selon l'échelle
+int64_t FloatToScaled(float floatValue, int scalePower) {
+    return static_cast<int64_t>(floatValue / std::pow(10.0f, scalePower));
+}
+
+
+void VariablesWindow::ShowRAMEditor()
+{
+
+    if (ImGui::Button("Add Variable")) {
+        // Ajouter une nouvelle variable par défaut
+        m_story.AddVariable();
+    }
+
+    ImGui::Separator();
+    int i = 0;
+
+    m_story.ScanVariable([&i, this] (Variable &var) {
+
+        ImGui::PushID(static_cast<int>(i)); // Assure l'unicité des widgets
+        if (ImGui::TreeNode((var.name + "###variable").c_str()))
+        {
+            // Modifier le nom de la variable
+            static char buffer[Variable::NameMaxSize];
+            std::strncpy(buffer, var.name.c_str(), sizeof(buffer));
+            buffer[sizeof(buffer) - 1] = '\0'; // Assure la terminaison
+            if (ImGui::InputText("Name", buffer, sizeof(buffer))) {
+                var.name = buffer;
+            }
+
+             // Choisir le type de la variable
+            const char* types[] = {"Integer", "String"};
+            static int selectedType = (var.type == "Integer") ? 0 : 1;
+            if (ImGui::Combo("Type", &selectedType, types, IM_ARRAYSIZE(types))) {
+                var.type = types[selectedType];
+            }
+
+            if (var.type == "Integer")
+            {
+                // Modifier l'échelle
+                ImGui::InputInt("Scale Power (10^x)", &var.scalePower);
+
+                // Modifier la valeur entière
+                int intValue = static_cast<int>(var.value);
+                if (ImGui::InputInt("Integer Value", &intValue)) {
+                    var.value = static_cast<int64_t>(intValue);
+                }
+
+                // Afficher la valeur flottante calculée
+                float floatValue = ScaledToFloat(var.value, var.scalePower);
+                ImGui::Text("Float Value: %.6f", floatValue);
+            }
+            else
+            {
+                std::strncpy(buffer, var.valueText.c_str(), sizeof(buffer));
+                if (ImGui::InputText("Text value", buffer, sizeof(buffer)))
+                {
+                    var.valueText = buffer;
+                }
+            }
+
+            // Bouton pour supprimer la variable
+            if (ImGui::Button("Delete")) {
+                m_story.DeleteVariable(i);
+                ImGui::TreePop();
+                ImGui::PopID();
+
+            }
+
+            ImGui::TreePop();
+        }
+        ImGui::PopID();
+        i++;
+
+    });
+}
+
+void VariablesWindow::Draw()
+{  
+    WindowBase::BeginDraw();
+
+
+    ShowRAMEditor();
+
+    WindowBase::EndDraw();
+}
diff --git a/story-editor/src/windows/variables_window.h b/story-editor/src/windows/variables_window.h
new file mode 100644
index 0000000..347503d
--- /dev/null
+++ b/story-editor/src/windows/variables_window.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include "window_base.h"
+#include "i_story_manager.h"
+#include "gui.h"
+
+class VariablesWindow : public WindowBase
+{
+public:
+    VariablesWindow(IStoryManager &proj);
+
+    void Initialize();
+    virtual void Draw() override;
+
+private:
+    IStoryManager &m_story;
+
+
+    void ShowRAMEditor();
+
+};
+
diff --git a/story-editor/src/window_base.cpp b/story-editor/src/windows/window_base.cpp
similarity index 100%
rename from story-editor/src/window_base.cpp
rename to story-editor/src/windows/window_base.cpp
diff --git a/story-editor/src/window_base.h b/story-editor/src/windows/window_base.h
similarity index 100%
rename from story-editor/src/window_base.h
rename to story-editor/src/windows/window_base.h
diff --git a/story-editor/story-editor b/story-editor/story-editor
deleted file mode 100755
index 89fb204..0000000
Binary files a/story-editor/story-editor and /dev/null differ
diff --git a/story-editor/story-editor-logo-256x256.png b/story-editor/story-editor-logo-256x256.png
new file mode 100644
index 0000000..3f06094
Binary files /dev/null and b/story-editor/story-editor-logo-256x256.png differ
diff --git a/story-editor/story-editor-logo-512x512.png b/story-editor/story-editor-logo-512x512.png
new file mode 100644
index 0000000..1bcc0db
Binary files /dev/null and b/story-editor/story-editor-logo-512x512.png differ
diff --git a/story-editor/story-editor-logo.png b/story-editor/story-editor-logo.png
deleted file mode 100644
index f50d31d..0000000
Binary files a/story-editor/story-editor-logo.png and /dev/null differ
diff --git a/story-editor/story-editor.desktop b/story-editor/story-editor.desktop
index b31dc74..ee38e5f 100644
--- a/story-editor/story-editor.desktop
+++ b/story-editor/story-editor.desktop
@@ -1,8 +1,8 @@
 [Desktop Entry]
 Type=Application
-Name=StoryEditor
+Name=OpenStoryEditor
 Comment=A tool to create stories with graphical nodes (OpenStoryTeller project)
 Exec=story-editor
-Icon=story-editor-logo
+Icon=eu.d8s.OpenStoryEditor.png
 Categories=Graphics;2DGraphics;