diff --git a/GigiCompilerLib/Backends/DX12/Backend_DX12.cpp b/GigiCompilerLib/Backends/DX12/Backend_DX12.cpp
index 824525a8..70b0dbb7 100644
--- a/GigiCompilerLib/Backends/DX12/Backend_DX12.cpp
+++ b/GigiCompilerLib/Backends/DX12/Backend_DX12.cpp
@@ -35,6 +35,8 @@ struct BackendDX12 : public BackendBase
             case DataFieldType::Float3: return "DXGI_FORMAT_R32G32B32_FLOAT";
             case DataFieldType::Uint: return "DXGI_FORMAT_R32_UINT";
             case DataFieldType::Uint_16: return "DXGI_FORMAT_R16_UINT";
+            case DataFieldType::Int_64: return "DXGI_FORMAT_RG32_INT";
+            case DataFieldType::Uint_64: return "DXGI_FORMAT_RG32_UINT";
             default:
             {
                 Assert(false, "Unhandled DataFieldType: %i", (int)type);
@@ -438,6 +440,8 @@ struct BackendDX12 : public BackendBase
             case DataFieldType::Float4: return "float4";
             case DataFieldType::Bool: return "bool";
             case DataFieldType::Float4x4: return "float4x4";
+            case DataFieldType::Int_64: return "int64_t";
+            case DataFieldType::Uint_64: return "uint64_t";
             default:
             {
                 Assert(false, "Unhandled data field type: %s (%i)", EnumToString(type), type);
@@ -802,6 +806,8 @@ struct BackendDX12 : public BackendBase
                     case DataFieldType::Bool: varSymbols = "b"; break;
                     case DataFieldType::Float4x4: varSymbols = "ffffffffffffffff"; varRefs = "&value[0], &value[1], &value[2], &value[3], &value[4], &value[5], &value[6], &value[7], &value[8], &value[9], &value[10], &value[11], &value[12], &value[13], &value[14], &value[15]"; break;
                     case DataFieldType::Uint_16: varSymbols = "I"; break;
+                    case DataFieldType::Int_64: varSymbols = "I"; break;
+                    case DataFieldType::Uint_64: varSymbols = "I"; break;
                     default: Assert(false, "Unhandled Variable Type: %i", variable.type); break;
                 }
 
@@ -2055,7 +2061,7 @@ void CopyShaderFileDX12(const Shader& shader, const std::unordered_map<std::stri
 
         std::string workingDirectory = (std::filesystem::path(outFolder) / "shaders" / "").string();
         std::string slangErrorMessage;
-        if (!ProcessWithSlang(shaderFileContents, shader.fileName.c_str(), stage, shader.entryPoint.c_str(), shaderModel, slangErrorMessage, workingDirectory.c_str()))
+        if (!ProcessWithSlang(shaderFileContents, shader.fileName.c_str(), stage, shader.entryPoint.c_str(), shaderModel, slangErrorMessage, workingDirectory.c_str(), shader.slangOptions))
         {
             ShowErrorMessage("Slang:%s\n%s\n", shader.fileName.c_str(), slangErrorMessage.c_str());
         }
diff --git a/GigiCompilerLib/Backends/Shared.h b/GigiCompilerLib/Backends/Shared.h
index 940da405..6a7609f4 100644
--- a/GigiCompilerLib/Backends/Shared.h
+++ b/GigiCompilerLib/Backends/Shared.h
@@ -543,6 +543,8 @@ inline size_t DataFieldTypeToSize(DataFieldType type)
         case DataFieldType::Bool: return 4;
         case DataFieldType::Float4x4: return 4 * 4 * 4;
         case DataFieldType::Uint_16: return 2;
+        case DataFieldType::Int_64: return 8;
+        case DataFieldType::Uint_64: return 8;
         default:
         {
             Assert(false, "Unknown data field type: %i", type);
@@ -570,6 +572,8 @@ inline size_t DataFieldTypeComponentCount(DataFieldType type)
         case DataFieldType::Bool: return 1;
         case DataFieldType::Float4x4: return 16;
         case DataFieldType::Uint_16: return 1;
+        case DataFieldType::Int_64: return 1;
+        case DataFieldType::Uint_64: return 1;
         default:
         {
             Assert(false, "Unknown data field type: %i (%s)", type, EnumToString(type));
@@ -685,6 +689,8 @@ static std::string DataFieldTypeToShaderType(DataFieldType type)
         case DataFieldType::Bool: return "uint";
         case DataFieldType::Float4x4: return "float4x4";
         case DataFieldType::Uint_16: return "uint";
+        case DataFieldType::Int_64: return "int64";
+        case DataFieldType::Uint_64: return "uint64";
         case DataFieldType::Count:
         {
             Assert(false, "Invalid data field type: Count");
@@ -791,6 +797,8 @@ enum class DataFieldComponentType
 	_int,
 	_uint16_t,
 	_uint32_t,
+    _int64_t,
+    _uint64_t,
 	_float,
 };
 
@@ -835,6 +843,8 @@ inline DataFieldTypeInfoStruct DataFieldTypeInfo(DataFieldType type)
         case DataFieldType::Bool: return DATA_FIELD_TYPE_INFO(uint32_t, 1, DataFieldType::Bool);
         case DataFieldType::Float4x4: return DATA_FIELD_TYPE_INFO(float, 16, DataFieldType::Float);
         case DataFieldType::Uint_16: return DATA_FIELD_TYPE_INFO(uint16_t, 1, DataFieldType::Uint_16);
+        case DataFieldType::Int_64: return DATA_FIELD_TYPE_INFO(int64_t, 1, DataFieldType::Int_64);
+        case DataFieldType::Uint_64: return DATA_FIELD_TYPE_INFO(uint64_t, 1, DataFieldType::Uint_64);
         default:
         {
             Assert(false, "Unknown data field type: %i (%s)", type, EnumToString(type));
diff --git a/GigiCompilerLib/GigiCompilerLib.vcxproj b/GigiCompilerLib/GigiCompilerLib.vcxproj
index 35574ac4..c2f56a2d 100644
--- a/GigiCompilerLib/GigiCompilerLib.vcxproj
+++ b/GigiCompilerLib/GigiCompilerLib.vcxproj
@@ -67,7 +67,7 @@
       <AdditionalDependencies>slang.lib</AdditionalDependencies>
     </Lib>
     <Lib>
-      <AdditionalLibraryDirectories>$(SolutionDir)External\slang\bin\windows-x64\release\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>$(SolutionDir)External\slang\lib\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
     </Lib>
     <PreBuildEvent>
       <Command>
@@ -98,7 +98,7 @@
       <AdditionalDependencies>slang.lib</AdditionalDependencies>
     </Lib>
     <Lib>
-      <AdditionalLibraryDirectories>$(SolutionDir)External\slang\bin\windows-x64\release\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>$(SolutionDir)External\slang\lib\;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
     </Lib>
     <PreBuildEvent>
       <Command>
diff --git a/GigiCompilerLib/ProcessSlang.cpp b/GigiCompilerLib/ProcessSlang.cpp
index a4885b96..e15b77b6 100644
--- a/GigiCompilerLib/ProcessSlang.cpp
+++ b/GigiCompilerLib/ProcessSlang.cpp
@@ -5,8 +5,8 @@
 
 #include "ProcessSlang.h"
 
-#include "external/slang/slang.h"
-#include "external/slang/slang-com-helper.h"
+#include "external/slang/include/slang.h"
+#include "external/slang/include/slang-com-helper.h"
 
 #include <filesystem>
 #include <vector>
@@ -156,8 +156,8 @@ class SlangIncludeHandler : public ISlangFileSystem
 };
 #endif
 
-// https://github.com/shader-slang/slang/blob/master/docs/api-users-guide.md
-bool ProcessWithSlang(std::string& source, const char* fileName, const char* stage, const char* entryPoint, const char* profile, std::string& errorMessage, const char* workingDirectory)
+// https://github.com/shader-slang/slang/tree/master/docs#readme
+bool ProcessWithSlang(std::string& source, const char* fileName, const char* stage, const char* entryPoint, const char* profile, std::string& errorMessage, const char* workingDirectory, const SlangOptions& options)
 {
     bool ret = true;
     char errorBuffer[1024];
@@ -166,9 +166,38 @@ bool ProcessWithSlang(std::string& source, const char* fileName, const char* sta
     SlangSession* session = spCreateSession(NULL);
     SlangCompileRequest* request = spCreateCompileRequest(session);
 
+    if (options.noNameMangling)
+        request->setCompileFlags(SLANG_COMPILE_FLAG_NO_MANGLING);
+
+    switch (options.optimizationLevel)
+    {
+        case GigiSlangOptimizationLevel::None: request->setOptimizationLevel(SLANG_OPTIMIZATION_LEVEL_NONE); break;
+        case GigiSlangOptimizationLevel::Default: request->setOptimizationLevel(SLANG_OPTIMIZATION_LEVEL_DEFAULT); break;
+        case GigiSlangOptimizationLevel::High: request->setOptimizationLevel(SLANG_OPTIMIZATION_LEVEL_HIGH); break;
+        case GigiSlangOptimizationLevel::Maximum: request->setOptimizationLevel(SLANG_OPTIMIZATION_LEVEL_MAXIMAL); break;
+    }
+
+    request->setLineDirectiveMode(options.lineDirectives ? SLANG_LINE_DIRECTIVE_MODE_STANDARD : SLANG_LINE_DIRECTIVE_MODE_NONE);
+
+    {
+        unsigned int diagnosticsFlags = 0;
+        if (options.warningsAsErrors)
+            diagnosticsFlags |= SLANG_DIAGNOSTIC_FLAG_TREAT_WARNINGS_AS_ERRORS;
+        if (options.verbosePaths)
+            diagnosticsFlags |= SLANG_DIAGNOSTIC_FLAG_VERBOSE_PATHS;
+        request->setDiagnosticFlags(diagnosticsFlags);
+    }
+
     // Set what type of thing we want to come out of the slang compiler
     spSetCodeGenTarget(request, SlangCompileTarget::SLANG_HLSL);
 
+    switch (options.floatingPointMode)
+    {
+        case GigiSlangFloatingPointMode::Default: request->setTargetFloatingPointMode(0, SLANG_FLOATING_POINT_MODE_DEFAULT); break;
+        case GigiSlangFloatingPointMode::Fast: request->setTargetFloatingPointMode(0, SLANG_FLOATING_POINT_MODE_FAST); break;
+        case GigiSlangFloatingPointMode::Precise: request->setTargetFloatingPointMode(0, SLANG_FLOATING_POINT_MODE_PRECISE); break;
+    }
+
     spAddSearchPath(request, workingDirectory);
 
     int translationUnitIndex = spAddTranslationUnit(request, SLANG_SOURCE_LANGUAGE_SLANG, "");
diff --git a/GigiCompilerLib/ProcessSlang.h b/GigiCompilerLib/ProcessSlang.h
index 5537e664..63974950 100644
--- a/GigiCompilerLib/ProcessSlang.h
+++ b/GigiCompilerLib/ProcessSlang.h
@@ -7,4 +7,6 @@
 
 #include <string>
 
-bool ProcessWithSlang(std::string& source, const char* fileName, const char* stage, const char* entryPoint, const char* profile, std::string& errorMessage, const char* workingDirectory);
+struct SlangOptions;
+
+bool ProcessWithSlang(std::string& source, const char* fileName, const char* stage, const char* entryPoint, const char* profile, std::string& errorMessage, const char* workingDirectory, const SlangOptions& options);
diff --git a/GigiCompilerLib/SubGraphs.cpp b/GigiCompilerLib/SubGraphs.cpp
index 9920749e..7f63f1a1 100644
--- a/GigiCompilerLib/SubGraphs.cpp
+++ b/GigiCompilerLib/SubGraphs.cpp
@@ -231,6 +231,9 @@ struct RenameReferencesVisitor
 
     bool Visit(SetVariable& setVariable, const std::string& path)
     {
+        m_renameData.UpdateNodeName(setVariable.ANode.name);
+        m_renameData.UpdateNodeName(setVariable.BNode.name);
+
         m_renameData.UpdateVariableName(setVariable.destination.name);
         m_renameData.UpdateVariableName(setVariable.AVar.name);
         m_renameData.UpdateVariableName(setVariable.BVar.name);
@@ -522,7 +525,8 @@ struct RenameChildVisitor
         // Need to handle the subgraph possibly being in a parent directory etc.
         if (s.destFileName.empty())
             s.destFileName = s.fileName;
-        s.destFileName = (std::filesystem::path(m_subGraphNode.fileName).filename().replace_extension() / s.destFileName).string();
+        std::string destFolder = std::filesystem::path(m_subGraphNode.fileName).filename().replace_extension().string() + "_" + m_subGraphNode.name;
+        s.destFileName = (std::filesystem::path(destFolder) / s.destFileName).string();
         StringReplaceAll(s.destFileName, "\\", "/");
 
         // Update where the file lives on disk
diff --git a/GigiCompilerLib/Utils.h b/GigiCompilerLib/Utils.h
index 0f3c4858..9709648f 100644
--- a/GigiCompilerLib/Utils.h
+++ b/GigiCompilerLib/Utils.h
@@ -95,6 +95,8 @@ inline std::string DataFieldTypeToHLSLType(DataFieldType type)
         case DataFieldType::Bool: return "uint";
         case DataFieldType::Float4x4: return "float4x4";
         case DataFieldType::Uint_16: return "uint";
+        case DataFieldType::Int_64: return "uint64_t";
+        case DataFieldType::Uint_64: return "uint64_t";
         case DataFieldType::Count:
         {
             Assert(false, "Invalid data field type: Count");
diff --git a/GigiCompilerLib/gigiinterpreter.cpp b/GigiCompilerLib/gigiinterpreter.cpp
index 98417580..d2d08f57 100644
--- a/GigiCompilerLib/gigiinterpreter.cpp
+++ b/GigiCompilerLib/gigiinterpreter.cpp
@@ -77,6 +77,38 @@ void VariableStorage::SetFromString(const char* text, size_t count, uint16_t* va
 	);
 }
 
+void VariableStorage::SetFromString(const char* text, size_t count, int64_t* value)
+{
+	ParseCSV::ForEachValue(text, false,
+		[&](int tokenIndex, const char* token)
+		{
+			if (tokenIndex < count)
+			{
+				int64_t temp;
+				sscanf_s(token, "%lli", &temp);
+				value[tokenIndex] = temp;
+			}
+			return true;
+		}
+	);
+}
+
+void VariableStorage::SetFromString(const char* text, size_t count, uint64_t* value)
+{
+	ParseCSV::ForEachValue(text, false,
+		[&](int tokenIndex, const char* token)
+		{
+			if (tokenIndex < count)
+			{
+				uint64_t temp;
+				sscanf_s(token, "%llu", &temp);
+				value[tokenIndex] = temp;
+			}
+			return true;
+		}
+	);
+}
+
 std::string VariableStorage::GetAsString(size_t count, int* value)
 {
 	std::string ret;
@@ -145,4 +177,33 @@ std::string VariableStorage::GetAsString(size_t count, uint16_t* value)
 	}
 
 	return ret;
-}
\ No newline at end of file
+}
+
+std::string VariableStorage::GetAsString(size_t count, int64_t* value)
+{
+	std::string ret;
+
+	char buffer[256];
+	for (size_t i = 0; i < count; ++i)
+	{
+		sprintf_s(buffer, "%s%lli", ((i > 0) ? "," : ""), (int64_t)value[i]);
+		ret += std::string(buffer);
+	}
+
+	return ret;
+}
+
+std::string VariableStorage::GetAsString(size_t count, uint64_t* value)
+{
+	std::string ret;
+
+	char buffer[256];
+	for (size_t i = 0; i < count; ++i)
+	{
+		sprintf_s(buffer, "%s%llu", ((i > 0) ? "," : ""), (uint64_t)value[i]);
+		ret += std::string(buffer);
+	}
+
+	return ret;
+}
+
diff --git a/GigiCompilerLib/gigiinterpreter.h b/GigiCompilerLib/gigiinterpreter.h
index e5fc8c46..107ce873 100644
--- a/GigiCompilerLib/gigiinterpreter.h
+++ b/GigiCompilerLib/gigiinterpreter.h
@@ -32,6 +32,15 @@ struct VariableStorage
 		void* dflt = nullptr;
 
 		size_t size = 0;
+
+		// If true, the variable was changed in the viewer and should not use the default from the editor
+		bool overrideValue = false;
+		bool systemValue = false;
+
+		bool isDefault() const
+		{
+			return memcmp(value, dflt, size) == 0;
+		}
 	};
 
 	void Clear() { m_storage.Clear(); }
@@ -41,10 +50,12 @@ struct VariableStorage
 	static void SetFromString(const char* text, size_t count, float* value);
 	static void SetFromString(const char* text, size_t count, bool* value);
 	static void SetFromString(const char* text, size_t count, uint16_t* value);
+	static void SetFromString(const char* text, size_t count, int64_t* value);
+	static void SetFromString(const char* text, size_t count, uint64_t* value);
 
 private:
 	template<typename T>
-	Storage Get(const Variable& variable, size_t count, T* dummy)
+	Storage Get(const RenderGraph& renderGraph, const Variable& variable, size_t count, T* dummy)
 	{
 		// Get or create variable storage.
 		// Allocate double the memory needed because we need to store both the value and the default value
@@ -57,10 +68,35 @@ struct VariableStorage
 		Storage ret;
 		ret.size  = sizeof(T) * count;
 		ret.value = storage.data();
-		ret.dflt  = &((char*)ret.value)[ret.size];
+		ret.dflt = &((char*)ret.value)[ret.size];
 
 		// parse the dflt and set the value to the dflt if it's new storage
-		SetFromString(variable.dflt.c_str(), count, (T*)ret.dflt);
+		// If it's an enum, find the numerical value and set the default to that
+		if (variable.enumIndex >= 0)
+		{
+			const Enum& e = renderGraph.enums[variable.enumIndex];
+			size_t foundIndex = 0;
+
+			for (size_t itemIndex = 0; itemIndex < e.items.size(); ++itemIndex)
+			{
+				std::string scopedLabel = e.name + "::" + e.items[itemIndex].label;
+
+				if (!strcmp(variable.dflt.c_str(), e.items[itemIndex].label.c_str()) ||
+					!strcmp(variable.dflt.c_str(), scopedLabel.c_str()))
+				{
+					foundIndex = itemIndex;
+					break;
+				}
+			}
+
+			char buffer[64];
+			sprintf_s(buffer, "%i", (int)foundIndex);
+			SetFromString(buffer, count, (T*)ret.dflt);
+		}
+		else
+		{
+			SetFromString(variable.dflt.c_str(), count, (T*)ret.dflt);
+		}
 		if (newStorage)
 			memcpy(ret.value, ret.dflt, ret.size);
 
@@ -73,170 +109,193 @@ struct VariableStorage
 	static std::string GetAsString(size_t count, float* value);
 	static std::string GetAsString(size_t count, bool* value);
 	static std::string GetAsString(size_t count, uint16_t* value);
+	static std::string GetAsString(size_t count, int64_t* value);
+	static std::string GetAsString(size_t count, uint64_t* value);
 
 	template<typename LAMBDA>
-	void CallFor_Int(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Int(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
 		using TheType				 = int;
 		static const size_t TheCount = 1;
 
-		Storage storage = Get(variable, TheCount, (TheType*)nullptr);
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
 		lambda(TheCount, (TheType*)storage.value);
 	}
 
 	template<typename LAMBDA>
-	void CallFor_Int2(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Int2(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
 		using TheType				 = int;
 		static const size_t TheCount = 2;
 
-		Storage storage = Get(variable, TheCount, (TheType*)nullptr);
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
 		lambda(TheCount, (TheType*)storage.value);
 	}
 
 	template<typename LAMBDA>
-	void CallFor_Int3(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Int3(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
 		using TheType				 = int;
 		static const size_t TheCount = 3;
 
-		Storage storage = Get(variable, TheCount, (TheType*)nullptr);
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
 		lambda(TheCount, (TheType*)storage.value);
 	}
 
 	template<typename LAMBDA>
-	void CallFor_Int4(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Int4(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
 		using TheType				 = int;
 		static const size_t TheCount = 4;
 
-		Storage storage = Get(variable, TheCount, (TheType*)nullptr);
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
 		lambda(TheCount, (TheType*)storage.value);
 	}
 
 	template<typename LAMBDA>
-	void CallFor_Uint(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Uint(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
 		using TheType				 = unsigned int;
 		static const size_t TheCount = 1;
 
-		Storage storage = Get(variable, TheCount, (TheType*)nullptr);
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
 		lambda(TheCount, (TheType*)storage.value);
 	}
 
 	template<typename LAMBDA>
-	void CallFor_Uint2(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Uint2(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
 		using TheType				 = unsigned int;
 		static const size_t TheCount = 2;
 
-		Storage storage = Get(variable, TheCount, (TheType*)nullptr);
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
 		lambda(TheCount, (TheType*)storage.value);
 	}
 
 	template<typename LAMBDA>
-	void CallFor_Uint3(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Uint3(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
 		using TheType				 = unsigned int;
 		static const size_t TheCount = 3;
 
-		Storage storage = Get(variable, TheCount, (TheType*)nullptr);
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
 		lambda(TheCount, (TheType*)storage.value);
 	}
 
 	template<typename LAMBDA>
-	void CallFor_Uint4(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Uint4(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
 		using TheType				 = unsigned int;
 		static const size_t TheCount = 4;
 
-		Storage storage = Get(variable, TheCount, (TheType*)nullptr);
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
 		lambda(TheCount, (TheType*)storage.value);
 	}
 
 	template<typename LAMBDA>
-	void CallFor_Float(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Float(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
 		using TheType				 = float;
 		static const size_t TheCount = 1;
 
-		Storage storage = Get(variable, TheCount, (TheType*)nullptr);
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
 		lambda(TheCount, (TheType*)storage.value);
 	}
 
 	template<typename LAMBDA>
-	void CallFor_Float2(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Float2(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
 		using TheType				 = float;
 		static const size_t TheCount = 2;
 
-		Storage storage = Get(variable, TheCount, (TheType*)nullptr);
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
 		lambda(TheCount, (TheType*)storage.value);
 	}
 
 	template<typename LAMBDA>
-	void CallFor_Float3(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Float3(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
 		using TheType				 = float;
 		static const size_t TheCount = 3;
 
-		Storage storage = Get(variable, TheCount, (TheType*)nullptr);
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
 		lambda(TheCount, (TheType*)storage.value);
 	}
 
 	template<typename LAMBDA>
-	void CallFor_Float4(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Float4(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
 		using TheType				 = float;
 		static const size_t TheCount = 4;
 
-		Storage storage = Get(variable, TheCount, (TheType*)nullptr);
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
 		lambda(TheCount, (TheType*)storage.value);
 	}
 
 	template<typename LAMBDA>
-	void CallFor_Bool(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Bool(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
 		using TheType				 = bool;
 		static const size_t TheCount = 1;
 
-		Storage storage = Get(variable, TheCount, (TheType*)nullptr);
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
 		lambda(TheCount, (TheType*)storage.value);
 	}
 
 	template<typename LAMBDA>
-	void CallFor_Float4x4(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Float4x4(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
 		using TheType				 = float;
 		static const size_t TheCount = 16;
 
-		Storage storage = Get(variable, TheCount, (TheType*)nullptr);
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
 		lambda(TheCount, (TheType*)storage.value);
 	}
 
 	template<typename LAMBDA>
-	void CallFor_Uint_16(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Uint_16(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
 		using TheType				 = uint16_t;
 		static const size_t TheCount = 1;
 
-		Storage storage = Get(variable, TheCount, (TheType*)nullptr);
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
+		lambda(TheCount, (TheType*)storage.value);
+	}
+
+
+	template<typename LAMBDA>
+	void CallFor_Int_64(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
+	{
+		using TheType = int64_t;
+		static const size_t TheCount = 1;
+
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
 		lambda(TheCount, (TheType*)storage.value);
 	}
 
 	template<typename LAMBDA>
-	void CallFor_Count(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Uint_64(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
+		using TheType = uint64_t;
+		static const size_t TheCount = 1;
+
+		Storage storage = Get(renderGraph, variable, TheCount, (TheType*)nullptr);
+		lambda(TheCount, (TheType*)storage.value);
 	}
 
 	template<typename LAMBDA>
-	void CallForVariable(const Variable& variable, const LAMBDA& lambda)
+	void CallFor_Count(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
+	{
+	}
+
+	template<typename LAMBDA>
+	void CallForVariable(const RenderGraph& renderGraph, const Variable& variable, const LAMBDA& lambda)
 	{
 		switch (variable.type)
 		{
 #include "external/df_serialize/_common.h"
 #define ENUM_ITEM(_NAME, _DESCRIPTION) \
-	case DataFieldType::_NAME: CallFor_##_NAME(variable, lambda); break;
+	case DataFieldType::_NAME: CallFor_##_NAME(renderGraph, variable, lambda); break;
 // clang-format off
 #include "external/df_serialize/_fillunsetdefines.h"
 #include "Schemas/DataFieldTypes.h"
@@ -245,19 +304,19 @@ struct VariableStorage
 	}
 
 public:
-	void SetValueFromString(const Variable& variable, const char* textValue)
+	void SetValueFromString(const RenderGraph& renderGraph, const Variable& variable, const char* textValue)
 	{
-		CallForVariable(variable,
+		CallForVariable(renderGraph, variable,
 			[&](size_t count, auto* value)
 			{
 				SetFromString(textValue, count, value);
 			});
 	}
 
-	std::string GetValueAsString(const Variable& variable)
+	std::string GetValueAsString(const RenderGraph& renderGraph, const Variable& variable)
 	{
 		std::string ret;
-		CallForVariable(variable,
+		CallForVariable(renderGraph, variable,
 			[&](size_t count, auto* value)
 			{
 				ret = GetAsString(count, value);
@@ -265,13 +324,13 @@ struct VariableStorage
 		return ret;
 	}
 
-	Storage Get(const Variable& variable)
+	Storage Get(const RenderGraph& renderGraph, const Variable& variable)
 	{
 		Storage ret;
-		CallForVariable(variable,
+		CallForVariable(renderGraph, variable,
 			[&](size_t count, auto* value)
 			{
-				ret = Get(variable, count, value);
+				ret = Get(renderGraph, variable, count, value);
 			});
 		return ret;
 	}
@@ -577,6 +636,16 @@ class IGigiInterpreter
 				DoSetVarOperation<uint16_t>(setVar, ABytes, BBytes, destBytes, typeInfo.componentCount);
 				break;
 			}
+            case DataFieldType::Int_64:
+            {
+                DoSetVarOperation<int64_t>(setVar, ABytes, BBytes, destBytes, typeInfo.componentCount);
+                break;
+            }
+			case DataFieldType::Uint_64:
+			{
+				DoSetVarOperation<uint64_t>(setVar, ABytes, BBytes, destBytes, typeInfo.componentCount);
+				break;
+			}
 			case DataFieldType::Uint:
 			{
 				DoSetVarOperation<uint32_t>(setVar, ABytes, BBytes, destBytes, typeInfo.componentCount);
@@ -815,6 +884,10 @@ class IGigiInterpreter
 	{
 		return m_runtimeVariables[index];
 	}
+	RuntimeVariable& GetRuntimeVariable(int index)
+	{
+		return m_runtimeVariables[index];
+	}
 
 	int GetRuntimeVariableIndex(const char* name) const
 	{
@@ -836,7 +909,7 @@ class IGigiInterpreter
 
 	std::string GetRuntimeVariableValueAsString(int index)
 	{
-		return m_variableStorage.GetValueAsString(*m_runtimeVariables[index].variable);
+		return m_variableStorage.GetValueAsString(m_renderGraph, *m_runtimeVariables[index].variable);
 	}
 
 	void SetRuntimeVariableFromString(int index, const char* textValue)
@@ -851,12 +924,15 @@ class IGigiInterpreter
 			{
 				char valueIntString[256];
 				sprintf_s(valueIntString, "%i", valueInt);
-				m_variableStorage.SetValueFromString(*m_runtimeVariables[index].variable, valueIntString);
+				m_variableStorage.SetValueFromString(m_renderGraph, *m_runtimeVariables[index].variable, valueIntString);
 				return;
 			}
 		}
 
-		m_variableStorage.SetValueFromString(*m_runtimeVariables[index].variable, textValue);
+		m_variableStorage.SetValueFromString(m_renderGraph, *m_runtimeVariables[index].variable, textValue);
+
+		// The variable was in the file means the user overrides the setting
+		m_runtimeVariables[index].storage.overrideValue = true;
 	}
 
 	void SetRuntimeVariableToDflt(int index)
@@ -1010,7 +1086,7 @@ class IGigiInterpreter
 		for (size_t i = 0; i < renderGraph.variables.size(); ++i)
 		{
 			m_runtimeVariables[i].variable = &renderGraph.variables[i];
-			m_runtimeVariables[i].storage  = m_variableStorage.Get(*m_runtimeVariables[i].variable);
+			m_runtimeVariables[i].storage  = m_variableStorage.Get(m_renderGraph, *m_runtimeVariables[i].variable);
 		}
 	}
 
diff --git a/GigiCompilerLib/structParser.cpp b/GigiCompilerLib/structParser.cpp
index 9335ccb4..42520d33 100644
--- a/GigiCompilerLib/structParser.cpp
+++ b/GigiCompilerLib/structParser.cpp
@@ -32,6 +32,8 @@ DataFieldType getDataFieldType(const std::string& value)
 	if (value == "bool") return DataFieldType::Bool;
 	if (value == "float4x4") return DataFieldType::Float4x4;
 	if (value == "uint16") return DataFieldType::Uint_16;
+	if (value == "int64_t") return DataFieldType::Int_64;
+	if (value == "uint64_t") return DataFieldType::Uint_64;
 
 	// not recognized
 	return DataFieldType::Count;
diff --git a/GigiEdit/EditorNodes.h b/GigiEdit/EditorNodes.h
index 707be95e..29378fc0 100644
--- a/GigiEdit/EditorNodes.h
+++ b/GigiEdit/EditorNodes.h
@@ -335,58 +335,68 @@ inline std::vector<NodePinInfo> GetNodePins(const RenderGraph& renderGraph, Rend
     return ret;
 }
 
-inline std::vector<NodePinInfo> GetNodePins(const RenderGraph& renderGraph, RenderGraphNode_Action_ComputeShader& node)
+template <typename NODE_ACTION_SHADER>
+inline size_t RebuildShaderNodePins(const RenderGraph& renderGraph, int shaderIndex, NODE_ACTION_SHADER& node, size_t pinOffset, std::vector<NodePinInfo>& ret)
 {
-    std::vector<NodePinInfo> ret;
-
-    // Get the shader the shader reference
-    int shaderIndex = GetShaderIndexByName(renderGraph, ShaderType::Compute, node.shader.name.c_str());
+    size_t numNewConnections = 0;
 
     // It's ok if the shader wasn't found. it means the user isn't done editing
     if (shaderIndex != -1)
     {
-        // make sure there is a connection on the node for each resource
         const Shader& shader = renderGraph.shaders[shaderIndex];
-        for (const ShaderResource& resource : shader.resources)
+
+        for (size_t dstIdx = 0; dstIdx < shader.resources.size(); dstIdx++)
         {
+            size_t dstPinIdx = pinOffset + dstIdx;
+            const ShaderResource& resource = shader.resources[dstIdx];
+
             bool found = false;
-            for (NodePinConnection& connection : node.connections)
+            for (size_t srcPinIdx = pinOffset; srcPinIdx < node.connections.size(); srcPinIdx++)
             {
-                if (connection.srcPin == resource.name)
+                if (node.connections[srcPinIdx].srcPin == resource.name)
                 {
                     found = true;
+                    std::swap(node.connections[dstPinIdx], node.connections[srcPinIdx]);
+                    std::swap(node.linkProperties[dstPinIdx], node.linkProperties[srcPinIdx]);
                     break;
                 }
             }
 
             if (!found)
             {
-                node.connections.resize(node.connections.size() + 1);
-                node.connections.rbegin()->srcPin = resource.name;
+                NodePinConnection connection{};
+                connection.srcPin = resource.name;
+                LinkProperties link{};
+
+                node.connections.insert(node.connections.begin() + dstPinIdx, connection);
+                node.linkProperties.insert(node.linkProperties.begin() + dstPinIdx, link);
             }
-        }
 
-        // make a pin for each resource
-        for (const ShaderResource& resource : shader.resources)
-        {
+            // create pin:
             NodePinInfo pin;
             pin.name = resource.name;
-
-            for (NodePinConnection& connection : node.connections)
-            {
-                if (connection.srcPin == resource.name)
-                {
-                    pin.inputNode = &connection.dstNode;
-                    pin.inputNodePin = &connection.dstPin;
-                    pin.accessLabel = ShaderResourceTypeIsReadOnly(resource.access) ? " (R)" : " (RW)";
-                    break;
-                }
-            }
-
+            pin.inputNode = &node.connections[dstPinIdx].dstNode;
+            pin.inputNodePin = &node.connections[dstPinIdx].dstPin;
+            pin.accessLabel = ShaderResourceTypeIsReadOnly(resource.access) ? " (R)" : " (RW)";
             ret.push_back(pin);
+
+            numNewConnections++;
         }
     }
 
+    return numNewConnections;
+}
+
+inline std::vector<NodePinInfo> GetNodePins(const RenderGraph& renderGraph, RenderGraphNode_Action_ComputeShader& node)
+{
+    std::vector<NodePinInfo> ret;
+
+    // Get the shader the shader reference
+    int shaderIndex = GetShaderIndexByName(renderGraph, ShaderType::Compute, node.shader.name.c_str());
+
+    size_t numNewConnections = RebuildShaderNodePins<RenderGraphNode_Action_ComputeShader>(renderGraph, shaderIndex, node, 0, ret);
+    node.connections.resize(numNewConnections);
+
     // make a pin for indirect dispatch
     NodePinInfo pin;
     pin.name = "indirectBuffer";
@@ -404,51 +414,8 @@ inline std::vector<NodePinInfo> GetNodePins(const RenderGraph& renderGraph, Rend
 
     // Get the shader the shader reference
     int shaderIndex = GetShaderIndexByName(renderGraph, ShaderType::RTRayGen, node.shader.name.c_str());
-
-    // It's ok if the shader wasn't found. it means the user isn't done editing
-    if (shaderIndex != -1)
-    {
-        // make sure there is a connection on the node for each resource
-        const Shader& shader = renderGraph.shaders[shaderIndex];
-        for (const ShaderResource& resource : shader.resources)
-        {
-            bool found = false;
-            for (NodePinConnection& connection : node.connections)
-            {
-                if (connection.srcPin == resource.name)
-                {
-                    found = true;
-                    break;
-                }
-            }
-
-            if (!found)
-            {
-                node.connections.resize(node.connections.size() + 1);
-                node.connections.rbegin()->srcPin = resource.name;
-            }
-        }
-
-        // make a pin for each resource
-        for (const ShaderResource& resource : shader.resources)
-        {
-            NodePinInfo pin;
-            pin.name = resource.name;
-
-            for (NodePinConnection& connection : node.connections)
-            {
-                if (connection.srcPin == resource.name)
-                {
-                    pin.inputNode = &connection.dstNode;
-                    pin.inputNodePin = &connection.dstPin;
-                    pin.accessLabel = ShaderResourceTypeIsReadOnly(resource.access) ? " (R)" : " (RW)";
-                    break;
-                }
-            }
-
-            ret.push_back(pin);
-        }
-    }
+    size_t numNewConnections = RebuildShaderNodePins<RenderGraphNode_Action_RayShader>(renderGraph, shaderIndex, node, 0, ret);
+    node.connections.resize(numNewConnections);
 
     return ret;
 }
@@ -477,210 +444,37 @@ inline std::vector<NodePinInfo> GetNodePins(const RenderGraph& renderGraph, Rend
 {
     std::vector<NodePinInfo> ret;
 
+    size_t numNewConnections = 0;
     // Vertex Shader Pins
     {
         // Get the shader the shader reference
         int shaderIndex = GetShaderIndexByName(renderGraph, ShaderType::Vertex, node.vertexShader.name.c_str());
-
-        // It's ok if the shader wasn't found. it means the user isn't done editing
-        if (shaderIndex != -1)
-        {
-            // make sure there is a connection on the node for each resource
-            const Shader& shader = renderGraph.shaders[shaderIndex];
-            for (const ShaderResource& resource : shader.resources)
-            {
-                bool found = false;
-                for (NodePinConnection& connection : node.connections)
-                {
-                    if (connection.srcPin == resource.name)
-                    {
-                        found = true;
-                        break;
-                    }
-                }
-
-                if (!found)
-                {
-                    node.connections.resize(node.connections.size() + 1);
-                    node.connections.rbegin()->srcPin = resource.name;
-                }
-            }
-
-            // make a pin for each resource
-            for (const ShaderResource& resource : shader.resources)
-            {
-                NodePinInfo pin;
-                pin.name = resource.name;
-
-                for (NodePinConnection& connection : node.connections)
-                {
-                    if (connection.srcPin == resource.name)
-                    {
-                        pin.inputNode = &connection.dstNode;
-                        pin.inputNodePin = &connection.dstPin;
-                        pin.accessLabel = ShaderResourceTypeIsReadOnly(resource.access) ? " (R)" : " (RW)";
-                        break;
-                    }
-                }
-
-                ret.push_back(pin);
-            }
-        }
+        numNewConnections += RebuildShaderNodePins<RenderGraphNode_Action_DrawCall>(renderGraph, shaderIndex, node, numNewConnections, ret);
     }
 
     // Pixel Shader Pins
     {
         // Get the shader the shader reference
         int shaderIndex = GetShaderIndexByName(renderGraph, ShaderType::Pixel, node.pixelShader.name.c_str());
-
-        // It's ok if the shader wasn't found. it means the user isn't done editing
-        if (shaderIndex != -1)
-        {
-            // make sure there is a connection on the node for each resource
-            const Shader& shader = renderGraph.shaders[shaderIndex];
-            for (const ShaderResource& resource : shader.resources)
-            {
-                bool found = false;
-                for (NodePinConnection& connection : node.connections)
-                {
-                    if (connection.srcPin == resource.name)
-                    {
-                        found = true;
-                        break;
-                    }
-                }
-
-                if (!found)
-                {
-                    node.connections.resize(node.connections.size() + 1);
-                    node.connections.rbegin()->srcPin = resource.name;
-                }
-            }
-
-            // make a pin for each resource
-            for (const ShaderResource& resource : shader.resources)
-            {
-                NodePinInfo pin;
-                pin.name = resource.name;
-
-                for (NodePinConnection& connection : node.connections)
-                {
-                    if (connection.srcPin == resource.name)
-                    {
-                        pin.inputNode = &connection.dstNode;
-                        pin.inputNodePin = &connection.dstPin;
-                        pin.accessLabel = ShaderResourceTypeIsReadOnly(resource.access) ? " (R)" : " (RW)";
-                        break;
-                    }
-                }
-
-                ret.push_back(pin);
-            }
-        }
+        numNewConnections += RebuildShaderNodePins<RenderGraphNode_Action_DrawCall>(renderGraph, shaderIndex, node, numNewConnections, ret);
     }
 
     // Amplification Shader Pins
     {
         // Get the shader the shader reference
         int shaderIndex = GetShaderIndexByName(renderGraph, ShaderType::Amplification, node.amplificationShader.name.c_str());
-
-        // It's ok if the shader wasn't found. it means the user isn't done editing
-        if (shaderIndex != -1)
-        {
-            // make sure there is a connection on the node for each resource
-            const Shader& shader = renderGraph.shaders[shaderIndex];
-            for (const ShaderResource& resource : shader.resources)
-            {
-                bool found = false;
-                for (NodePinConnection& connection : node.connections)
-                {
-                    if (connection.srcPin == resource.name)
-                    {
-                        found = true;
-                        break;
-                    }
-                }
-
-                if (!found)
-                {
-                    node.connections.resize(node.connections.size() + 1);
-                    node.connections.rbegin()->srcPin = resource.name;
-                }
-            }
-
-            // make a pin for each resource
-            for (const ShaderResource& resource : shader.resources)
-            {
-                NodePinInfo pin;
-                pin.name = resource.name;
-
-                for (NodePinConnection& connection : node.connections)
-                {
-                    if (connection.srcPin == resource.name)
-                    {
-                        pin.inputNode = &connection.dstNode;
-                        pin.inputNodePin = &connection.dstPin;
-                        pin.accessLabel = ShaderResourceTypeIsReadOnly(resource.access) ? " (R)" : " (RW)";
-                        break;
-                    }
-                }
-
-                ret.push_back(pin);
-            }
-        }
+        numNewConnections += RebuildShaderNodePins<RenderGraphNode_Action_DrawCall>(renderGraph, shaderIndex, node, numNewConnections, ret);
     }
 
     // Mesh Shader Pins
     {
         // Get the shader the shader reference
         int shaderIndex = GetShaderIndexByName(renderGraph, ShaderType::Mesh, node.meshShader.name.c_str());
-
-        // It's ok if the shader wasn't found. it means the user isn't done editing
-        if (shaderIndex != -1)
-        {
-            // make sure there is a connection on the node for each resource
-            const Shader& shader = renderGraph.shaders[shaderIndex];
-            for (const ShaderResource& resource : shader.resources)
-            {
-                bool found = false;
-                for (NodePinConnection& connection : node.connections)
-                {
-                    if (connection.srcPin == resource.name)
-                    {
-                        found = true;
-                        break;
-                    }
-                }
-
-                if (!found)
-                {
-                    node.connections.resize(node.connections.size() + 1);
-                    node.connections.rbegin()->srcPin = resource.name;
-                }
-            }
-
-            // make a pin for each resource
-            for (const ShaderResource& resource : shader.resources)
-            {
-                NodePinInfo pin;
-                pin.name = resource.name;
-
-                for (NodePinConnection& connection : node.connections)
-                {
-                    if (connection.srcPin == resource.name)
-                    {
-                        pin.inputNode = &connection.dstNode;
-                        pin.inputNodePin = &connection.dstPin;
-                        pin.accessLabel = ShaderResourceTypeIsReadOnly(resource.access) ? " (R)" : " (RW)";
-                        break;
-                    }
-                }
-
-                ret.push_back(pin);
-            }
-        }
+        numNewConnections += RebuildShaderNodePins<RenderGraphNode_Action_DrawCall>(renderGraph, shaderIndex, node, numNewConnections, ret);
     }
 
+    node.connections.resize(numNewConnections);
+
     // Shading Rate Image
     NodePinInfo pin;
     pin.name = "shadingRateImage";
diff --git a/GigiEdit/MakeUI.h b/GigiEdit/MakeUI.h
index b6610d9b..fb30eb1d 100644
--- a/GigiEdit/MakeUI.h
+++ b/GigiEdit/MakeUI.h
@@ -2164,7 +2164,31 @@ inline UIOverrideResult ShowUIOverride<Shader>(RenderGraph& renderGraph, uint64_
         // If a resource was added, let it react
         if (value.resources.size() > oldResources.size())
         {
-            OnShaderResourceAdd(value, value.resources.rbegin()->name);
+            // find which index was added from the oldResources array. Note that it may have been the last one.
+            int index = 0;
+
+            while (index < value.resources.size()
+                && index < oldResources.size()
+                && value.resources[index].name == oldResources[index].name)
+                index++;
+
+            // make sure name is unique
+            bool isUnique = false;
+            while (!isUnique)
+            {
+                isUnique = true;
+                for (const ShaderResource& old : oldResources)
+                {
+                    if (old.name == value.resources[index].name)
+                    {
+                        isUnique = false; // wasn't unique, check the version with the suffix
+                        value.resources[index].name += " Copy";
+                        break;
+                    }
+                }
+            }
+
+            OnShaderResourceAdd(value, value.resources[index].name);
         }
         // If a resource was deleted, we need to unhook everything that was plugged into that pin
         else if (value.resources.size() < oldResources.size())
@@ -2189,8 +2213,20 @@ inline UIOverrideResult ShowUIOverride<Shader>(RenderGraph& renderGraph, uint64_
                     if (index + 1 < value.resources.size() && value.resources[index + 1].name != oldResources[index + 1].name)
                         break;
 
-                    // otherwise it's a rename
-                    OnShaderResourceRename(value, oldResources[index].name, value.resources[index].name);
+                    // otherwise it's a rename --- don't allow duplicates
+                    // // this doesn't reallydo anything except not call the callback, it
+                    bool isUnique = true;
+                    for (const ShaderResource& old : oldResources)
+                    {
+                        if (old.name == value.resources[index].name)
+                        {
+                            isUnique = false; // wasn't unique, check the version with the suffix
+                            value.resources[index].name = oldResources[index].name;
+                            break;
+                        }
+                    }
+                    if (isUnique)
+                        OnShaderResourceRename(value, oldResources[index].name, value.resources[index].name);
                     break;
                 }
             }
diff --git a/GigiEdit/main.cpp b/GigiEdit/main.cpp
index 7e1be8d6..144da886 100644
--- a/GigiEdit/main.cpp
+++ b/GigiEdit/main.cpp
@@ -161,14 +161,6 @@ void OnShaderResourceDelete(const Shader& shader, const std::string& resourceNam
                 if (node.actionComputeShader.shader.name != shader.name)
                     continue;
 
-                node.actionComputeShader.connections.erase(
-                    std::remove_if(
-                        node.actionComputeShader.connections.begin(),
-                        node.actionComputeShader.connections.end(),
-                        [&](const NodePinConnection& connection) { return connection.srcPin == resourceName; }),
-                    node.actionComputeShader.connections.end()
-                );
-
                 shaderNodes.push_back(node.actionComputeShader.name);
                 break;
             }
@@ -176,18 +168,20 @@ void OnShaderResourceDelete(const Shader& shader, const std::string& resourceNam
             {
                 if (node.actionRayShader.shader.name != shader.name)
                     continue;
-
-                node.actionRayShader.connections.erase(
-                    std::remove_if(
-                        node.actionRayShader.connections.begin(),
-                        node.actionRayShader.connections.end(),
-                        [&](const NodePinConnection& connection) { return connection.srcPin == resourceName; }),
-                    node.actionRayShader.connections.end()
-                );
-
                 shaderNodes.push_back(node.actionRayShader.name);
                 break;
             }
+            case RenderGraphNode::c_index_actionDrawCall:
+            {
+                if (node.actionDrawCall.pixelShader.name != shader.name
+                    && node.actionDrawCall.vertexShader.name != shader.name
+                    && node.actionDrawCall.amplificationShader.name != shader.name
+                    && node.actionDrawCall.meshShader.name != shader.name)
+                    continue;
+
+                shaderNodes.push_back(node.actionDrawCall.name);
+                break;
+            }
         }
     }
 
@@ -253,7 +247,10 @@ void OnShaderResourceRename(const Shader& shader, const std::string& oldName, co
             }
             case RenderGraphNode::c_index_actionDrawCall:
             {
-                if (node.actionDrawCall.vertexShader.name != shader.name && node.actionDrawCall.pixelShader.name != shader.name)
+                if (node.actionDrawCall.vertexShader.name != shader.name
+                    && node.actionDrawCall.pixelShader.name != shader.name
+                    && node.actionDrawCall.meshShader.name != shader.name
+                    && node.actionDrawCall.amplificationShader.name != shader.name)
                     continue;
                 shaderNodes.push_back(shader.name);
 
@@ -660,8 +657,12 @@ struct Example :
     {
 		if (!g_renderGraphDirty || AskForConfirmation("You have unsaved changes, are you sure you want to proceed?"))
 		{
+			// e.g. "C:\\gitlab\\gigi"
+			std::filesystem::path defaultPath = std::filesystem::current_path();
+			std::string exploreLocation = (defaultPath / "Techniques").u8string();
+
 			nfdchar_t* outPath = nullptr;
-			if (NFD_OpenDialog("gg", "Techniques", &outPath) == NFD_OKAY)
+			if (NFD_OpenDialog("gg", exploreLocation.c_str(), &outPath) == NFD_OKAY)
 				LoadJSONFile(outPath);
 		}
     }
@@ -1493,55 +1494,6 @@ struct Example :
 					SetNodeName(node, newNodeName);
 				}
 
-				// custom UI for node types
-				switch (node._index)
-				{
-					// an edit and explore button for the shader
-				case RenderGraphNode::c_index_actionComputeShader:
-				case RenderGraphNode::c_index_actionRayShader:
-				{
-					int shaderIndex;
-					if (node._index == RenderGraphNode::c_index_actionComputeShader)
-						shaderIndex = GetShaderIndexByName(g_renderGraph, ShaderType::Compute, node.actionComputeShader.shader.name.c_str());
-					else
-						shaderIndex = GetShaderIndexByName(g_renderGraph, ShaderType::RTRayGen, node.actionRayShader.shader.name.c_str());
-					if (shaderIndex < 0)
-						break;
-
-					if (g_renderGraph.shaders[shaderIndex].fileName.empty())
-						break;
-
-					ImGui::Text("Shader:");
-					ImGui::SameLine();
-					ImGui::InputText("##ShaderFileName", (char*)g_renderGraph.shaders[shaderIndex].fileName.c_str(), g_renderGraph.shaders[shaderIndex].fileName.length(), ImGuiInputTextFlags_ReadOnly);
-
-					std::filesystem::path defaultPath = std::filesystem::path(g_renderGraphFileName).remove_filename();
-
-					std::string exploreLocation;
-
-					if (g_renderGraph.shaders[shaderIndex].fileName.empty())
-						exploreLocation = defaultPath.u8string();
-					else
-						exploreLocation = (defaultPath / std::filesystem::path(g_renderGraph.shaders[shaderIndex].fileName)).remove_filename().u8string();
-					exploreLocation = std::filesystem::absolute(std::filesystem::path(exploreLocation)).u8string();
-
-					if (ImGui::Button("Edit"))
-					{
-						std::string fullFileName = (defaultPath / std::filesystem::path(g_renderGraph.shaders[shaderIndex].fileName)).u8string();
-						ShellExecuteA(NULL, "open", fullFileName.c_str(), NULL, NULL, SW_SHOWDEFAULT);
-					}
-
-					ImGui::SameLine();
-
-					if (ImGui::Button("Explore"))
-					{
-						ShellExecuteA(NULL, "explore", exploreLocation.c_str(), NULL, NULL, SW_SHOWDEFAULT);
-					}
-
-					break;
-				}
-				}
-
 				ImGui::Unindent();
             }
         }
diff --git a/GigiViewerDX12/Interpreter/GigiInterpreterPreviewWindowDX12.h b/GigiViewerDX12/Interpreter/GigiInterpreterPreviewWindowDX12.h
index c5bffff8..a8925edd 100644
--- a/GigiViewerDX12/Interpreter/GigiInterpreterPreviewWindowDX12.h
+++ b/GigiViewerDX12/Interpreter/GigiInterpreterPreviewWindowDX12.h
@@ -584,6 +584,11 @@ class GigiInterpreterPreviewWindowDX12 : public IGigiInterpreter<RuntimeTypes>
 		return m_dxrDevice != nullptr;
 	}
 
+	D3D12_VARIABLE_SHADING_RATE_TIER VRSSupportLevel() const
+	{
+		return m_dx12_options6.VariableShadingRateTier;
+	}
+
 	const D3D12_FEATURE_DATA_D3D12_OPTIONS5& GetOptions5() const
 	{
 		return m_dx12_options5;
diff --git a/GigiViewerDX12/Interpreter/GigiInterpreterPreviewWindowDX12_UI.cpp b/GigiViewerDX12/Interpreter/GigiInterpreterPreviewWindowDX12_UI.cpp
index f5cafc37..ee6a7249 100644
--- a/GigiViewerDX12/Interpreter/GigiInterpreterPreviewWindowDX12_UI.cpp
+++ b/GigiViewerDX12/Interpreter/GigiInterpreterPreviewWindowDX12_UI.cpp
@@ -11,6 +11,12 @@
 #include <unordered_set>
 // clang-format on
 
+// needed to make ImGui::Text() align with UI that is larger because of FarmePadding
+void ShowUI_StartGap()
+{
+	ImGui::SetCursorPosY(ImGui::GetCursorPosY() + ImGui::GetStyle().FramePadding.y);
+}
+
 static void ShowToolTip(const char* tooltip)
 {
 	if (!tooltip || !tooltip[0])
@@ -519,6 +525,7 @@ static void ShowUI_Bool(const RenderGraph& renderGraph, const bool paused, const
 
 static void ShowUI_Float4x4(const RenderGraph& renderGraph, const bool paused, const Variable& variable, void* storage)
 {
+	ShowUI_StartGap();
 	ImGui::Text(variable.originalName.c_str());
 	ImGui::InputFloat4("row 0", (float*)storage + 0);
 	ImGui::InputFloat4("row 1", (float*)storage + 4);
@@ -567,6 +574,89 @@ static void ShowUI_Uint_16(const RenderGraph& renderGraph, const bool paused, co
 	v2[0] = v[0];
 }
 
+static void ShowUI_Int_64(const RenderGraph& renderGraph, const bool paused, const Variable& variable, void* storage)
+{
+	int64_t* v2 = (int64_t*)storage;
+	int		  v[] = { (int)v2[0] };
+
+	if (variable.UISettings.UIHint == VariableUIHint::Drag)
+	{
+		float speed = 1.0f;
+		int	  min = 0;
+		int	  max = 0;
+
+		if (!variable.UISettings.step.empty())
+			sscanf_s(variable.UISettings.step.c_str(), "%f", &speed);
+
+		if (!variable.UISettings.min.empty())
+			sscanf_s(variable.UISettings.min.c_str(), "%i", &min);
+
+		if (!variable.UISettings.max.empty())
+			sscanf_s(variable.UISettings.max.c_str(), "%i", &max);
+
+		ImGui::DragInt(variable.originalName.c_str(), v, speed, min, max);
+	}
+	else if (variable.UISettings.UIHint == VariableUIHint::Slider)
+	{
+		int min = 0;
+		int max = 65535;
+
+		if (!variable.UISettings.min.empty())
+			sscanf_s(variable.UISettings.min.c_str(), "%i", &min);
+
+		if (!variable.UISettings.max.empty())
+			sscanf_s(variable.UISettings.max.c_str(), "%i", &max);
+
+		ImGui::SliderInt(variable.originalName.c_str(), v, min, max);
+	}
+	else
+		ImGui::InputInt(variable.originalName.c_str(), v);
+
+	v2[0] = v[0];
+}
+
+static void ShowUI_Uint_64(const RenderGraph& renderGraph, const bool paused, const Variable& variable, void* storage)
+{
+	uint64_t* v2 = (uint64_t*)storage;
+	int		  v[] = { (int)v2[0] };
+
+	if (variable.UISettings.UIHint == VariableUIHint::Drag)
+	{
+		float speed = 1.0f;
+		int	  min = 0;
+		int	  max = 0;
+
+		if (!variable.UISettings.step.empty())
+			sscanf_s(variable.UISettings.step.c_str(), "%f", &speed);
+
+		if (!variable.UISettings.min.empty())
+			sscanf_s(variable.UISettings.min.c_str(), "%i", &min);
+
+		if (!variable.UISettings.max.empty())
+			sscanf_s(variable.UISettings.max.c_str(), "%i", &max);
+
+		ImGui::DragInt(variable.originalName.c_str(), v, speed, min, max);
+	}
+	else if (variable.UISettings.UIHint == VariableUIHint::Slider)
+	{
+		int min = 0;
+		int max = 65535;
+
+		if (!variable.UISettings.min.empty())
+			sscanf_s(variable.UISettings.min.c_str(), "%i", &min);
+
+		if (!variable.UISettings.max.empty())
+			sscanf_s(variable.UISettings.max.c_str(), "%i", &max);
+
+		ImGui::SliderInt(variable.originalName.c_str(), v, min, max);
+	}
+	else
+		ImGui::InputInt(variable.originalName.c_str(), v);
+
+	v2[0] = v[0];
+}
+
+
 static void ShowUI_Count(const RenderGraph& renderGraph, const bool paused, const Variable& variable, void* storage)
 {
 	// No-op. Shouldn't ever happen
@@ -588,6 +678,132 @@ static std::string VariableUIScope(const Variable& variable)
 	return ret;
 }
 
+
+// copied from ImGui::Bullet()
+void LargeBullet(ImVec4 color, float scale = 1.0f)
+{
+	ImGuiWindow* window = ImGui::GetCurrentWindow();
+	if (window->SkipItems)
+		return;
+
+	ImGuiContext& g = *GImGui;
+	const ImGuiStyle& style = g.Style;
+	ImVec2 size(g.FontSize * scale, g.FontSize * scale);
+
+	const ImRect bb(window->DC.CursorPos, ImVec2(window->DC.CursorPos.x + size.y, window->DC.CursorPos.y + size.y));
+	ImGui::ItemSize(bb);
+	if (!ImGui::ItemAdd(bb, 0))
+	{
+		ImGui::SameLine(0, style.FramePadding.x * 2);
+		return;
+	}
+	
+	ImDrawList* draw_list = window->DrawList;
+
+	// Render and stay on same line
+	ImVec2 pos;
+	pos.x = bb.Min.x + style.FramePadding.x + size.x * 0.5f;
+	pos.y = bb.Min.y + style.FramePadding.y + size.y * 0.5f;
+	draw_list->AddCircleFilled(pos, size.x * 0.50f, ImGui::GetColorU32(color), 24);	// was *0.2f for small bullet
+}
+
+void setToDefault(GigiInterpreterPreviewWindowDX12::RuntimeVariable& variable)
+{
+	memcpy(variable.storage.value, variable.storage.dflt, variable.storage.size);
+	variable.storage.overrideValue = false;
+}
+
+// assumes we are in an Indent Block
+void VariableUIStart(GigiInterpreterPreviewWindowDX12::RuntimeVariable& variable)
+{
+	static std::string deferredReset;
+
+	if (variable.variable->name == deferredReset)
+	{
+		setToDefault(variable);
+		deferredReset.clear();
+	}
+
+	const ImVec4 oldTextColor = ImVec4(0.1f, 0.1f, 0.8f, 1);;
+	const ImVec4 constColor = ImVec4(0.4f, 0.4f, 0.4f, 1);
+	const ImVec4 overrrideColor = ImVec4(1, 1, 0.2f, 1);
+	const ImVec4 transientColor = ImVec4(1, 1, 0.4f, 1);
+	const ImVec4 systemColor = ImVec4(1, 0.4f, 1, 1);
+
+	ImVec4 color = oldTextColor;
+	const char* txt = "default (value comes from .gg file)";
+
+	bool canReset = true;
+
+	if (variable.storage.overrideValue)
+	{
+		color = overrrideColor;
+		txt = "override (stored in .gguser file)";
+	}
+	if (variable.variable->Const)
+	{
+		color = constColor;
+		txt = "constant (cannot be changed, will not be saved)";
+		canReset = false;
+	}
+	if (variable.variable->transient)
+	{
+		color = transientColor;
+		txt = "transient (will not be saved)";
+	}
+	if (variable.storage.systemValue)
+	{
+		color = systemColor;
+		txt = "system (set by the viewer, will not be saved)";
+		canReset = false;
+	}
+
+	if (variable.storage.isDefault())
+		canReset = false;
+
+	float space = ImGui::GetStyle().IndentSpacing;
+
+	float oldX = ImGui::GetCursorPosX();
+	ImGui::SetCursorPosX(ImGui::GetCursorPosX() - space);
+	ImGui::PushStyleVar(ImGuiStyleVar_FrameRounding, 100);
+	LargeBullet(color);
+	ImGui::SameLine();
+	ImGui::PopStyleVar();
+	ImGui::SetCursorPosX(oldX);
+
+	if (ImGui::IsItemHovered(ImGuiHoveredFlags_AllowWhenDisabled))
+	{
+		if (ImGui::BeginTooltipEx(ImGuiTooltipFlags_OverridePreviousTooltip, ImGuiWindowFlags_None))
+		{
+			ImVec2 pos = ImGui::GetCursorPos();
+			LargeBullet(color, 2.0f);
+			float gap = ImGui::GetItemRectSize().x + 2 * ImGui::GetStyle().FramePadding.x + ImGui::GetStyle().WindowPadding.x;
+			ImGui::SetCursorPos(pos);
+
+			ImGui::Indent(gap);
+			ImGui::Text("Variable: %s", variable.variable->name.c_str());
+			ImGui::Text(" Comment: %s", variable.variable->comment.c_str());
+			ImGui::Unindent(gap);
+			ImGui::Text("");
+			ImGui::Text("Default%s: %s", 
+				canReset ? " (left click to reset)" : "", variable.variable->dflt.c_str());
+
+			ImGui::PushStyleVar(ImGuiStyleVar_Alpha, ImGui::GetStyle().Alpha * 0.5f);
+			ImGui::Text("\nColor code meaning: %s", txt);
+			ImGui::PopStyleVar();
+
+			ImGui::EndTooltip();
+		}
+	}
+	// we could put this in a context menu
+	if (canReset && ImGui::IsItemClicked())
+	{
+		// Sadly with ImGui we have to defer the operation a frame, even ClearActiveID() does not fix it.
+		// This is to fix the case where the element is currently in edit mode.
+		deferredReset = variable.variable->name;
+	}
+}
+
 void GigiInterpreterPreviewWindowDX12::ShowUI(bool minimalUI, bool paused)
 {
 	// Minimal UI only shows public variables.
@@ -615,10 +831,29 @@ void GigiInterpreterPreviewWindowDX12::ShowUI(bool minimalUI, bool paused)
 		// Reset variables to default if we should
 		if (ImGui::Button("Reset To Defaults"))
 		{
-			for (const RuntimeVariable& variable : m_runtimeVariables)
-				memcpy(variable.storage.value, variable.storage.dflt, variable.storage.size);
+			for (RuntimeVariable& variable : m_runtimeVariables)
+			{
+				setToDefault(variable);
+			}
 		}
 
+		/*
+		// Color Legend
+		{
+//			ImGui::Text("Colors: ");
+			ImGui::SameLine();
+			ImGui::TextColored(overrrideColor, "default");
+			ImGui::SameLine();
+			ImGui::TextColored(oldTextColor, "override");
+			ImGui::SameLine();
+			ImGui::TextColored(constColor, "const");
+			ImGui::SameLine();
+			ImGui::TextColored(transientColor, "transient");
+			ImGui::SameLine();
+			ImGui::TextColored(systemColor, "system");
+		}
+		*/
+
 		struct VariableGroup
 		{
 			VariableVisibility visibility;
@@ -691,6 +926,7 @@ void GigiInterpreterPreviewWindowDX12::ShowUI(bool minimalUI, bool paused)
 						}
 					}
 					visibilityHeaderShown = true;
+
 					ImGui::Indent();
 				}
 
@@ -708,13 +944,23 @@ void GigiInterpreterPreviewWindowDX12::ShowUI(bool minimalUI, bool paused)
 				// Show variable labels and value
 				for (const RuntimeVariable* var : runtimeVariablesSorted)
 				{
-					const RuntimeVariable& variable = *var;
-
+					RuntimeVariable& variable = (RuntimeVariable&)*var;
+					
 					if (variable.variable->Const)
 					{
 						ImGui::PushItemFlag(ImGuiItemFlags_Disabled, true);
 					}
 
+					// copy variable.storage.value to get "hasChanged" without affecting a lot of UI code
+					// static to avoid reallocations, makes this no reentrant
+					static std::vector<const char*> valueCopy;
+					valueCopy.clear();
+					valueCopy.resize(variable.storage.size);
+					memcpy(valueCopy.data(), variable.storage.value, variable.storage.size);
+
+					VariableUIStart(variable);
+//					ImGui::PushStyleColor(ImGuiCol_Text, color);
+
 					switch (variable.variable->type)
 					{
 #include "external/df_serialize/_common.h"
@@ -726,8 +972,18 @@ void GigiInterpreterPreviewWindowDX12::ShowUI(bool minimalUI, bool paused)
 						// clang-format on
 					}
 
+//					ImGui::PopStyleColor(1);
+
 					ShowToolTip(variable.variable->comment.c_str());
 
+					bool hasChanged = valueCopy.size() != variable.storage.size
+						|| memcmp(variable.storage.value, valueCopy.data(), valueCopy.size()) != 0;
+
+					if (hasChanged)
+					{
+						variable.storage.overrideValue = true;
+					}
+
 					if (variable.variable->Const)
 					{
 						ImGui::PopItemFlag();
diff --git a/GigiViewerDX12/Interpreter/NodesShared.h b/GigiViewerDX12/Interpreter/NodesShared.h
index af076e40..4e4f4f66 100644
--- a/GigiViewerDX12/Interpreter/NodesShared.h
+++ b/GigiViewerDX12/Interpreter/NodesShared.h
@@ -266,6 +266,8 @@ inline DataFieldTypeInfoStructDX12 DataFieldTypeInfoDX12(DataFieldType type)
         case DataFieldType::Bool: return DATA_FIELD_TYPE_INFO_DX12(uint32_t, 1, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_UINT, 1);
         case DataFieldType::Float4x4: return DATA_FIELD_TYPE_INFO_DX12(float, 16, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, 16);
         case DataFieldType::Uint_16: return DATA_FIELD_TYPE_INFO_DX12(uint16_t, 1, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_UINT, 1);
+        case DataFieldType::Int_64: return DATA_FIELD_TYPE_INFO_DX12(int64_t, 1, DXGI_FORMAT_R32_SINT, DXGI_FORMAT_R32_SINT, 1);
+        case DataFieldType::Uint_64: return DATA_FIELD_TYPE_INFO_DX12(uint64_t, 1, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_UINT, 1);
         default:
         {
             Assert(false, "Unknown data field type: %i", type);
diff --git a/GigiViewerDX12/Interpreter/RenderGraphNode_Action_DrawCall.cpp b/GigiViewerDX12/Interpreter/RenderGraphNode_Action_DrawCall.cpp
index 77d0b17e..dbd6fbfe 100644
--- a/GigiViewerDX12/Interpreter/RenderGraphNode_Action_DrawCall.cpp
+++ b/GigiViewerDX12/Interpreter/RenderGraphNode_Action_DrawCall.cpp
@@ -1054,28 +1054,7 @@ bool GigiInterpreterPreviewWindowDX12::OnNodeAction(const RenderGraphNode_Action
 					// transition
 					queuedTransitions.push_back({ TRANSITION_DEBUG_INFO_NAMED(textureInfo.m_resource, D3D12_RESOURCE_STATE_SHADING_RATE_SOURCE, GetNodeName(resourceNode).c_str()) });
 
-					bool sparseSampling = false;
-					D3D12_FEATURE_DATA_D3D12_OPTIONS6 options;
-					if (S_OK == m_device->CheckFeatureSupport(
-						D3D12_FEATURE_D3D12_OPTIONS6,
-						&options,
-						sizeof(options)))
-					{
-						switch (options.VariableShadingRateTier) {
-						case D3D12_VARIABLE_SHADING_RATE_TIER_1:
-						case D3D12_VARIABLE_SHADING_RATE_TIER_2:
-						{
-							sparseSampling = true;
-							break;
-						}
-						default: {
-							m_logFn(LogLevel::Error, "Draw call node \"%s\" could not enable sparse shading because it is not supported", node.name.c_str());
-							break;
-						}
-						}
-					}
-
-					if (sparseSampling)
+					if (VRSSupportLevel() == D3D12_VARIABLE_SHADING_RATE_TIER_2)
 					{
 						ID3D12GraphicsCommandList5* VRSCommandList = nullptr;
 						if (FAILED(m_commandList->QueryInterface(IID_PPV_ARGS(&VRSCommandList))))
@@ -1089,6 +1068,10 @@ bool GigiInterpreterPreviewWindowDX12::OnNodeAction(const RenderGraphNode_Action
 
 						VRSCommandList->Release();
 					}
+					else
+					{
+						m_logFn(LogLevel::Error, "Draw call node \"%s\" could not enable sparse shading because it is not supported", node.name.c_str());
+					}
 				}
 			}
 		}
@@ -1608,28 +1591,8 @@ bool GigiInterpreterPreviewWindowDX12::OnNodeAction(const RenderGraphNode_Action
 		m_commandList->OMSetRenderTargets((UINT)colorTargetHandles.size(), colorTargetHandles.data(), false, depthTargetHandlePtr);
 		m_commandList->OMSetStencilRef(node.stencilRef);
 
-		bool sparseSampling = false;
-		D3D12_FEATURE_DATA_D3D12_OPTIONS6 options;
-		if (S_OK == m_device->CheckFeatureSupport(
-			D3D12_FEATURE_D3D12_OPTIONS6,
-			&options,
-			sizeof(options)))
-		{
-			switch (options.VariableShadingRateTier) {
-			case D3D12_VARIABLE_SHADING_RATE_TIER_1:
-			case D3D12_VARIABLE_SHADING_RATE_TIER_2: {
-				sparseSampling = true;
-				break;
-			}
-			default: {
-				m_logFn(LogLevel::Error, "Draw call node \"%s\" could not enable sparse shading because it is not supported", node.name.c_str());
-				break;
-			}
-			}
-		}
-
 		// variable rate shading - set sparse sampling
-		if (sparseSampling)
+		if (VRSSupportLevel() > D3D12_VARIABLE_SHADING_RATE_TIER_NOT_SUPPORTED)
 		{
 			ID3D12GraphicsCommandList5* VRSCommandList = nullptr;
 			if (FAILED(m_commandList->QueryInterface(IID_PPV_ARGS(&VRSCommandList))))
@@ -1774,7 +1737,7 @@ bool GigiInterpreterPreviewWindowDX12::OnNodeAction(const RenderGraphNode_Action
 		}
 
 		// variable rate shading - set it back to dense sampling
-		if (sparseSampling)
+		if (VRSSupportLevel() > D3D12_VARIABLE_SHADING_RATE_TIER_NOT_SUPPORTED)
 		{
 			ID3D12GraphicsCommandList5* VRSCommandList = nullptr;
 			if (FAILED(m_commandList->QueryInterface(IID_PPV_ARGS(&VRSCommandList))))
@@ -1784,7 +1747,9 @@ bool GigiInterpreterPreviewWindowDX12::OnNodeAction(const RenderGraphNode_Action
 			}
 
 			VRSCommandList->RSSetShadingRate(D3D12_SHADING_RATE_1X1, nullptr);
-			VRSCommandList->RSSetShadingRateImage(nullptr);
+
+			if (VRSSupportLevel() == D3D12_VARIABLE_SHADING_RATE_TIER_2)
+				VRSCommandList->RSSetShadingRateImage(nullptr);
 
 			VRSCommandList->Release();
 		}
diff --git a/GigiViewerDX12/ViewerPython.cpp b/GigiViewerDX12/ViewerPython.cpp
index f4f6921f..b3812cb8 100644
--- a/GigiViewerDX12/ViewerPython.cpp
+++ b/GigiViewerDX12/ViewerPython.cpp
@@ -703,6 +703,18 @@ static PyObject* Python_SetCameraPos(PyObject* self, PyObject* args)
     return Py_None;
 }
 
+static PyObject* Python_SetCameraFOV(PyObject* self, PyObject* args)
+{
+    float fov = 0.0f;
+    if (!PyArg_ParseTuple(args, "f:Python_SetCameraFOV", &fov))
+        return PyErr_Format(PyExc_TypeError, "type error in " __FUNCTION__ "()");
+
+    g_interface->SetCameraFOV(fov);
+
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
 static PyObject* Python_SetCameraAltitudeAzimuth(PyObject* self, PyObject* args)
 {
     float altitude = 0.0f;
@@ -1088,6 +1100,7 @@ void PythonInit(PythonInterface* interface)
         {"SetImportedTextureBinaryFormat", Python_SetImportedTextureBinaryFormat, METH_VARARGS, "Sets the format of the binary file."},
         {"SetFrameDeltaTime", Python_SetFrameDeltaTime, METH_VARARGS, "Set the frame delta time, in seconds. Useful for recording videos by setting a fixed frame rate. Clear by setting to 0."},
         {"SetCameraPos", Python_SetCameraPos, METH_VARARGS, "Set the camera position"},
+        {"SetCameraFOV", Python_SetCameraFOV, METH_VARARGS, "Set the camera field of view"},
         {"SetCameraAltitudeAzimuth", Python_SetCameraAltitudeAzimuth, METH_VARARGS, "Set the camera altitude azimuth"},
         {"SetCameraNearFarZ", Python_SetCameraNearFarZ, METH_VARARGS, "Set the near and far plane"},
         {"SetCameraFlySpeed", Python_SetCameraFlySpeed, METH_VARARGS, "Set the fly speed of the camera"},
diff --git a/GigiViewerDX12/ViewerPython.h b/GigiViewerDX12/ViewerPython.h
index 1841b7c9..9fbcecbe 100644
--- a/GigiViewerDX12/ViewerPython.h
+++ b/GigiViewerDX12/ViewerPython.h
@@ -70,6 +70,7 @@ class PythonInterface
 	virtual void SetImportedTextureBinaryFormat(const char* textureName, int textureFormat) = 0;
 	virtual void SetFrameDeltaTime(float seconds) = 0;
 	virtual void SetCameraPos(float X, float Y, float Z) = 0;
+	virtual void SetCameraFOV(float fov) = 0;
 	virtual void SetCameraAltitudeAzimuth(float altitude, float azimuth) = 0;
 	virtual void SetCameraNearFarZ(float nearZ, float farZ) = 0;
 	virtual void SetCameraFlySpeed(float speed) = 0;
diff --git a/GigiViewerDX12/main.cpp b/GigiViewerDX12/main.cpp
index e6c6c4af..29f8ebe0 100644
--- a/GigiViewerDX12/main.cpp
+++ b/GigiViewerDX12/main.cpp
@@ -116,6 +116,8 @@ static int const                    NUM_FRAMES_IN_FLIGHT = 3;
 static FrameContext                 g_frameContext[NUM_FRAMES_IN_FLIGHT] = {};
 static UINT                         g_frameIndex = 0;
 
+static bool g_useWarpAdapter = false;
+
 HWND g_hwnd = NULL;
 static int const                    NUM_BACK_BUFFERS = 3;
 static ID3D12Device2* g_pd3dDevice = NULL;
@@ -173,7 +175,7 @@ static RENDERDOC_API_1_6_0* g_renderDocAPI = nullptr;
 static bool g_renderDocCaptureNextFrame = false;
 static bool g_renderDocIsCapturing = false;
 static bool g_renderDocLaunchUI = false;
-static bool g_renderDocEnabled = false;
+static bool g_renderDocEnabled = true;
 static bool g_pixCaptureEnabled = true;
 static int g_renderDocFrameCaptureCount = 1;
 
@@ -838,6 +840,9 @@ void GatherSnapshotData(GGUserFileV2Snapshot& snapshot)
         if (rtVar.variable->transient)
             continue;
 
+        if (!rtVar.storage.overrideValue)
+            continue;
+
         // don't save system variables
         if (rtVar.variable->name == g_systemVariables.iResolution_varName ||
             rtVar.variable->name == g_systemVariables.iTime_varName ||
@@ -1479,7 +1484,7 @@ void HandleMainMenu()
             static int captureFrames = 1;
             static std::string waitingToOpenFileName = "";
             static bool openCapture = true;
-            if (g_pixCaptureEnabled && ImGui::Button("Pix Capture"))
+            if (g_pixCaptureEnabled && ImGui::Button("Pix"))
             {
                 wchar_t fileName[1024];
                 int i = 0;
@@ -1514,7 +1519,7 @@ void HandleMainMenu()
                 }
             }
 
-			if (g_renderDocEnabled && ImGui::Button("RenderDoc Capture"))
+			if (g_renderDocEnabled && ImGui::Button("RenderDoc"))
 			{
 				g_renderDocCaptureNextFrame = true;
                 g_renderDocFrameCaptureCount = captureFrames;
@@ -1968,7 +1973,12 @@ bool AssignVariable(const char* name, DataFieldType type, T value)
 
         if (variable.name == name && variable.type == type)
         {
-            auto rtVar = g_interpreter.GetRuntimeVariable(varIndex);
+            auto& rtVar = g_interpreter.GetRuntimeVariable(varIndex);
+
+//            Variable& nonConstVar = (Variable&)(rtVar.variable);
+//            nonConstVar.system = true;
+            rtVar.storage.systemValue = true;
+
             memcpy(rtVar.storage.value, &value, rtVar.storage.size);
             return true;
         }
@@ -5845,13 +5855,28 @@ void ShowProfilerWindow()
 
     static std::vector<StableSample> stableSamples;
     static bool stableProfiling = true;
-    if (ImGui::BeginTable("profiling", 3, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg))
+    static int accumFrames = 0;
+    static bool pauseAccum = false;
+    static int accumLimit = 0;
+
+    if (!pauseAccum)
+        accumFrames++;
+
+    static std::unordered_map<std::string, float> accumCPU;
+    static std::unordered_map<std::string, float> accumGPU;
+    static std::unordered_map<std::string, bool> sumCheckBoxes;
+    bool wantsSum = false;
+    float sums[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
+    if (ImGui::BeginTable("profiling", 6, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg))
     {
         ImGuiIO& io = ImGui::GetIO();
 
         ImGui::TableSetupColumn("Label", ImGuiTableColumnFlags_WidthStretch);
         ImGui::TableSetupColumn("CPU ms", ImGuiTableColumnFlags_WidthFixed);
         ImGui::TableSetupColumn("GPU ms", ImGuiTableColumnFlags_WidthFixed);
+        ImGui::TableSetupColumn("A. CPU", ImGuiTableColumnFlags_WidthFixed);
+        ImGui::TableSetupColumn("A. GPU", ImGuiTableColumnFlags_WidthFixed);
+        ImGui::TableSetupColumn("Sum", ImGuiTableColumnFlags_WidthFixed);
         ImGui::TableHeadersRow();
 
         int entryIndex = -1;
@@ -5878,11 +5903,96 @@ void ShowProfilerWindow()
             ImGui::Text("%0.3f", stableProfiling ? stableSampleCPU.getStableAverage() : stableSampleCPU.getCurrentValue());
             ImGui::TableNextColumn();
             ImGui::Text("%0.3f", stableProfiling ? stableSampleGPU.getStableAverage() : stableSampleGPU.getCurrentValue());
+
+            // Accumulated CPU
+            {
+                ImGui::TableNextColumn();
+                float value = accumCPU[entry.label];
+                if (!pauseAccum)
+                {
+                    value = Lerp(value, entry.CPUDurationSeconds * 1000.0f, 1.0f / float(accumFrames));
+                    accumCPU[entry.label] = value;
+                }
+                ImGui::Text("%0.3f", value);
+            }
+
+            // Accumulated GPU
+            {
+                ImGui::TableNextColumn();
+                float value = accumGPU[entry.label];
+                if (!pauseAccum)
+                {
+                    value = Lerp(value, entry.GPUDurationSeconds * 1000.0f, 1.0f / float(accumFrames));
+                    accumGPU[entry.label] = value;
+                }
+                ImGui::Text("%0.3f", value);
+            }
+
+            // Sum checkbox
+            {
+                ImGui::TableNextColumn();
+
+                if (entry.label != "Total")
+                {
+                    ImGui::PushID(entry.label.c_str());
+                    ImGui::PushID("sumcb");
+                    bool includeInSum = sumCheckBoxes[entry.label];
+                    ImGui::Checkbox("", &includeInSum);
+                    sumCheckBoxes[entry.label] = includeInSum;
+                    ImGui::PopID();
+                    ImGui::PopID();
+
+                    if (includeInSum)
+                    {
+                        wantsSum = true;
+                        sums[0] += stableProfiling ? stableSampleCPU.getStableAverage() : stableSampleCPU.getCurrentValue();
+                        sums[1] += stableProfiling ? stableSampleGPU.getStableAverage() : stableSampleGPU.getCurrentValue();
+                        sums[2] += accumCPU[entry.label];
+                        sums[3] += accumGPU[entry.label];
+                    }
+                }
+            }
+        }
+        // sum row
+        if (wantsSum)
+        {
+            ImGui::TableNextRow();
+            ImGui::TableNextColumn();
+            ImGui::TextUnformatted("Selection Sum");
+
+            ImGui::TableNextColumn();
+            ImGui::Text("%0.3f", sums[0]);
+
+            ImGui::TableNextColumn();
+            ImGui::Text("%0.3f", sums[1]);
+
+            ImGui::TableNextColumn();
+            ImGui::Text("%0.3f", sums[2]);
+
+            ImGui::TableNextColumn();
+            ImGui::Text("%0.3f", sums[3]);
+
+            ImGui::TableNextColumn();
         }
         ImGui::EndTable();
 
         ImGui::Checkbox("Stabilize", &stableProfiling);
 
+        if (ImGui::Button((pauseAccum ? "Unpause Accum" : "Pause Accum")))
+            pauseAccum = !pauseAccum;
+        ImGui::SameLine();
+        char buttonText[1024];
+        sprintf_s(buttonText, "Reset Accum (%i frames)###ResetAccum", accumFrames);
+        if (ImGui::Button(buttonText) || g_techniqueFrameIndex < accumFrames)
+        {
+            pauseAccum = false;
+            accumFrames = 0;
+        }
+        if (ImGui::InputInt("Accum Limit", &accumLimit))
+            pauseAccum = false;
+        if (accumLimit > 0 && accumFrames >= accumLimit)
+            pauseAccum = true;
+
         ImGui::PushStyleColor(ImGuiCol_Text, IM_COL32(255, 255, 0, 255));
         ImGui::TextWrapped(
             "Warning: Profiling is affected by features in this viewer. "
@@ -6844,6 +6954,11 @@ class Python : public PythonInterface
         g_systemVariables.camera.cameraPos[2] = Z;
     }
 
+    void SetCameraFOV(float fov) override final
+    {
+        g_systemVariables.camera.FOV = fov;
+    }
+
     void SetCameraAltitudeAzimuth(float altitude, float azimuth) override final
     {
         g_systemVariables.camera.cameraAltitudeAzimuth[0] = altitude;
@@ -7286,9 +7401,9 @@ int main(int argc, char** argv)
             g_GPUValidation = true;
             argIndex++;
         }
-        else if (!_stricmp(argv[argIndex], "-renderdoc"))
+        else if (!_stricmp(argv[argIndex], "-norenderdoc"))
         {
-            g_renderDocEnabled = true;
+            g_renderDocEnabled = false;
             argIndex++;
         }
         else if (!_stricmp(argv[argIndex], "-nopixcapture"))
@@ -7301,6 +7416,11 @@ int main(int argc, char** argv)
             g_interpreter.m_compileShadersForDebug = true;
             argIndex++;
         }
+        else if (!_stricmp(argv[argIndex], "-warpadapter"))
+        {
+            g_useWarpAdapter = true;
+            argIndex++;
+        }
         else
         {
             argIndex++;
@@ -7384,7 +7504,7 @@ int main(int argc, char** argv)
     // Setup Dear ImGui style
     ImGui::StyleColorsDark();
 
-    // adjustment to make log scroll regision more recognizable
+    // adjustment to make log scroll revision more recognizable
     {
         ImGuiStyle* style = &ImGui::GetStyle();
         ImVec4* colors = style->Colors;
@@ -7400,6 +7520,8 @@ int main(int argc, char** argv)
         style.WindowRounding = 0.0f;
         style.Colors[ImGuiCol_WindowBg].w = 1.0f;
     }
+    // Menus and Popups partial see through is cool but can be distracting.
+    style.Colors[ImGuiCol_PopupBg].w = 1.0f;
 
     // Setup Platform/Renderer backends
     ImGui_ImplWin32_Init(g_hwnd);
@@ -7680,11 +7802,9 @@ bool CreateDeviceD3D(HWND hWnd)
     }
 
     // Gather the adapters
-    IDXGIAdapter1* adapter = nullptr; 
-    if (SUCCEEDED(dxgiFactory->EnumAdapterByGpuPreference(
-        0,
-        DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE,
-        IID_PPV_ARGS(&adapter))))
+    IDXGIAdapter1* adapter = nullptr;
+    if ((g_useWarpAdapter && SUCCEEDED(dxgiFactory->EnumWarpAdapter(IID_PPV_ARGS(&adapter)))) ||
+        (!g_useWarpAdapter && SUCCEEDED(dxgiFactory->EnumAdapterByGpuPreference(0, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, IID_PPV_ARGS(&adapter)))))
     {
         DXGI_ADAPTER_DESC1 desc;
         adapter->GetDesc1(&desc);
diff --git a/Install.nsi b/Install.nsi
index d592f61b..432e8b9b 100644
--- a/Install.nsi
+++ b/Install.nsi
@@ -10,8 +10,8 @@
   !define APPNAME "Gigi"
   !define DESCRIPTION "Rapid Graphics Development Platform"
   !define VERSIONMAJOR 0
-  !define VERSIONMINOR 99
-  !define VERSIONBUILD 8
+  !define VERSIONMINOR 991
+  !define VERSIONBUILD 0
   !define SLUG "${APPNAME} v${VERSIONMAJOR}.${VERSIONMINOR}.${VERSIONBUILD}"
 
   # These will be displayed by the "Click here for support information" link in "Add/Remove Programs"
diff --git a/MakeCode_UnitTests_DX12.py b/MakeCode_UnitTests_DX12.py
index a0aa9c32..4d9f38d6 100644
--- a/MakeCode_UnitTests_DX12.py
+++ b/MakeCode_UnitTests_DX12.py
@@ -33,6 +33,7 @@
     "Textures\\Mips_Imported_Cube",
     "Textures\\Load_Tex2DArray",
     "Textures\\Load_Tex3D",
+    "SubGraph\\SetVarNode",
 
     # Just need to make work
     "RayTrace\\simpleRTDynamic",
diff --git a/RenderGraph/Visitors.h b/RenderGraph/Visitors.h
index b2029bab..b089e213 100644
--- a/RenderGraph/Visitors.h
+++ b/RenderGraph/Visitors.h
@@ -34,6 +34,8 @@ inline void ZeroDfltIfEmpty(std::string& dflt, DataFieldType type, const std::st
             case DataFieldType::Bool: dflt = "false"; break;
             case DataFieldType::Float4x4: dflt = "0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f"; break;
             case DataFieldType::Uint_16: dflt = "0"; break;
+            case DataFieldType::Int_64: dflt = "0"; break;
+            case DataFieldType::Uint_64: dflt = "0"; break;
             default:
             {
                 Assert(false, "Unhandled data field type %s (%i).\nIn %s\n", EnumToString(type), type, path.c_str());
diff --git a/Schemas/DataFieldTypes.h b/Schemas/DataFieldTypes.h
index c3d74467..91770315 100644
--- a/Schemas/DataFieldTypes.h
+++ b/Schemas/DataFieldTypes.h
@@ -20,6 +20,8 @@ ENUM_BEGIN(DataFieldType, "The type of a data field")
     ENUM_ITEM(Float4, "float[4]")
     ENUM_ITEM(Bool, "bool")
     ENUM_ITEM(Float4x4, "float[4][4]")
-    ENUM_ITEM(Uint_16, "a 16 bit uint")
+	ENUM_ITEM(Uint_16, "a 16 bit uint")
+	ENUM_ITEM(Int_64, "a 64 bit int")
+	ENUM_ITEM(Uint_64, "a 64 bit uint")
     ENUM_ITEM(Count, "")
 ENUM_END()
diff --git a/Schemas/SchemasShaders.h b/Schemas/SchemasShaders.h
index ca118d32..00ce8719 100644
--- a/Schemas/SchemasShaders.h
+++ b/Schemas/SchemasShaders.h
@@ -43,6 +43,8 @@ ENUM_BEGIN(TextureViewType, "The type that a texture is actually viewed as, in a
     ENUM_ITEM(Float2, "float[2]")
     ENUM_ITEM(Float3, "float[3]")
     ENUM_ITEM(Float4, "float[4]")
+	ENUM_ITEM(Int_64, "int64_t")
+	ENUM_ITEM(Uint_64, "uint64_t")
 ENUM_END()
 
 ENUM_BEGIN(SamplerFilter, "The type of filter a sampler uses")
@@ -329,8 +331,27 @@ STRUCT_END()
 // Slang settings
 //========================================================
 
-STRUCT_BEGIN(SlangOptions, "A declaration of a shader")
+ENUM_BEGIN(GigiSlangOptimizationLevel, "The level of optimizations")
+    ENUM_ITEM(None, "Don't optimize at all.")
+    ENUM_ITEM(Default, "Default optimization level: balance code quality and compilation time.")
+    ENUM_ITEM(High, "Optimize aggressively.")
+    ENUM_ITEM(Maximum, "Include optimizations that may take a very long time, or may involve severe space-vs-speed tradeoffs.")
+ENUM_END()
+
+ENUM_BEGIN(GigiSlangFloatingPointMode, "Floating point mode")
+    ENUM_ITEM(Default, "")
+    ENUM_ITEM(Fast, "")
+    ENUM_ITEM(Precise, "")
+ENUM_END()
+
+STRUCT_BEGIN(SlangOptions, "Slang options")
     STRUCT_FIELD(bool, process, false, "if true, this shader will be processed by slang", 0)
+    STRUCT_FIELD(bool, noNameMangling, false, "Do as little mangling of names as possible, to try to preserve original names.", 0)
+    STRUCT_FIELD(bool, lineDirectives, true, "Whether to output line directives in the shader.", 0)
+    STRUCT_FIELD(bool, warningsAsErrors, false, "Warnings are errors.", 0)
+    STRUCT_FIELD(bool, verbosePaths, false, "Verbose Paths.", 0)
+    STRUCT_FIELD(GigiSlangFloatingPointMode, floatingPointMode, GigiSlangFloatingPointMode::Default, "Floating point mode", 0)
+    STRUCT_FIELD(GigiSlangOptimizationLevel, optimizationLevel, GigiSlangOptimizationLevel::Default, "Optimization level", 0)
 STRUCT_END()
 
 //========================================================
@@ -354,7 +375,7 @@ STRUCT_BEGIN(Shader, "A declaration of a shader")
     STRUCT_STATIC_ARRAY(int, NumThreads, 3, { 8 COMMA 8 COMMA 1 }, "The number of threads each dispatch has, for applicable shader types. 64,1,1 suggested for 1d. 8,8,1 for 2d. 4,4,4 for 3d.", SCHEMA_FLAG_UI_ARRAY_HIDE_INDEX)
 
     STRUCT_FIELD(bool, copyFile, true, "if false, will not copy the file over. A hackaround for when you have multiple raytracing shaders in the same file. TODO: resolve this better.", 0)
-    STRUCT_FIELD(SlangOptions, slangOptions, {}, "Settings for optionally processing shaders with slang", 0)
+    STRUCT_FIELD(SlangOptions, slangOptions, {}, "Settings for optionally processing shaders with slang", SCHEMA_FLAG_UI_COLLAPSABLE)
 
     STRUCT_FIELD(BackendRestriction, backends, {}, "The backends this file copy happens for.", SCHEMA_FLAG_UI_COLLAPSABLE)
 
diff --git a/Schemas/SchemasVariables.h b/Schemas/SchemasVariables.h
index 87d1c422..f80f4e75 100644
--- a/Schemas/SchemasVariables.h
+++ b/Schemas/SchemasVariables.h
@@ -45,6 +45,8 @@ STRUCT_BEGIN(Variable, "A variable definition")
     STRUCT_FIELD(std::string, originalName, "", "The name before renames and sanitization", SCHEMA_FLAG_NO_SERIALIZE)
     STRUCT_FIELD(std::string, scope, "", "The scope that the node lives in. A possibly nested list of subgraph node names, seperated by a dot.", SCHEMA_FLAG_NO_SERIALIZE)
 
+	STRUCT_FIELD(bool, system, false, "Is set if the runtime overrides the value", SCHEMA_FLAG_NO_SERIALIZE | SCHEMA_FLAG_NO_UI)
+
     // deprecated in 0.94b
     // replaced by UISettings.UIHint
     STRUCT_FIELD(VariableUIHint, UIHint, VariableUIHint::Count, "Any hints for UI", SCHEMA_FLAG_NO_UI)
diff --git a/Techniques/UnitTests/Compute/SlangAutoDiff.gg b/Techniques/UnitTests/Compute/SlangAutoDiff.gg
index 10716594..622e4508 100644
--- a/Techniques/UnitTests/Compute/SlangAutoDiff.gg
+++ b/Techniques/UnitTests/Compute/SlangAutoDiff.gg
@@ -1,7 +1,7 @@
 {
     "$schema": "gigischema.json",
     "name": "SlangAutoDiff",
-    "version": "0.98b",
+    "version": "0.991b",
     "variables": [
         {
             "name": "NumGaussians",
@@ -95,7 +95,8 @@
                     "type": "Buffer",
                     "access": "UAV",
                     "buffer": {
-                        "type": "Float"
+                        "type": "Float",
+                        "PODAsStructuredBuffer": false
                     }
                 }
             ]
@@ -110,13 +111,17 @@
                     "type": "Buffer",
                     "access": "SRV",
                     "buffer": {
-                        "type": "Float"
+                        "type": "Float",
+                        "PODAsStructuredBuffer": false
                     }
                 },
                 {
                     "name": "Output",
                     "type": "Texture",
-                    "access": "UAV"
+                    "access": "UAV",
+                    "buffer": {
+                        "PODAsStructuredBuffer": false
+                    }
                 }
             ]
         },
@@ -143,7 +148,8 @@
                     "type": "Buffer",
                     "access": "UAV",
                     "buffer": {
-                        "type": "Float"
+                        "type": "Float",
+                        "PODAsStructuredBuffer": false
                     }
                 }
             ]
diff --git a/Techniques/UnitTests/Compute/SlangAutoDiff_Descend.hlsl b/Techniques/UnitTests/Compute/SlangAutoDiff_Descend.hlsl
index 94432104..49167226 100644
--- a/Techniques/UnitTests/Compute/SlangAutoDiff_Descend.hlsl
+++ b/Techniques/UnitTests/Compute/SlangAutoDiff_Descend.hlsl
@@ -49,7 +49,7 @@ float GetHeightAtPos(float x, float y, no_diff float2 gaussPos, no_diff float2 g
 		float2 dFLocal = float2(0.0f, 0.0f);
 
 		// Backward mode automatic differentiation (AD) - AKA Backpropagation
-		if (/*$(Variable:UseBackwardAD)*/)
+		if ((bool)/*$(Variable:UseBackwardAD)*/)
 		{
 			// get local dFdX and dFdy
 			float height = GetHeightAtPos(pos.x, pos.y, gaussPos, gaussSigma);
diff --git a/Techniques/UnitTests/SubGraph/ConstOverride.gg b/Techniques/UnitTests/SubGraph/ConstOverride.gg
new file mode 100644
index 00000000..1505c875
--- /dev/null
+++ b/Techniques/UnitTests/SubGraph/ConstOverride.gg
@@ -0,0 +1,166 @@
+{
+    "$schema": "gigischema.json",
+    "comment": "This tests using a subgraph twice and overriding with different literal values.",
+    "version": "0.99b",
+    "shaders": [
+        {
+            "name": "ClearCS",
+            "fileName": "ConstOverride_Clear.hlsl",
+            "entryPoint": "csmain",
+            "resources": [
+                {
+                    "name": "Output",
+                    "type": "Texture",
+                    "access": "UAV"
+                }
+            ]
+        }
+    ],
+    "nodes": [
+        {
+            "resourceTexture": {
+                "name": "Output",
+                "editorPos": [
+                    -37.0,
+                    2.0
+                ],
+                "visibility": "Exported",
+                "format": {
+                    "format": "RGBA8_Unorm_sRGB"
+                },
+                "size": {
+                    "multiply": [
+                        512,
+                        512,
+                        1
+                    ]
+                }
+            }
+        },
+        {
+            "actionComputeShader": {
+                "name": "Clear",
+                "editorPos": [
+                    85.0,
+                    2.0
+                ],
+                "linkProperties": [
+                    {},
+                    {}
+                ],
+                "connections": [
+                    {
+                        "srcPin": "Output",
+                        "dstNode": "Output",
+                        "dstPin": "resource"
+                    }
+                ],
+                "shader": {
+                    "name": "ClearCS"
+                },
+                "dispatchSize": {
+                    "node": {
+                        "name": "Output"
+                    }
+                }
+            }
+        },
+        {
+            "actionSubGraph": {
+                "name": "Left",
+                "editorPos": [
+                    245.0,
+                    2.0
+                ],
+                "linkProperties": [
+                    {}
+                ],
+                "connections": [
+                    {
+                        "srcPin": "Color",
+                        "dstNode": "Clear",
+                        "dstPin": "Output"
+                    }
+                ],
+                "fileName": "ConstOverrideSubgraph.gg",
+                "subGraphData": {
+                    "importedResources": [
+                        "Color"
+                    ],
+                    "variables": [
+                        {
+                            "name": "MinX",
+                            "visibility": "User"
+                        },
+                        {
+                            "name": "MaxX",
+                            "visibility": "User"
+                        }
+                    ]
+                },
+                "variableSettings": [
+                    {
+                        "name": "MinX",
+                        "visibility": "User",
+                        "replaceWithValue": "50"
+                    },
+                    {
+                        "name": "MaxX",
+                        "visibility": "User",
+                        "replaceWithValue": "100"
+                    }
+                ]
+            }
+        },
+        {
+            "actionSubGraph": {
+                "name": "Right",
+                "editorPos": [
+                    335.0,
+                    2.0
+                ],
+                "linkProperties": [
+                    {}
+                ],
+                "connections": [
+                    {
+                        "srcPin": "Color",
+                        "dstNode": "Left",
+                        "dstPin": "Color"
+                    }
+                ],
+                "fileName": "ConstOverrideSubgraph.gg",
+                "subGraphData": {
+                    "importedResources": [
+                        "Color"
+                    ],
+                    "variables": [
+                        {
+                            "name": "MinX",
+                            "visibility": "User"
+                        },
+                        {
+                            "name": "MaxX",
+                            "visibility": "User"
+                        }
+                    ]
+                },
+                "variableSettings": [
+                    {
+                        "name": "MinX",
+                        "visibility": "User",
+                        "replaceWithValue": "150"
+                    },
+                    {
+                        "name": "MaxX",
+                        "visibility": "User",
+                        "replaceWithValue": "200"
+                    }
+                ]
+            }
+        }
+    ],
+    "PrimaryOutput": {
+        "name": "Output"
+    }
+}
\ No newline at end of file
diff --git a/Techniques/UnitTests/SubGraph/ConstOverride.gguser b/Techniques/UnitTests/SubGraph/ConstOverride.gguser
new file mode 100644
index 00000000..4196e5bb
--- /dev/null
+++ b/Techniques/UnitTests/SubGraph/ConstOverride.gguser
@@ -0,0 +1,12 @@
+{
+    "version": "2.0",
+    "snapshot": {
+        "resourceViewNodeIndex": 3,
+        "resourceViewResourceIndex": 1,
+        "cameraPos": [
+            0.0,
+            0.0,
+            -10.00960922241211
+        ]
+    }
+}
\ No newline at end of file
diff --git a/Techniques/UnitTests/SubGraph/ConstOverride.py b/Techniques/UnitTests/SubGraph/ConstOverride.py
new file mode 100644
index 00000000..cebbb68b
--- /dev/null
+++ b/Techniques/UnitTests/SubGraph/ConstOverride.py
@@ -0,0 +1,73 @@
+import Host
+import GigiArray
+import numpy
+from PIL import Image
+import os
+
+resources = [
+	[ "Right_WriteColor.Color: Output (UAV - After)", True ],
+]
+
+# don't save gguser files during this script execution
+Host.DisableGGUserSave(True)
+
+def DoTest():
+	TestPassed = True
+
+	# make sure the output directory exists
+	outDirName = "Techniques/UnitTests/_GoldImages/SubGraph/ConstOverride/"
+	os.makedirs(outDirName, exist_ok=True)
+
+	# Load the technique
+	if not Host.LoadGG("Techniques/UnitTests/SubGraph/ConstOverride.gg"):
+		return False
+
+	# Specify the resources we want to readback
+	for resource in resources:
+		Host.SetWantReadback(resource[0])
+
+	# Do one execution to ensure everything is initialized
+	Host.RunTechnique()
+
+	# Get the results and compare
+	Host.RunTechnique()
+	Host.WaitOnGPU()
+	for i, resource in enumerate(resources):
+		lastReadback, success = Host.Readback(resource[0])
+		if success:
+			lastReadbackNp = numpy.array(lastReadback)
+			if resource[1]:
+				lastReadbackNp = lastReadbackNp.reshape((lastReadbackNp.shape[1], lastReadbackNp.shape[2], lastReadbackNp.shape[3]))
+				outFileName = outDirName + str(i) + ".png"
+				if os.path.exists(outFileName):
+					img = numpy.asarray(Image.open(outFileName))
+					if not numpy.array_equal(img, lastReadbackNp):
+						Host.Log("Error", outFileName + " did not match")
+						TestPassed = False
+				else:
+					Host.Log("Error", outFileName + " didn't exist, creating")
+					Image.fromarray(lastReadbackNp, "RGBA").save(outFileName)
+					TestPassed = False
+			else:
+				outFileName = outDirName + str(i) + ".npy"
+				if os.path.exists(outFileName):
+					img = numpy.load(outFileName)
+					if not numpy.array_equal(img, lastReadbackNp):
+						Host.Log("Error", outFileName + " did not match")
+						TestPassed = False
+				else:
+					Host.Log("Error", outFileName + " didn't exist, creating")
+					numpy.save(outFileName, lastReadbackNp)
+					TestPassed = False
+		else:
+			Host.Log("Error", "Could not readback " + resource[0])
+			TestPassed = False
+
+	return TestPassed
+
+# This is so the test can be ran by itself directly
+if __name__ == "builtins":
+	if DoTest():
+		Host.Log("Info", "test Passed")
+	else:
+		Host.Log("Error", "Test Failed")
diff --git a/Techniques/UnitTests/SubGraph/ConstOverrideSubgraph.gg b/Techniques/UnitTests/SubGraph/ConstOverrideSubgraph.gg
new file mode 100644
index 00000000..ae842107
--- /dev/null
+++ b/Techniques/UnitTests/SubGraph/ConstOverrideSubgraph.gg
@@ -0,0 +1,70 @@
+{
+    "$schema": "gigischema.json",
+    "version": "0.99b",
+    "variables": [
+        {
+            "name": "MinX",
+            "type": "Uint",
+            "visibility": "User"
+        },
+        {
+            "name": "MaxX",
+            "type": "Uint",
+            "visibility": "User"
+        }
+    ],
+    "shaders": [
+        {
+            "name": "WriteColorCS",
+            "fileName": "ConstOverride_WriteColor.hlsl",
+            "entryPoint": "csmain",
+            "resources": [
+                {
+                    "name": "Color",
+                    "type": "Texture",
+                    "access": "UAV"
+                }
+            ]
+        }
+    ],
+    "nodes": [
+        {
+            "resourceTexture": {
+                "name": "Color",
+                "editorPos": [
+                    -5.0,
+                    -30.0
+                ],
+                "visibility": "Imported"
+            }
+        },
+        {
+            "actionComputeShader": {
+                "name": "WriteColor",
+                "editorPos": [
+                    133.0,
+                    -30.0
+                ],
+                "linkProperties": [
+                    {},
+                    {}
+                ],
+                "connections": [
+                    {
+                        "srcPin": "Color",
+                        "dstNode": "Color",
+                        "dstPin": "resource"
+                    }
+                ],
+                "shader": {
+                    "name": "WriteColorCS"
+                },
+                "dispatchSize": {
+                    "node": {
+                        "name": "Color"
+                    }
+                }
+            }
+        }
+    ]
+}
\ No newline at end of file
diff --git a/Techniques/UnitTests/SubGraph/ConstOverrideSubgraph.gguser b/Techniques/UnitTests/SubGraph/ConstOverrideSubgraph.gguser
new file mode 100644
index 00000000..52c925a9
--- /dev/null
+++ b/Techniques/UnitTests/SubGraph/ConstOverrideSubgraph.gguser
@@ -0,0 +1,30 @@
+{
+    "version": "2.0",
+    "snapshot": {
+        "resourceViewNodeIndex": 1,
+        "resourceViewResourceIndex": 2,
+        "importedResources": [
+            {
+                "nodeName": "Color",
+                "texture": {
+                    "size": [
+                        512,
+                        512,
+                        1
+                    ],
+                    "binaryFormat": "Any"
+                }
+            }
+        ],
+        "savedVariables": [
+            {
+                "name": "MinX",
+                "value": "0"
+            },
+            {
+                "name": "MaxX",
+                "value": "100"
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/Techniques/UnitTests/SubGraph/ConstOverride_Clear.hlsl b/Techniques/UnitTests/SubGraph/ConstOverride_Clear.hlsl
new file mode 100644
index 00000000..33980622
--- /dev/null
+++ b/Techniques/UnitTests/SubGraph/ConstOverride_Clear.hlsl
@@ -0,0 +1,12 @@
+// Unnamed technique, shader ClearCS
+/*$(ShaderResources)*/
+
+/*$(_compute:csmain)*/(uint3 DTid : SV_DispatchThreadID)
+{
+    Output[DTid.xy] = float4(0.5f, 0.5f, 0.5f, 1.0f);
+}
+
+/*
+Shader Resources:
+	Texture Output (as UAV)
+*/
diff --git a/Techniques/UnitTests/SubGraph/ConstOverride_WriteColor.hlsl b/Techniques/UnitTests/SubGraph/ConstOverride_WriteColor.hlsl
new file mode 100644
index 00000000..db747b73
--- /dev/null
+++ b/Techniques/UnitTests/SubGraph/ConstOverride_WriteColor.hlsl
@@ -0,0 +1,13 @@
+// Unnamed technique, shader WriteColorCS
+/*$(ShaderResources)*/
+
+/*$(_compute:csmain)*/(uint3 DTid : SV_DispatchThreadID)
+{
+    if (DTid.x >= /*$(Variable:MinX)*/ && DTid.x <= /*$(Variable:MaxX)*/)
+        Color[DTid.xy] = float4(0.2f, 0.8f, 0.2f, 1.0f);
+}
+
+/*
+Shader Resources:
+	Texture Color (as UAV)
+*/
diff --git a/Techniques/UnitTests/SubGraph/SetVarNode.gg b/Techniques/UnitTests/SubGraph/SetVarNode.gg
new file mode 100644
index 00000000..b09b979f
--- /dev/null
+++ b/Techniques/UnitTests/SubGraph/SetVarNode.gg
@@ -0,0 +1,78 @@
+{
+    "$schema": "gigischema.json",
+    "version": "0.99b",
+    "nodes": [
+        {
+            "actionSubGraph": {
+                "name": "Sub",
+                "editorPos": [
+                    47.0,
+                    79.0
+                ],
+                "linkProperties": [
+                    {},
+                    {}
+                ],
+                "connections": [
+                    {
+                        "srcPin": "ImportedBuffer",
+                        "dstNode": "Buf",
+                        "dstPin": "resource"
+                    },
+                    {
+                        "srcPin": "ImportedTexture",
+                        "dstNode": "Tex",
+                        "dstPin": "resource"
+                    }
+                ],
+                "fileName": "SetVarNodeInner.gg",
+                "subGraphData": {
+                    "importedResources": [
+                        "ImportedBuffer",
+                        "ImportedTexture"
+                    ],
+                    "variables": [
+                        {
+                            "name": "BufferSize",
+                            "visibility": "User"
+                        },
+                        {
+                            "name": "TextureSize",
+                            "visibility": "User"
+                        }
+                    ]
+                },
+                "variableSettings": [
+                    {
+                        "name": "BufferSize",
+                        "visibility": "User"
+                    },
+                    {
+                        "name": "TextureSize",
+                        "visibility": "User"
+                    }
+                ]
+            }
+        },
+        {
+            "resourceBuffer": {
+                "name": "Buf",
+                "editorPos": [
+                    -85.0,
+                    66.0
+                ],
+                "visibility": "Imported"
+            }
+        },
+        {
+            "resourceTexture": {
+                "name": "Tex",
+                "editorPos": [
+                    -85.0,
+                    114.0
+                ],
+                "visibility": "Imported"
+            }
+        }
+    ]
+}
\ No newline at end of file
diff --git a/Techniques/UnitTests/SubGraph/SetVarNode.gguser b/Techniques/UnitTests/SubGraph/SetVarNode.gguser
new file mode 100644
index 00000000..5be1e556
--- /dev/null
+++ b/Techniques/UnitTests/SubGraph/SetVarNode.gguser
@@ -0,0 +1,37 @@
+{
+    "version": "2.0",
+    "snapshot": {
+        "importedResources": [
+            {
+                "nodeName": "Buf",
+                "isATexture": false,
+                "buffer": {
+                    "type": "Float2",
+                    "count": 5,
+                    "BLASOpaque": true
+                }
+            },
+            {
+                "nodeName": "Tex",
+                "texture": {
+                    "size": [
+                        512,
+                        256,
+                        1
+                    ],
+                    "binaryFormat": "Any"
+                }
+            }
+        ],
+        "savedVariables": [
+            {
+                "name": "Sub_BufferSize",
+                "value": "5"
+            },
+            {
+                "name": "Sub_TextureSize",
+                "value": "512,256,1"
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/Techniques/UnitTests/SubGraph/SetVarNode.py b/Techniques/UnitTests/SubGraph/SetVarNode.py
new file mode 100644
index 00000000..5d0fd57a
--- /dev/null
+++ b/Techniques/UnitTests/SubGraph/SetVarNode.py
@@ -0,0 +1,50 @@
+import Host
+import GigiArray
+import numpy
+from PIL import Image
+import os
+
+resources = [
+	[ "Right_WriteColor.Color: Output (UAV - After)", True ],
+]
+
+# don't save gguser files during this script execution
+Host.DisableGGUserSave(True)
+
+def DoTest():
+	TestPassed = True
+
+	# make sure the output directory exists
+	outDirName = "Techniques/UnitTests/_GoldImages/SubGraph/SetVarNode/"
+	os.makedirs(outDirName, exist_ok=True)
+
+	# Load the technique
+	if not Host.LoadGG("Techniques/UnitTests/SubGraph/SetVarNode.gg"):
+		return False
+
+	# Specify the resources we want to readback
+	for resource in resources:
+		Host.SetWantReadback(resource[0])
+
+	# Do one execution to ensure everything is initialized
+	Host.RunTechnique()
+
+	# Do another to get the setvars
+	Host.RunTechnique()
+
+	if Host.GetVariable("Sub_BufferSize") != "5":
+		Host.Log("Error", "Sub_BufferSize was wrong")
+		TestPassed = False
+
+	if Host.GetVariable("Sub_TextureSize") != "512,256,1":
+		Host.Log("Error", "Sub_TextureSize was wrong")
+		TestPassed = False
+
+	return TestPassed
+
+# This is so the test can be ran by itself directly
+if __name__ == "builtins":
+	if DoTest():
+		Host.Log("Info", "test Passed")
+	else:
+		Host.Log("Error", "Test Failed")
diff --git a/Techniques/UnitTests/SubGraph/SetVarNodeInner.gg b/Techniques/UnitTests/SubGraph/SetVarNodeInner.gg
new file mode 100644
index 00000000..35df0cd9
--- /dev/null
+++ b/Techniques/UnitTests/SubGraph/SetVarNodeInner.gg
@@ -0,0 +1,82 @@
+{
+    "$schema": "gigischema.json",
+    "version": "0.99b",
+    "variables": [
+        {
+            "name": "BufferSize",
+            "type": "Uint",
+            "visibility": "User"
+        },
+        {
+            "name": "TextureSize",
+            "type": "Uint3",
+            "visibility": "User"
+        }
+    ],
+    "nodes": [
+        {
+            "resourceBuffer": {
+                "name": "ImportedBuffer",
+                "editorPos": [
+                    -25.0,
+                    -14.0
+                ],
+                "visibility": "Imported"
+            }
+        },
+        {
+            "resourceTexture": {
+                "name": "ImportedTexture",
+                "editorPos": [
+                    -21.0,
+                    34.0
+                ],
+                "visibility": "Imported"
+            }
+        }
+    ],
+    "setVars": [
+        {
+            "destination": {
+                "name": "BufferSize"
+            },
+            "ANode": {
+                "name": "ImportedBuffer"
+            },
+            "op": "Noop"
+        },
+        {
+            "destination": {
+                "name": "TextureSize"
+            },
+            "destinationIndex": 0,
+            "AVarIndex": 0,
+            "ANode": {
+                "name": "ImportedTexture"
+            },
+            "op": "Noop"
+        },
+        {
+            "destination": {
+                "name": "TextureSize"
+            },
+            "destinationIndex": 1,
+            "AVarIndex": 1,
+            "ANode": {
+                "name": "ImportedTexture"
+            },
+            "op": "Noop"
+        },
+        {
+            "destination": {
+                "name": "TextureSize"
+            },
+            "destinationIndex": 2,
+            "AVarIndex": 2,
+            "ANode": {
+                "name": "ImportedTexture"
+            },
+            "op": "Noop"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/Techniques/UnitTests/_GoldImages/Compute/boxblur/0.png b/Techniques/UnitTests/_GoldImages/Compute/boxblur/0.png
index e53629fe..ea7174b1 100644
Binary files a/Techniques/UnitTests/_GoldImages/Compute/boxblur/0.png and b/Techniques/UnitTests/_GoldImages/Compute/boxblur/0.png differ
diff --git a/Techniques/UnitTests/_GoldImages/Compute/simple/0.png b/Techniques/UnitTests/_GoldImages/Compute/simple/0.png
index b40c62a4..26f922ee 100644
Binary files a/Techniques/UnitTests/_GoldImages/Compute/simple/0.png and b/Techniques/UnitTests/_GoldImages/Compute/simple/0.png differ
diff --git a/Techniques/UnitTests/_GoldImages/RayTrace/TwoRayGens/0.png b/Techniques/UnitTests/_GoldImages/RayTrace/TwoRayGens/0.png
index a3405992..997413be 100644
Binary files a/Techniques/UnitTests/_GoldImages/RayTrace/TwoRayGens/0.png and b/Techniques/UnitTests/_GoldImages/RayTrace/TwoRayGens/0.png differ
diff --git a/Techniques/UnitTests/_GoldImages/RayTrace/TwoRayGensSubgraph/0.png b/Techniques/UnitTests/_GoldImages/RayTrace/TwoRayGensSubgraph/0.png
index a3405992..997413be 100644
Binary files a/Techniques/UnitTests/_GoldImages/RayTrace/TwoRayGensSubgraph/0.png and b/Techniques/UnitTests/_GoldImages/RayTrace/TwoRayGensSubgraph/0.png differ
diff --git a/Techniques/UnitTests/_GoldImages/SubGraph/ConstOverride/0.png b/Techniques/UnitTests/_GoldImages/SubGraph/ConstOverride/0.png
new file mode 100644
index 00000000..7b52f705
Binary files /dev/null and b/Techniques/UnitTests/_GoldImages/SubGraph/ConstOverride/0.png differ
diff --git a/Techniques/UnitTests/_GoldImages/SubGraph/SubGraphLoops/0.png b/Techniques/UnitTests/_GoldImages/SubGraph/SubGraphLoops/0.png
index 7a4fda5d..706648d2 100644
Binary files a/Techniques/UnitTests/_GoldImages/SubGraph/SubGraphLoops/0.png and b/Techniques/UnitTests/_GoldImages/SubGraph/SubGraphLoops/0.png differ
diff --git a/Techniques/UnitTests/_GoldImages/SubGraph/SubGraphTest/0.png b/Techniques/UnitTests/_GoldImages/SubGraph/SubGraphTest/0.png
index 0a701e66..1e8bb127 100644
Binary files a/Techniques/UnitTests/_GoldImages/SubGraph/SubGraphTest/0.png and b/Techniques/UnitTests/_GoldImages/SubGraph/SubGraphTest/0.png differ
diff --git a/Techniques/UnitTests/_GoldImages/SubGraph/SubInSub/0.png b/Techniques/UnitTests/_GoldImages/SubGraph/SubInSub/0.png
index cf28de49..367ccc64 100644
Binary files a/Techniques/UnitTests/_GoldImages/SubGraph/SubInSub/0.png and b/Techniques/UnitTests/_GoldImages/SubGraph/SubInSub/0.png differ
diff --git a/Techniques/UnitTests/_GoldImages/Textures/Mips_Imported_2D/0.png b/Techniques/UnitTests/_GoldImages/Textures/Mips_Imported_2D/0.png
index 947fce18..20013f87 100644
Binary files a/Techniques/UnitTests/_GoldImages/Textures/Mips_Imported_2D/0.png and b/Techniques/UnitTests/_GoldImages/Textures/Mips_Imported_2D/0.png differ
diff --git a/Techniques/UnitTests/_GoldImages/Textures/Mips_Imported_2DArray/0.png b/Techniques/UnitTests/_GoldImages/Textures/Mips_Imported_2DArray/0.png
index 947fce18..20013f87 100644
Binary files a/Techniques/UnitTests/_GoldImages/Textures/Mips_Imported_2DArray/0.png and b/Techniques/UnitTests/_GoldImages/Textures/Mips_Imported_2DArray/0.png differ
diff --git a/UserDocumentation/GigiViewerDX12_Documentation.docx b/UserDocumentation/GigiViewerDX12_Documentation.docx
index 640aa822..3488ab53 100644
Binary files a/UserDocumentation/GigiViewerDX12_Documentation.docx and b/UserDocumentation/GigiViewerDX12_Documentation.docx differ
diff --git a/UserDocumentation/GigiViewerDX12_Documentation.pdf b/UserDocumentation/GigiViewerDX12_Documentation.pdf
index 287cdebc..09627127 100644
Binary files a/UserDocumentation/GigiViewerDX12_Documentation.pdf and b/UserDocumentation/GigiViewerDX12_Documentation.pdf differ
diff --git a/UserDocumentation/PythonTypes.txt b/UserDocumentation/PythonTypes.txt
index e054dcec..54bf8e38 100644
--- a/UserDocumentation/PythonTypes.txt
+++ b/UserDocumentation/PythonTypes.txt
@@ -30,6 +30,12 @@
   StructFieldSemanticFromString()
   StructFieldSemanticToString()
 
+  GigiSlangOptimizationLevelFromString()
+  GigiSlangOptimizationLevelToString()
+
+  GigiSlangFloatingPointModeFromString()
+  GigiSlangFloatingPointModeToString()
+
   VariableVisibilityFromString()
   VariableVisibilityToString()
 
@@ -144,9 +150,11 @@
     Host.DataFieldType_Bool = 12
     Host.DataFieldType_Float4x4 = 13
     Host.DataFieldType_Uint_16 = 14
-    Host.DataFieldType_Count = 15
-    Host.DataFieldType_LAST = 15
-    Host.DataFieldType_COUNT = 16
+    Host.DataFieldType_Int_64 = 15
+    Host.DataFieldType_Uint_64 = 16
+    Host.DataFieldType_Count = 17
+    Host.DataFieldType_LAST = 17
+    Host.DataFieldType_COUNT = 18
 
   TextureViewType:
     Host.TextureViewType_FIRST = 0
@@ -159,8 +167,10 @@
     Host.TextureViewType_Float2 = 6
     Host.TextureViewType_Float3 = 7
     Host.TextureViewType_Float4 = 8
-    Host.TextureViewType_LAST = 8
-    Host.TextureViewType_COUNT = 9
+    Host.TextureViewType_Int_64 = 9
+    Host.TextureViewType_Uint_64 = 10
+    Host.TextureViewType_LAST = 10
+    Host.TextureViewType_COUNT = 11
 
   SamplerFilter:
     Host.SamplerFilter_FIRST = 0
@@ -218,6 +228,23 @@
     Host.StructFieldSemantic_LAST = 7
     Host.StructFieldSemantic_COUNT = 8
 
+  GigiSlangOptimizationLevel:
+    Host.GigiSlangOptimizationLevel_FIRST = 0
+    Host.GigiSlangOptimizationLevel_None = 0
+    Host.GigiSlangOptimizationLevel_Default = 1
+    Host.GigiSlangOptimizationLevel_High = 2
+    Host.GigiSlangOptimizationLevel_Maximum = 3
+    Host.GigiSlangOptimizationLevel_LAST = 3
+    Host.GigiSlangOptimizationLevel_COUNT = 4
+
+  GigiSlangFloatingPointMode:
+    Host.GigiSlangFloatingPointMode_FIRST = 0
+    Host.GigiSlangFloatingPointMode_Default = 0
+    Host.GigiSlangFloatingPointMode_Fast = 1
+    Host.GigiSlangFloatingPointMode_Precise = 2
+    Host.GigiSlangFloatingPointMode_LAST = 2
+    Host.GigiSlangFloatingPointMode_COUNT = 3
+
   VariableVisibility:
     Host.VariableVisibility_FIRST = 0
     Host.VariableVisibility_Internal = 0
diff --git a/Version.h b/Version.h
index cbccaf2a..6595c82e 100644
--- a/Version.h
+++ b/Version.h
@@ -3,6 +3,6 @@
 //        Copyright (c) 2024 Electronic Arts Inc. All rights reserved.       //
 ///////////////////////////////////////////////////////////////////////////////
 
-#define GIGI_VERSION() "0.99b"
-#define GIGI_VERSION_WITH_BUILD_NUMBER() "0.99.8"
+#define GIGI_VERSION() "0.991b"
+#define GIGI_VERSION_WITH_BUILD_NUMBER() "0.991.0"
 #define BROWSER_DB_VERSION() "1.0"
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTestLogic.h b/_GeneratedCode/UnitTests/DX12/UnitTestLogic.h
index f2ba71d0..befd9be8 100644
--- a/_GeneratedCode/UnitTests/DX12/UnitTestLogic.h
+++ b/_GeneratedCode/UnitTests/DX12/UnitTestLogic.h
@@ -1965,6 +1965,12 @@ void UnitTestImpl(UnitTestContext& testContext, ID3D12Device* device, ID3D12Grap
         testContext.VerifyReadbackPNG(device, commandList, context->m_output.texture_Texture, context->m_output.c_texture_Texture_endingState, 0, 0, "..\\..\\..\\Techniques\\UnitTests\\_GoldImages\\RayTrace\\TwoRayGensSubgraph\\0.png");
 }
 
+void UnitTestImpl(UnitTestContext& testContext, ID3D12Device* device, ID3D12GraphicsCommandList* commandList, DX12Utils::ReadbackHelper& readbackHelper, ConstOverride::Context* context, UnitTestEvent event)
+{
+    if (testContext.IsFirstPostExecute(event))
+        testContext.VerifyReadbackPNG(device, commandList, context->m_output.texture_Output, context->m_output.c_texture_Output_endingState, 0, 0, "..\\..\\..\\Techniques\\UnitTests\\_GoldImages\\Subgraph\\ConstOverride\\0.png");
+}
+
 void UnitTestImpl(UnitTestContext& testContext, ID3D12Device* device, ID3D12GraphicsCommandList* commandList, DX12Utils::ReadbackHelper& readbackHelper, simpleRT::Context* context, UnitTestEvent event)
 {
     if (testContext.IsFirstPreExecute(event))
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/Compute/SlangAutoDiff/shaders/SlangAutoDiff_Descend.hlsl b/_GeneratedCode/UnitTests/DX12/UnitTests/Compute/SlangAutoDiff/shaders/SlangAutoDiff_Descend.hlsl
index 8959092d..26c1bdd1 100644
--- a/_GeneratedCode/UnitTests/DX12/UnitTests/Compute/SlangAutoDiff/shaders/SlangAutoDiff_Descend.hlsl
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/Compute/SlangAutoDiff/shaders/SlangAutoDiff_Descend.hlsl
@@ -1,8 +1,12 @@
-#pragma pack_matrix(column_major)
+#pragma pack_matrix(row_major)
 #ifdef SLANG_HLSL_ENABLE_NVAPI
 #include "nvHLSLExtns.h"
 #endif
-#pragma warning(disable: 3557)
+
+#ifndef __DXC_VERSION_MAJOR
+// warning X3557: loop doesn't seem to do anything, forcing loop to unroll
+#pragma warning(disable : 3557)
+#endif
 
 
 #line 12 "SlangAutoDiff_Descend.hlsl"
@@ -24,7 +28,7 @@ cbuffer _DescendCB_0 : register(b0)
     Struct_DescendCB_0 _DescendCB_0;
 }
 
-#line 89 "core"
+#line 7818 "hlsl.meta.slang"
 struct DiffPair_float_0
 {
     float primal_0;
@@ -36,16 +40,17 @@ struct DiffPair_float_0
 void _d_exp_0(inout DiffPair_float_0 dpx_0, float dOut_0)
 {
 
-#line 886 "diff.meta.slang"
+#line 1868 "diff.meta.slang"
     float _S1 = exp(dpx_0.primal_0) * dOut_0;
 
-#line 886
+#line 1868
     dpx_0.primal_0 = dpx_0.primal_0;
 
-#line 886
+#line 1868
     dpx_0.differential_0 = _S1;
 
-#line 860
+
+
     return;
 }
 
@@ -54,32 +59,14 @@ void _d_exp_0(inout DiffPair_float_0 dpx_0, float dOut_0)
 DiffPair_float_0 _d_exp_1(DiffPair_float_0 dpx_1)
 {
 
-#line 1
-    DiffPair_float_0 _S2 = { exp(dpx_1.primal_0), exp(dpx_1.primal_0) * dpx_1.differential_0 };
+#line 1841 "diff.meta.slang"
+    float _S2 = exp(dpx_1.primal_0);
 
-#line 829 "diff.meta.slang"
-    return _S2;
-}
-
-
-#line 829
-float s_bwd_exp_0(float _S3)
-{
+#line 1841
+    DiffPair_float_0 _S3 = { _S2, _S2 * dpx_1.differential_0 };
 
-#line 829
-    return exp(_S3);
-}
-
-
-#line 829
-void s_bwd_exp_1(inout DiffPair_float_0 _S4, float _S5)
-{
-
-#line 829
-    _d_exp_0(_S4, _S5);
-
-#line 829
-    return;
+#line 1841
+    return _S3;
 }
 
 
@@ -87,20 +74,11 @@ void s_bwd_exp_1(inout DiffPair_float_0 _S4, float _S5)
 DiffPair_float_0 _d_sqrt_0(DiffPair_float_0 dpx_2)
 {
 
-#line 1
-    DiffPair_float_0 _S6 = { sqrt(dpx_2.primal_0), 0.5 / sqrt(max(0.00000010000000116861, dpx_2.primal_0)) * dpx_2.differential_0 };
+#line 1838 "diff.meta.slang"
+    DiffPair_float_0 _S4 = { sqrt(dpx_2.primal_0), 0.5f / sqrt(max(1.00000001168609742e-07f, dpx_2.primal_0)) * dpx_2.differential_0 };
 
-#line 829 "diff.meta.slang"
-    return _S6;
-}
-
-
-#line 829
-float s_bwd_sqrt_0(float _S7)
-{
 
-#line 829
-    return sqrt(_S7);
+    return _S4;
 }
 
 
@@ -109,109 +87,136 @@ float GetHeightAtPos_0(float x_0, float y_0, float2 gaussPos_0, float2 gaussSigm
 {
 
 
-    float _S8 = gaussSigma_0.x;
+    float _S5 = gaussSigma_0.x;
 
 #line 11
-    float XOverSigma_0 = x_0 / _S8;
+    float XOverSigma_0 = x_0 / _S5;
+
+    float _S6 = sqrt(6.28318548202514648f);
 
 #line 18
-    float _S9 = gaussSigma_0.y;
+    float _S7 = gaussSigma_0.y;
 
 #line 18
-    float XOverSigma_1 = y_0 / _S9;
+    float XOverSigma_1 = y_0 / _S7;
 
 #line 23
-    return exp(-0.5 * XOverSigma_0 * XOverSigma_0) / (_S8 * sqrt(6.28318548202514648438)) * (exp(-0.5 * XOverSigma_1 * XOverSigma_1) / (_S9 * sqrt(6.28318548202514648438)));
+    return exp(-0.5f * XOverSigma_0 * XOverSigma_0) / (_S5 * _S6) * (exp(-0.5f * XOverSigma_1 * XOverSigma_1) / (_S7 * _S6));
 }
 
 
 #line 57
-void s_bwd_GetHeightAtPos_0(inout DiffPair_float_0 dpx_3, inout DiffPair_float_0 dpy_0, float2 gaussPos_1, float2 gaussSigma_1, float _s_dOut_0)
+float s_primal_ctx_exp_0(float _S8)
 {
 
-#line 7
-    float _S10 = gaussSigma_1.x;
+#line 57
+    return exp(_S8);
+}
 
-#line 7
-    float _S11 = gaussSigma_1.y;
 
-#line 7
-    float XOverSigma_2 = dpx_3.primal_0 / _S10;
+#line 57
+float s_primal_ctx_sqrt_0(float _S9)
+{
 
-#line 7
-    float _S12 = -0.5 * XOverSigma_2;
+#line 57
+    return sqrt(_S9);
+}
 
-#line 7
-    float _S13 = _S12 * XOverSigma_2;
 
-#line 7
-    float _S14 = _S10 * s_bwd_sqrt_0(6.28318548202514648438);
+#line 57
+void s_bwd_prop_exp_0(inout DiffPair_float_0 _S10, float _S11)
+{
 
-#line 7
-    float _S15 = _S14 * _S14;
+#line 57
+    _d_exp_0(_S10, _S11);
 
-#line 7
-    float XOverSigma_3 = dpy_0.primal_0 / _S11;
+#line 57
+    return;
+}
 
-#line 7
-    float _S16 = -0.5 * XOverSigma_3;
 
 #line 7
-    float _S17 = _S16 * XOverSigma_3;
+void s_bwd_prop_GetHeightAtPos_0(inout DiffPair_float_0 dpx_3, inout DiffPair_float_0 dpy_0, float2 gaussPos_1, float2 gaussSigma_1, float _s_dOut_0)
+{
 
-#line 7
-    float _S18 = _S11 * s_bwd_sqrt_0(6.28318548202514648438);
 
-#line 7
-    float s_diff_gaussX_T_0 = s_bwd_exp_0(_S17) / _S18 * _s_dOut_0;
+    float _S12 = gaussSigma_1.x;
 
-#line 7
-    float _S19 = _S18 * (s_bwd_exp_0(_S13) / _S14 * _s_dOut_0 / (_S18 * _S18));
+#line 11
+    float XOverSigma_2 = dpx_3.primal_0 / _S12;
+    float _S13 = -0.5f * XOverSigma_2;
 
-#line 7
-    DiffPair_float_0 _S20;
+#line 12
+    float _S14 = _S13 * XOverSigma_2;
 
-#line 7
-    _S20.primal_0 = _S17;
+#line 12
+    float _S15 = s_primal_ctx_sqrt_0(6.28318548202514648f);
+    float _S16 = _S12 * _S15;
 
-#line 7
-    _S20.differential_0 = 0.0;
+#line 13
+    float _S17 = _S16 * _S16;
 
-#line 7
-    s_bwd_exp_1(_S20, _S19);
+#line 18
+    float _S18 = gaussSigma_1.y;
 
-#line 7
-    float _S21 = (_S16 * _S20.differential_0 + -0.5 * (XOverSigma_3 * _S20.differential_0)) / _S11;
+#line 18
+    float XOverSigma_3 = dpy_0.primal_0 / _S18;
+    float _S19 = -0.5f * XOverSigma_3;
 
-#line 7
-    float _S22 = _S14 * (s_diff_gaussX_T_0 / _S15);
+#line 19
+    float _S20 = _S19 * XOverSigma_3;
+    float _S21 = _S18 * _S15;
 
-#line 7
+
+    float s_diff_gaussX_T_0 = s_primal_ctx_exp_0(_S20) / _S21 * _s_dOut_0;
+
+#line 20
+    float _S22 = _S21 * (s_primal_ctx_exp_0(_S14) / _S16 * _s_dOut_0 / (_S21 * _S21));
+
+#line 19
     DiffPair_float_0 _S23;
 
-#line 7
-    _S23.primal_0 = _S13;
+#line 19
+    _S23.primal_0 = _S20;
 
-#line 7
-    _S23.differential_0 = 0.0;
+#line 19
+    _S23.differential_0 = 0.0f;
 
-#line 7
-    s_bwd_exp_1(_S23, _S22);
+#line 19
+    s_bwd_prop_exp_0(_S23, _S22);
 
-#line 7
-    float _S24 = (_S12 * _S23.differential_0 + -0.5 * (XOverSigma_2 * _S23.differential_0)) / _S10;
+#line 18
+    float _S24 = (_S19 * _S23.differential_0 + -0.5f * (XOverSigma_3 * _S23.differential_0)) / _S18;
 
-#line 7
+#line 13
+    float _S25 = _S16 * (s_diff_gaussX_T_0 / _S17);
+
+#line 12
+    DiffPair_float_0 _S26;
+
+#line 12
+    _S26.primal_0 = _S14;
+
+#line 12
+    _S26.differential_0 = 0.0f;
+
+#line 12
+    s_bwd_prop_exp_0(_S26, _S25);
+
+#line 11
+    float _S27 = (_S13 * _S26.differential_0 + -0.5f * (XOverSigma_2 * _S26.differential_0)) / _S12;
+
+#line 11
     dpy_0.primal_0 = dpy_0.primal_0;
 
-#line 7
-    dpy_0.differential_0 = _S21;
+#line 11
+    dpy_0.differential_0 = _S24;
 
-#line 7
+#line 11
     dpx_3.primal_0 = dpx_3.primal_0;
 
-#line 7
-    dpx_3.differential_0 = _S24;
+#line 11
+    dpx_3.differential_0 = _S27;
 
 #line 7
     return;
@@ -219,11 +224,11 @@ void s_bwd_GetHeightAtPos_0(inout DiffPair_float_0 dpx_3, inout DiffPair_float_0
 
 
 #line 7
-void s_bwd_GetHeightAtPos_1(inout DiffPair_float_0 _S25, inout DiffPair_float_0 _S26, float2 _S27, float2 _S28, float _S29)
+void s_bwd_GetHeightAtPos_0(inout DiffPair_float_0 _S28, inout DiffPair_float_0 _S29, float2 _S30, float2 _S31, float _S32)
 {
 
 #line 7
-    s_bwd_GetHeightAtPos_0(_S25, _S26, _S27, _S28, _S29);
+    s_bwd_prop_GetHeightAtPos_0(_S28, _S29, _S30, _S31, _S32);
 
 #line 7
     return;
@@ -234,104 +239,85 @@ void s_bwd_GetHeightAtPos_1(inout DiffPair_float_0 _S25, inout DiffPair_float_0
 DiffPair_float_0 s_fwd_GetHeightAtPos_0(DiffPair_float_0 dpx_4, DiffPair_float_0 dpy_1, float2 gaussPos_2, float2 gaussSigma_2)
 {
 
-#line 7
-    float _S30 = gaussSigma_2.x;
-
-#line 7
-    float XOverSigma_4 = dpx_4.primal_0 / _S30;
-
-#line 7
-    float s_diff_XOverSigma_0 = dpx_4.differential_0 / _S30;
 
-#line 7
-    float _S31 = -0.5 * XOverSigma_4;
+    float _S33 = gaussSigma_2.x;
 
-#line 7
-    DiffPair_float_0 _S32 = { _S31 * XOverSigma_4, s_diff_XOverSigma_0 * -0.5 * XOverSigma_4 + s_diff_XOverSigma_0 * _S31 };
-
-#line 7
-    DiffPair_float_0 _S33 = _d_exp_1(_S32);
+#line 11
+    float XOverSigma_4 = dpx_4.primal_0 / _S33;
 
-#line 7
-    DiffPair_float_0 _S34 = { 6.28318548202514648438, 0.0 };
+#line 11
+    float s_diff_XOverSigma_0 = dpx_4.differential_0 / _S33;
+    float _S34 = -0.5f * XOverSigma_4;
 
-#line 7
-    DiffPair_float_0 _S35 = _d_sqrt_0(_S34);
+#line 12
+    DiffPair_float_0 _S35 = { _S34 * XOverSigma_4, s_diff_XOverSigma_0 * -0.5f * XOverSigma_4 + s_diff_XOverSigma_0 * _S34 };
 
-#line 7
-    float _S36 = _S30 * _S35.primal_0;
+#line 12
+    DiffPair_float_0 _S36 = _d_exp_1(_S35);
 
-#line 7
-    float gaussX_0 = _S33.primal_0 / _S36;
+#line 12
+    DiffPair_float_0 _S37 = { 6.28318548202514648f, 0.0f };
+    DiffPair_float_0 _S38 = _d_sqrt_0(_S37);
 
-#line 7
-    float _S37 = gaussSigma_2.y;
+#line 13
+    float _S39 = _S33 * _S38.primal_0;
 
-#line 7
-    float XOverSigma_5 = dpy_1.primal_0 / _S37;
+#line 13
+    float gaussX_0 = _S36.primal_0 / _S39;
 
-#line 7
-    float s_diff_XOverSigma_1 = dpy_1.differential_0 / _S37;
+#line 18
+    float _S40 = gaussSigma_2.y;
 
-#line 7
-    float _S38 = -0.5 * XOverSigma_5;
+#line 18
+    float XOverSigma_5 = dpy_1.primal_0 / _S40;
 
-#line 7
-    DiffPair_float_0 _S39 = { _S38 * XOverSigma_5, s_diff_XOverSigma_1 * -0.5 * XOverSigma_5 + s_diff_XOverSigma_1 * _S38 };
+#line 18
+    float s_diff_XOverSigma_1 = dpy_1.differential_0 / _S40;
+    float _S41 = -0.5f * XOverSigma_5;
 
-#line 7
-    DiffPair_float_0 _S40 = _d_exp_1(_S39);
+#line 19
+    DiffPair_float_0 _S42 = { _S41 * XOverSigma_5, s_diff_XOverSigma_1 * -0.5f * XOverSigma_5 + s_diff_XOverSigma_1 * _S41 };
 
-#line 7
-    DiffPair_float_0 _S41 = _d_sqrt_0(_S34);
+#line 19
+    DiffPair_float_0 _S43 = _d_exp_1(_S42);
+    float _S44 = _S40 * _S38.primal_0;
 
-#line 7
-    float _S42 = _S37 * _S41.primal_0;
+#line 20
+    float gaussY_0 = _S43.primal_0 / _S44;
 
-#line 7
-    float gaussY_0 = _S40.primal_0 / _S42;
+#line 20
+    DiffPair_float_0 _S45 = { gaussX_0 * gaussY_0, (_S36.differential_0 * _S39 - _S36.primal_0 * (_S38.differential_0 * _S33)) / (_S39 * _S39) * gaussY_0 + (_S43.differential_0 * _S44 - _S43.primal_0 * (_S38.differential_0 * _S40)) / (_S44 * _S44) * gaussX_0 };
 
-#line 7
-    DiffPair_float_0 _S43 = { gaussX_0 * gaussY_0, (_S33.differential_0 * _S36 - _S33.primal_0 * (_S35.differential_0 * _S30)) / (_S36 * _S36) * gaussY_0 + (_S40.differential_0 * _S42 - _S40.primal_0 * (_S41.differential_0 * _S37)) / (_S42 * _S42) * gaussX_0 };
 
-#line 7
-    return _S43;
+    return _S45;
 }
 
 
-#line 27
 [shader("compute")][numthreads(1, 1, 1)]
-void csmain(uint3 DTid_0 : SV_DISPATCHTHREADID)
+void csmain(uint3 DTid_0 : SV_DispatchThreadID)
 {
 
 #line 29
-    float _S44 = Data_0[0U];
+    float _S46 = Data_0.Load(int(0));
 
 #line 29
-    float _S45 = Data_0[1U];
+    float _S47 = Data_0.Load(int(1));
 
 #line 29
-    float2 ballPos_0 = float2(_S44, _S45);
-    float2 _S46 = float2(0.0, 0.0);
-
-#line 100
-    float2 _S47 = float2(0.00100000004749745131, 0.00100000004749745131);
+    float2 ballPos_0 = float2(_S46, _S47);
+    float2 _S48 = float2(0.0f, 0.0f);
 
-#line 100
-    float2 _S48 = float2(0.99000000953674316406, 0.99000000953674316406);
-
-#line 100
+#line 30
     int i_0 = int(0);
 
-#line 100
-    float2 ballPosGradient_0 = _S46;
+#line 30
+    float2 ballPosGradient_0 = _S48;
 
-#line 100
     for(;;)
     {
 
 #line 32
-        if(i_0 < _DescendCB_0.NumGaussians_0)
+        if(i_0 < (_DescendCB_0.NumGaussians_0))
         {
         }
         else
@@ -341,28 +327,25 @@ void csmain(uint3 DTid_0 : SV_DISPATCHTHREADID)
             break;
         }
 
-        int _S49 = int(4) + i_0 * int(5);
-
-#line 35
-        float _S50 = Data_0[uint(_S49)];
+        float _S49 = Data_0.Load(int(uint(int(4) + i_0 * int(5))));
 
 #line 35
-        float _S51 = Data_0[uint(_S49 + int(1))];
+        float _S50 = Data_0.Load(int(uint(int(4) + i_0 * int(5) + int(1))));
 
 #line 35
-        float2 gaussPos_3 = float2(_S50, _S51);
-        float gaussAngle_0 = Data_0[uint(_S49 + int(2))];
-        float _S52 = Data_0[uint(_S49 + int(3))];
+        float2 gaussPos_3 = float2(_S49, _S50);
+        float _S51 = Data_0.Load(int(uint(int(4) + i_0 * int(5) + int(2))));
+        float _S52 = Data_0.Load(int(uint(int(4) + i_0 * int(5) + int(3))));
 
 #line 37
-        float _S53 = Data_0[uint(_S49 + int(4))];
+        float _S53 = Data_0.Load(int(uint(int(4) + i_0 * int(5) + int(4))));
 
 #line 37
         float2 gaussSigma_3 = float2(_S52, _S53);
 
 
         float2 relativePos_0 = ballPos_0 - gaussPos_3;
-        float _S54 = - gaussAngle_0;
+        float _S54 = - _S51;
 
 #line 41
         float cosTheta_0 = cos(_S54);
@@ -379,7 +362,7 @@ void csmain(uint3 DTid_0 : SV_DISPATCHTHREADID)
 
 
 
-        float2 dFLocal_0 = _S46;
+        float2 dFLocal_0 = _S48;
 
 
         if(bool(_DescendCB_0.UseBackwardAD_0))
@@ -393,15 +376,15 @@ void csmain(uint3 DTid_0 : SV_DISPATCHTHREADID)
             ballPosX_0.primal_0 = _S57;
 
 #line 57
-            ballPosX_0.differential_0 = 0.0;
+            ballPosX_0.differential_0 = 0.0f;
             DiffPair_float_0 ballPosY_0;
 
 #line 58
             ballPosY_0.primal_0 = _S58;
 
 #line 58
-            ballPosY_0.differential_0 = 0.0;
-            s_bwd_GetHeightAtPos_1(ballPosX_0, ballPosY_0, gaussPos_3, gaussSigma_3, height_0);
+            ballPosY_0.differential_0 = 0.0f;
+            s_bwd_GetHeightAtPos_0(ballPosX_0, ballPosY_0, gaussPos_3, gaussSigma_3, height_0);
 
             dFLocal_0 = float2(ballPosX_0.differential_0, ballPosY_0.differential_0);
 
@@ -411,29 +394,29 @@ void csmain(uint3 DTid_0 : SV_DISPATCHTHREADID)
         {
 
 #line 52
-            DiffPair_float_0 _S59 = { _S57, 1.0 };
+            DiffPair_float_0 _S59 = { _S57, 1.0f };
 
 #line 52
-            DiffPair_float_0 _S60 = { _S58, 0.0 };
+            DiffPair_float_0 _S60 = { _S58, 0.0f };
 
 #line 71
             dFLocal_0[int(0)] = s_fwd_GetHeightAtPos_0(_S59, _S60, gaussPos_3, gaussSigma_3).differential_0;
 
 #line 71
-            DiffPair_float_0 _S61 = { _S57, 0.0 };
+            DiffPair_float_0 _S61 = { _S57, 0.0f };
 
 #line 71
-            DiffPair_float_0 _S62 = { _S58, 1.0 };
+            DiffPair_float_0 _S62 = { _S58, 1.0f };
 
 #line 79
-            dFLocal_0[int(1)] = dFLocal_0.y + s_fwd_GetHeightAtPos_0(_S61, _S62, gaussPos_3, gaussSigma_3).differential_0;
+            dFLocal_0[int(1)] = dFLocal_0[int(1)] + s_fwd_GetHeightAtPos_0(_S61, _S62, gaussPos_3, gaussSigma_3).differential_0;
 
 #line 52
         }
 
 #line 84
-        float cosNegTheta_0 = cos(gaussAngle_0);
-        float sinNegTheta_0 = sin(gaussAngle_0);
+        float cosNegTheta_0 = cos(_S51);
+        float sinNegTheta_0 = sin(_S51);
 
 #line 90
         float2 ballPosGradient_1 = ballPosGradient_0 + float2(dFLocal_0.x * cosNegTheta_0 - dFLocal_0.y * sinNegTheta_0, dFLocal_0.x * sinNegTheta_0 + dFLocal_0.y * cosNegTheta_0);
@@ -452,7 +435,7 @@ void csmain(uint3 DTid_0 : SV_DISPATCHTHREADID)
 
 #line 94
     float2 adjust_1;
-    if(length(adjust_0) > _DescendCB_0.MaximumStepSize_0)
+    if((length(adjust_0)) > (_DescendCB_0.MaximumStepSize_0))
     {
 
 #line 95
@@ -470,15 +453,15 @@ void csmain(uint3 DTid_0 : SV_DISPATCHTHREADID)
     }
 
 #line 100
-    float2 ballPos_1 = clamp(ballPos_0 + adjust_1, _S47, _S48);
+    float2 ballPos_1 = clamp(ballPos_0 + adjust_1, float2(0.00100000004749745f, 0.00100000004749745f), float2(0.99000000953674316f, 0.99000000953674316f));
 
 
-    Data_0[0U] = ballPos_1.x;
-    Data_0[1U] = ballPos_1.y;
+    ((Data_0))[(0U)] = (ballPos_1.x);
+    ((Data_0))[(1U)] = (ballPos_1.y);
 
 
-    Data_0[2U] = ballPosGradient_0.x;
-    Data_0[3U] = ballPosGradient_0.y;
+    ((Data_0))[(2U)] = (ballPosGradient_0.x);
+    ((Data_0))[(3U)] = (ballPosGradient_0.y);
     return;
 }
 
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/Raster/simpleRasterInSubgraph/private/technique.cpp b/_GeneratedCode/UnitTests/DX12/UnitTests/Raster/simpleRasterInSubgraph/private/technique.cpp
index 177ef070..1396d00e 100644
--- a/_GeneratedCode/UnitTests/DX12/UnitTests/Raster/simpleRasterInSubgraph/private/technique.cpp
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/Raster/simpleRasterInSubgraph/private/technique.cpp
@@ -1123,7 +1123,7 @@ namespace simpleRasterInSubgraph
                 m_internal.drawCall_DoSimpleRaster_Rasterize_rootSig->SetName(L"DoSimpleRaster_Rasterize");
 
             ShaderCompilationInfo shaderCompilationInfoVS;
-            shaderCompilationInfoVS.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "simpleRaster/simpleRaster_VS.hlsl";
+            shaderCompilationInfoVS.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "simpleRaster_DoSimpleRaster/simpleRaster_VS.hlsl";
             shaderCompilationInfoVS.entryPoint = "VSMain";
             shaderCompilationInfoVS.shaderModel = "vs_6_1";
             shaderCompilationInfoVS.debugName = (c_debugNames ? "DoSimpleRaster_Rasterize" : "");
@@ -1134,7 +1134,7 @@ namespace simpleRasterInSubgraph
                 return false;
 
             ShaderCompilationInfo shaderCompilationInfoPS;
-            shaderCompilationInfoPS.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "simpleRaster/simpleRaster_PS.hlsl";
+            shaderCompilationInfoPS.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "simpleRaster_DoSimpleRaster/simpleRaster_PS.hlsl";
             shaderCompilationInfoPS.entryPoint = "PSMain";
             shaderCompilationInfoPS.shaderModel = "ps_6_1";
             shaderCompilationInfoPS.debugName = (c_debugNames ? "DoSimpleRaster_Rasterize" : "");
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/Raster/simpleRasterInSubgraph/shaders/simpleRaster_DoSimpleRaster/simpleRaster_PS.hlsl b/_GeneratedCode/UnitTests/DX12/UnitTests/Raster/simpleRasterInSubgraph/shaders/simpleRaster_DoSimpleRaster/simpleRaster_PS.hlsl
new file mode 100644
index 00000000..0f125015
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/Raster/simpleRasterInSubgraph/shaders/simpleRaster_DoSimpleRaster/simpleRaster_PS.hlsl
@@ -0,0 +1,24 @@
+// SimpleRaster technique, shader PixelShader
+
+
+
+#line 2
+
+
+struct PSInput // AKA VSOutput
+{
+	float4 position   : SV_POSITION;
+	float3 normal     : TEXCOORD1;
+};
+
+struct PSOutput
+{
+	float4 colorTarget : SV_Target0;
+};
+
+PSOutput PSMain(PSInput input)
+{
+	PSOutput ret = (PSOutput)0;
+	ret.colorTarget = float4(input.normal * 0.5f + 0.5f, 1.0f);
+	return ret;
+}
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/Raster/simpleRasterInSubgraph/shaders/simpleRaster_DoSimpleRaster/simpleRaster_VS.hlsl b/_GeneratedCode/UnitTests/DX12/UnitTests/Raster/simpleRasterInSubgraph/shaders/simpleRaster_DoSimpleRaster/simpleRaster_VS.hlsl
new file mode 100644
index 00000000..c7143bdd
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/Raster/simpleRasterInSubgraph/shaders/simpleRaster_DoSimpleRaster/simpleRaster_VS.hlsl
@@ -0,0 +1,32 @@
+// SimpleRaster technique, shader VertexShader
+
+
+struct Struct__DoSimpleRaster_VertexShaderCB
+{
+    float4x4 DoSimpleRaster_ViewProjMtx;
+};
+
+ConstantBuffer<Struct__DoSimpleRaster_VertexShaderCB> _DoSimpleRaster_VertexShaderCB : register(b0);
+
+#line 2
+
+
+struct VSInput
+{
+	float3 position   : POSITION;
+	float3 normal     : NORMAL;
+};
+
+struct VSOutput // AKA PSInput
+{
+	float4 position   : SV_POSITION;
+	float3 normal     : TEXCOORD1;
+};
+
+VSOutput VSMain(VSInput input)
+{
+	VSOutput ret = (VSOutput)0;
+	ret.position = mul(float4(input.position, 1.0f), _DoSimpleRaster_VertexShaderCB.DoSimpleRaster_ViewProjMtx);
+	ret.normal = input.normal;
+	return ret;
+}
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/RayTrace/TwoRayGensSubgraph/private/technique.cpp b/_GeneratedCode/UnitTests/DX12/UnitTests/RayTrace/TwoRayGensSubgraph/private/technique.cpp
index 077c48a1..fbff6583 100644
--- a/_GeneratedCode/UnitTests/DX12/UnitTests/RayTrace/TwoRayGensSubgraph/private/technique.cpp
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/RayTrace/TwoRayGensSubgraph/private/technique.cpp
@@ -99,20 +99,20 @@ namespace TwoRayGensSubgraph
             // Compile shaders
             std::vector<unsigned char> shaderCode[3];
 
-            // Compile RTMiss : TwoRayGensSubgraphA/TwoRayGens1.hlsl Miss1()
-            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "TwoRayGensSubgraphA/TwoRayGens1.hlsl";
+            // Compile RTMiss : TwoRayGensSubgraphA_A/TwoRayGens1.hlsl Miss1()
+            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "TwoRayGensSubgraphA_A/TwoRayGens1.hlsl";
             shaderCode[0] = DX12Utils::CompileShaderToByteCode_DXC(shaderCompilationInfo, Context::LogFn);
             if (shaderCode[0].empty())
                 return false;
 
-            // Compile RTClosestHit : TwoRayGensSubgraphA/TwoRayGens1.hlsl ClosestHit1()
-            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "TwoRayGensSubgraphA/TwoRayGens1.hlsl";
+            // Compile RTClosestHit : TwoRayGensSubgraphA_A/TwoRayGens1.hlsl ClosestHit1()
+            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "TwoRayGensSubgraphA_A/TwoRayGens1.hlsl";
             shaderCode[1] = DX12Utils::CompileShaderToByteCode_DXC(shaderCompilationInfo, Context::LogFn);
             if (shaderCode[1].empty())
                 return false;
 
-            // Compile RTRayGen : TwoRayGensSubgraphA/TwoRayGens1.hlsl RayGen1()
-            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "TwoRayGensSubgraphA/TwoRayGens1.hlsl";
+            // Compile RTRayGen : TwoRayGensSubgraphA_A/TwoRayGens1.hlsl RayGen1()
+            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "TwoRayGensSubgraphA_A/TwoRayGens1.hlsl";
             shaderCode[2] = DX12Utils::CompileShaderToByteCode_DXC(shaderCompilationInfo, Context::LogFn);
             if (shaderCode[2].empty())
                 return false;
@@ -125,7 +125,7 @@ namespace TwoRayGensSubgraph
             soDesc.NumSubobjects = 8;
             soDesc.pSubobjects = subObjects;
 
-            // DXIL Library for RTMiss : TwoRayGensSubgraphA/TwoRayGens1.hlsl Miss1()
+            // DXIL Library for RTMiss : TwoRayGensSubgraphA_A/TwoRayGens1.hlsl Miss1()
             {
                 static D3D12_EXPORT_DESC exportDesc;
                 exportDesc.Name = L"Miss1_0";
@@ -142,7 +142,7 @@ namespace TwoRayGensSubgraph
                 subObjects[0].pDesc = &libDesc;
             }
 
-            // DXIL Library for RTClosestHit : TwoRayGensSubgraphA/TwoRayGens1.hlsl ClosestHit1()
+            // DXIL Library for RTClosestHit : TwoRayGensSubgraphA_A/TwoRayGens1.hlsl ClosestHit1()
             {
                 static D3D12_EXPORT_DESC exportDesc;
                 exportDesc.Name = L"ClosestHit1_1";
@@ -159,7 +159,7 @@ namespace TwoRayGensSubgraph
                 subObjects[1].pDesc = &libDesc;
             }
 
-            // DXIL Library for RTRayGen : TwoRayGensSubgraphA/TwoRayGens1.hlsl RayGen1()
+            // DXIL Library for RTRayGen : TwoRayGensSubgraphA_A/TwoRayGens1.hlsl RayGen1()
             {
                 static D3D12_EXPORT_DESC exportDesc;
                 exportDesc.Name = L"RayGen1_2";
@@ -327,26 +327,26 @@ namespace TwoRayGensSubgraph
             // Compile shaders
             std::vector<unsigned char> shaderCode[4];
 
-            // Compile RTMiss : TwoRayGensSubgraphB/TwoRayGens2.hlsl Miss2A()
-            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "TwoRayGensSubgraphB/TwoRayGens2.hlsl";
+            // Compile RTMiss : TwoRayGensSubgraphB_B/TwoRayGens2.hlsl Miss2A()
+            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "TwoRayGensSubgraphB_B/TwoRayGens2.hlsl";
             shaderCode[0] = DX12Utils::CompileShaderToByteCode_DXC(shaderCompilationInfo, Context::LogFn);
             if (shaderCode[0].empty())
                 return false;
 
-            // Compile RTMiss : TwoRayGensSubgraphB/TwoRayGens2.hlsl Miss2B()
-            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "TwoRayGensSubgraphB/TwoRayGens2.hlsl";
+            // Compile RTMiss : TwoRayGensSubgraphB_B/TwoRayGens2.hlsl Miss2B()
+            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "TwoRayGensSubgraphB_B/TwoRayGens2.hlsl";
             shaderCode[1] = DX12Utils::CompileShaderToByteCode_DXC(shaderCompilationInfo, Context::LogFn);
             if (shaderCode[1].empty())
                 return false;
 
-            // Compile RTClosestHit : TwoRayGensSubgraphB/TwoRayGens2.hlsl ClosestHit2()
-            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "TwoRayGensSubgraphB/TwoRayGens2.hlsl";
+            // Compile RTClosestHit : TwoRayGensSubgraphB_B/TwoRayGens2.hlsl ClosestHit2()
+            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "TwoRayGensSubgraphB_B/TwoRayGens2.hlsl";
             shaderCode[2] = DX12Utils::CompileShaderToByteCode_DXC(shaderCompilationInfo, Context::LogFn);
             if (shaderCode[2].empty())
                 return false;
 
-            // Compile RTRayGen : TwoRayGensSubgraphB/TwoRayGens2.hlsl RayGen2()
-            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "TwoRayGensSubgraphB/TwoRayGens2.hlsl";
+            // Compile RTRayGen : TwoRayGensSubgraphB_B/TwoRayGens2.hlsl RayGen2()
+            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "TwoRayGensSubgraphB_B/TwoRayGens2.hlsl";
             shaderCode[3] = DX12Utils::CompileShaderToByteCode_DXC(shaderCompilationInfo, Context::LogFn);
             if (shaderCode[3].empty())
                 return false;
@@ -359,7 +359,7 @@ namespace TwoRayGensSubgraph
             soDesc.NumSubobjects = 9;
             soDesc.pSubobjects = subObjects;
 
-            // DXIL Library for RTMiss : TwoRayGensSubgraphB/TwoRayGens2.hlsl Miss2A()
+            // DXIL Library for RTMiss : TwoRayGensSubgraphB_B/TwoRayGens2.hlsl Miss2A()
             {
                 static D3D12_EXPORT_DESC exportDesc;
                 exportDesc.Name = L"Miss2A_0";
@@ -376,7 +376,7 @@ namespace TwoRayGensSubgraph
                 subObjects[0].pDesc = &libDesc;
             }
 
-            // DXIL Library for RTMiss : TwoRayGensSubgraphB/TwoRayGens2.hlsl Miss2B()
+            // DXIL Library for RTMiss : TwoRayGensSubgraphB_B/TwoRayGens2.hlsl Miss2B()
             {
                 static D3D12_EXPORT_DESC exportDesc;
                 exportDesc.Name = L"Miss2B_1";
@@ -393,7 +393,7 @@ namespace TwoRayGensSubgraph
                 subObjects[1].pDesc = &libDesc;
             }
 
-            // DXIL Library for RTClosestHit : TwoRayGensSubgraphB/TwoRayGens2.hlsl ClosestHit2()
+            // DXIL Library for RTClosestHit : TwoRayGensSubgraphB_B/TwoRayGens2.hlsl ClosestHit2()
             {
                 static D3D12_EXPORT_DESC exportDesc;
                 exportDesc.Name = L"ClosestHit2_2";
@@ -410,7 +410,7 @@ namespace TwoRayGensSubgraph
                 subObjects[2].pDesc = &libDesc;
             }
 
-            // DXIL Library for RTRayGen : TwoRayGensSubgraphB/TwoRayGens2.hlsl RayGen2()
+            // DXIL Library for RTRayGen : TwoRayGensSubgraphB_B/TwoRayGens2.hlsl RayGen2()
             {
                 static D3D12_EXPORT_DESC exportDesc;
                 exportDesc.Name = L"RayGen2_3";
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphA_A/TwoRayGens1.hlsl b/_GeneratedCode/UnitTests/DX12/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphA_A/TwoRayGens1.hlsl
new file mode 100644
index 00000000..d22d5620
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphA_A/TwoRayGens1.hlsl
@@ -0,0 +1,79 @@
+// TwoRayGens technique
+
+
+struct Struct__A_TwoRayGens1CB
+{
+    float3 cameraPos;
+    float _padding0;
+    float4x4 clipToWorld;
+    float depthNearPlane;
+    float3 _padding1;
+};
+
+RWTexture2D<float4> g_texture : register(u0);
+RaytracingAccelerationStructure g_scene : register(t0);
+ConstantBuffer<Struct__A_TwoRayGens1CB> _A_TwoRayGens1CB : register(b0);
+
+#line 2
+
+
+struct Payload
+{
+	bool hit;
+};
+
+[shader("raygeneration")]
+#line 9
+void RayGen1()
+{
+	uint2 px = DispatchRaysIndex().xy;
+	float2 dimensions = float2(DispatchRaysDimensions().xy);
+
+	float2 screenPos = (float2(px)+0.5f) / dimensions * 2.0 - 1.0;
+	screenPos.y = -screenPos.y;
+
+	float4 world = mul(float4(screenPos, _A_TwoRayGens1CB.depthNearPlane, 1), _A_TwoRayGens1CB.clipToWorld);
+	world.xyz /= world.w;
+
+	RayDesc ray;
+	ray.Origin = _A_TwoRayGens1CB.cameraPos;
+	ray.Direction = normalize(world.xyz - ray.Origin);
+	ray.TMin = 0;
+	ray.TMax = 1000.0f;
+
+	Payload payload = (Payload)0;
+
+	TraceRay(g_scene, // Scene (TLAS) buffer
+		RAY_FLAG_FORCE_OPAQUE, // Ray flags
+		0xFF, // Ray mask
+		0,
+		0,
+		0,
+		ray,
+		payload);
+
+	float4 color = g_texture[px];
+	color.a = 1.0f;
+	color.r = payload.hit ? 1.0f : 0.0f;
+	g_texture[px] = color;
+}
+
+[shader("miss")]
+#line 43
+void Miss1(inout Payload payload : SV_RayPayload)
+{
+	payload.hit = false;
+}
+
+[shader("closesthit")]
+#line 48
+void ClosestHit1(inout Payload payload : SV_RayPayload, in BuiltInTriangleIntersectionAttributes intersection : SV_IntersectionAttributes)
+{
+	payload.hit = true;
+}
+
+/*
+Shader Resources:
+	Texture g_texture (as UAV)
+	Buffer g_scene (as RTScene)
+*/
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphB_B/TwoRayGens2.hlsl b/_GeneratedCode/UnitTests/DX12/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphB_B/TwoRayGens2.hlsl
new file mode 100644
index 00000000..9f4a4b1b
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphB_B/TwoRayGens2.hlsl
@@ -0,0 +1,99 @@
+// TwoRayGens technique
+
+
+struct Struct__B_TwoRayGens2CB
+{
+    float3 cameraPos;
+    float _padding0;
+    float4x4 clipToWorld;
+    float depthNearPlane;
+    float3 _padding1;
+};
+
+RWTexture2D<float4> g_texture : register(u0);
+RaytracingAccelerationStructure g_scene : register(t0);
+Texture2D<float4> g_blueChannel : register(t1);
+ConstantBuffer<Struct__B_TwoRayGens2CB> _B_TwoRayGens2CB : register(b0);
+
+#line 2
+
+
+struct Payload
+{
+	bool hit;
+	float blueChannelMultiplier;
+};
+
+[shader("raygeneration")]
+#line 10
+void RayGen2()
+{
+	uint2 px = DispatchRaysIndex().xy;
+	float2 dimensions = float2(DispatchRaysDimensions().xy);
+
+	float2 screenPos = (float2(px)+0.5f) / dimensions * 2.0 - 1.0;
+	screenPos.y = -screenPos.y;
+
+	float4 world = mul(float4(screenPos, _B_TwoRayGens2CB.depthNearPlane, 1), _B_TwoRayGens2CB.clipToWorld);
+	world.xyz /= world.w;
+
+	RayDesc ray;
+	ray.Origin = _B_TwoRayGens2CB.cameraPos;
+	ray.Direction = normalize(world.xyz - ray.Origin);
+	ray.TMin = 0;
+	ray.TMax = 1000.0f;
+
+	Payload payload = (Payload)0;
+
+	int missShaderIndex = (px.y < dimensions.y / 2) ? 0 : 1;
+
+	TraceRay(g_scene, // Scene (TLAS) buffer
+		RAY_FLAG_FORCE_OPAQUE, // Ray flags
+		0xFF, // Ray mask
+		0,
+		0,
+		missShaderIndex,
+		ray,
+		payload);
+
+	float4 color = g_texture[px];
+	color.a = 1.0f;
+	color.g = payload.hit ? 1.0f : 0.0f;
+
+	uint2 blueChannelDims;
+	g_blueChannel.GetDimensions(blueChannelDims.x, blueChannelDims.y);
+	color.b = dot(g_blueChannel[px % blueChannelDims].rgb, float3(0.3f, 0.59f, 0.11f)) * payload.blueChannelMultiplier;
+
+	g_texture[px] = color;
+}
+
+[shader("miss")]
+#line 51
+void Miss2A(inout Payload payload : SV_RayPayload)
+{
+	payload.hit = false;
+	payload.blueChannelMultiplier = 0.25f;
+}
+
+[shader("miss")]
+#line 57
+void Miss2B(inout Payload payload : SV_RayPayload)
+{
+	payload.hit = false;
+	payload.blueChannelMultiplier = 1.0f;
+}
+
+[shader("closesthit")]
+#line 63
+void ClosestHit2(inout Payload payload : SV_RayPayload, in BuiltInTriangleIntersectionAttributes intersection : SV_IntersectionAttributes)
+{
+	payload.hit = true;
+	payload.blueChannelMultiplier = 0.0f;
+}
+
+/*
+Shader Resources:
+	Texture g_texture (as UAV)
+	Buffer g_scene (as RTScene)
+	Texture g_blueChannel (as SRV)
+*/
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/private/technique.cpp b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/private/technique.cpp
new file mode 100644
index 00000000..3f8e2bdb
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/private/technique.cpp
@@ -0,0 +1,1049 @@
+#include "../public/technique.h"
+#include "DX12Utils/dxutils.h"
+#include "DX12Utils/DelayedReleaseTracker.h"
+#include "DX12Utils/HeapAllocationTracker.h"
+#include "DX12Utils/TextureCache.h"
+
+#include <vector>
+#include <chrono>
+
+namespace ConstOverride
+{
+    static std::vector<Context*> s_allContexts;
+
+    static DX12Utils::Heap                  s_srvHeap;
+    static DX12Utils::Heap                  s_rtvHeap;
+    static DX12Utils::Heap                  s_dsvHeap;
+    static DX12Utils::UploadBufferTracker   s_ubTracker;
+    static DX12Utils::DelayedReleaseTracker s_delayedRelease;
+    static DX12Utils::HeapAllocationTracker s_heapAllocationTrackerRTV;
+    static DX12Utils::HeapAllocationTracker s_heapAllocationTrackerDSV;
+
+    TLogFn Context::LogFn = [] (LogLevel level, const char* msg, ...) {};
+    TPerfEventBeginFn Context::PerfEventBeginFn = [] (const char* name, ID3D12GraphicsCommandList* commandList, int index) {};
+    TPerfEventEndFn Context::PerfEventEndFn = [] (ID3D12GraphicsCommandList* commandList) {};
+
+    std::wstring Context::s_techniqueLocation = L"./";
+    static unsigned int s_timerIndex = 0;
+
+    ID3D12CommandSignature* ContextInternal::s_commandSignatureDispatch = nullptr;
+
+    ID3D12PipelineState* ContextInternal::computeShader_Clear_pso = nullptr;
+    ID3D12RootSignature* ContextInternal::computeShader_Clear_rootSig = nullptr;
+
+    ID3D12PipelineState* ContextInternal::computeShader_Left_WriteColor_pso = nullptr;
+    ID3D12RootSignature* ContextInternal::computeShader_Left_WriteColor_rootSig = nullptr;
+
+    ID3D12PipelineState* ContextInternal::computeShader_Right_WriteColor_pso = nullptr;
+    ID3D12RootSignature* ContextInternal::computeShader_Right_WriteColor_rootSig = nullptr;
+
+    template <typename T>
+    T Pow2GE(const T& A)
+    {
+        float f = std::log2(float(A));
+        f = std::ceilf(f);
+        return (T)std::pow(2.0f, f);
+    }
+
+    bool CreateShared(ID3D12Device* device)
+    {
+
+        // Compute Shader: Clear
+        {
+            D3D12_STATIC_SAMPLER_DESC* samplers = nullptr;
+
+            D3D12_DESCRIPTOR_RANGE ranges[1];
+
+            // Output
+            ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
+            ranges[0].NumDescriptors = 1;
+            ranges[0].BaseShaderRegister = 0;
+            ranges[0].RegisterSpace = 0;
+            ranges[0].OffsetInDescriptorsFromTableStart = 0;
+
+            if(!DX12Utils::MakeRootSig(device, ranges, 1, samplers, 0, &ContextInternal::computeShader_Clear_rootSig, (c_debugNames ? L"Clear" : nullptr), Context::LogFn))
+                return false;
+
+            ShaderCompilationInfo shaderCompilationInfo;
+            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "ConstOverride_Clear.hlsl";
+            shaderCompilationInfo.entryPoint = "csmain";
+            shaderCompilationInfo.shaderModel = "cs_6_1";
+            shaderCompilationInfo.debugName = (c_debugNames ? "Clear" : "");
+            if (c_debugShaders) shaderCompilationInfo.flags |= ShaderCompilationFlags::Debug;
+            shaderCompilationInfo.defines.emplace_back("__GigiDispatchMultiply","uint3(1,1,1)");
+            shaderCompilationInfo.defines.emplace_back("__GigiDispatchDivide","uint3(1,1,1)");
+            shaderCompilationInfo.defines.emplace_back("__GigiDispatchPreAdd","uint3(0,0,0)");
+            shaderCompilationInfo.defines.emplace_back("__GigiDispatchPostAdd","uint3(0,0,0)");
+
+            if(!DX12Utils::MakeComputePSO_DXC(device, shaderCompilationInfo,
+               ContextInternal::computeShader_Clear_rootSig, &ContextInternal::computeShader_Clear_pso, Context::LogFn))
+                return false;
+        }
+
+        // Compute Shader: Left_WriteColor
+        {
+            D3D12_STATIC_SAMPLER_DESC* samplers = nullptr;
+
+            D3D12_DESCRIPTOR_RANGE ranges[1];
+
+            // Color
+            ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
+            ranges[0].NumDescriptors = 1;
+            ranges[0].BaseShaderRegister = 0;
+            ranges[0].RegisterSpace = 0;
+            ranges[0].OffsetInDescriptorsFromTableStart = 0;
+
+            if(!DX12Utils::MakeRootSig(device, ranges, 1, samplers, 0, &ContextInternal::computeShader_Left_WriteColor_rootSig, (c_debugNames ? L"Left_WriteColor" : nullptr), Context::LogFn))
+                return false;
+
+            ShaderCompilationInfo shaderCompilationInfo;
+            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "ConstOverrideSubgraph_Left/ConstOverride_WriteColor.hlsl";
+            shaderCompilationInfo.entryPoint = "csmain";
+            shaderCompilationInfo.shaderModel = "cs_6_1";
+            shaderCompilationInfo.debugName = (c_debugNames ? "Left_WriteColor" : "");
+            if (c_debugShaders) shaderCompilationInfo.flags |= ShaderCompilationFlags::Debug;
+            shaderCompilationInfo.defines.emplace_back("__GigiDispatchMultiply","uint3(1,1,1)");
+            shaderCompilationInfo.defines.emplace_back("__GigiDispatchDivide","uint3(1,1,1)");
+            shaderCompilationInfo.defines.emplace_back("__GigiDispatchPreAdd","uint3(0,0,0)");
+            shaderCompilationInfo.defines.emplace_back("__GigiDispatchPostAdd","uint3(0,0,0)");
+
+            if(!DX12Utils::MakeComputePSO_DXC(device, shaderCompilationInfo,
+               ContextInternal::computeShader_Left_WriteColor_rootSig, &ContextInternal::computeShader_Left_WriteColor_pso, Context::LogFn))
+                return false;
+        }
+
+        // Compute Shader: Right_WriteColor
+        {
+            D3D12_STATIC_SAMPLER_DESC* samplers = nullptr;
+
+            D3D12_DESCRIPTOR_RANGE ranges[1];
+
+            // Color
+            ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
+            ranges[0].NumDescriptors = 1;
+            ranges[0].BaseShaderRegister = 0;
+            ranges[0].RegisterSpace = 0;
+            ranges[0].OffsetInDescriptorsFromTableStart = 0;
+
+            if(!DX12Utils::MakeRootSig(device, ranges, 1, samplers, 0, &ContextInternal::computeShader_Right_WriteColor_rootSig, (c_debugNames ? L"Right_WriteColor" : nullptr), Context::LogFn))
+                return false;
+
+            ShaderCompilationInfo shaderCompilationInfo;
+            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "ConstOverrideSubgraph_Right/ConstOverride_WriteColor.hlsl";
+            shaderCompilationInfo.entryPoint = "csmain";
+            shaderCompilationInfo.shaderModel = "cs_6_1";
+            shaderCompilationInfo.debugName = (c_debugNames ? "Right_WriteColor" : "");
+            if (c_debugShaders) shaderCompilationInfo.flags |= ShaderCompilationFlags::Debug;
+            shaderCompilationInfo.defines.emplace_back("__GigiDispatchMultiply","uint3(1,1,1)");
+            shaderCompilationInfo.defines.emplace_back("__GigiDispatchDivide","uint3(1,1,1)");
+            shaderCompilationInfo.defines.emplace_back("__GigiDispatchPreAdd","uint3(0,0,0)");
+            shaderCompilationInfo.defines.emplace_back("__GigiDispatchPostAdd","uint3(0,0,0)");
+
+            if(!DX12Utils::MakeComputePSO_DXC(device, shaderCompilationInfo,
+               ContextInternal::computeShader_Right_WriteColor_rootSig, &ContextInternal::computeShader_Right_WriteColor_pso, Context::LogFn))
+                return false;
+        }
+
+        // Create heaps
+        if (c_numSRVDescriptors > 0 && !CreateHeap(s_srvHeap, device, c_numSRVDescriptors, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, Context::LogFn))
+            return false;
+
+        if (c_numRTVDescriptors > 0 && !CreateHeap(s_rtvHeap, device, c_numRTVDescriptors, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, D3D12_DESCRIPTOR_HEAP_FLAG_NONE, Context::LogFn))
+            return false;
+
+        if (c_numDSVDescriptors > 0 && !CreateHeap(s_dsvHeap, device, c_numDSVDescriptors, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, D3D12_DESCRIPTOR_HEAP_FLAG_NONE, Context::LogFn))
+            return false;
+
+        s_heapAllocationTrackerRTV.Init(s_rtvHeap.m_heap, c_numRTVDescriptors, (int)device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV));
+        s_heapAllocationTrackerDSV.Init(s_dsvHeap.m_heap, c_numDSVDescriptors, (int)device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV));
+
+        // create indirect dispatch command
+        {
+            D3D12_INDIRECT_ARGUMENT_DESC dispatchArg = {};
+            dispatchArg.Type						 = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH;
+
+            D3D12_COMMAND_SIGNATURE_DESC dispatchDesc = {};
+            dispatchDesc.ByteStride					  = sizeof(uint32_t) * 3;
+            dispatchDesc.NumArgumentDescs			  = 1;
+            dispatchDesc.pArgumentDescs				  = &dispatchArg;
+            dispatchDesc.NodeMask					  = 0x0;
+
+            device->CreateCommandSignature(
+                &dispatchDesc,
+                nullptr,
+                IID_PPV_ARGS(&ContextInternal::s_commandSignatureDispatch));
+        }
+
+        return true;
+    }
+
+    void DestroyShared()
+    {
+
+        if(ContextInternal::computeShader_Clear_pso)
+        {
+            s_delayedRelease.Add(ContextInternal::computeShader_Clear_pso);
+            ContextInternal::computeShader_Clear_pso = nullptr;
+        }
+
+        if(ContextInternal::computeShader_Clear_rootSig)
+        {
+            s_delayedRelease.Add(ContextInternal::computeShader_Clear_rootSig);
+            ContextInternal::computeShader_Clear_rootSig = nullptr;
+        }
+
+        if(ContextInternal::computeShader_Left_WriteColor_pso)
+        {
+            s_delayedRelease.Add(ContextInternal::computeShader_Left_WriteColor_pso);
+            ContextInternal::computeShader_Left_WriteColor_pso = nullptr;
+        }
+
+        if(ContextInternal::computeShader_Left_WriteColor_rootSig)
+        {
+            s_delayedRelease.Add(ContextInternal::computeShader_Left_WriteColor_rootSig);
+            ContextInternal::computeShader_Left_WriteColor_rootSig = nullptr;
+        }
+
+        if(ContextInternal::computeShader_Right_WriteColor_pso)
+        {
+            s_delayedRelease.Add(ContextInternal::computeShader_Right_WriteColor_pso);
+            ContextInternal::computeShader_Right_WriteColor_pso = nullptr;
+        }
+
+        if(ContextInternal::computeShader_Right_WriteColor_rootSig)
+        {
+            s_delayedRelease.Add(ContextInternal::computeShader_Right_WriteColor_rootSig);
+            ContextInternal::computeShader_Right_WriteColor_rootSig = nullptr;
+        }
+
+        // Clear out heap trackers
+        s_heapAllocationTrackerRTV.Release();
+        s_heapAllocationTrackerDSV.Release();
+
+        // Destroy Heaps
+        DestroyHeap(s_srvHeap);
+        DestroyHeap(s_rtvHeap);
+        DestroyHeap(s_dsvHeap);
+
+        // Destroy any upload buffers
+        s_ubTracker.Release();
+
+        // Finish any delayed release
+        s_delayedRelease.Release();
+
+        // Destroy indirect dispatch command
+        if (ContextInternal::s_commandSignatureDispatch)
+        {
+            ContextInternal::s_commandSignatureDispatch->Release();
+            ContextInternal::s_commandSignatureDispatch = nullptr;
+        }
+    }
+
+    Context* CreateContext(ID3D12Device* device)
+    {
+        if (s_allContexts.size() == 0)
+        {
+            if (!CreateShared(device))
+                return nullptr;
+        }
+
+        Context* ret = new Context;
+        s_allContexts.push_back(ret);
+        return ret;
+    }
+
+    void DestroyContext(Context* context)
+    {
+        s_allContexts.erase(std::remove(s_allContexts.begin(), s_allContexts.end(), context), s_allContexts.end());
+        delete context;
+        if (s_allContexts.size() == 0)
+            DestroyShared();
+    }
+
+    ID3D12Resource* Context::GetPrimaryOutputTexture()
+    {
+        return m_output.texture_Output;
+    }
+
+    D3D12_RESOURCE_STATES Context::GetPrimaryOutputTextureState()
+    {
+        return m_output.c_texture_Output_endingState;
+    }
+
+    void OnNewFrame(int framesInFlight)
+    {
+        s_delayedRelease.OnNewFrame(framesInFlight);
+        s_ubTracker.OnNewFrame(framesInFlight);
+        s_heapAllocationTrackerRTV.OnNewFrame(framesInFlight);
+        s_heapAllocationTrackerDSV.OnNewFrame(framesInFlight);
+    }
+
+    int Context::GetContextCount()
+    {
+        return (int)s_allContexts.size();
+    }
+
+    Context* Context::GetContext(int index)
+    {
+        if (index >= 0 && index < GetContextCount())
+            return s_allContexts[index];
+        else
+            return nullptr;
+    }
+
+    ID3D12Resource* Context::CreateManagedBuffer(ID3D12Device* device, ID3D12GraphicsCommandList* commandList, D3D12_RESOURCE_FLAGS flags, const void* data, size_t size, const wchar_t* debugName, D3D12_RESOURCE_STATES desiredState)
+    {
+        // Make a buffer and have the context manage it
+        ID3D12Resource* ret = DX12Utils::CreateBuffer(
+            device,
+            (unsigned int)size,
+            flags,
+            D3D12_RESOURCE_STATE_COPY_DEST,
+            D3D12_HEAP_TYPE_DEFAULT,
+            debugName,
+            Context::LogFn
+        );
+        AddManagedResource(ret);
+
+        // Copy the data to the buffer if we should
+        if (data != nullptr && size > 0)
+            UploadBufferData(device, commandList, ret, D3D12_RESOURCE_STATE_COPY_DEST, data, (unsigned int)size);
+
+        // Do a resource transition if we should
+        if (desiredState != D3D12_RESOURCE_STATE_COPY_DEST)
+        {
+            D3D12_RESOURCE_BARRIER barrier;
+
+            barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+            barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+            barrier.Transition.pResource = ret;
+            barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
+            barrier.Transition.StateAfter = desiredState;
+            barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+
+            commandList->ResourceBarrier(1, &barrier);
+        }
+
+        // return the resource
+        return ret;
+    }
+
+    ID3D12Resource* Context::CreateManagedTexture(ID3D12Device* device, ID3D12GraphicsCommandList* commandList, D3D12_RESOURCE_FLAGS flags, DXGI_FORMAT format, const unsigned int size[3], unsigned int numMips, DX12Utils::ResourceType resourceType, const void* initialData, const wchar_t* debugName, D3D12_RESOURCE_STATES desiredState)
+    {
+        // Create a texture
+        ID3D12Resource* ret = DX12Utils::CreateTexture(device, size, numMips, format, flags, D3D12_RESOURCE_STATE_COPY_DEST, resourceType, debugName, Context::LogFn);
+        AddManagedResource(ret);
+
+        // copy initial data in, if we should
+        if (initialData != nullptr)
+        {
+            DX12Utils::DXGI_FORMAT_Info formatInfo = DX12Utils::Get_DXGI_FORMAT_Info(format, Context::LogFn);
+            UploadTextureData(device, commandList, ret, D3D12_RESOURCE_STATE_COPY_DEST, initialData, size[0] * formatInfo.bytesPerPixel);
+        }
+
+        // Put the resource into the desired state
+        if (desiredState != D3D12_RESOURCE_STATE_COPY_DEST)
+        {
+            D3D12_RESOURCE_BARRIER barrier;
+
+            barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+            barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+            barrier.Transition.pResource = ret;
+            barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
+            barrier.Transition.StateAfter = desiredState;
+            barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+
+            commandList->ResourceBarrier(1, &barrier);
+        }
+
+        return ret;
+    }
+
+    ID3D12Resource* Context::CreateManagedTextureAndClear(ID3D12Device* device, ID3D12GraphicsCommandList* commandList, D3D12_RESOURCE_FLAGS flags, DXGI_FORMAT format, const unsigned int size[3], unsigned int numMips, DX12Utils::ResourceType resourceType, void* clearValue, size_t clearValueSize, const wchar_t* debugName, D3D12_RESOURCE_STATES desiredState)
+    {
+        // Make sure the clear value is the correct size
+        DX12Utils::DXGI_FORMAT_Info formatInfo = DX12Utils::Get_DXGI_FORMAT_Info(format, Context::LogFn);
+        if (clearValue != nullptr && clearValueSize > 0 && clearValueSize != formatInfo.bytesPerPixel)
+            return nullptr;
+
+        // Copy data into the resource
+        std::vector<unsigned char> expandedClearValue;
+        void* initialData = nullptr;
+        if (clearValue != nullptr && clearValueSize > 0)
+        {
+            expandedClearValue.resize(size[0] * size[1] * size[2] * formatInfo.bytesPerPixel);
+            unsigned char* dest = expandedClearValue.data();
+            for (size_t i = 0; i < size[0] * size[1] * size[2]; ++i)
+            {
+                memcpy(dest, clearValue, formatInfo.bytesPerPixel);
+                dest += formatInfo.bytesPerPixel;
+            }
+            initialData = expandedClearValue.data();
+        }
+
+        // make and return the texture
+        return CreateManagedTexture(device, commandList, flags, format, size, numMips, resourceType, initialData, debugName, desiredState);
+    }
+
+    ID3D12Resource* Context::CreateManagedTextureFromFile(ID3D12Device* device, ID3D12GraphicsCommandList* commandList, D3D12_RESOURCE_FLAGS flags, DXGI_FORMAT format, DX12Utils::ResourceType resourceType, const char* fileName, bool sourceIsSRGB, unsigned int size[3], const wchar_t* debugName, D3D12_RESOURCE_STATES desiredState)
+    {
+        // Get the desired channel type
+        DX12Utils::DXGI_FORMAT_Info formatInfo = DX12Utils::Get_DXGI_FORMAT_Info(format, Context::LogFn);
+        DX12Utils::TextureCache::Type desiredChannelType = DX12Utils::TextureCache::Type::U8;
+        if (formatInfo.channelType == DX12Utils::DXGI_FORMAT_Info::ChannelType::_uint8_t)
+            desiredChannelType = DX12Utils::TextureCache::Type::U8;
+        else if (formatInfo.channelType == DX12Utils::DXGI_FORMAT_Info::ChannelType::_float)
+            desiredChannelType = DX12Utils::TextureCache::Type::F32;
+        else
+            return nullptr;
+
+        if (resourceType == DX12Utils::ResourceType::Texture2D)
+        {
+            // Load the texture and convert as necessary
+            DX12Utils::TextureCache::Texture texture = DX12Utils::TextureCache::GetAs(fileName, sourceIsSRGB, desiredChannelType, formatInfo.sRGB, formatInfo.channelCount);
+            if (!texture.Valid())
+                return nullptr;
+
+            // store off image properties
+            size[0] = texture.width;
+            size[1] = texture.height;
+            size[2] = 1;
+
+            // make and return the texture
+            return CreateManagedTexture(device, commandList, flags, format, size, 1, resourceType, texture.pixels.data(), debugName, desiredState);
+        }
+        else if (resourceType == DX12Utils::ResourceType::Texture2DArray ||
+                 resourceType == DX12Utils::ResourceType::Texture3D ||
+                 resourceType == DX12Utils::ResourceType::TextureCube)
+        {
+            static const char* c_cubeMapNames[] =
+            {
+                "Right",
+                "Left",
+                "Up",
+                "Down",
+                "Front",
+                "Back"
+            };
+
+            bool useCubeMapNames = (resourceType == DX12Utils::ResourceType::TextureCube && strstr(fileName, "%s") != nullptr);
+            bool hasPercentI = strstr(fileName, "%i") != nullptr;
+            if (!useCubeMapNames && !hasPercentI)
+                return nullptr;
+
+            std::vector<DX12Utils::TextureCache::Texture> loadedTextureSlices;
+
+            // Load multiple textures
+            int textureIndex = -1;
+            while (1)
+            {
+                textureIndex++;
+                char indexedFileName[1024];
+
+                if (useCubeMapNames)
+                    sprintf_s(indexedFileName, fileName, c_cubeMapNames[textureIndex]);
+                else
+                    sprintf_s(indexedFileName, fileName, textureIndex);
+
+                // Load the texture and convert as necessary
+                DX12Utils::TextureCache::Texture loadedTextureSlice = DX12Utils::TextureCache::GetAs(indexedFileName, sourceIsSRGB, desiredChannelType, formatInfo.sRGB, formatInfo.channelCount);
+                if (!loadedTextureSlice.Valid())
+                {
+                    if (textureIndex == 0)
+                        return nullptr;
+                    break;
+                }
+
+                // make sure the textures are the same size
+                if (textureIndex > 0 && (loadedTextureSlice.width != loadedTextureSlices[0].width || loadedTextureSlice.height != loadedTextureSlices[0].height))
+                    return nullptr;
+
+                loadedTextureSlices.push_back(loadedTextureSlice);
+            }
+
+            // store the texture size
+            size[0] = loadedTextureSlices[0].width;
+            size[1] = loadedTextureSlices[0].height;
+            size[2] = (unsigned int)loadedTextureSlices.size();
+
+            // gather up all pixels into a contiguous chunk of memory
+            std::vector<unsigned char> allPixels;
+            for (const DX12Utils::TextureCache::Texture& texture : loadedTextureSlices)
+                allPixels.insert(allPixels.end(), texture.pixels.begin(), texture.pixels.end());
+
+            // make and return the texture
+            return CreateManagedTexture(device, commandList, flags, format, size, 1, resourceType, allPixels.data(), debugName, desiredState);
+        }
+        else
+            return nullptr;
+    }
+
+    void Context::UploadTextureData(ID3D12Device* device, ID3D12GraphicsCommandList* commandList, ID3D12Resource* texture, D3D12_RESOURCE_STATES textureState, const void* data, unsigned int unalignedPitch)
+    {
+        // Get information about the texture
+        int alignedPitch = ALIGN(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT, unalignedPitch);
+        D3D12_RESOURCE_DESC textureDesc = texture->GetDesc();
+
+        // transition the resource to copy dest if it isn't already
+        if (textureState != D3D12_RESOURCE_STATE_COPY_DEST)
+        {
+            D3D12_RESOURCE_BARRIER barrier;
+
+            barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+            barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+            barrier.Transition.pResource = texture;
+            barrier.Transition.StateBefore = textureState;
+            barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
+            barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+
+            commandList->ResourceBarrier(1, &barrier);
+        }
+
+        // 3d textures do a single copy because it's a single sub resource.
+        if (textureDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D)
+        {
+            // Get the upload buffer
+            DX12Utils::UploadBufferTracker::UploadBufferTracker::Buffer* uploadBuffer = s_ubTracker.GetBuffer(device, alignedPitch * textureDesc.Height * textureDesc.DepthOrArraySize, Context::LogFn, false);
+
+            // Map, copy, unmap
+            {
+                unsigned char* dest = nullptr;
+                D3D12_RANGE readRange = { 0, 0 };
+                HRESULT hr = uploadBuffer->buffer->Map(0, &readRange, (void**)&dest);
+                if (FAILED(hr))
+                {
+                    Context::LogFn(LogLevel::Error, "Could not map upload buffer.");
+                }
+                else
+                {
+                    const unsigned char* src = (const unsigned char*)data;
+                    for (int iz = 0; iz < textureDesc.DepthOrArraySize; ++iz)
+                    {
+                        for (int iy = 0; iy < (int)textureDesc.Height; ++iy)
+                        {
+                            memcpy(dest, src, unalignedPitch);
+                            src += unalignedPitch;
+                            dest += alignedPitch;
+                        }
+                    }
+
+                    uploadBuffer->buffer->Unmap(0, nullptr);
+                }
+            }
+
+            // copy the upload buffer into the texture
+            {
+                unsigned char layoutMem[sizeof(D3D12_PLACED_SUBRESOURCE_FOOTPRINT) + sizeof(UINT) + sizeof(UINT64)];
+                D3D12_PLACED_SUBRESOURCE_FOOTPRINT* layout = (D3D12_PLACED_SUBRESOURCE_FOOTPRINT*)layoutMem;
+                device->GetCopyableFootprints(&textureDesc, 0, 1, 0, layout, nullptr, nullptr, nullptr);
+
+                D3D12_TEXTURE_COPY_LOCATION src = {};
+                src.pResource = uploadBuffer->buffer;
+                src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
+                src.PlacedFootprint = *layout;
+
+                D3D12_TEXTURE_COPY_LOCATION dest = {};
+                dest.pResource = texture;
+                dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+                dest.SubresourceIndex = 0;
+
+                commandList->CopyTextureRegion(&dest, 0, 0, 0, &src, nullptr);
+            }
+        }
+        // 2d array textures do a copy for each slice
+        else if (textureDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D)
+        {
+            for (int iz = 0; iz < textureDesc.DepthOrArraySize; ++iz)
+            {
+                // Get the upload buffer
+                DX12Utils::UploadBufferTracker::Buffer* uploadBuffer = s_ubTracker.GetBuffer(device, alignedPitch * textureDesc.Height, Context::LogFn, false);
+
+                // Map, copy, unmap
+                {
+                    unsigned char* dest = nullptr;
+                    D3D12_RANGE readRange = { 0, 0 };
+                    HRESULT hr = uploadBuffer->buffer->Map(0, &readRange, (void**)&dest);
+                    if (FAILED(hr))
+                    {
+                        Context::LogFn(LogLevel::Error, "Could not map upload buffer.");
+                    }
+                    else
+                    {
+                        const unsigned char* src = &((const unsigned char*)data)[unalignedPitch * textureDesc.Height * iz];
+                        for (int iy = 0; iy < (int)textureDesc.Height; ++iy)
+                        {
+                            memcpy(dest, src, unalignedPitch);
+                            src += unalignedPitch;
+                            dest += alignedPitch;
+                        }
+
+                        uploadBuffer->buffer->Unmap(0, nullptr);
+                    }
+                }
+
+                 // copy the upload buffer into the texture
+                 {
+                     unsigned char layoutMem[sizeof(D3D12_PLACED_SUBRESOURCE_FOOTPRINT) + sizeof(UINT) + sizeof(UINT64)];
+                     D3D12_PLACED_SUBRESOURCE_FOOTPRINT* layout = (D3D12_PLACED_SUBRESOURCE_FOOTPRINT*)layoutMem;
+                     device->GetCopyableFootprints(&textureDesc, 0, 1, 0, layout, nullptr, nullptr, nullptr);
+
+                     D3D12_TEXTURE_COPY_LOCATION src = {};
+                     src.pResource = uploadBuffer->buffer;
+                     src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
+                     src.PlacedFootprint = *layout;
+
+                     D3D12_TEXTURE_COPY_LOCATION dest = {};
+                     dest.pResource = texture;
+                     dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+                     dest.SubresourceIndex = iz;
+
+                     commandList->CopyTextureRegion(&dest, 0, 0, 0, &src, nullptr);
+                 }
+            }
+        }
+        else
+        {
+            Context::LogFn(LogLevel::Error, "Unhandled texture dimension.");
+        }
+
+        // transition the resource back to what it was
+        if (textureState != D3D12_RESOURCE_STATE_COPY_DEST)
+        {
+            D3D12_RESOURCE_BARRIER barrier;
+
+            barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+            barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+            barrier.Transition.pResource = texture;
+            barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
+            barrier.Transition.StateAfter = textureState;
+            barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+
+            commandList->ResourceBarrier(1, &barrier);
+        }
+    }
+
+    void Context::UploadBufferData(ID3D12Device* device, ID3D12GraphicsCommandList* commandList, ID3D12Resource* buffer, D3D12_RESOURCE_STATES bufferState, const void* data, unsigned int dataSize)
+    {
+        // Get the upload buffer
+        DX12Utils::UploadBufferTracker::UploadBufferTracker::Buffer* uploadBuffer = s_ubTracker.GetBuffer(device, dataSize, Context::LogFn, false);
+
+        // copy cpu data to the upload buffer
+        {
+            void* start = nullptr;
+            HRESULT hr = uploadBuffer->buffer->Map(0, nullptr, reinterpret_cast<void**>(&start));
+            if(hr)
+            {
+                Context::LogFn(LogLevel::Error, "Could not map upload buffer");
+                return;
+            }
+
+            memcpy(start, data, dataSize);
+
+            uploadBuffer->buffer->Unmap(0, nullptr);
+        }
+
+        // transition the resource to copy dest if it isn't already
+        if (bufferState != D3D12_RESOURCE_STATE_COPY_DEST)
+        {
+            D3D12_RESOURCE_BARRIER barrier;
+
+            barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+            barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+            barrier.Transition.pResource = buffer;
+            barrier.Transition.StateBefore = bufferState;
+            barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
+            barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+
+            commandList->ResourceBarrier(1, &barrier);
+        }
+
+        // copy the resource
+        commandList->CopyResource(buffer, uploadBuffer->buffer);
+
+        // transition the resource back to what it was
+        if (bufferState != D3D12_RESOURCE_STATE_COPY_DEST)
+        {
+            D3D12_RESOURCE_BARRIER barrier;
+
+            barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+            barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+            barrier.Transition.pResource = buffer;
+            barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
+            barrier.Transition.StateAfter = bufferState;
+            barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+
+            commandList->ResourceBarrier(1, &barrier);
+        }
+    }
+
+    int Context::GetRTV(ID3D12Device* device, ID3D12Resource* resource, DXGI_FORMAT format, D3D12_RTV_DIMENSION dimension, int arrayIndex, int mipIndex, const char* debugName)
+    {
+        // Make the key
+        DX12Utils::SubResourceHeapAllocationInfo key;
+        key.resource = resource;
+        key.arrayIndex = arrayIndex;
+        key.mipIndex = mipIndex;
+
+        // If it already exists, use it
+        auto it = m_internal.m_RTVCache.find(key);
+        if (it != m_internal.m_RTVCache.end())
+            return it->second;
+
+        // Allocate an RTV index
+        int rtvIndex = -1;
+        if (!s_heapAllocationTrackerRTV.Allocate(rtvIndex, debugName))
+            return -1;
+
+        // Create the RTV
+        if (!DX12Utils::CreateRTV(device, resource, s_heapAllocationTrackerRTV.GetCPUHandle(rtvIndex), format, dimension, arrayIndex, mipIndex))
+        {
+            s_heapAllocationTrackerRTV.Free(rtvIndex);
+            return -1;
+        }
+
+        // store the result
+        m_internal.m_RTVCache[key] = rtvIndex;
+        return rtvIndex;
+    }
+
+    int Context::GetDSV(ID3D12Device* device, ID3D12Resource* resource, DXGI_FORMAT format, D3D12_DSV_DIMENSION dimension, int arrayIndex, int mipIndex, const char* debugName)
+    {
+	    // Make the key
+        DX12Utils::SubResourceHeapAllocationInfo key;
+        key.resource = resource;
+        key.arrayIndex = arrayIndex;
+        key.mipIndex = mipIndex;
+
+	    // If it already exists, use it
+	    auto it = m_internal.m_DSVCache.find(key);
+	    if (it != m_internal.m_DSVCache.end())
+            return it->second;
+
+        // Allocate a DSV index
+        int dsvIndex = -1;
+        if (!s_heapAllocationTrackerDSV.Allocate(dsvIndex, debugName))
+            return -1;
+
+        // Create the DSV
+        if (!DX12Utils::CreateDSV(device, resource, s_heapAllocationTrackerDSV.GetCPUHandle(dsvIndex), format, dimension, arrayIndex, mipIndex))
+        {
+            s_heapAllocationTrackerDSV.Free(dsvIndex);
+            return -1;
+        }
+
+        // store the result
+        m_internal.m_DSVCache[key] = dsvIndex;
+        return dsvIndex;
+    }
+
+    const ProfileEntry* Context::ReadbackProfileData(ID3D12CommandQueue* commandQueue, int& numItems)
+    {
+        numItems = 0;
+
+        if (!m_profile || !m_internal.m_TimestampReadbackBuffer)
+            return nullptr;
+
+        uint64_t GPUFrequency;
+        commandQueue->GetTimestampFrequency(&GPUFrequency);
+        double GPUTickDelta = 1.0 / static_cast<double>(GPUFrequency);
+
+        D3D12_RANGE range;
+        range.Begin = 0;
+        range.End = ((3 + 1) * 2) * sizeof(uint64_t);
+
+        uint64_t* timeStampBuffer = nullptr;
+        m_internal.m_TimestampReadbackBuffer->Map(0, &range, (void**)&timeStampBuffer);
+
+        m_profileData[numItems].m_gpu = float(GPUTickDelta * double(timeStampBuffer[numItems*2+2] - timeStampBuffer[numItems*2+1])); numItems++; // compute shader: Clear
+        m_profileData[numItems].m_gpu = float(GPUTickDelta * double(timeStampBuffer[numItems*2+2] - timeStampBuffer[numItems*2+1])); numItems++; // compute shader: Left_WriteColor
+        m_profileData[numItems].m_gpu = float(GPUTickDelta * double(timeStampBuffer[numItems*2+2] - timeStampBuffer[numItems*2+1])); numItems++; // compute shader: Right_WriteColor
+        m_profileData[numItems].m_gpu = float(GPUTickDelta * double(timeStampBuffer[numItems*2+1] - timeStampBuffer[0])); numItems++; // GPU total
+
+        D3D12_RANGE emptyRange = {};
+        m_internal.m_TimestampReadbackBuffer->Unmap(0, &emptyRange);
+
+        return m_profileData;
+    }
+
+    Context::~Context()
+    {
+        for (const auto& pair : m_internal.m_RTVCache)
+            s_heapAllocationTrackerRTV.Free(pair.second);
+        m_internal.m_RTVCache.clear();
+
+        for (const auto& pair : m_internal.m_DSVCache)
+            s_heapAllocationTrackerDSV.Free(pair.second);
+        m_internal.m_DSVCache.clear();
+
+        for (ID3D12Resource* resource : m_internal.m_managedResources)
+            resource->Release();
+        m_internal.m_managedResources.clear();
+
+        if(m_internal.m_TimestampQueryHeap)
+        {
+            m_internal.m_TimestampQueryHeap->Release();
+            m_internal.m_TimestampQueryHeap = nullptr;
+        }
+
+        if(m_internal.m_TimestampReadbackBuffer)
+        {
+            m_internal.m_TimestampReadbackBuffer->Release();
+            m_internal.m_TimestampReadbackBuffer = nullptr;
+        }
+
+        if(m_output.texture_Output)
+        {
+            s_delayedRelease.Add(m_output.texture_Output);
+            m_output.texture_Output = nullptr;
+        }
+    }
+
+    void Execute(Context* context, ID3D12Device* device, ID3D12GraphicsCommandList* commandList)
+    {
+        // reset the timer index
+        s_timerIndex = 0;
+
+        ScopedPerfEvent scopedPerf("ConstOverride", commandList, 4);
+
+        std::chrono::high_resolution_clock::time_point startPointCPUTechnique;
+        if(context->m_profile)
+        {
+            startPointCPUTechnique = std::chrono::high_resolution_clock::now();
+            if(context->m_internal.m_TimestampQueryHeap == nullptr)
+            {
+                D3D12_QUERY_HEAP_DESC QueryHeapDesc;
+                QueryHeapDesc.Count = (3+1) * 2;
+                QueryHeapDesc.NodeMask = 1;
+                QueryHeapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
+                device->CreateQueryHeap(&QueryHeapDesc, IID_PPV_ARGS(&context->m_internal.m_TimestampQueryHeap));
+                if (c_debugNames)
+                    context->m_internal.m_TimestampQueryHeap->SetName(L"ConstOverride Time Stamp Query Heap");
+
+                context->m_internal.m_TimestampReadbackBuffer = DX12Utils::CreateBuffer(device, sizeof(uint64_t) * (3+1) * 2, D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_HEAP_TYPE_READBACK, (c_debugNames ? L"ConstOverride Time Stamp Query Heap" : nullptr), nullptr);
+            }
+            commandList->EndQuery(context->m_internal.m_TimestampQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, s_timerIndex++);
+        }
+
+        // Make sure internally owned resources are created and are the right size and format
+        context->EnsureResourcesCreated(device, commandList);
+
+        // set the heaps
+        ID3D12DescriptorHeap* heaps[] =
+        {
+            s_srvHeap.m_heap,
+        };
+        commandList->SetDescriptorHeaps(_countof(heaps), heaps);
+
+        // Transition resources for the next action
+        {
+            D3D12_RESOURCE_BARRIER barriers[1];
+
+            barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
+            barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+            barriers[0].UAV.pResource = context->m_output.texture_Output;
+
+            commandList->ResourceBarrier(1, barriers);
+        }
+
+        // Compute Shader: Clear
+        {
+            ScopedPerfEvent scopedPerf("Compute Shader: Clear", commandList, 1);
+            std::chrono::high_resolution_clock::time_point startPointCPU;
+            if(context->m_profile)
+            {
+                startPointCPU = std::chrono::high_resolution_clock::now();
+                commandList->EndQuery(context->m_internal.m_TimestampQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, s_timerIndex++);
+            }
+
+            commandList->SetComputeRootSignature(ContextInternal::computeShader_Clear_rootSig);
+            commandList->SetPipelineState(ContextInternal::computeShader_Clear_pso);
+
+            DX12Utils::ResourceDescriptor descriptors[] = {
+                { context->m_output.texture_Output, context->m_output.texture_Output_format, DX12Utils::AccessType::UAV, DX12Utils::ResourceType::Texture2D, false, 0, 0, 0 }
+            };
+
+            D3D12_GPU_DESCRIPTOR_HANDLE descriptorTable = GetDescriptorTable(device, s_srvHeap, descriptors, 1, Context::LogFn);
+            commandList->SetComputeRootDescriptorTable(0, descriptorTable);
+
+            unsigned int baseDispatchSize[3] = {
+                context->m_output.texture_Output_size[0],
+                context->m_output.texture_Output_size[1],
+                context->m_output.texture_Output_size[2]
+            };
+
+            unsigned int dispatchSize[3] = {
+                (((baseDispatchSize[0] + 0) * 1) / 1 + 0 + 8 - 1) / 8,
+                (((baseDispatchSize[1] + 0) * 1) / 1 + 0 + 8 - 1) / 8,
+                (((baseDispatchSize[2] + 0) * 1) / 1 + 0 + 1 - 1) / 1
+            };
+
+            commandList->Dispatch(dispatchSize[0], dispatchSize[1], dispatchSize[2]);
+
+            if(context->m_profile)
+            {
+                context->m_profileData[(s_timerIndex-1)/2].m_label = "Clear";
+                context->m_profileData[(s_timerIndex-1)/2].m_cpu = (float)std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::high_resolution_clock::now() - startPointCPU).count();
+                commandList->EndQuery(context->m_internal.m_TimestampQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, s_timerIndex++);
+            }
+        }
+
+        // Transition resources for the next action
+        {
+            D3D12_RESOURCE_BARRIER barriers[1];
+
+            barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
+            barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+            barriers[0].UAV.pResource = context->m_output.texture_Output;
+
+            commandList->ResourceBarrier(1, barriers);
+        }
+
+        // Compute Shader: Left_WriteColor
+        {
+            ScopedPerfEvent scopedPerf("Compute Shader: Left_WriteColor", commandList, 2);
+            std::chrono::high_resolution_clock::time_point startPointCPU;
+            if(context->m_profile)
+            {
+                startPointCPU = std::chrono::high_resolution_clock::now();
+                commandList->EndQuery(context->m_internal.m_TimestampQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, s_timerIndex++);
+            }
+
+            commandList->SetComputeRootSignature(ContextInternal::computeShader_Left_WriteColor_rootSig);
+            commandList->SetPipelineState(ContextInternal::computeShader_Left_WriteColor_pso);
+
+            DX12Utils::ResourceDescriptor descriptors[] = {
+                { context->m_output.texture_Output, context->m_output.texture_Output_format, DX12Utils::AccessType::UAV, DX12Utils::ResourceType::Texture2D, false, 0, 0, 0 }
+            };
+
+            D3D12_GPU_DESCRIPTOR_HANDLE descriptorTable = GetDescriptorTable(device, s_srvHeap, descriptors, 1, Context::LogFn);
+            commandList->SetComputeRootDescriptorTable(0, descriptorTable);
+
+            unsigned int baseDispatchSize[3] = {
+                context->m_output.texture_Output_size[0],
+                context->m_output.texture_Output_size[1],
+                context->m_output.texture_Output_size[2]
+            };
+
+            unsigned int dispatchSize[3] = {
+                (((baseDispatchSize[0] + 0) * 1) / 1 + 0 + 8 - 1) / 8,
+                (((baseDispatchSize[1] + 0) * 1) / 1 + 0 + 8 - 1) / 8,
+                (((baseDispatchSize[2] + 0) * 1) / 1 + 0 + 1 - 1) / 1
+            };
+
+            commandList->Dispatch(dispatchSize[0], dispatchSize[1], dispatchSize[2]);
+
+            if(context->m_profile)
+            {
+                context->m_profileData[(s_timerIndex-1)/2].m_label = "Left_WriteColor";
+                context->m_profileData[(s_timerIndex-1)/2].m_cpu = (float)std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::high_resolution_clock::now() - startPointCPU).count();
+                commandList->EndQuery(context->m_internal.m_TimestampQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, s_timerIndex++);
+            }
+        }
+
+        // Transition resources for the next action
+        {
+            D3D12_RESOURCE_BARRIER barriers[1];
+
+            barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
+            barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+            barriers[0].UAV.pResource = context->m_output.texture_Output;
+
+            commandList->ResourceBarrier(1, barriers);
+        }
+
+        // Compute Shader: Right_WriteColor
+        {
+            ScopedPerfEvent scopedPerf("Compute Shader: Right_WriteColor", commandList, 3);
+            std::chrono::high_resolution_clock::time_point startPointCPU;
+            if(context->m_profile)
+            {
+                startPointCPU = std::chrono::high_resolution_clock::now();
+                commandList->EndQuery(context->m_internal.m_TimestampQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, s_timerIndex++);
+            }
+
+            commandList->SetComputeRootSignature(ContextInternal::computeShader_Right_WriteColor_rootSig);
+            commandList->SetPipelineState(ContextInternal::computeShader_Right_WriteColor_pso);
+
+            DX12Utils::ResourceDescriptor descriptors[] = {
+                { context->m_output.texture_Output, context->m_output.texture_Output_format, DX12Utils::AccessType::UAV, DX12Utils::ResourceType::Texture2D, false, 0, 0, 0 }
+            };
+
+            D3D12_GPU_DESCRIPTOR_HANDLE descriptorTable = GetDescriptorTable(device, s_srvHeap, descriptors, 1, Context::LogFn);
+            commandList->SetComputeRootDescriptorTable(0, descriptorTable);
+
+            unsigned int baseDispatchSize[3] = {
+                context->m_output.texture_Output_size[0],
+                context->m_output.texture_Output_size[1],
+                context->m_output.texture_Output_size[2]
+            };
+
+            unsigned int dispatchSize[3] = {
+                (((baseDispatchSize[0] + 0) * 1) / 1 + 0 + 8 - 1) / 8,
+                (((baseDispatchSize[1] + 0) * 1) / 1 + 0 + 8 - 1) / 8,
+                (((baseDispatchSize[2] + 0) * 1) / 1 + 0 + 1 - 1) / 1
+            };
+
+            commandList->Dispatch(dispatchSize[0], dispatchSize[1], dispatchSize[2]);
+
+            if(context->m_profile)
+            {
+                context->m_profileData[(s_timerIndex-1)/2].m_label = "Right_WriteColor";
+                context->m_profileData[(s_timerIndex-1)/2].m_cpu = (float)std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::high_resolution_clock::now() - startPointCPU).count();
+                commandList->EndQuery(context->m_internal.m_TimestampQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, s_timerIndex++);
+            }
+        }
+
+        if(context->m_profile)
+        {
+            context->m_profileData[(s_timerIndex-1)/2].m_label = "Total";
+            context->m_profileData[(s_timerIndex-1)/2].m_cpu = (float)std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::high_resolution_clock::now() - startPointCPUTechnique).count();
+            commandList->EndQuery(context->m_internal.m_TimestampQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, s_timerIndex++);
+            commandList->ResolveQueryData(context->m_internal.m_TimestampQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, s_timerIndex, context->m_internal.m_TimestampReadbackBuffer, 0);
+        }
+    }
+
+    void Context::EnsureResourcesCreated(ID3D12Device* device, ID3D12GraphicsCommandList* commandList)
+    {
+        bool dirty = false;
+
+        // Output
+        {
+            unsigned int baseSize[3] = { 1, 1, 1 };
+
+            unsigned int desiredSize[3] = {
+                ((baseSize[0] + 0) * 512) / 1 + 0,
+                ((baseSize[1] + 0) * 512) / 1 + 0,
+                ((baseSize[2] + 0) * 1) / 1 + 0
+            };
+
+            static const unsigned int desiredNumMips = 1;
+
+            DXGI_FORMAT desiredFormat = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
+
+            if(!m_output.texture_Output ||
+               m_output.texture_Output_size[0] != desiredSize[0] ||
+               m_output.texture_Output_size[1] != desiredSize[1] ||
+               m_output.texture_Output_size[2] != desiredSize[2] ||
+               m_output.texture_Output_numMips != desiredNumMips ||
+               m_output.texture_Output_format != desiredFormat)
+            {
+                dirty = true;
+                if(m_output.texture_Output)
+                    s_delayedRelease.Add(m_output.texture_Output);
+
+                m_output.texture_Output = DX12Utils::CreateTexture(device, desiredSize, desiredNumMips, desiredFormat, m_output.texture_Output_flags, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, DX12Utils::ResourceType::Texture2D, (c_debugNames ? L"Output" : nullptr), Context::LogFn);
+                m_output.texture_Output_size[0] = desiredSize[0];
+                m_output.texture_Output_size[1] = desiredSize[1];
+                m_output.texture_Output_size[2] = desiredSize[2];
+                m_output.texture_Output_numMips = desiredNumMips;
+                m_output.texture_Output_format = desiredFormat;
+            }
+        }
+        EnsureDrawCallPSOsCreated(device, dirty);
+    }
+
+    bool Context::EnsureDrawCallPSOsCreated(ID3D12Device* device, bool dirty)
+    {
+        return true;
+    }
+};
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/private/technique.h b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/private/technique.h
new file mode 100644
index 00000000..5de6508f
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/private/technique.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <d3d12.h>
+#include <array>
+#include <vector>
+#include <unordered_map>
+#include "DX12Utils/dxutils.h"
+
+namespace ConstOverride
+{
+    using uint = unsigned int;
+    using uint2 = std::array<uint, 2>;
+    using uint3 = std::array<uint, 3>;
+    using uint4 = std::array<uint, 4>;
+
+    using int2 = std::array<int, 2>;
+    using int3 = std::array<int, 3>;
+    using int4 = std::array<int, 4>;
+    using float2 = std::array<float, 2>;
+    using float3 = std::array<float, 3>;
+    using float4 = std::array<float, 4>;
+    using float4x4 = std::array<std::array<float, 4>, 4>;
+
+    struct ContextInternal
+    {
+        ID3D12QueryHeap* m_TimestampQueryHeap = nullptr;
+        ID3D12Resource* m_TimestampReadbackBuffer = nullptr;
+
+        static ID3D12CommandSignature* s_commandSignatureDispatch;
+
+        // Variables
+        const uint variable___literal_0 = 50;  // Made to replace variable "MinX" with a constant value in subgraph node "Left"
+        const uint variable___literal_1 = 100;  // Made to replace variable "MaxX" with a constant value in subgraph node "Left"
+        const uint variable___literal_2 = 150;  // Made to replace variable "MinX" with a constant value in subgraph node "Right"
+        const uint variable___literal_3 = 200;  // Made to replace variable "MaxX" with a constant value in subgraph node "Right"
+
+        static ID3D12PipelineState* computeShader_Clear_pso;
+        static ID3D12RootSignature* computeShader_Clear_rootSig;
+
+        static ID3D12PipelineState* computeShader_Left_WriteColor_pso;
+        static ID3D12RootSignature* computeShader_Left_WriteColor_rootSig;
+
+        static ID3D12PipelineState* computeShader_Right_WriteColor_pso;
+        static ID3D12RootSignature* computeShader_Right_WriteColor_rootSig;
+
+        std::unordered_map<DX12Utils::SubResourceHeapAllocationInfo, int, DX12Utils::SubResourceHeapAllocationInfo> m_RTVCache;
+        std::unordered_map<DX12Utils::SubResourceHeapAllocationInfo, int, DX12Utils::SubResourceHeapAllocationInfo> m_DSVCache;
+
+        // Freed on destruction of the context
+        std::vector<ID3D12Resource*> m_managedResources;
+    };
+};
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/public/all.h b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/public/all.h
new file mode 100644
index 00000000..53fc4255
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/public/all.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#include "./technique.h"
+#include "./imgui.h"
+#include "./pythoninterface.h"
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/public/imgui.h b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/public/imgui.h
new file mode 100644
index 00000000..4ab180be
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/public/imgui.h
@@ -0,0 +1,55 @@
+#pragma once
+
+#include "technique.h"
+
+namespace ConstOverride
+{
+    inline void ShowToolTip(const char* tooltip)
+    {
+        if (!tooltip || !tooltip[0])
+            return;
+
+        ImGui::SameLine();
+        ImGui::PushStyleColor(ImGuiCol_Text, IM_COL32(255, 255, 0, 255));
+        ImGui::Text("[?]");
+        ImGui::PopStyleColor();
+        if (ImGui::IsItemHovered(ImGuiHoveredFlags_AllowWhenDisabled))
+            ImGui::SetTooltip("%s", tooltip);
+    }
+
+    void MakeUI(Context* context, ID3D12CommandQueue* commandQueue)
+    {
+        ImGui::PushID("gigi_ConstOverride");
+
+        ImGui::Checkbox("Profile", &context->m_profile);
+        if (context->m_profile)
+        {
+            int numEntries = 0;
+            const ProfileEntry* entries = context->ReadbackProfileData(commandQueue, numEntries);
+            if (ImGui::BeginTable("profiling", 3, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg))
+            {
+                ImGui::TableSetupColumn("Label");
+                ImGui::TableSetupColumn("CPU ms");
+                ImGui::TableSetupColumn("GPU ms");
+                ImGui::TableHeadersRow();
+                float totalCpu = 0.0f;
+                float totalGpu = 0.0f;
+                for (int entryIndex = 0; entryIndex < numEntries; ++entryIndex)
+                {
+                    ImGui::TableNextRow();
+                    ImGui::TableNextColumn();
+                    ImGui::TextUnformatted(entries[entryIndex].m_label);
+                    ImGui::TableNextColumn();
+                    ImGui::Text("%0.3f", entries[entryIndex].m_cpu * 1000.0f);
+                    ImGui::TableNextColumn();
+                    ImGui::Text("%0.3f", entries[entryIndex].m_gpu * 1000.0f);
+                    totalCpu += entries[entryIndex].m_cpu;
+                    totalGpu += entries[entryIndex].m_gpu;
+                }
+                ImGui::EndTable();
+            }
+        }
+
+        ImGui::PopID();
+    }
+};
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/public/pythoninterface.h b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/public/pythoninterface.h
new file mode 100644
index 00000000..a52dc891
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/public/pythoninterface.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include "technique.h"
+
+namespace ConstOverride
+{
+
+    static PyMethodDef pythonModuleMethods[] = {
+        {nullptr, nullptr, 0, nullptr}
+    };
+
+    static PyModuleDef pythonModule = {
+        PyModuleDef_HEAD_INIT, "ConstOverride", NULL, -1, pythonModuleMethods,
+        NULL, NULL, NULL, NULL
+    };
+
+    PyObject* CreateModule()
+    {
+        PyObject* module = PyModule_Create(&pythonModule);
+        return module;
+    }
+};
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/public/technique.h b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/public/technique.h
new file mode 100644
index 00000000..c6fe6249
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/public/technique.h
@@ -0,0 +1,182 @@
+#pragma once
+
+#include "../private/technique.h"
+#include <string>
+#include <vector>
+#include "DX12Utils/logfn.h"
+#include "DX12Utils/dxutils.h"
+
+namespace ConstOverride
+{
+    // Compile time technique settings. Feel free to modify these.
+    static const int c_numSRVDescriptors = 256;  // If 0, no heap will be created. One heap shared by all contexts of this technique.
+    static const int c_numRTVDescriptors = 256;  // If 0, no heap will be created. One heap shared by all contexts of this technique.
+    static const int c_numDSVDescriptors = 256;  // If 0, no heap will be created. One heap shared by all contexts of this technique.
+    static const bool c_debugShaders = true; // If true, will compile shaders with debug info enabled.
+    static const bool c_debugNames = true; // If true, will set debug names on objects. If false, debug names should be deadstripped from the executable.
+
+    // Information about the technique
+    static const bool c_requiresRaytracing = false; // If true, this technique will not work without raytracing support
+
+    using TPerfEventBeginFn = void (*)(const char* name, ID3D12GraphicsCommandList* commandList, int index);
+    using TPerfEventEndFn = void (*)(ID3D12GraphicsCommandList* commandList);
+
+    struct ProfileEntry
+    {
+        const char* m_label = nullptr;
+        float m_gpu = 0.0f;
+        float m_cpu = 0.0f;
+    };
+
+    struct Context
+    {
+        static const char* GetTechniqueName()
+        {
+            return "ConstOverride";
+        }
+
+        static const wchar_t* GetTechniqueNameW()
+        {
+            return L"ConstOverride";
+        }
+
+        // This is the input to the technique that you are expected to fill out
+        struct ContextInput
+        {
+        };
+        ContextInput m_input;
+
+        // This is the output of the technique that you can consume
+        struct ContextOutput
+        {
+
+            ID3D12Resource* texture_Output = nullptr;
+            unsigned int texture_Output_size[3] = { 0, 0, 0 };
+            unsigned int texture_Output_numMips = 0;
+            DXGI_FORMAT texture_Output_format = DXGI_FORMAT_UNKNOWN;
+            static const D3D12_RESOURCE_FLAGS texture_Output_flags =  D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
+            const D3D12_RESOURCE_STATES c_texture_Output_endingState = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
+        };
+        ContextOutput m_output;
+
+        // Internal storage for the technique
+        ContextInternal m_internal;
+
+        // If true, will do both cpu and gpu profiling. Call ReadbackProfileData() on the context to get the profiling data.
+        bool m_profile = false;
+        const ProfileEntry* ReadbackProfileData(ID3D12CommandQueue* commandQueue, int& numItems);
+
+        // Set this static function pointer to your own log function if you want to recieve callbacks on info, warnings and errors.
+        static TLogFn LogFn;
+
+        // These callbacks are for perf instrumentation, such as with Pix.
+        static TPerfEventBeginFn PerfEventBeginFn;
+        static TPerfEventEndFn PerfEventEndFn;
+
+        // The path to where the shader files for this technique are. Defaults to L"./"
+        static std::wstring s_techniqueLocation;
+
+        static int GetContextCount();
+        static Context* GetContext(int index);
+
+        // Buffer Creation
+        template <typename T>
+        ID3D12Resource* CreateManagedBuffer(ID3D12Device* device, ID3D12GraphicsCommandList* commandList, D3D12_RESOURCE_FLAGS flags, const T* data, size_t count, const wchar_t* debugName, D3D12_RESOURCE_STATES desiredState = D3D12_RESOURCE_STATE_COPY_DEST)
+        {
+            return CreateManagedBuffer(device, commandList, flags, (void*)data, count * sizeof(T), debugName, desiredState);
+        }
+
+        template <typename T>
+        ID3D12Resource* CreateManagedBuffer(ID3D12Device* device, ID3D12GraphicsCommandList* commandList, D3D12_RESOURCE_FLAGS flags, const T& data, const wchar_t* debugName, D3D12_RESOURCE_STATES desiredState = D3D12_RESOURCE_STATE_COPY_DEST)
+        {
+            return CreateManagedBuffer(device, commandList, flags, (void*)&data, sizeof(T), debugName, desiredState);
+        }
+
+        template <typename T>
+        ID3D12Resource* CreateManagedBuffer(ID3D12Device* device, ID3D12GraphicsCommandList* commandList, D3D12_RESOURCE_FLAGS flags, const std::vector<T>& data, const wchar_t* debugName, D3D12_RESOURCE_STATES desiredState = D3D12_RESOURCE_STATE_COPY_DEST)
+        {
+            return CreateManagedBuffer(device, commandList, flags, (void*)data.data(), data.size() * sizeof(T), debugName, desiredState);
+        }
+
+        ID3D12Resource* CreateManagedBuffer(ID3D12Device* device, ID3D12GraphicsCommandList* commandList, D3D12_RESOURCE_FLAGS flags, const void* data, size_t size, const wchar_t* debugName, D3D12_RESOURCE_STATES desiredState = D3D12_RESOURCE_STATE_COPY_DEST);
+
+        // Texture Creation
+
+        ID3D12Resource* CreateManagedTexture(ID3D12Device* device, ID3D12GraphicsCommandList* commandList, D3D12_RESOURCE_FLAGS flags, DXGI_FORMAT format, const unsigned int size[3], unsigned int numMips, DX12Utils::ResourceType resourceType, const void* initialData, const wchar_t* debugName, D3D12_RESOURCE_STATES desiredState = D3D12_RESOURCE_STATE_COPY_DEST);
+        ID3D12Resource* CreateManagedTextureAndClear(ID3D12Device* device, ID3D12GraphicsCommandList* commandList, D3D12_RESOURCE_FLAGS flags, DXGI_FORMAT format, const unsigned int size[3], unsigned int numMips, DX12Utils::ResourceType resourceType, void* clearValue, size_t clearValueSize, const wchar_t* debugName, D3D12_RESOURCE_STATES desiredState = D3D12_RESOURCE_STATE_COPY_DEST);
+        ID3D12Resource* CreateManagedTextureFromFile(ID3D12Device* device, ID3D12GraphicsCommandList* commandList, D3D12_RESOURCE_FLAGS flags, DXGI_FORMAT format, DX12Utils::ResourceType resourceType, const char* fileName, bool sourceIsSRGB, unsigned int size[3], const wchar_t* debugName, D3D12_RESOURCE_STATES desiredState = D3D12_RESOURCE_STATE_COPY_DEST);
+
+        // Helpers for the host app
+        void UploadTextureData(ID3D12Device* device, ID3D12GraphicsCommandList* commandList, ID3D12Resource* texture, D3D12_RESOURCE_STATES textureState, const void* data, unsigned int unalignedPitch);
+        void UploadBufferData(ID3D12Device* device, ID3D12GraphicsCommandList* commandList, ID3D12Resource* buffer, D3D12_RESOURCE_STATES bufferState, const void* data, unsigned int dataSize);
+
+        // The resource will be freed when the context is destroyed
+        void AddManagedResource(ID3D12Resource* resource)
+        {
+            m_internal.m_managedResources.push_back(resource);
+        }
+
+        // Returns the allocated index within the respective heap
+        int GetRTV(ID3D12Device* device, ID3D12Resource* resource, DXGI_FORMAT format, D3D12_RTV_DIMENSION dimension, int arrayIndex, int mipIndex, const char* debugName);
+        int GetDSV(ID3D12Device* device, ID3D12Resource* resource, DXGI_FORMAT format, D3D12_DSV_DIMENSION dimension, int arrayIndex, int mipIndex, const char* debugName);
+
+        bool CreateManagedTLAS(ID3D12Device* device, ID3D12GraphicsCommandList* commandList, ID3D12Resource* vertexBuffer, int vertexBufferCount, bool isAABBs, D3D12_RAYTRACING_GEOMETRY_FLAGS geometryFlags, D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS buildFlags, DXGI_FORMAT vertexPositionFormat, unsigned int vertexPositionOffset, unsigned int vertexPositionStride, ID3D12Resource*& blas, unsigned int& blasSize, ID3D12Resource*& tlas, unsigned int& tlasSize, TLogFn logFn)
+        {
+            ID3D12Resource* scratch = nullptr;
+            ID3D12Resource* instanceDescs = nullptr;
+
+            if (!DX12Utils::CreateTLAS(device, commandList, vertexBuffer, vertexBufferCount, isAABBs, geometryFlags, buildFlags, vertexPositionFormat, vertexPositionOffset, vertexPositionStride, blas, blasSize, tlas, tlasSize, scratch, instanceDescs, LogFn))
+                return false;
+
+            AddManagedResource(scratch);
+            AddManagedResource(instanceDescs);
+
+            AddManagedResource(blas);
+            AddManagedResource(tlas);
+
+            return true;
+        }
+
+        // Get information about the primary output texture, if specified in the render graph
+        ID3D12Resource* GetPrimaryOutputTexture();
+        D3D12_RESOURCE_STATES GetPrimaryOutputTextureState();
+
+    private:
+        friend void DestroyContext(Context* context);
+        ~Context();
+
+        friend void Execute(Context* context, ID3D12Device* device, ID3D12GraphicsCommandList* commandList);
+        void EnsureResourcesCreated(ID3D12Device* device, ID3D12GraphicsCommandList* commandList);
+        bool EnsureDrawCallPSOsCreated(ID3D12Device* device, bool dirty);
+
+        ProfileEntry m_profileData[3+1]; // One for each action node, and another for the total
+    };
+
+    struct ScopedPerfEvent
+    {
+        ScopedPerfEvent(const char* name, ID3D12GraphicsCommandList* commandList, int index)
+            : m_commandList(commandList)
+        {
+            Context::PerfEventBeginFn(name, commandList, index);
+        }
+
+        ~ScopedPerfEvent()
+        {
+            Context::PerfEventEndFn(m_commandList);
+        }
+
+        ID3D12GraphicsCommandList* m_commandList;
+    };
+
+    // Create 0 to N contexts at any point
+    Context* CreateContext(ID3D12Device* device);
+
+    // Call at the beginning of your frame
+    void OnNewFrame(int framesInFlight);
+
+    // Call this 0 to M times a frame on each context to execute the technique
+    void Execute(Context* context, ID3D12Device* device, ID3D12GraphicsCommandList* commandList);
+
+    // Destroy a context
+    void DestroyContext(Context* context);
+};
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/readme.txt b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/readme.txt
new file mode 100644
index 00000000..0306583e
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/readme.txt
@@ -0,0 +1,95 @@
+Here are the instructions for how to use this package in a dx12 application
+
+=====================================================================================================
+1) Add the code files to your project: private, public, DX12Utils
+=====================================================================================================
+
+=====================================================================================================
+2) Include the public/technique.h file where you want to interact with the technique from
+as well as the public/imgui.h and public/pythoninterface.h file if you want that functionality.
+=====================================================================================================
+
+=====================================================================================================
+3) Create 0 or more contexts at initialization or other times. Member variables are a good place to
+store them.
+=====================================================================================================
+
+ConstOverride::Context *m_ConstOverrideContext = nullptr;
+
+m_ConstOverrideContext = ConstOverride::CreateContext(device);
+
+=====================================================================================================
+4) Call OnNewFrame at the beginning of your frame.
+=====================================================================================================
+
+ConstOverride::OnNewFrame(FramesInFlight)
+
+FramesInFlight is the number of buffered frames in your application, which the technique uses to
+know when temporary resources are no longer used and can safely be released.
+
+=====================================================================================================
+5) Destroy each context at some point before application exit.
+=====================================================================================================
+
+ConstOverride::DestroyContext(m_ConstOverrideContext);
+
+=====================================================================================================
+6) Ensure that the technique has the right path to the root folder of the technique, to find
+assets and shaders.
+=====================================================================================================
+
+// By default this is set to L"./", so only need to set it if that is not right
+ConstOverride::Context::s_techniqueLocation = L"./"
+
+=====================================================================================================
+7) Call execute on each context 0 or more times per frame, giving inputs and using outputs.
+=====================================================================================================
+
+// You should fill out everything in this struct before calling Execute
+m_ConstOverride.m_input.
+
+ConstOverride::Execute(m_ConstOverride, m_device, m_commandList);
+
+=====================================================================================================
+8) Handle Assets
+=====================================================================================================
+
+The assets folder contains any assets the technique needs, such as textures.
+
+Those need to be loaded and provided to the technique via the context input structure as appropriate.
+
+Resources loaded by the technique (such as those declared in shaders) will happen within the generated
+technique code and do not need to be loaded manually.
+
+=====================================================================================================
+9) Optional: Hook up a custom log function to get notifications of internal errors and warnings
+=====================================================================================================
+
+void CustomLogFn(LogLevel level, const char* msg, ...)
+{
+}
+
+ConstOverride::Context::LogFn = &CustomLogFn;
+
+you can do similar for perf markers (like for pix) by overriding these:
+
+ConstOverride::Context::PerfEventBeginFn
+ConstOverride::Context::PerfEventEndFn
+
+=====================================================================================================
+10) Optional: Hook up imgui for automatic UI of technique inputs
+=====================================================================================================
+
+if (m_ConstOverride && ImGui::CollapsingHeader("ConstOverride"))
+    outline::MakeUI(m_ConstOverride);
+
+=====================================================================================================
+11) Optional: Hook up python
+=====================================================================================================
+
+PyImport_AppendInittab("ConstOverride", ConstOverride::CreateModule);
+
+in the python module, you will need to "import ConstOverride", and when will be able to set user exposed
+variables like:
+
+ConstOverride.Set_<variable name>(<value>);
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/shaders/ConstOverrideSubgraph_Left/ConstOverride_WriteColor.hlsl b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/shaders/ConstOverrideSubgraph_Left/ConstOverride_WriteColor.hlsl
new file mode 100644
index 00000000..eb5df6fc
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/shaders/ConstOverrideSubgraph_Left/ConstOverride_WriteColor.hlsl
@@ -0,0 +1,20 @@
+// Unnamed technique, shader WriteColorCS
+
+
+RWTexture2D<float4> Color : register(u0);
+
+#line 2
+
+
+[numthreads(8, 8, 1)]
+#line 4
+void csmain(uint3 DTid : SV_DispatchThreadID)
+{
+    if (DTid.x >= (50) && DTid.x <= (100))
+        Color[DTid.xy] = float4(0.2f, 0.8f, 0.2f, 1.0f);
+}
+
+/*
+Shader Resources:
+	Texture Color (as UAV)
+*/
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/shaders/ConstOverrideSubgraph_Right/ConstOverride_WriteColor.hlsl b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/shaders/ConstOverrideSubgraph_Right/ConstOverride_WriteColor.hlsl
new file mode 100644
index 00000000..f9c97a03
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/shaders/ConstOverrideSubgraph_Right/ConstOverride_WriteColor.hlsl
@@ -0,0 +1,20 @@
+// Unnamed technique, shader WriteColorCS
+
+
+RWTexture2D<float4> Color : register(u0);
+
+#line 2
+
+
+[numthreads(8, 8, 1)]
+#line 4
+void csmain(uint3 DTid : SV_DispatchThreadID)
+{
+    if (DTid.x >= (150) && DTid.x <= (200))
+        Color[DTid.xy] = float4(0.2f, 0.8f, 0.2f, 1.0f);
+}
+
+/*
+Shader Resources:
+	Texture Color (as UAV)
+*/
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/shaders/ConstOverride_Clear.hlsl b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/shaders/ConstOverride_Clear.hlsl
new file mode 100644
index 00000000..ce6cc846
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/ConstOverride/shaders/ConstOverride_Clear.hlsl
@@ -0,0 +1,19 @@
+// Unnamed technique, shader ClearCS
+
+
+RWTexture2D<float4> Output : register(u0);
+
+#line 2
+
+
+[numthreads(8, 8, 1)]
+#line 4
+void csmain(uint3 DTid : SV_DispatchThreadID)
+{
+    Output[DTid.xy] = float4(0.5f, 0.5f, 0.5f, 1.0f);
+}
+
+/*
+Shader Resources:
+	Texture Output (as UAV)
+*/
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/private/technique.cpp b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/private/technique.cpp
index 908f0b95..98ed3210 100644
--- a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/private/technique.cpp
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/private/technique.cpp
@@ -85,7 +85,7 @@ namespace SubGraphLoops
                 return false;
 
             ShaderCompilationInfo shaderCompilationInfo;
-            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "SubGraphLoopsInner/SubGraphLoopsBlur.hlsl";
+            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "SubGraphLoopsInner_FilterSub Iteration 0/SubGraphLoopsBlur.hlsl";
             shaderCompilationInfo.entryPoint = "main";
             shaderCompilationInfo.shaderModel = "cs_6_1";
             shaderCompilationInfo.debugName = (c_debugNames ? "FilterSub_Iteration_0_DoBlur" : "");
@@ -131,7 +131,7 @@ namespace SubGraphLoops
                 return false;
 
             ShaderCompilationInfo shaderCompilationInfo;
-            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "SubGraphLoopsInner/SubGraphLoopsBlur.hlsl";
+            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "SubGraphLoopsInner_FilterSub Iteration 1/SubGraphLoopsBlur.hlsl";
             shaderCompilationInfo.entryPoint = "main";
             shaderCompilationInfo.shaderModel = "cs_6_1";
             shaderCompilationInfo.debugName = (c_debugNames ? "FilterSub_Iteration_1_DoBlur" : "");
@@ -177,7 +177,7 @@ namespace SubGraphLoops
                 return false;
 
             ShaderCompilationInfo shaderCompilationInfo;
-            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "SubGraphLoopsInner/SubGraphLoopsBlur.hlsl";
+            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "SubGraphLoopsInner_FilterSub Iteration 2/SubGraphLoopsBlur.hlsl";
             shaderCompilationInfo.entryPoint = "main";
             shaderCompilationInfo.shaderModel = "cs_6_1";
             shaderCompilationInfo.debugName = (c_debugNames ? "FilterSub_Iteration_2_DoBlur" : "");
@@ -223,7 +223,7 @@ namespace SubGraphLoops
                 return false;
 
             ShaderCompilationInfo shaderCompilationInfo;
-            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "SubGraphLoopsInner/SubGraphLoopsBlur.hlsl";
+            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "SubGraphLoopsInner_FilterSub Iteration 3/SubGraphLoopsBlur.hlsl";
             shaderCompilationInfo.entryPoint = "main";
             shaderCompilationInfo.shaderModel = "cs_6_1";
             shaderCompilationInfo.debugName = (c_debugNames ? "FilterSub_Iteration_3_DoBlur" : "");
@@ -269,7 +269,7 @@ namespace SubGraphLoops
                 return false;
 
             ShaderCompilationInfo shaderCompilationInfo;
-            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "SubGraphLoopsInner/SubGraphLoopsBlur.hlsl";
+            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "SubGraphLoopsInner_FilterSub Iteration 4/SubGraphLoopsBlur.hlsl";
             shaderCompilationInfo.entryPoint = "main";
             shaderCompilationInfo.shaderModel = "cs_6_1";
             shaderCompilationInfo.debugName = (c_debugNames ? "FilterSub_Iteration_4_DoBlur" : "");
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/shaders/SubGraphLoopsInner_FilterSub Iteration 0/SubGraphLoopsBlur.hlsl b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/shaders/SubGraphLoopsInner_FilterSub Iteration 0/SubGraphLoopsBlur.hlsl
new file mode 100644
index 00000000..5209c7c7
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/shaders/SubGraphLoopsInner_FilterSub Iteration 0/SubGraphLoopsBlur.hlsl	
@@ -0,0 +1,71 @@
+// Unnamed technique, shader Blur
+
+
+struct Struct__FilterSub_Iteration_0_BlurCB
+{
+    uint FilterSub_Iteration_0_sRGB;
+    int __loopIndexValue_0;
+    float2 _padding0;
+};
+
+Texture2D<float4> Input : register(t0);
+RWTexture2D<float4> Output : register(u0);
+ConstantBuffer<Struct__FilterSub_Iteration_0_BlurCB> _FilterSub_Iteration_0_BlurCB : register(b0);
+
+#line 2
+
+
+float3 LinearToSRGB(float3 linearCol)
+{
+	float3 sRGBLo = linearCol * 12.92;
+	float3 sRGBHi = (pow(abs(linearCol), float3(1.0 / 2.4, 1.0 / 2.4, 1.0 / 2.4)) * 1.055) - 0.055;
+	float3 sRGB;
+	sRGB.r = linearCol.r <= 0.0031308 ? sRGBLo.r : sRGBHi.r;
+	sRGB.g = linearCol.g <= 0.0031308 ? sRGBLo.g : sRGBHi.g;
+	sRGB.b = linearCol.b <= 0.0031308 ? sRGBLo.b : sRGBHi.b;
+	return sRGB;
+}
+
+float3 SRGBToLinear(in float3 sRGBCol)
+{
+	float3 linearRGBLo = sRGBCol / 12.92;
+	float3 linearRGBHi = pow((sRGBCol + 0.055) / 1.055, float3(2.4, 2.4, 2.4));
+	float3 linearRGB;
+	linearRGB.r = sRGBCol.r <= 0.04045 ? linearRGBLo.r : linearRGBHi.r;
+	linearRGB.g = sRGBCol.g <= 0.04045 ? linearRGBLo.g : linearRGBHi.g;
+	linearRGB.b = sRGBCol.b <= 0.04045 ? linearRGBLo.b : linearRGBHi.b;
+	return linearRGB;
+}
+
+[numthreads(8, 8, 1)]
+#line 26
+void main(uint3 DTid : SV_DispatchThreadID)
+{
+	int2 px = int2(DTid.xy);
+
+	int2 dims;
+	Input.GetDimensions(dims.x, dims.y);
+
+	int radius = _FilterSub_Iteration_0_BlurCB.__loopIndexValue_0 + 1;
+	float3 ret = float3(0.0f, 0.0f, 0.0f);
+	for (int iy = -1; iy <= 1; ++iy)
+	{
+		for (int ix = -1; ix <= 1; ++ix)
+		{
+			int2 readpx = (px + int2(ix, iy) * radius + dims) % dims;
+			ret += Input[readpx].rgb;
+		}
+	}
+	ret /= 9.0f;
+
+	if (_FilterSub_Iteration_0_BlurCB.FilterSub_Iteration_0_sRGB)
+		ret = LinearToSRGB(ret);
+
+	Output[px] = float4(ret, 1.0f);
+}
+
+/*
+Shader Resources:
+	Texture Input (as SRV)
+	Texture Output (as UAV)
+*/
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/shaders/SubGraphLoopsInner_FilterSub Iteration 1/SubGraphLoopsBlur.hlsl b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/shaders/SubGraphLoopsInner_FilterSub Iteration 1/SubGraphLoopsBlur.hlsl
new file mode 100644
index 00000000..6d6d3214
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/shaders/SubGraphLoopsInner_FilterSub Iteration 1/SubGraphLoopsBlur.hlsl	
@@ -0,0 +1,71 @@
+// Unnamed technique, shader Blur
+
+
+struct Struct__FilterSub_Iteration_1_BlurCB
+{
+    uint FilterSub_Iteration_1_sRGB;
+    int __loopIndexValue_1;
+    float2 _padding0;
+};
+
+Texture2D<float4> Input : register(t0);
+RWTexture2D<float4> Output : register(u0);
+ConstantBuffer<Struct__FilterSub_Iteration_1_BlurCB> _FilterSub_Iteration_1_BlurCB : register(b0);
+
+#line 2
+
+
+float3 LinearToSRGB(float3 linearCol)
+{
+	float3 sRGBLo = linearCol * 12.92;
+	float3 sRGBHi = (pow(abs(linearCol), float3(1.0 / 2.4, 1.0 / 2.4, 1.0 / 2.4)) * 1.055) - 0.055;
+	float3 sRGB;
+	sRGB.r = linearCol.r <= 0.0031308 ? sRGBLo.r : sRGBHi.r;
+	sRGB.g = linearCol.g <= 0.0031308 ? sRGBLo.g : sRGBHi.g;
+	sRGB.b = linearCol.b <= 0.0031308 ? sRGBLo.b : sRGBHi.b;
+	return sRGB;
+}
+
+float3 SRGBToLinear(in float3 sRGBCol)
+{
+	float3 linearRGBLo = sRGBCol / 12.92;
+	float3 linearRGBHi = pow((sRGBCol + 0.055) / 1.055, float3(2.4, 2.4, 2.4));
+	float3 linearRGB;
+	linearRGB.r = sRGBCol.r <= 0.04045 ? linearRGBLo.r : linearRGBHi.r;
+	linearRGB.g = sRGBCol.g <= 0.04045 ? linearRGBLo.g : linearRGBHi.g;
+	linearRGB.b = sRGBCol.b <= 0.04045 ? linearRGBLo.b : linearRGBHi.b;
+	return linearRGB;
+}
+
+[numthreads(8, 8, 1)]
+#line 26
+void main(uint3 DTid : SV_DispatchThreadID)
+{
+	int2 px = int2(DTid.xy);
+
+	int2 dims;
+	Input.GetDimensions(dims.x, dims.y);
+
+	int radius = _FilterSub_Iteration_1_BlurCB.__loopIndexValue_1 + 1;
+	float3 ret = float3(0.0f, 0.0f, 0.0f);
+	for (int iy = -1; iy <= 1; ++iy)
+	{
+		for (int ix = -1; ix <= 1; ++ix)
+		{
+			int2 readpx = (px + int2(ix, iy) * radius + dims) % dims;
+			ret += Input[readpx].rgb;
+		}
+	}
+	ret /= 9.0f;
+
+	if (_FilterSub_Iteration_1_BlurCB.FilterSub_Iteration_1_sRGB)
+		ret = LinearToSRGB(ret);
+
+	Output[px] = float4(ret, 1.0f);
+}
+
+/*
+Shader Resources:
+	Texture Input (as SRV)
+	Texture Output (as UAV)
+*/
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/shaders/SubGraphLoopsInner_FilterSub Iteration 2/SubGraphLoopsBlur.hlsl b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/shaders/SubGraphLoopsInner_FilterSub Iteration 2/SubGraphLoopsBlur.hlsl
new file mode 100644
index 00000000..f681a7fb
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/shaders/SubGraphLoopsInner_FilterSub Iteration 2/SubGraphLoopsBlur.hlsl	
@@ -0,0 +1,71 @@
+// Unnamed technique, shader Blur
+
+
+struct Struct__FilterSub_Iteration_2_BlurCB
+{
+    uint FilterSub_Iteration_2_sRGB;
+    int __loopIndexValue_2;
+    float2 _padding0;
+};
+
+Texture2D<float4> Input : register(t0);
+RWTexture2D<float4> Output : register(u0);
+ConstantBuffer<Struct__FilterSub_Iteration_2_BlurCB> _FilterSub_Iteration_2_BlurCB : register(b0);
+
+#line 2
+
+
+float3 LinearToSRGB(float3 linearCol)
+{
+	float3 sRGBLo = linearCol * 12.92;
+	float3 sRGBHi = (pow(abs(linearCol), float3(1.0 / 2.4, 1.0 / 2.4, 1.0 / 2.4)) * 1.055) - 0.055;
+	float3 sRGB;
+	sRGB.r = linearCol.r <= 0.0031308 ? sRGBLo.r : sRGBHi.r;
+	sRGB.g = linearCol.g <= 0.0031308 ? sRGBLo.g : sRGBHi.g;
+	sRGB.b = linearCol.b <= 0.0031308 ? sRGBLo.b : sRGBHi.b;
+	return sRGB;
+}
+
+float3 SRGBToLinear(in float3 sRGBCol)
+{
+	float3 linearRGBLo = sRGBCol / 12.92;
+	float3 linearRGBHi = pow((sRGBCol + 0.055) / 1.055, float3(2.4, 2.4, 2.4));
+	float3 linearRGB;
+	linearRGB.r = sRGBCol.r <= 0.04045 ? linearRGBLo.r : linearRGBHi.r;
+	linearRGB.g = sRGBCol.g <= 0.04045 ? linearRGBLo.g : linearRGBHi.g;
+	linearRGB.b = sRGBCol.b <= 0.04045 ? linearRGBLo.b : linearRGBHi.b;
+	return linearRGB;
+}
+
+[numthreads(8, 8, 1)]
+#line 26
+void main(uint3 DTid : SV_DispatchThreadID)
+{
+	int2 px = int2(DTid.xy);
+
+	int2 dims;
+	Input.GetDimensions(dims.x, dims.y);
+
+	int radius = _FilterSub_Iteration_2_BlurCB.__loopIndexValue_2 + 1;
+	float3 ret = float3(0.0f, 0.0f, 0.0f);
+	for (int iy = -1; iy <= 1; ++iy)
+	{
+		for (int ix = -1; ix <= 1; ++ix)
+		{
+			int2 readpx = (px + int2(ix, iy) * radius + dims) % dims;
+			ret += Input[readpx].rgb;
+		}
+	}
+	ret /= 9.0f;
+
+	if (_FilterSub_Iteration_2_BlurCB.FilterSub_Iteration_2_sRGB)
+		ret = LinearToSRGB(ret);
+
+	Output[px] = float4(ret, 1.0f);
+}
+
+/*
+Shader Resources:
+	Texture Input (as SRV)
+	Texture Output (as UAV)
+*/
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/shaders/SubGraphLoopsInner_FilterSub Iteration 3/SubGraphLoopsBlur.hlsl b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/shaders/SubGraphLoopsInner_FilterSub Iteration 3/SubGraphLoopsBlur.hlsl
new file mode 100644
index 00000000..7f1242e4
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/shaders/SubGraphLoopsInner_FilterSub Iteration 3/SubGraphLoopsBlur.hlsl	
@@ -0,0 +1,71 @@
+// Unnamed technique, shader Blur
+
+
+struct Struct__FilterSub_Iteration_3_BlurCB
+{
+    uint FilterSub_Iteration_3_sRGB;
+    int __loopIndexValue_3;
+    float2 _padding0;
+};
+
+Texture2D<float4> Input : register(t0);
+RWTexture2D<float4> Output : register(u0);
+ConstantBuffer<Struct__FilterSub_Iteration_3_BlurCB> _FilterSub_Iteration_3_BlurCB : register(b0);
+
+#line 2
+
+
+float3 LinearToSRGB(float3 linearCol)
+{
+	float3 sRGBLo = linearCol * 12.92;
+	float3 sRGBHi = (pow(abs(linearCol), float3(1.0 / 2.4, 1.0 / 2.4, 1.0 / 2.4)) * 1.055) - 0.055;
+	float3 sRGB;
+	sRGB.r = linearCol.r <= 0.0031308 ? sRGBLo.r : sRGBHi.r;
+	sRGB.g = linearCol.g <= 0.0031308 ? sRGBLo.g : sRGBHi.g;
+	sRGB.b = linearCol.b <= 0.0031308 ? sRGBLo.b : sRGBHi.b;
+	return sRGB;
+}
+
+float3 SRGBToLinear(in float3 sRGBCol)
+{
+	float3 linearRGBLo = sRGBCol / 12.92;
+	float3 linearRGBHi = pow((sRGBCol + 0.055) / 1.055, float3(2.4, 2.4, 2.4));
+	float3 linearRGB;
+	linearRGB.r = sRGBCol.r <= 0.04045 ? linearRGBLo.r : linearRGBHi.r;
+	linearRGB.g = sRGBCol.g <= 0.04045 ? linearRGBLo.g : linearRGBHi.g;
+	linearRGB.b = sRGBCol.b <= 0.04045 ? linearRGBLo.b : linearRGBHi.b;
+	return linearRGB;
+}
+
+[numthreads(8, 8, 1)]
+#line 26
+void main(uint3 DTid : SV_DispatchThreadID)
+{
+	int2 px = int2(DTid.xy);
+
+	int2 dims;
+	Input.GetDimensions(dims.x, dims.y);
+
+	int radius = _FilterSub_Iteration_3_BlurCB.__loopIndexValue_3 + 1;
+	float3 ret = float3(0.0f, 0.0f, 0.0f);
+	for (int iy = -1; iy <= 1; ++iy)
+	{
+		for (int ix = -1; ix <= 1; ++ix)
+		{
+			int2 readpx = (px + int2(ix, iy) * radius + dims) % dims;
+			ret += Input[readpx].rgb;
+		}
+	}
+	ret /= 9.0f;
+
+	if (_FilterSub_Iteration_3_BlurCB.FilterSub_Iteration_3_sRGB)
+		ret = LinearToSRGB(ret);
+
+	Output[px] = float4(ret, 1.0f);
+}
+
+/*
+Shader Resources:
+	Texture Input (as SRV)
+	Texture Output (as UAV)
+*/
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/shaders/SubGraphLoopsInner_FilterSub Iteration 4/SubGraphLoopsBlur.hlsl b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/shaders/SubGraphLoopsInner_FilterSub Iteration 4/SubGraphLoopsBlur.hlsl
new file mode 100644
index 00000000..2e2f52e9
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubGraphLoops/shaders/SubGraphLoopsInner_FilterSub Iteration 4/SubGraphLoopsBlur.hlsl	
@@ -0,0 +1,71 @@
+// Unnamed technique, shader Blur
+
+
+struct Struct__FilterSub_Iteration_4_BlurCB
+{
+    uint FilterSub_Iteration_4_sRGB;
+    int __loopIndexValue_4;
+    float2 _padding0;
+};
+
+Texture2D<float4> Input : register(t0);
+RWTexture2D<float4> Output : register(u0);
+ConstantBuffer<Struct__FilterSub_Iteration_4_BlurCB> _FilterSub_Iteration_4_BlurCB : register(b0);
+
+#line 2
+
+
+float3 LinearToSRGB(float3 linearCol)
+{
+	float3 sRGBLo = linearCol * 12.92;
+	float3 sRGBHi = (pow(abs(linearCol), float3(1.0 / 2.4, 1.0 / 2.4, 1.0 / 2.4)) * 1.055) - 0.055;
+	float3 sRGB;
+	sRGB.r = linearCol.r <= 0.0031308 ? sRGBLo.r : sRGBHi.r;
+	sRGB.g = linearCol.g <= 0.0031308 ? sRGBLo.g : sRGBHi.g;
+	sRGB.b = linearCol.b <= 0.0031308 ? sRGBLo.b : sRGBHi.b;
+	return sRGB;
+}
+
+float3 SRGBToLinear(in float3 sRGBCol)
+{
+	float3 linearRGBLo = sRGBCol / 12.92;
+	float3 linearRGBHi = pow((sRGBCol + 0.055) / 1.055, float3(2.4, 2.4, 2.4));
+	float3 linearRGB;
+	linearRGB.r = sRGBCol.r <= 0.04045 ? linearRGBLo.r : linearRGBHi.r;
+	linearRGB.g = sRGBCol.g <= 0.04045 ? linearRGBLo.g : linearRGBHi.g;
+	linearRGB.b = sRGBCol.b <= 0.04045 ? linearRGBLo.b : linearRGBHi.b;
+	return linearRGB;
+}
+
+[numthreads(8, 8, 1)]
+#line 26
+void main(uint3 DTid : SV_DispatchThreadID)
+{
+	int2 px = int2(DTid.xy);
+
+	int2 dims;
+	Input.GetDimensions(dims.x, dims.y);
+
+	int radius = _FilterSub_Iteration_4_BlurCB.__loopIndexValue_4 + 1;
+	float3 ret = float3(0.0f, 0.0f, 0.0f);
+	for (int iy = -1; iy <= 1; ++iy)
+	{
+		for (int ix = -1; ix <= 1; ++ix)
+		{
+			int2 readpx = (px + int2(ix, iy) * radius + dims) % dims;
+			ret += Input[readpx].rgb;
+		}
+	}
+	ret /= 9.0f;
+
+	if (_FilterSub_Iteration_4_BlurCB.FilterSub_Iteration_4_sRGB)
+		ret = LinearToSRGB(ret);
+
+	Output[px] = float4(ret, 1.0f);
+}
+
+/*
+Shader Resources:
+	Texture Input (as SRV)
+	Texture Output (as UAV)
+*/
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubInSub/private/technique.cpp b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubInSub/private/technique.cpp
index 29587270..7274e7d6 100644
--- a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubInSub/private/technique.cpp
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubInSub/private/technique.cpp
@@ -73,7 +73,7 @@ namespace SubInSub
                 return false;
 
             ShaderCompilationInfo shaderCompilationInfo;
-            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "SubInSubInner2/SubInSubInner2_CS.hlsl";
+            shaderCompilationInfo.fileName = std::filesystem::path(Context::s_techniqueLocation) / "shaders" / "SubInSubInner2_Inner1.Inner2/SubInSubInner2_CS.hlsl";
             shaderCompilationInfo.entryPoint = "main";
             shaderCompilationInfo.shaderModel = "cs_6_1";
             shaderCompilationInfo.debugName = (c_debugNames ? "Inner1_Inner2_Rotate_Colors" : "");
diff --git a/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubInSub/shaders/SubInSubInner2_Inner1.Inner2/SubInSubInner2_CS.hlsl b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubInSub/shaders/SubInSubInner2_Inner1.Inner2/SubInSubInner2_CS.hlsl
new file mode 100644
index 00000000..72d77c5c
--- /dev/null
+++ b/_GeneratedCode/UnitTests/DX12/UnitTests/SubGraph/SubInSub/shaders/SubInSubInner2_Inner1.Inner2/SubInSubInner2_CS.hlsl
@@ -0,0 +1,28 @@
+// SubInSubInner2 technique, shader Inner2CS
+
+
+struct Struct__Inner1_Inner2_Inner2CSCB
+{
+    float4 Inner1_Inner1Mult;
+};
+
+Texture2D<float4> Input : register(t0);
+RWTexture2D<float4> Output : register(u0);
+ConstantBuffer<Struct__Inner1_Inner2_Inner2CSCB> _Inner1_Inner2_Inner2CSCB : register(b0);
+
+#line 2
+
+
+[numthreads(8, 8, 1)]
+#line 4
+void main(uint3 DTid : SV_DispatchThreadID)
+{
+	uint2 px = DTid.xy;
+	Output[px].rgba = Input[px].gbra * _Inner1_Inner2_Inner2CSCB.Inner1_Inner1Mult;
+}
+
+/*
+Shader Resources:
+	Texture Input (as SRV)
+	Texture Output (as UAV)
+*/
diff --git a/_GeneratedCode/UnitTests/DX12/main.cpp b/_GeneratedCode/UnitTests/DX12/main.cpp
index 7229f507..8cbb7fb7 100644
--- a/_GeneratedCode/UnitTests/DX12/main.cpp
+++ b/_GeneratedCode/UnitTests/DX12/main.cpp
@@ -225,6 +225,9 @@ static DX12Utils::ReadbackHelper    g_readbackHelper;
 #include "UnitTests\RayTrace\TwoRayGensSubgraph\public\technique.h"
 #include "UnitTests\RayTrace\TwoRayGensSubgraph\public\imgui.h"
 #include "UnitTests\RayTrace\TwoRayGensSubgraph\private\technique.h"
+#include "UnitTests\SubGraph\ConstOverride\public\technique.h"
+#include "UnitTests\SubGraph\ConstOverride\public\imgui.h"
+#include "UnitTests\SubGraph\ConstOverride\private\technique.h"
 #include "UnitTests\SubGraph\SubGraphLoops\public\technique.h"
 #include "UnitTests\SubGraph\SubGraphLoops\public\imgui.h"
 #include "UnitTests\SubGraph\SubGraphLoops\private\technique.h"
@@ -340,6 +343,7 @@ simpleRT::Context* m_simpleRT = nullptr;
 simpleRT_inline::Context* m_simpleRT_inline = nullptr;
 TwoRayGens::Context* m_TwoRayGens = nullptr;
 TwoRayGensSubgraph::Context* m_TwoRayGensSubgraph = nullptr;
+ConstOverride::Context* m_ConstOverride = nullptr;
 SubGraphLoops::Context* m_SubGraphLoops = nullptr;
 SubGraphTest::Context* m_SubGraphTest = nullptr;
 SubInSub::Context* m_SubInSub = nullptr;
@@ -405,6 +409,7 @@ bool g_doTest_simpleRT = false;
 bool g_doTest_simpleRT_inline = false;
 bool g_doTest_TwoRayGens = false;
 bool g_doTest_TwoRayGensSubgraph = false;
+bool g_doTest_ConstOverride = false;
 bool g_doTest_SubGraphLoops = false;
 bool g_doTest_SubGraphTest = false;
 bool g_doTest_SubInSub = false;
@@ -1023,6 +1028,20 @@ int main(int, char**)
         }
     }
 
+    if (!g_doSubsetTest || g_doTest_ConstOverride)
+    {
+        ConstOverride::Context::LogFn = &LogFunction;
+        ConstOverride::Context::PerfEventBeginFn = &PerfEventBeginFn;
+        ConstOverride::Context::PerfEventEndFn = &PerfEventEndFn;
+        ConstOverride::Context::s_techniqueLocation = L".\\UnitTests\\SubGraph\\ConstOverride\\";
+        m_ConstOverride = ConstOverride::CreateContext(g_pd3dDevice);
+        if (!m_ConstOverride)
+        {
+            printf("Could not create m_ConstOverride context");
+            return 1;
+        }
+    }
+
     if (!g_doSubsetTest || g_doTest_SubGraphLoops)
     {
         SubGraphLoops::Context::LogFn = &LogFunction;
@@ -1509,6 +1528,8 @@ int main(int, char**)
             TwoRayGens::MakeUI(m_TwoRayGens, g_pd3dCommandQueue);
         if (m_TwoRayGensSubgraph && ImGui::CollapsingHeader("TwoRayGensSubgraph"))
             TwoRayGensSubgraph::MakeUI(m_TwoRayGensSubgraph, g_pd3dCommandQueue);
+        if (m_ConstOverride && ImGui::CollapsingHeader("ConstOverride"))
+            ConstOverride::MakeUI(m_ConstOverride, g_pd3dCommandQueue);
         if (m_SubGraphLoops && ImGui::CollapsingHeader("SubGraphLoops"))
             SubGraphLoops::MakeUI(m_SubGraphLoops, g_pd3dCommandQueue);
         if (m_SubGraphTest && ImGui::CollapsingHeader("SubGraphTest"))
@@ -1697,6 +1718,8 @@ int main(int, char**)
             TwoRayGens::OnNewFrame(NUM_FRAMES_IN_FLIGHT);
         if (m_TwoRayGensSubgraph)
             TwoRayGensSubgraph::OnNewFrame(NUM_FRAMES_IN_FLIGHT);
+        if (m_ConstOverride)
+            ConstOverride::OnNewFrame(NUM_FRAMES_IN_FLIGHT);
         if (m_SubGraphLoops)
             SubGraphLoops::OnNewFrame(NUM_FRAMES_IN_FLIGHT);
         if (m_SubGraphTest)
@@ -1818,6 +1841,8 @@ int main(int, char**)
             UnitTest(g_pd3dDevice, g_pd3dCommandList, g_readbackHelper, m_TwoRayGens, UnitTestEvent::PreExecute);
         if (m_TwoRayGensSubgraph)
             UnitTest(g_pd3dDevice, g_pd3dCommandList, g_readbackHelper, m_TwoRayGensSubgraph, UnitTestEvent::PreExecute);
+        if (m_ConstOverride)
+            UnitTest(g_pd3dDevice, g_pd3dCommandList, g_readbackHelper, m_ConstOverride, UnitTestEvent::PreExecute);
         if (m_SubGraphLoops)
             UnitTest(g_pd3dDevice, g_pd3dCommandList, g_readbackHelper, m_SubGraphLoops, UnitTestEvent::PreExecute);
         if (m_SubGraphTest)
@@ -1939,6 +1964,8 @@ int main(int, char**)
             TwoRayGens::Execute(m_TwoRayGens, g_pd3dDevice, g_pd3dCommandList);
         if (m_TwoRayGensSubgraph)
             TwoRayGensSubgraph::Execute(m_TwoRayGensSubgraph, g_pd3dDevice, g_pd3dCommandList);
+        if (m_ConstOverride)
+            ConstOverride::Execute(m_ConstOverride, g_pd3dDevice, g_pd3dCommandList);
         if (m_SubGraphLoops)
             SubGraphLoops::Execute(m_SubGraphLoops, g_pd3dDevice, g_pd3dCommandList);
         if (m_SubGraphTest)
@@ -2060,6 +2087,8 @@ int main(int, char**)
             UnitTest(g_pd3dDevice, g_pd3dCommandList, g_readbackHelper, m_TwoRayGens, UnitTestEvent::PostExecute);
         if (m_TwoRayGensSubgraph)
             UnitTest(g_pd3dDevice, g_pd3dCommandList, g_readbackHelper, m_TwoRayGensSubgraph, UnitTestEvent::PostExecute);
+        if (m_ConstOverride)
+            UnitTest(g_pd3dDevice, g_pd3dCommandList, g_readbackHelper, m_ConstOverride, UnitTestEvent::PostExecute);
         if (m_SubGraphLoops)
             UnitTest(g_pd3dDevice, g_pd3dCommandList, g_readbackHelper, m_SubGraphLoops, UnitTestEvent::PostExecute);
         if (m_SubGraphTest)
@@ -2315,6 +2344,11 @@ int main(int, char**)
         TwoRayGensSubgraph::DestroyContext(m_TwoRayGensSubgraph);
         m_TwoRayGensSubgraph = nullptr;
     }
+    if (m_ConstOverride)
+    {
+        ConstOverride::DestroyContext(m_ConstOverride);
+        m_ConstOverride = nullptr;
+    }
     if (m_SubGraphLoops)
     {
         SubGraphLoops::DestroyContext(m_SubGraphLoops);
diff --git a/_GeneratedCode/UnitTests/Frostbite/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphA_A/TwoRayGens1.hlsl b/_GeneratedCode/UnitTests/Frostbite/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphA_A/TwoRayGens1.hlsl
new file mode 100644
index 00000000..769ba29f
--- /dev/null
+++ b/_GeneratedCode/UnitTests/Frostbite/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphA_A/TwoRayGens1.hlsl
@@ -0,0 +1,83 @@
+// TwoRayGens technique
+
+#include <Systems/Render/Raytrace/RaytraceBase.hlsl>
+
+// TODO: need to make ABI for PS support
+FB_ABI_BEGIN(TwoRayGensSubgraph_A_TwoRayGens1_Abi)
+FB_ABI_END
+
+struct Struct__A_TwoRayGens1CB
+{
+    float3 cameraPos;
+    float _padding0;
+    float4x4 clipToWorld;
+    float depthNearPlane;
+    float3 _padding1;
+};
+
+RWTexture2D<float4> g_texture : register(u0);
+RaytracingAccelerationStructure g_scene : FB_RT_SCENE_REGISTER(t0);
+cbuffer cb0 : register(b0)
+{
+	Struct__A_TwoRayGens1CB _A_TwoRayGens1CB;
+};
+
+
+struct Payload
+{
+	bool hit;
+};
+
+#define TraceARay(AccelerationStructure, RayFlags, InstanceInclusionMask, RayContributionToHitGroupIndex, MultiplierForGeometryContributionToHitGroupIndex, MissShaderIndex, Ray, payload) \
+	fb::rt::traceRay<TwoRayGensSubgraph_A_TwoRayGens1_Abi, FB_RT_FLAGS_UseSharedStack | FB_RT_FLAGS_DefaultStackLds, RayFlags> \
+		(AccelerationStructure, Ray, payload, InstanceInclusionMask, RayContributionToHitGroupIndex, MultiplierForGeometryContributionToHitGroupIndex, MissShaderIndex);
+
+FB_DEFINE_RAYGEN_SHADER(RayGen1, 8, 8, 1)
+{
+	uint2 px = DispatchRaysIndex().xy;
+	float2 dimensions = float2(DispatchRaysDimensions().xy);
+
+	float2 screenPos = (float2(px)+0.5f) / dimensions * 2.0 - 1.0;
+	screenPos.y = -screenPos.y;
+
+	float4 world = mul(float4(screenPos, _A_TwoRayGens1CB.depthNearPlane, 1), _A_TwoRayGens1CB.clipToWorld);
+	world.xyz /= world.w;
+
+	RayDesc ray;
+	ray.Origin = _A_TwoRayGens1CB.cameraPos;
+	ray.Direction = normalize(world.xyz - ray.Origin);
+	ray.TMin = 0;
+	ray.TMax = 1000.0f;
+
+	Payload payload = (Payload)0;
+
+	TraceARay(g_scene, // Scene (TLAS) buffer
+		RAY_FLAG_FORCE_OPAQUE, // Ray flags
+		0xFF, // Ray mask
+		0,
+		0,
+		1,
+		ray,
+		payload);
+
+	float4 color = g_texture[px];
+	color.a = 1.0f;
+	color.r = payload.hit ? 1.0f : 0.0f;
+	g_texture[px] = color;
+}
+
+FB_DEFINE_MISS_SHADER(TwoRayGensSubgraph_A_TwoRayGens1_Abi, Miss1, defaultLsrt, Payload, payload)
+{
+	payload.hit = false;
+}
+
+FB_DEFINE_CLOSEST_HIT_SHADER(TwoRayGensSubgraph_A_TwoRayGens1_Abi, ClosestHit1, defaultLsrt, Payload, payload)
+{
+	payload.hit = true;
+}
+
+/*
+Shader Resources:
+	Texture g_texture (as UAV)
+	Buffer g_scene (as RTScene)
+*/
diff --git a/_GeneratedCode/UnitTests/Frostbite/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphB_B/TwoRayGens2.hlsl b/_GeneratedCode/UnitTests/Frostbite/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphB_B/TwoRayGens2.hlsl
new file mode 100644
index 00000000..44bbf6c7
--- /dev/null
+++ b/_GeneratedCode/UnitTests/Frostbite/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphB_B/TwoRayGens2.hlsl
@@ -0,0 +1,101 @@
+// TwoRayGens technique
+
+#include <Systems/Render/Raytrace/RaytraceBase.hlsl>
+
+// TODO: need to make ABI for PS support
+FB_ABI_BEGIN(TwoRayGensSubgraph_B_TwoRayGens2_Abi)
+FB_ABI_END
+
+struct Struct__B_TwoRayGens2CB
+{
+    float3 cameraPos;
+    float _padding0;
+    float4x4 clipToWorld;
+    float depthNearPlane;
+    float3 _padding1;
+};
+
+RWTexture2D<float4> g_texture : register(u0);
+RaytracingAccelerationStructure g_scene : FB_RT_SCENE_REGISTER(t0);
+Texture2D<float4> g_blueChannel : register(t0);
+cbuffer cb0 : register(b0)
+{
+	Struct__B_TwoRayGens2CB _B_TwoRayGens2CB;
+};
+
+
+struct Payload
+{
+	bool hit;
+	float blueChannelMultiplier;
+};
+
+#define TraceARay(AccelerationStructure, RayFlags, InstanceInclusionMask, RayContributionToHitGroupIndex, MultiplierForGeometryContributionToHitGroupIndex, MissShaderIndex, Ray, payload) \
+	fb::rt::traceRay<TwoRayGensSubgraph_B_TwoRayGens2_Abi, FB_RT_FLAGS_UseSharedStack | FB_RT_FLAGS_DefaultStackLds, RayFlags> \
+		(AccelerationStructure, Ray, payload, InstanceInclusionMask, RayContributionToHitGroupIndex, MultiplierForGeometryContributionToHitGroupIndex, MissShaderIndex);
+
+FB_DEFINE_RAYGEN_SHADER(RayGen2, 8, 8, 1)
+{
+	uint2 px = DispatchRaysIndex().xy;
+	float2 dimensions = float2(DispatchRaysDimensions().xy);
+
+	float2 screenPos = (float2(px)+0.5f) / dimensions * 2.0 - 1.0;
+	screenPos.y = -screenPos.y;
+
+	float4 world = mul(float4(screenPos, _B_TwoRayGens2CB.depthNearPlane, 1), _B_TwoRayGens2CB.clipToWorld);
+	world.xyz /= world.w;
+
+	RayDesc ray;
+	ray.Origin = _B_TwoRayGens2CB.cameraPos;
+	ray.Direction = normalize(world.xyz - ray.Origin);
+	ray.TMin = 0;
+	ray.TMax = 1000.0f;
+
+	Payload payload = (Payload)0;
+
+	int missShaderIndex = (px.y < dimensions.y / 2) ? 1 : 1;
+
+	TraceARay(g_scene, // Scene (TLAS) buffer
+		RAY_FLAG_FORCE_OPAQUE, // Ray flags
+		0xFF, // Ray mask
+		0,
+		0,
+		missShaderIndex,
+		ray,
+		payload);
+
+	float4 color = g_texture[px];
+	color.a = 1.0f;
+	color.g = payload.hit ? 1.0f : 0.0f;
+
+	uint2 blueChannelDims;
+	g_blueChannel.GetDimensions(blueChannelDims.x, blueChannelDims.y);
+	color.b = dot(g_blueChannel[px % blueChannelDims].rgb, float3(0.3f, 0.59f, 0.11f)) * payload.blueChannelMultiplier;
+
+	g_texture[px] = color;
+}
+
+FB_DEFINE_MISS_SHADER(TwoRayGensSubgraph_B_TwoRayGens2_Abi, Miss2A, defaultLsrt, Payload, payload)
+{
+	payload.hit = false;
+	payload.blueChannelMultiplier = 0.25f;
+}
+
+FB_DEFINE_MISS_SHADER(TwoRayGensSubgraph_B_TwoRayGens2_Abi, Miss2B, defaultLsrt, Payload, payload)
+{
+	payload.hit = false;
+	payload.blueChannelMultiplier = 1.0f;
+}
+
+FB_DEFINE_CLOSEST_HIT_SHADER(TwoRayGensSubgraph_B_TwoRayGens2_Abi, ClosestHit2, defaultLsrt, Payload, payload)
+{
+	payload.hit = true;
+	payload.blueChannelMultiplier = 0.0f;
+}
+
+/*
+Shader Resources:
+	Texture g_texture (as UAV)
+	Buffer g_scene (as RTScene)
+	Texture g_blueChannel (as SRV)
+*/
diff --git a/_GeneratedCode/UnitTests/UE_5_3/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphA_A/TwoRayGens1.usf b/_GeneratedCode/UnitTests/UE_5_3/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphA_A/TwoRayGens1.usf
new file mode 100644
index 00000000..bc52cf53
--- /dev/null
+++ b/_GeneratedCode/UnitTests/UE_5_3/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphA_A/TwoRayGens1.usf
@@ -0,0 +1,69 @@
+#include "/Engine/Public/Platform.ush"
+#include "/Engine/Private/RayTracing/RayTracingCommon.ush"
+
+// TwoRayGens technique
+
+
+RWTexture2D<float4> g_texture;
+RaytracingAccelerationStructure g_scene;
+float3 cb_A_TwoRayGens1CB_cameraPos;
+float cb_A_TwoRayGens1CB__padding0;
+float4x4 cb_A_TwoRayGens1CB_clipToWorld;
+float cb_A_TwoRayGens1CB_depthNearPlane;
+float3 cb_A_TwoRayGens1CB__padding1;
+
+
+struct Payload
+{
+	bool hit;
+};
+
+RAY_TRACING_ENTRY_RAYGEN(RayGen1)
+{
+	uint2 px = DispatchRaysIndex().xy;
+	float2 dimensions = float2(DispatchRaysDimensions().xy);
+
+	float2 screenPos = (float2(px)+0.5f) / dimensions * 2.0 - 1.0;
+	screenPos.y = -screenPos.y;
+
+	float4 world = mul(float4(screenPos, _A_TwoRayGens1CB_depthNearPlane, 1), _A_TwoRayGens1CB_clipToWorld);
+	world.xyz /= world.w;
+
+	RayDesc ray;
+	ray.Origin = _A_TwoRayGens1CB_cameraPos;
+	ray.Direction = normalize(world.xyz - ray.Origin);
+	ray.TMin = 0;
+	ray.TMax = 1000.0f;
+
+	Payload payload = (Payload)0;
+
+	TraceRay(g_scene, // Scene (TLAS) buffer
+		RAY_FLAG_FORCE_OPAQUE, // Ray flags
+		0xFF, // Ray mask
+		0,
+		0,
+		1,
+		ray,
+		payload);
+
+	float4 color = g_texture[px];
+	color.a = 1.0f;
+	color.r = payload.hit ? 1.0f : 0.0f;
+	g_texture[px] = color;
+}
+
+RAY_TRACING_ENTRY_MISS(Miss1, Payload, payload)
+{
+	payload.hit = false;
+}
+
+RAY_TRACING_ENTRY_CLOSEST_HIT(ClosestHit1, Payload, payload, FRayTracingIntersectionAttributes, intersection)
+{
+	payload.hit = true;
+}
+
+/*
+Shader Resources:
+	Texture g_texture (as UAV)
+	Buffer g_scene (as RTScene)
+*/
diff --git a/_GeneratedCode/UnitTests/UE_5_3/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphB_B/TwoRayGens2.usf b/_GeneratedCode/UnitTests/UE_5_3/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphB_B/TwoRayGens2.usf
new file mode 100644
index 00000000..9ccf1b32
--- /dev/null
+++ b/_GeneratedCode/UnitTests/UE_5_3/UnitTests/RayTrace/TwoRayGensSubgraph/shaders/TwoRayGensSubgraphB_B/TwoRayGens2.usf
@@ -0,0 +1,87 @@
+#include "/Engine/Public/Platform.ush"
+#include "/Engine/Private/RayTracing/RayTracingCommon.ush"
+
+// TwoRayGens technique
+
+
+RWTexture2D<float4> g_texture;
+RaytracingAccelerationStructure g_scene;
+Texture2D<float4> g_blueChannel;
+float3 cb_B_TwoRayGens2CB_cameraPos;
+float cb_B_TwoRayGens2CB__padding0;
+float4x4 cb_B_TwoRayGens2CB_clipToWorld;
+float cb_B_TwoRayGens2CB_depthNearPlane;
+float3 cb_B_TwoRayGens2CB__padding1;
+
+
+struct Payload
+{
+	bool hit;
+	float blueChannelMultiplier;
+};
+
+RAY_TRACING_ENTRY_RAYGEN(RayGen2)
+{
+	uint2 px = DispatchRaysIndex().xy;
+	float2 dimensions = float2(DispatchRaysDimensions().xy);
+
+	float2 screenPos = (float2(px)+0.5f) / dimensions * 2.0 - 1.0;
+	screenPos.y = -screenPos.y;
+
+	float4 world = mul(float4(screenPos, _B_TwoRayGens2CB_depthNearPlane, 1), _B_TwoRayGens2CB_clipToWorld);
+	world.xyz /= world.w;
+
+	RayDesc ray;
+	ray.Origin = _B_TwoRayGens2CB_cameraPos;
+	ray.Direction = normalize(world.xyz - ray.Origin);
+	ray.TMin = 0;
+	ray.TMax = 1000.0f;
+
+	Payload payload = (Payload)0;
+
+	int missShaderIndex = (px.y < dimensions.y / 2) ? 1 : 1;
+
+	TraceRay(g_scene, // Scene (TLAS) buffer
+		RAY_FLAG_FORCE_OPAQUE, // Ray flags
+		0xFF, // Ray mask
+		1,
+		0,
+		missShaderIndex,
+		ray,
+		payload);
+
+	float4 color = g_texture[px];
+	color.a = 1.0f;
+	color.g = payload.hit ? 1.0f : 0.0f;
+
+	uint2 blueChannelDims;
+	g_blueChannel.GetDimensions(blueChannelDims.x, blueChannelDims.y);
+	color.b = dot(g_blueChannel[px % blueChannelDims].rgb, float3(0.3f, 0.59f, 0.11f)) * payload.blueChannelMultiplier;
+
+	g_texture[px] = color;
+}
+
+RAY_TRACING_ENTRY_MISS(Miss2A, Payload, payload)
+{
+	payload.hit = false;
+	payload.blueChannelMultiplier = 0.25f;
+}
+
+RAY_TRACING_ENTRY_MISS(Miss2B, Payload, payload)
+{
+	payload.hit = false;
+	payload.blueChannelMultiplier = 1.0f;
+}
+
+RAY_TRACING_ENTRY_CLOSEST_HIT(ClosestHit2, Payload, payload, FRayTracingIntersectionAttributes, intersection)
+{
+	payload.hit = true;
+	payload.blueChannelMultiplier = 0.0f;
+}
+
+/*
+Shader Resources:
+	Texture g_texture (as UAV)
+	Buffer g_scene (as RTScene)
+	Texture g_blueChannel (as SRV)
+*/
diff --git a/external/RenderDoc/Readme.txt b/external/RenderDoc/Readme.txt
new file mode 100644
index 00000000..9d38b94b
--- /dev/null
+++ b/external/RenderDoc/Readme.txt
@@ -0,0 +1,3 @@
+This comes from RenderDoc v1.37
+Downloaded from https://renderdoc.org/builds
+The header here, and the dll in the main directory
\ No newline at end of file
diff --git a/external/df_serialize/MakeJSONReadFooter.h b/external/df_serialize/MakeJSONReadFooter.h
index 9c8e8fa7..9b6e5e82 100644
--- a/external/df_serialize/MakeJSONReadFooter.h
+++ b/external/df_serialize/MakeJSONReadFooter.h
@@ -105,6 +105,8 @@ bool ReadFromJSONFile(TROOT& root, const char* fileName, bool fileMustExist = tr
     return ReadFromJSONBuffer(root, fileData);
 }
 
+inline bool RebuildConnections_PostLoad(RenderGraph& renderGraph);
+
 // version fixup
 inline bool ReadFromJSON_PostLoad(RenderGraph& renderGraph)
 {
@@ -272,6 +274,17 @@ inline bool ReadFromJSON_PostLoad(RenderGraph& renderGraph)
             }
             renderGraph.version = "0.99b";
         }
+        else if (renderGraph.version == "0.99b")
+        {
+            renderGraph.versionUpgradedMessage +=
+                R"(
+                 reordered node connections in the code.
+                )";
+
+            RebuildConnections_PostLoad(renderGraph);
+
+            renderGraph.version = "0.991b";
+        }
         else
         {
             return false;
@@ -279,4 +292,97 @@ inline bool ReadFromJSON_PostLoad(RenderGraph& renderGraph)
     }
 
     return true;
-}
\ No newline at end of file
+}
+
+inline void BuildConnections(const RenderGraph& renderGraph, int shaderIndex, std::vector<NodePinConnection>& newConnections, const std::vector<NodePinConnection>& oldConnections);
+inline bool RebuildConnections_PostLoad(RenderGraph& renderGraph)
+{
+    auto GetShaderIndexByName = [](const RenderGraph& renderGraph, ShaderType shaderType, const char* name)
+        {
+            // Get the shader the shader reference
+            for (int index = 0; index < (int)renderGraph.shaders.size(); ++index)
+            {
+                if (shaderType != ShaderType::Count && renderGraph.shaders[index].type != shaderType)
+                    continue;
+
+                if (!_stricmp(renderGraph.shaders[index].name.c_str(), name))
+                    return index;
+            }
+            return -1;
+        };
+
+    for (RenderGraphNode& node : renderGraph.nodes)
+    {
+        std::vector<NodePinConnection> newConnections{};
+
+        switch (node._index)
+        {
+        case RenderGraphNode::c_index_actionComputeShader:
+        {
+            RenderGraphNode_Action_ComputeShader& computeNode = node.actionComputeShader;
+            int shaderIndex = GetShaderIndexByName(renderGraph, ShaderType::Compute, computeNode.shader.name.c_str());
+            BuildConnections(renderGraph, shaderIndex, newConnections, computeNode.connections);
+
+            // get rid of all unnecessary connections (now all at the back of the vector)
+            computeNode.connections.swap(newConnections);
+        }
+        break;
+        case RenderGraphNode::c_index_actionRayShader:
+        {
+            RenderGraphNode_Action_RayShader& rayNode = node.actionRayShader;
+            int shaderIndex = GetShaderIndexByName(renderGraph, ShaderType::RTRayGen, rayNode.shader.name.c_str());
+            BuildConnections(renderGraph, shaderIndex, newConnections, rayNode.connections);
+
+            // get rid of all unnecessary connections (now all at the back of the vector)
+            rayNode.connections.swap(newConnections);
+        }
+        break;
+        case RenderGraphNode::c_index_actionDrawCall:
+        {
+            RenderGraphNode_Action_DrawCall& dcNode = node.actionDrawCall;
+
+            int shaderIndex = GetShaderIndexByName(renderGraph, ShaderType::Vertex, dcNode.vertexShader.name.c_str());
+            BuildConnections(renderGraph, shaderIndex, newConnections, dcNode.connections);
+            shaderIndex = GetShaderIndexByName(renderGraph, ShaderType::Pixel, dcNode.pixelShader.name.c_str());
+            BuildConnections(renderGraph, shaderIndex, newConnections, dcNode.connections);
+            shaderIndex = GetShaderIndexByName(renderGraph, ShaderType::Amplification, dcNode.amplificationShader.name.c_str());
+            BuildConnections(renderGraph, shaderIndex, newConnections, dcNode.connections);
+            shaderIndex = GetShaderIndexByName(renderGraph, ShaderType::Mesh, dcNode.meshShader.name.c_str());
+            BuildConnections(renderGraph, shaderIndex, newConnections, dcNode.connections);
+
+            // get rid of all unnecessary connections (now all at the back of the vector)
+            dcNode.connections.swap(newConnections);
+        }
+        break;
+        }
+    }
+    return true;
+}
+
+inline void BuildConnections(const RenderGraph& renderGraph, int shaderIndex, std::vector<NodePinConnection>& newConnections ,const std::vector<NodePinConnection>& oldConnections)
+{
+    if (shaderIndex > -1 && shaderIndex < renderGraph.shaders.size())
+    {
+        const Shader& shader = renderGraph.shaders[shaderIndex];
+
+        size_t connectionOffset = newConnections.size();
+        for (size_t dstIdx = 0; dstIdx < shader.resources.size(); dstIdx++)
+        {
+            size_t dstConnectionIdx = connectionOffset + dstIdx; // this is the next connection we try to make
+            const ShaderResource& resource = shader.resources[dstIdx]; // corresponding to this shader resource
+
+            // create a default connection
+            newConnections.push_back(NodePinConnection{});
+
+            // try to find that resource's connection in the original node
+            for (size_t srcConnectionIdx = 0; srcConnectionIdx < oldConnections.size(); srcConnectionIdx++)
+            {
+                if (oldConnections[srcConnectionIdx].srcPin == resource.name)
+                {
+                    newConnections[dstConnectionIdx] = oldConnections[srcConnectionIdx];
+                    break;
+                }
+            }
+        }
+    }
+}
diff --git a/external/slang/LICENSE b/external/slang/LICENSE
new file mode 100644
index 00000000..b6918fff
--- /dev/null
+++ b/external/slang/LICENSE
@@ -0,0 +1,29 @@
+SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+LLVM Exceptions to the Apache 2.0 License
+
+As an exception, if, as a result of your compiling your source code, portions
+of this Software are embedded into an Object form of such source code, you
+may redistribute such embedded portions in such Object form without complying
+with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
+
+In addition, if you combine or link compiled forms of this Software with
+software that is licensed under the GPLv2 ("Combined Software") and if a
+court of competent jurisdiction determines that the patent provision (Section
+3), the indemnity provision (Section 9) or other Section of the License
+conflicts with the conditions of the GPLv2, you may retroactively and
+prospectively choose to deem waived or otherwise exclude such Section(s) of
+the License, but only in their entirety and only with respect to the Combined
+Software.
diff --git a/external/slang/README.md b/external/slang/README.md
new file mode 100644
index 00000000..95c1c217
--- /dev/null
+++ b/external/slang/README.md
@@ -0,0 +1,156 @@
+Slang
+=====
+![CI Status](https://github.com/shader-slang/slang/actions/workflows/ci.yml/badge.svg?branch=master)
+![CTS Status](https://github.com/shader-slang/slang/actions/workflows/vk-gl-cts-nightly.yml/badge.svg)
+
+Slang is a shading language that makes it easier to build and maintain large shader codebases in a modular and extensible fashion, while also maintaining the highest possible performance on modern GPUs and graphics APIs.
+Slang is based on years of collaboration between researchers at NVIDIA, Carnegie Mellon University, Stanford, MIT, UCSD and the University of Washington.
+
+
+Why Slang?
+---------------
+
+The Slang shading language is designed to enable real-time graphics developers to work with large-scale, high-performance shader code.
+
+### Write Shaders Once, Run Anywhere
+
+The Slang compiler can generate code for a wide variety of targets: D3D12, Vulkan, Metal, D3D11, OpenGL, CUDA, and even generate code to run on a CPU. For textual targets, such as Metal Shading Language (MSL) and CUDA, Slang produces readable code that preserves original identifier names, as well as the type and call structure, making it easier to debug.
+
+### Access the Latest GPU Features
+
+Slang code is highly portable, but can still leverage unique platform capabilities, including the latest features in Direct3D and Vulkan. For example, developers can make full use of [pointers](https://shader-slang.com/slang/user-guide/convenience-features.html#pointers-limited) when generating SPIR-V.
+Slang's [capability system](https://shader-slang.com/slang/user-guide/capabilities.html) helps applications manage feature set differences across target platforms by ensuring code only uses available features during the type-checking step, before generating final code. Additionally, Slang provides [flexible interop](https://shader-slang.com/slang/user-guide/a1-04-interop.html) features to enable directly embedding target code or SPIR-V into generated shaders.
+
+### Leverage Neural Graphics with Automatic Differentiation
+
+Slang can [automatically generate both forward and backward derivative propagation code](https://shader-slang.com/slang/user-guide/autodiff.html) for complex functions that involve arbitrary control flow and dynamic dispatch. This allows existing rendering codebases to easily become differentiable, or for Slang to serve as the kernel language in a PyTorch-driven machine learning framework via [`slangtorch`](https://shader-slang.com/slang/user-guide/a1-02-slangpy.html).
+
+### Scalable Software Development with Modules
+
+Slang provides a [module system](https://shader-slang.com/slang/user-guide/modules.html) that enables logical organization of code for separate compilation. Slang modules can be independently compiled offline to a custom IR (with optional obfuscation) and then linked at runtime to generate code in formats such as DXIL or SPIR-V.
+
+### Code Specialization that Works with Modules
+
+Slang supports [generics and interfaces](https://shader-slang.com/slang/user-guide/interfaces-generics.html) (a.k.a. type traits/protocols), allowing for clear expression of shader specialization without the need for preprocessor techniques or string-pasting. Unlike C++ templates, Slang's generics are pre-checked and don't produce cascading error messages that are difficult to diagnose. The same generic shader can be specialized for a variety of different types to produce specialized code ahead of time, or on the fly, entirely under application control.
+
+### Easy On-ramp for HLSL and GLSL Codebases
+
+Slang's syntax is similar to HLSL, and most existing HLSL code can be compiled with the Slang compiler out-of-the-box, or with just minor modifications. This allows existing shader codebases to immediately benefit from Slang without requiring a complete rewrite or port.
+
+Slang provides a compatibility module that enables the use of most GLSL intrinsic functions and GLSL's parameter binding syntax.
+
+### Comprehensive Tooling Support
+
+Slang comes with full support of IntelliSense editing features in Visual Studio Code and Visual Studio through the Language Server Protocol.
+Full debugging capabilities are also available through RenderDoc and SPIR-V based tools.
+
+Getting Started
+---------------
+
+The fastest way to get started using Slang in your own development is to use a pre-built binary package, available through GitHub [releases](https://github.com/shader-slang/slang/releases).
+Slang binaries are also included in the [Vulkan SDK](https://vulkan.lunarg.com/sdk/home) since version 1.3.296.0.
+
+There are packages built for x86_64 and aarch64 Windows, Linux and macOS.
+Each binary release includes the command-line `slangc` compiler, a shared library for the compiler, and the `slang.h` header.
+
+See the user-guide for info on using the `slangc` command-line tool: [Slang Command Line Usage](
+https://shader-slang.com/slang/user-guide/compiling.html#command-line-compilation-with-slangc).
+
+If you want to try out the Slang language without installing anything, a fast and simple way is to use the [Slang Playground](https://shader-slang.com/slang-playground). The playground allows you to compile Slang code to a variety of targets, and even run some simple shaders directly within the browser. The playground loads Slang compiler to your browser and runs all compilation locally. No data will be sent to any servers.
+
+If you would like to build Slang from source, please consult the [build instructions](docs/building.md).
+
+Documentation
+-------------
+
+The Slang project provides a variety of different [documentation](docs/), but most users would be well served starting with the [User's Guide](https://shader-slang.github.io/slang/user-guide/).
+
+For developers writing Slang code, the [Slang Core Module Reference](https://shader-slang.com/stdlib-reference/) provides detailed documentation on Slang's built-in types and functions.
+
+We also provide a few [examples](examples/) of how to integrate Slang into a rendering application.
+
+These examples use a graphics layer that we include with Slang called "GFX" which is an abstraction library of various graphics APIs (D3D11, D2D12, OpenGL, Vulkan, CUDA, and the CPU) to support cross-platform applications using GPU graphics and compute capabilities. 
+If you'd like to learn more about GFX, see the [GFX User Guide](https://shader-slang.com/slang/gfx-user-guide/index.html).
+
+Additionally, we recommend checking out [Vulkan Mini Examples](https://github.com/nvpro-samples/vk_mini_samples/) for more examples of using Slang's language features available on Vulkan, such as pointers and the ray tracing intrinsics.
+
+Contributing
+------------
+
+If you'd like to contribute to the project, we are excited to have your input.
+The following guidelines should be observed by contributors:
+
+* Please follow the contributor [Code of Conduct](CODE_OF_CONDUCT.md).
+* Bugs reports and feature requests should go through the GitHub issue tracker
+* Changes should ideally come in as small pull requests on top of `master`, coming from your own personal fork of the project
+* Large features that will involve multiple contributors or a long development time should be discussed in issues, and broken down into smaller pieces that can be implemented and checked in in stages
+
+[Contribution guide](CONTRIBUTING.md) describes the workflow for contributors at more detail.
+
+Limitations and Support
+-----------------------
+
+### Platform support
+
+The Slang compiler and libraries can be built on the following platforms:
+
+|  Windows  |   Linux   |   MacOS   |  WebAssembly |
+|:---------:|:---------:|:---------:|:------------:|
+| supported | supported | supported | experimental |
+
+Both `x86_64` and `aarch64` architectures are supported on Windows, Linux and MacOS platforms.
+
+### Target support
+
+Slang can compile shader code to the following targets:
+
+|    Target   |                                         Status                                        |                          Output Formats                          |
+|:-----------:|:-------------------------------------------------------------------------------------:|:----------------------------------------------------------------:|
+| Direct3D 11 |    [supported](https://shader-slang.com/slang/user-guide/targets.html#direct3d-11)    |                               HLSL                               |
+| Direct3D 12 |    [supported](https://shader-slang.com/slang/user-guide/targets.html#direct3d-12)    |                               HLSL                               |
+|    Vulkan   |       [supported](https://shader-slang.com/slang/user-guide/targets.html#vulkan)      |                            SPIRV, GLSL                           |
+|    Metal    |     [experimental*](https://shader-slang.com/slang/user-guide/targets.html#metal)     |                      Metal Shading Language                      |
+|    WebGPU   |                                     experimental**                                    |                               WGSL                               |
+|     CUDA    |   [supported](https://shader-slang.com/slang/user-guide/targets.html#cuda-and-optix)  |                        C++ (compute only)                        |
+|    Optix    | [experimental](https://shader-slang.com/slang/user-guide/targets.html#cuda-and-optix) |                             C++ (WIP)                            |
+|     CPU     |   [experimental](https://shader-slang.com/slang/user-guide/targets.html#cpu-compute)  | C++ (kernel), C++ (host), standalone executable, dynamic library |
+
+> *Slang currently supports generating vertex, fragment, compute, task and mesh
+> shaders for Metal.
+
+> **WGSL support is still work in-progress.
+
+For greater detail, see the [Supported Compilation
+Targets](https://shader-slang.com/slang/user-guide/targets.html) section of the
+[User Guide](https://shader-slang.github.io/slang/user-guide/)
+
+The Slang project has been used for production applications and large shader
+codebases, but it is still under active development. Support is currently
+focused on the platforms (Windows, Linux) and target APIs (Direct3D 12, Vulkan)
+where Slang is used most heavily. Users who are looking for support on other
+platforms or APIs should coordinate with the development team via the issue
+tracker to make sure that their use cases can be supported.
+
+License
+-------
+
+The Slang code itself is under the Apache 2.0 with LLVM Exception license (see [LICENSE](LICENSE)).
+
+Builds of the core Slang tools depend on the following projects, either automatically or optionally, which may have their own licenses:
+
+* [`glslang`](https://github.com/KhronosGroup/glslang) (BSD)
+* [`lz4`](https://github.com/lz4/lz4) (BSD)
+* [`miniz`](https://github.com/richgel999/miniz) (MIT)
+* [`spirv-headers`](https://github.com/KhronosGroup/SPIRV-Headers) (Modified MIT)
+* [`spirv-tools`](https://github.com/KhronosGroup/SPIRV-Tools) (Apache 2.0)
+* [`ankerl::unordered_dense::{map, set}`](https://github.com/martinus/unordered_dense) (MIT)
+
+Slang releases may include [LLVM](https://github.com/llvm/llvm-project) under the license:
+
+* [`llvm`](https://llvm.org/docs/DeveloperPolicy.html#new-llvm-project-license-framework) (Apache 2.0 License with LLVM exceptions)
+
+Some of the tests and example programs that build with Slang use the following projects, which may have their own licenses:
+
+* [`glm`](https://github.com/g-truc/glm) (MIT)
+* `stb_image` and `stb_image_write` from the [`stb`](https://github.com/nothings/stb) collection of single-file libraries (Public Domain)
+* [`tinyobjloader`](https://github.com/tinyobjloader/tinyobjloader) (MIT)
diff --git a/external/slang/_README.txt b/external/slang/_README.txt
index 0c221f45..c2b4e7b4 100644
--- a/external/slang/_README.txt
+++ b/external/slang/_README.txt
@@ -2,4 +2,4 @@ downloaded from https://github.com/shader-slang/slang/releases
 
 information about slang is at https://github.com/shader-slang/slang
 
-slang-2024.1.22-win64
\ No newline at end of file
+slang-2025.6.3-windows-x86_64.zip
\ No newline at end of file
diff --git a/external/slang/bin/gfx.dll b/external/slang/bin/gfx.dll
new file mode 100644
index 00000000..af76f78d
Binary files /dev/null and b/external/slang/bin/gfx.dll differ
diff --git a/external/slang/bin/gfx.slang b/external/slang/bin/gfx.slang
new file mode 100644
index 00000000..fded20ee
--- /dev/null
+++ b/external/slang/bin/gfx.slang
@@ -0,0 +1,1991 @@
+import slang;
+
+public namespace gfx
+{
+public typedef slang.Result Result;
+
+public typedef intptr_t Int;
+public typedef uintptr_t UInt;
+public typedef uint64_t DeviceAddress;
+public typedef int GfxIndex;
+public typedef int GfxCount;
+public typedef intptr_t Size;
+public typedef intptr_t Offset;
+
+public const uint64_t kTimeoutInfinite = 0xFFFFFFFFFFFFFFFF;
+
+public enum class StructType
+{
+    D3D12ExtendedDesc,
+};
+
+public enum class StageType
+{
+    Unknown,
+    Vertex,
+    Hull,
+    Domain,
+    Geometry,
+    Fragment,
+    Compute,
+    RayGeneration,
+    Intersection,
+    AnyHit,
+    ClosestHit,
+    Miss,
+    Callable,
+    Amplification,
+    Mesh,
+    CountOf,
+};
+
+public enum class DeviceType
+{
+    Unknown,
+    Default,
+    DirectX11,
+    DirectX12,
+    OpenGl,
+    Vulkan,
+    Metal,
+    CPU,
+    CUDA,
+    CountOf,
+};
+
+public enum class ProjectionStyle
+{
+    Unknown,
+    OpenGl,
+    DirectX,
+    Vulkan,
+    Metal,
+    CountOf,
+};
+
+public enum class BindingStyle
+{
+    Unknown,
+    DirectX,
+    OpenGl,
+    Vulkan,
+    Metal,
+    CPU,
+    CUDA,
+    CountOf,
+};
+
+public enum class AccessFlag
+{
+    None,
+    Read,
+    Write,
+};
+
+public static const GfxCount kMaxRenderTargetCount = 8;
+
+// Defines how linking should be performed for a shader program.
+public enum class LinkingStyle
+{
+    // Compose all entry-points in a single program, then compile all entry-points together with the same
+    // set of root shader arguments.
+    SingleProgram,
+
+    // Link and compile each entry-point individually, potentially with different specializations.
+    SeparateEntryPointCompilation
+};
+
+public enum class ShaderModuleSourceType
+{
+    SlangSource,           // a slang source string in memory.
+    SlangModuleBinary,     // a slang module binary code in memory.
+    SlangSourceFile,       // a slang source from file.
+    SlangModuleBinaryFile, // a slang module binary code from file.
+};
+
+public struct ShaderProgramDesc2
+{
+    public ShaderModuleSourceType sourceType = ShaderModuleSourceType::SlangSource;
+    public void *sourceData = nullptr;
+    public Size sourceDataSize = 0;
+
+    // Number of entry points to include in the shader program. 0 means include all entry points
+    // defined in the module.
+    public GfxCount entryPointCount = 0;
+    // Names of entry points to include in the shader program. The size of the array must be
+    // `entryPointCount`.
+    public NativeString* entryPointNames = nullptr;
+};
+
+[COM("9d32d0ad-915c-4ffd-91e2-508554a04a76")]
+public interface IShaderProgram
+{
+    public slang::TypeReflection* findTypeByName(NativeString name);
+};
+
+public enum class Format
+{
+    // D3D formats omitted: 19-22, 44-47, 65-66, 68-70, 73, 76, 79, 82, 88-89, 92-94, 97, 100-114
+    // These formats are omitted due to lack of a corresponding Vulkan format. D24_UNORM_S8_UINT (DXGI_FORMAT 45)
+    // has a matching Vulkan format but is also omitted as it is only supported by Nvidia.
+    Unknown,
+
+    R32G32B32A32_TYPELESS,
+    R32G32B32_TYPELESS,
+    R32G32_TYPELESS,
+    R32_TYPELESS,
+
+    R16G16B16A16_TYPELESS,
+    R16G16_TYPELESS,
+    R16_TYPELESS,
+
+    R8G8B8A8_TYPELESS,
+    R8G8_TYPELESS,
+    R8_TYPELESS,
+    B8G8R8A8_TYPELESS,
+
+    R32G32B32A32_FLOAT,
+    R32G32B32_FLOAT,
+    R32G32_FLOAT,
+    R32_FLOAT,
+
+    R16G16B16A16_FLOAT,
+    R16G16_FLOAT,
+    R16_FLOAT,
+
+    R64_UINT,
+
+    R32G32B32A32_UINT,
+    R32G32B32_UINT,
+    R32G32_UINT,
+    R32_UINT,
+
+    R16G16B16A16_UINT,
+    R16G16_UINT,
+    R16_UINT,
+
+    R8G8B8A8_UINT,
+    R8G8_UINT,
+    R8_UINT,
+
+    R64_SINT,
+
+    R32G32B32A32_SINT,
+    R32G32B32_SINT,
+    R32G32_SINT,
+    R32_SINT,
+
+    R16G16B16A16_SINT,
+    R16G16_SINT,
+    R16_SINT,
+
+    R8G8B8A8_SINT,
+    R8G8_SINT,
+    R8_SINT,
+
+    R16G16B16A16_UNORM,
+    R16G16_UNORM,
+    R16_UNORM,
+
+    R8G8B8A8_UNORM,
+    R8G8B8A8_UNORM_SRGB,
+    R8G8_UNORM,
+    R8_UNORM,
+    B8G8R8A8_UNORM,
+    B8G8R8A8_UNORM_SRGB,
+    B8G8R8X8_UNORM,
+    B8G8R8X8_UNORM_SRGB,
+
+    R16G16B16A16_SNORM,
+    R16G16_SNORM,
+    R16_SNORM,
+
+    R8G8B8A8_SNORM,
+    R8G8_SNORM,
+    R8_SNORM,
+
+    D32_FLOAT,
+    D16_UNORM,
+
+    B4G4R4A4_UNORM,
+    B5G6R5_UNORM,
+    B5G5R5A1_UNORM,
+
+    R9G9B9E5_SHAREDEXP,
+    R10G10B10A2_TYPELESS,
+    R10G10B10A2_UNORM,
+    R10G10B10A2_UINT,
+    R11G11B10_FLOAT,
+
+    BC1_UNORM,
+    BC1_UNORM_SRGB,
+    BC2_UNORM,
+    BC2_UNORM_SRGB,
+    BC3_UNORM,
+    BC3_UNORM_SRGB,
+    BC4_UNORM,
+    BC4_SNORM,
+    BC5_UNORM,
+    BC5_SNORM,
+    BC6H_UF16,
+    BC6H_SF16,
+    BC7_UNORM,
+    BC7_UNORM_SRGB,
+
+    _Count,
+};
+
+public struct FormatInfo
+{
+    public GfxCount channelCount; ///< The amount of channels in the format. Only set if the channelType is set
+    public uint8_t channelType;   ///< One of SlangScalarType None if type isn't made up of elements of type. TODO: Change to uint32_t?
+
+    public Size blockSizeInBytes;   ///< The size of a block in bytes.
+    public GfxCount pixelsPerBlock; ///< The number of pixels contained in a block.
+    public GfxCount blockWidth;     ///< The width of a block in pixels.
+    public GfxCount blockHeight;    ///< The height of a block in pixels.
+};
+
+public enum class InputSlotClass
+{
+    PerVertex, PerInstance
+};
+
+public struct InputElementDesc
+{
+    public NativeString semanticName; ///< The name of the corresponding parameter in shader code.
+    public GfxIndex semanticIndex;   ///< The index of the corresponding parameter in shader code. Only needed if multiple parameters share a semantic name.
+    public Format format;            ///< The format of the data being fetched for this element.
+    public Offset offset;            ///< The offset in bytes of this element from the start of the corresponding chunk of vertex stream data.
+    public GfxIndex bufferSlotIndex; ///< The index of the vertex stream to fetch this element's data from.
+};
+
+public struct VertexStreamDesc
+{
+    public Size stride;                   ///< The stride in bytes for this vertex stream.
+    public InputSlotClass slotClass;      ///< Whether the stream contains per-vertex or per-instance data.
+    public GfxCount instanceDataStepRate; ///< How many instances to draw per chunk of data.
+};
+
+public enum class PrimitiveType
+{
+    Point, Line, Triangle, Patch
+};
+
+public enum class PrimitiveTopology
+{
+    TriangleList, TriangleStrip, PointList, LineList, LineStrip
+};
+
+public enum class ResourceState
+{
+    Undefined,
+    General,
+    PreInitialized,
+    VertexBuffer,
+    IndexBuffer,
+    ConstantBuffer,
+    StreamOutput,
+    ShaderResource,
+    UnorderedAccess,
+    RenderTarget,
+    DepthRead,
+    DepthWrite,
+    Present,
+    IndirectArgument,
+    CopySource,
+    CopyDestination,
+    ResolveSource,
+    ResolveDestination,
+    AccelerationStructure,
+    AccelerationStructureBuildInput,
+    _Count
+};
+
+public struct ResourceStateSet
+{
+    public uint64_t m_bitFields;
+
+    [mutating]
+    public void add(ResourceState state) { m_bitFields |= (1LL << (uint32_t)state); }
+
+    public bool contains(ResourceState state) { return (m_bitFields & (1LL << (uint32_t)state)) != 0; }
+    public __init() { m_bitFields = 0; }
+    public __init(ResourceState state) { add(state); }
+};
+
+public ResourceStateSet operator &(ResourceStateSet val, ResourceStateSet that)
+{
+    ResourceStateSet result;
+    result.m_bitFields = val.m_bitFields & that.m_bitFields;
+    return result;
+}
+
+/// Describes how memory for the resource should be allocated for CPU access.
+public enum class MemoryType
+{
+    DeviceLocal,
+    Upload,
+    ReadBack,
+};
+
+public enum class InteropHandleAPI
+{
+    Unknown,
+    D3D12,                    // A D3D12 object pointer.
+    Vulkan,                   // A general Vulkan object handle.
+    CUDA,                     // A general CUDA object handle.
+    Win32,                    // A general Win32 HANDLE.
+    FileDescriptor,           // A file descriptor.
+    DeviceAddress,            // A device address.
+    D3D12CpuDescriptorHandle, // A D3D12_CPU_DESCRIPTOR_HANDLE value.
+    Metal,                    // A general Metal object handle.
+};
+
+public struct InteropHandle
+{
+    public InteropHandleAPI api = InteropHandleAPI::Unknown;
+    public uint64_t handleValue = 0LLU;
+};
+
+// Declare opaque type
+public struct InputLayoutDesc
+{
+    public InputElementDesc *inputElements;
+    public GfxCount inputElementCount;
+    public VertexStreamDesc *vertexStreams;
+    public GfxCount vertexStreamCount;
+};
+
+[COM("45223711-a84b-455c-befa-4937421e8e2e")]
+public interface IInputLayout
+{   
+};
+
+/// The type of resource.
+/// NOTE! The order needs to be such that all texture types are at or after Texture1D (otherwise isTexture won't work correctly)
+public enum class ResourceType
+{
+    Unknown,     ///< Unknown
+    Buffer,      ///< A buffer (like a constant/index/vertex buffer)
+    Texture1D,   ///< A 1d texture
+    Texture2D,   ///< A 2d texture
+    Texture3D,   ///< A 3d texture
+    TextureCube, ///< A cubemap consists of 6 Texture2D like faces
+    _Count,
+};
+
+/// Base class for Descs
+public struct ResourceDescBase
+{
+    public ResourceType type = ResourceType::Unknown;
+    public ResourceState defaultState = ResourceState::Undefined;
+    public ResourceStateSet allowedStates = {};
+    public MemoryType memoryType = MemoryType::DeviceLocal;
+    public InteropHandle existingHandle = {};
+    public bool isShared = false;
+};
+
+[COM("a0e39f34-8398-4522-95c2-ebc0f984ef3f")]
+public interface IResource
+{
+    public ResourceType getType();
+    public Result getNativeResourceHandle(out InteropHandle outHandle);
+    public Result getSharedHandle(out InteropHandle outHandle);
+    public Result setDebugName(NativeString name);
+    public NativeString getDebugName();
+};
+
+public struct MemoryRange
+{
+    // TODO: Change to Offset/Size?
+    public uint64_t offset;
+    public uint64_t size;
+};
+
+public struct BufferResourceDesc : ResourceDescBase
+{
+    public Size sizeInBytes = 0; ///< Total size in bytes
+    public Size elementSize = 0; ///< Get the element stride. If > 0, this is a structured buffer
+    public Format format = Format::Unknown;
+};
+
+[COM("1b274efe-5e37-492b-826e-7ee7e8f5a49b")]
+public interface IBufferResource : IResource
+{
+    public BufferResourceDesc *getDesc();
+    public DeviceAddress getDeviceAddress();
+    public Result map(MemoryRange *rangeToRead, void **outPointer);
+    public Result unmap(MemoryRange* writtenRange);
+};
+
+public struct DepthStencilClearValue
+{
+    public float depth = 1.0f;
+    public uint32_t stencil = 0;
+};
+
+public struct ColorClearValue
+{
+    public float4 values;
+
+    [mutating]
+    public void setValue(uint4 uintVal)
+    {
+        values = reinterpret<float4, uint4>(uintVal);
+    }
+
+    [mutating]
+    public void setValue(float4 floatVal)
+    {
+        values = floatVal;
+    }
+};
+
+public struct ClearValue
+{
+    public ColorClearValue color;
+    public DepthStencilClearValue depthStencil;
+};
+
+public struct BufferRange
+{
+    public Offset offset;   ///< Offset in bytes.
+    public Size size;       ///< Size in bytes.
+};
+
+public enum class TextureAspect : uint32_t
+{
+    Default = 0,
+    Color = 0x00000001,
+    Depth = 0x00000002,
+    Stencil = 0x00000004,
+    MetaData = 0x00000008,
+    Plane0 = 0x00000010,
+    Plane1 = 0x00000020,
+    Plane2 = 0x00000040,
+
+    DepthStencil = 0x6,
+};
+
+public struct SubresourceRange
+{
+    public TextureAspect aspectMask;
+    public GfxIndex mipLevel;
+    public GfxCount mipLevelCount;
+    public GfxIndex baseArrayLayer; // For Texture3D, this is WSlice.
+    public GfxCount layerCount;     // For cube maps, this is a multiple of 6.
+};
+
+public static const Size kRemainingTextureSize = 0xFFFFFFFF;
+public struct TextureResourceSampleDesc
+{
+    public GfxCount numSamples; ///< Number of samples per pixel
+    public int quality;         ///< The quality measure for the samples
+};
+
+public struct TextureResourceDesc : ResourceDescBase
+{
+    public int3 size;
+
+    public GfxCount arraySize = 0; ///< Array size
+
+    public GfxCount numMipLevels = 0;         ///< Number of mip levels - if 0 will create all mip levels
+    public Format format;             ///< The resources format
+    public TextureResourceSampleDesc sampleDesc; ///< How the resource is sampled
+    public ClearValue* optimalClearValue;
+};
+
+/// Data for a single subresource of a texture.
+///
+/// Each subresource is a tensor with `1 <= rank <= 3`,
+/// where the rank is deterined by the base shape of the
+/// texture (Buffer, 1D, 2D, 3D, or Cube). For the common
+/// case of a 2D texture, `rank == 2` and each subresource
+/// is a 2D image.
+///
+/// Subresource tensors must be stored in a row-major layout,
+/// so that the X axis strides over texels, the Y axis strides
+/// over 1D rows of texels, and the Z axis strides over 2D
+/// "layers" of texels.
+///
+/// For a texture with multiple mip levels or array elements,
+/// each mip level and array element is stores as a distinct
+/// subresource. When indexing into an array of subresources,
+/// the index of a subresoruce for mip level `m` and array
+/// index `a` is `m + a*mipLevelCount`.
+///
+public struct SubresourceData
+{
+    /// Pointer to texel data for the subresource tensor.
+    public void *data;
+
+    /// Stride in bytes between rows of the subresource tensor.
+    ///
+    /// This is the number of bytes to add to a pointer to a texel
+    /// at (X,Y,Z) to get to a texel at (X,Y+1,Z).
+    ///
+    /// Devices may not support all possible values for `strideY`.
+    /// In particular, they may only support strictly positive strides.
+    ///
+    public gfx::Size strideY;
+
+    /// Stride in bytes between layers of the subresource tensor.
+    ///
+    /// This is the number of bytes to add to a pointer to a texel
+    /// at (X,Y,Z) to get to a texel at (X,Y,Z+1).
+    ///
+    /// Devices may not support all possible values for `strideZ`.
+    /// In particular, they may only support strictly positive strides.
+    ///
+    public gfx::Size strideZ;
+};
+
+[COM("cf88a31c-6187-46c5-a4b7-eb-58-c7-33-40-17")]
+public interface ITextureResource : IResource
+{
+    public TextureResourceDesc* getDesc();
+};
+
+public enum class ComparisonFunc : uint8_t
+{
+    Never = 0x0,
+    Less = 0x1,
+    Equal = 0x2,
+    LessEqual = 0x3,
+    Greater = 0x4,
+    NotEqual = 0x5,
+    GreaterEqual = 0x6,
+    Always = 0x7,
+};
+
+public enum class TextureFilteringMode
+{
+    Point,
+    Linear,
+};
+
+public enum class TextureAddressingMode
+{
+    Wrap,
+    ClampToEdge,
+    ClampToBorder,
+    MirrorRepeat,
+    MirrorOnce,
+};
+
+public enum class TextureReductionOp
+{
+    Average,
+    Comparison,
+    Minimum,
+    Maximum,
+};
+
+public struct SamplerStateDesc
+{
+    public TextureFilteringMode minFilter;
+    public TextureFilteringMode magFilter;
+    public TextureFilteringMode mipFilter;
+    public TextureReductionOp reductionOp;
+    public TextureAddressingMode addressU;
+    public TextureAddressingMode addressV;
+    public TextureAddressingMode addressW;
+    public float mipLODBias;
+    public uint32_t maxAnisotropy;
+    public ComparisonFunc comparisonFunc;
+    public float4 borderColor;
+    public float minLOD;
+    public float maxLOD;
+    public __init()
+    {
+        minFilter = TextureFilteringMode::Linear;
+        magFilter = TextureFilteringMode::Linear;
+        mipFilter = TextureFilteringMode::Linear;
+        reductionOp = TextureReductionOp::Average;
+        addressU = TextureAddressingMode::Wrap;
+        addressV = TextureAddressingMode::Wrap;
+        addressW = TextureAddressingMode::Wrap;
+        mipLODBias = 0.0f;
+        maxAnisotropy = 1;
+        comparisonFunc = ComparisonFunc::Never;
+        borderColor = float4(1.0f, 1.0f, 1.0f, 1.0f);
+        minLOD = -float.maxValue;
+        maxLOD = float.maxValue;
+    }
+};
+
+[COM("8b8055df-9377-401d-91ff-3f-a3-bf-66-64-f4")]
+public interface ISamplerState
+{
+    /// Returns a native API handle representing this sampler state object.
+    /// When using D3D12, this will be a D3D12_CPU_DESCRIPTOR_HANDLE.
+    /// When using Vulkan, this will be a VkSampler.
+    public Result getNativeHandle(InteropHandle *outNativeHandle);
+};
+
+public enum class ResourceViewType
+{
+    Unknown,
+
+    RenderTarget,
+    DepthStencil,
+    ShaderResource,
+    UnorderedAccess,
+    AccelerationStructure,
+
+    CountOf_,
+};
+
+public struct RenderTargetDesc
+{
+    // The resource shape of this render target view.
+    public ResourceType shape;
+};
+
+public struct ResourceViewDesc
+{
+    public ResourceViewType type;
+    public Format format;
+
+    // Required fields for `RenderTarget` and `DepthStencil` views.
+    public RenderTargetDesc renderTarget;
+    // Specifies the range of a texture resource for a ShaderRsource/UnorderedAccess/RenderTarget/DepthStencil view.
+    public SubresourceRange subresourceRange;
+    // Specifies the range of a buffer resource for a ShaderResource/UnorderedAccess view.
+    public BufferRange bufferRange;
+};
+
+[COM("7b6c4926-0884-408c-ad8a-50-3a-8e-23-98-a4")]
+public interface IResourceView
+{
+    public ResourceViewDesc* getViewDesc();
+
+    /// Returns a native API handle representing this resource view object.
+    /// When using D3D12, this will be a D3D12_CPU_DESCRIPTOR_HANDLE or a buffer device address depending
+    /// on the type of the resource view.
+    /// When using Vulkan, this will be a VkImageView, VkBufferView, VkAccelerationStructure or a VkBuffer
+    /// depending on the type of the resource view.
+    public Result getNativeHandle(InteropHandle *outNativeHandle);
+};
+
+public enum class AccelerationStructureKind
+{
+    TopLevel,
+    BottomLevel
+};
+
+// The public enum values are intentionally consistent with
+// D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS.
+public enum AccelerationStructureBuildFlags
+{
+    None,
+    AllowUpdate = 1,
+    AllowCompaction = 2,
+    PreferFastTrace = 4,
+    PreferFastBuild = 8,
+    MinimizeMemory = 16,
+    PerformUpdate = 32
+};
+
+public enum class GeometryType
+{
+    Triangles, ProcedurePrimitives
+};
+
+public struct GeometryFlags
+{
+    // The public enum values are intentionally consistent with
+    // D3D12_RAYTRACING_GEOMETRY_FLAGS.
+    public enum Enum
+    {
+        None,
+        Opaque = 1,
+        NoDuplicateAnyHitInvocation = 2
+    };
+};
+
+public struct TriangleDesc
+{
+    public DeviceAddress transform3x4;
+    public Format indexFormat;
+    public Format vertexFormat;
+    public GfxCount indexCount;
+    public GfxCount vertexCount;
+    public DeviceAddress indexData;
+    public DeviceAddress vertexData;
+    public Size vertexStride;
+};
+
+public struct ProceduralAABB
+{
+    public float minX;
+    public float minY;
+    public float minZ;
+    public float maxX;
+    public float maxY;
+    public float maxZ;
+};
+
+public struct ProceduralAABBDesc
+{
+    /// Number of AABBs.
+    public GfxCount count;
+
+    /// Pointer to an array of `ProceduralAABB` values in device memory.
+    public DeviceAddress data;
+
+    /// Stride in bytes of the AABB values array.
+    public Size stride;
+};
+
+public struct GeometryDesc
+{
+    public GeometryType type;
+    public GeometryFlags::Enum flags;
+    public TriangleDesc triangles;
+    public property ProceduralAABBDesc proceduralAABBs
+    {
+        get { return reinterpret<ProceduralAABBDesc, TriangleDesc>(triangles); }
+        set { triangles = reinterpret<TriangleDesc, ProceduralAABBDesc>(newValue); }
+    }
+};
+
+// The public enum values are kept consistent with D3D12_RAYTRACING_INSTANCE_FLAGS
+// and VkGeometryInstanceFlagBitsKHR.
+public enum GeometryInstanceFlags
+{
+    None = 0,
+    TriangleFacingCullDisable = 0x00000001,
+    TriangleFrontCounterClockwise = 0x00000002,
+    ForceOpaque = 0x00000004,
+    NoOpaque = 0x00000008
+};
+
+// TODO: Should any of these be changed?
+// The layout of this public struct is intentionally consistent with D3D12_RAYTRACING_INSTANCE_DESC
+// and VkAccelerationStructureInstanceKHR.
+public struct InstanceDesc
+{
+    public float transform[3][4];
+    public uint32_t instanceID24_mask8;
+    public property uint32_t instanceID { get { return instanceID24_mask8 & 0xFFFFFF; } set { instanceID24_mask8 = (instanceID24_mask8 & 0xFF000000) | (newValue & 0xFFFFFF); } }
+    public property uint32_t instanceMask { get { return instanceID24_mask8 >> 24; } set { instanceID24_mask8 = (newValue << 24) | (instanceID24_mask8 & 0x00FFFFFF); } }
+
+    public uint32_t instanceContributionToHitGroupIndex24_flags8;
+    public property uint32_t instanceContributionToHitGroupIndex
+    {
+        get { return instanceContributionToHitGroupIndex24_flags8 & 0xFFFFFF; }
+        set { instanceContributionToHitGroupIndex24_flags8 = (instanceContributionToHitGroupIndex24_flags8 & 0xFF000000) | (newValue & 0xFFFFFF); }
+    }
+    public property GeometryInstanceFlags flags
+    {
+        get { return (GeometryInstanceFlags)(instanceContributionToHitGroupIndex24_flags8 >> 24); }
+        set { instanceContributionToHitGroupIndex24_flags8 = ((uint32_t)newValue << 24) | (instanceContributionToHitGroupIndex24_flags8 & 0x00FFFFFF); }
+    }
+    public DeviceAddress accelerationStructure;
+};
+
+public struct AccelerationStructurePrebuildInfo
+{
+    public Size resultDataMaxSize;
+    public Size scratchDataSize;
+    public Size updateScratchDataSize;
+};
+
+public struct AccelerationStructureBuildInputs
+{
+    public AccelerationStructureKind kind;
+
+    public AccelerationStructureBuildFlags flags;
+
+    public GfxCount descCount;
+
+    /// Array of `InstanceDesc` values in device memory.
+    /// Used when `kind` is `TopLevel`.
+    public DeviceAddress instanceDescs;
+
+    /// Array of `GeometryDesc` values.
+    /// Used when `kind` is `BottomLevel`.
+    public GeometryDesc *geometryDescs;
+};
+
+public struct AccelerationStructureCreateDesc
+{
+    public AccelerationStructureKind kind;
+    public NativeRef<IBufferResource> buffer;
+    public Offset offset;
+    public Size size;
+};
+
+public struct AccelerationStructureBuildDesc
+{
+    public AccelerationStructureBuildInputs inputs;
+    public NativeRef<IAccelerationStructure> source;
+    public NativeRef<IAccelerationStructure> dest;
+    public DeviceAddress scratchData;
+};
+
+[COM("a5cdda3c-1d4e-4df7-8ef2-b7-3f-ce-04-de-3b")]
+public interface IAccelerationStructure : IResourceView
+{
+    public DeviceAddress getDeviceAddress();
+};
+
+public struct FenceDesc
+{
+    public uint64_t initialValue;
+    public bool isShared;
+};
+
+[COM("7fe1c283-d3f4-48ed-aaf3-01-51-96-4e-7c-b5")]
+public interface IFence
+{
+    /// Returns the currently signaled value on the device.
+    public Result getCurrentValue(uint64_t *outValue);
+
+    /// Signals the fence from the host with the specified value.
+    public Result setCurrentValue(uint64_t value);
+
+    public Result getSharedHandle(InteropHandle *outHandle);
+    public Result getNativeHandle(InteropHandle *outNativeHandle);
+};
+
+public struct ShaderOffset
+{
+    public Int uniformOffset = 0; // TODO: Change to Offset?
+    public GfxIndex bindingRangeIndex = 0;
+    public GfxIndex bindingArrayIndex = 0;
+}
+
+public enum class ShaderObjectContainerType
+{
+    None, Array, StructuredBuffer
+};
+
+[COM("c1fa997e-5ca2-45ae-9bcb-c4-35-9e-85-05-85")]
+public interface IShaderObject
+{
+    public slang::TypeLayoutReflection* getElementTypeLayout();
+    public ShaderObjectContainerType getContainerType();
+    public GfxCount getEntryPointCount();
+    public Result getEntryPoint(GfxIndex index, out Optional<IShaderObject> entryPoint);
+    public Result setData(ShaderOffset *offset, void *data, Size size);
+    public Result getObject(ShaderOffset *offset, out Optional<IShaderObject> object);
+    public Result setObject(ShaderOffset* offset, IShaderObject object);
+    public Result setResource(ShaderOffset* offset, IResourceView resourceView);
+    public Result setSampler(ShaderOffset* offset, ISamplerState sampler);
+    public Result setCombinedTextureSampler(ShaderOffset* offset, IResourceView textureView, ISamplerState sampler);
+
+    /// Manually overrides the specialization argument for the sub-object binding at `offset`.
+    /// Specialization arguments are passed to the shader compiler to specialize the type
+    /// of interface-typed shader parameters.
+    public Result setSpecializationArgs(
+        ShaderOffset* offset,
+        slang::SpecializationArg *args,
+        GfxCount count);
+
+    public Result getCurrentVersion(
+        ITransientResourceHeap transientHeap,
+        out IShaderObject outObject);
+
+    public void* getRawData();
+
+    public Size getSize();
+
+    /// Use the provided constant buffer instead of the internally created one.
+    public Result setConstantBufferOverride(IBufferResource constantBuffer);
+};
+
+public enum class StencilOp : uint8_t
+{
+    Keep,
+    Zero,
+    Replace,
+    IncrementSaturate,
+    DecrementSaturate,
+    Invert,
+    IncrementWrap,
+    DecrementWrap,
+};
+
+public enum class FillMode : uint8_t
+{
+    Solid,
+    Wireframe,
+};
+
+public enum class CullMode : uint8_t
+{
+    None,
+    Front,
+    Back,
+};
+
+public enum class FrontFaceMode : uint8_t
+{
+    CounterClockwise,
+    Clockwise,
+};
+
+public struct DepthStencilOpDesc
+{
+    public StencilOp stencilFailOp = StencilOp::Keep;
+    public StencilOp stencilDepthFailOp = StencilOp::Keep;
+    public StencilOp stencilPassOp = StencilOp::Keep;
+    public ComparisonFunc stencilFunc = ComparisonFunc::Always;
+    public __init()
+    {
+        stencilFailOp = StencilOp::Keep;
+        stencilDepthFailOp = StencilOp::Keep;
+        stencilPassOp = StencilOp::Keep;
+        stencilFunc = ComparisonFunc::Always;
+    }
+};
+
+public struct DepthStencilDesc
+{
+    public bool depthTestEnable = false;
+    public bool depthWriteEnable = true;
+    public ComparisonFunc depthFunc = ComparisonFunc::Less;
+
+    public bool stencilEnable = false;
+    public uint32_t stencilReadMask = 0xFFFFFFFF;
+    public uint32_t stencilWriteMask = 0xFFFFFFFF;
+    public DepthStencilOpDesc frontFace;
+    public DepthStencilOpDesc backFace;
+
+    public uint32_t stencilRef = 0;
+
+    public __init()
+    {
+        depthTestEnable = false;
+        depthWriteEnable = true;
+        depthFunc = ComparisonFunc::Less;
+        stencilEnable = false;
+        stencilReadMask = 0xFFFFFFFF;
+        stencilWriteMask = 0xFFFFFFFF;
+        stencilRef = 0;
+    }
+};
+
+public struct RasterizerDesc
+{
+    public FillMode fillMode = FillMode::Solid;
+    public CullMode cullMode = CullMode::None;
+    public FrontFaceMode frontFace = FrontFaceMode::CounterClockwise;
+    public int32_t depthBias = 0;
+    public float depthBiasClamp = 0.0f;
+    public float slopeScaledDepthBias = 0.0f;
+    public bool depthClipEnable = true;
+    public bool scissorEnable = false;
+    public bool multisampleEnable = false;
+    public bool antialiasedLineEnable = false;
+    public bool enableConservativeRasterization = false;
+    public uint32_t forcedSampleCount = 0;
+
+    public __init()
+    {
+        fillMode = FillMode::Solid;
+        cullMode = CullMode::None;
+        frontFace = FrontFaceMode::CounterClockwise;
+        depthBias = 0;
+        depthBiasClamp = 0.0f;
+        slopeScaledDepthBias = 0.0f;
+        depthClipEnable = true;
+        scissorEnable = false;
+        multisampleEnable = false;
+        antialiasedLineEnable = false;
+        enableConservativeRasterization = false;
+        forcedSampleCount = 0;
+    }
+};
+
+public enum class LogicOp
+{
+    NoOp,
+};
+
+public enum class BlendOp
+{
+    Add,
+    Subtract,
+    ReverseSubtract,
+    Min,
+    Max,
+};
+
+public enum class BlendFactor
+{
+    Zero,
+    One,
+    SrcColor,
+    InvSrcColor,
+    SrcAlpha,
+    InvSrcAlpha,
+    DestAlpha,
+    InvDestAlpha,
+    DestColor,
+    InvDestColor,
+    SrcAlphaSaturate,
+    BlendColor,
+    InvBlendColor,
+    SecondarySrcColor,
+    InvSecondarySrcColor,
+    SecondarySrcAlpha,
+    InvSecondarySrcAlpha,
+};
+
+public enum RenderTargetWriteMask
+{
+    EnableNone = 0,
+    EnableRed = 0x01,
+    EnableGreen = 0x02,
+    EnableBlue = 0x04,
+    EnableAlpha = 0x08,
+    EnableAll = 0x0F,
+};
+
+public struct AspectBlendDesc
+{
+    public BlendFactor srcFactor = BlendFactor::One;
+    public BlendFactor dstFactor = BlendFactor::Zero;
+    public BlendOp op = BlendOp::Add;
+
+    __init()
+    {
+        srcFactor = BlendFactor::One;
+        dstFactor = BlendFactor::Zero;
+        op = BlendOp::Add;
+    }
+};
+
+public struct TargetBlendDesc
+{
+    public AspectBlendDesc color;
+    public AspectBlendDesc alpha;
+    public bool enableBlend;
+    public LogicOp logicOp;
+    public RenderTargetWriteMask writeMask;
+    public __init()
+    {
+        enableBlend = false;
+        logicOp = LogicOp::NoOp;
+        writeMask = RenderTargetWriteMask::EnableAll;
+    }
+};
+
+public struct BlendDesc
+{
+    public TargetBlendDesc targets[kMaxRenderTargetCount] = {};
+    public GfxCount targetCount = 0;
+
+    public bool alphaToCoverageEnable = false;
+};
+
+public struct FramebufferTargetLayout
+{
+    public Format format;
+    public GfxCount sampleCount;
+};
+
+public struct FramebufferLayoutDesc
+{
+    public GfxCount renderTargetCount;
+    public FramebufferTargetLayout *renderTargets;
+    public FramebufferTargetLayout *depthStencil;
+};
+
+[COM("0a838785-c13a-4832-ad88-64-06-b5-4b-5e-ba")]
+public interface IFramebufferLayout
+{
+};
+
+public struct GraphicsPipelineStateDesc
+{
+    public NativeRef<IShaderProgram> program;
+
+    public NativeRef<IInputLayout> inputLayout;
+    public NativeRef<IFramebufferLayout> framebufferLayout;
+    public PrimitiveType primitiveType;
+    public DepthStencilDesc depthStencil;
+    public RasterizerDesc rasterizer;
+    public BlendDesc blend;
+
+    public __init()
+    {
+        program = {IShaderProgram()};
+        inputLayout = {IInputLayout()};
+        framebufferLayout = {IFramebufferLayout()};
+        primitiveType = PrimitiveType::Triangle;
+        depthStencil = {};
+        rasterizer = {};
+        blend = {};
+    }
+};
+
+public struct ComputePipelineStateDesc
+{
+    public NativeRef<IShaderProgram> program;
+    public void *d3d12RootSignatureOverride;
+};
+
+public enum RayTracingPipelineFlags
+{
+    None = 0,
+    SkipTriangles = 1,
+    SkipProcedurals = 2,
+};
+
+public struct HitGroupDesc
+{
+    public NativeString hitGroupName;
+    public NativeString closestHitEntryPoint;
+    public NativeString anyHitEntryPoint;
+    public NativeString intersectionEntryPoint;
+};
+
+public struct RayTracingPipelineStateDesc
+{
+    public NativeRef<IShaderProgram> program;
+    public GfxCount hitGroupCount = 0;
+    public HitGroupDesc *hitGroups;
+    public int maxRecursion = 0;
+    public Size maxRayPayloadSize = 0;
+    public Size maxAttributeSizeInBytes = 8;
+    public RayTracingPipelineFlags flags = RayTracingPipelineFlags::None;
+};
+
+// Specifies the bytes to overwrite into a record in the shader table.
+public struct ShaderRecordOverwrite
+{
+    public Offset offset;   // Offset within the shader record.
+    public Size size;       // Number of bytes to overwrite.
+    public uint8_t data[8]; // Content to overwrite.
+};
+
+public struct ShaderTableDesc
+{
+    public GfxCount rayGenShaderCount;
+    public NativeString* rayGenShaderEntryPointNames;
+    public ShaderRecordOverwrite *rayGenShaderRecordOverwrites;
+
+    public GfxCount missShaderCount;
+    public NativeString *missShaderEntryPointNames;
+    public ShaderRecordOverwrite *missShaderRecordOverwrites;
+
+    public GfxCount hitGroupCount;
+    public NativeString *hitGroupNames;
+    public ShaderRecordOverwrite *hitGroupRecordOverwrites;
+
+    NativeRef<IShaderProgram> program;
+};
+
+[COM("a721522c-df31-4c2f-a5e7-3b-e0-12-4b-31-78")]
+public interface IShaderTable
+{
+
+};
+
+[COM("0ca7e57d-8a90-44f3-bdb1-fe-9b-35-3f-5a-72")]
+public interface IPipelineState
+{
+    Result getNativeHandle(InteropHandle *outHandle);
+};
+
+public struct ScissorRect
+{
+    public int32_t minX;
+    public int32_t minY;
+    public int32_t maxX;
+    public int32_t maxY;
+};
+
+public struct Viewport
+{
+    public float originX = 0.0f;
+    public float originY = 0.0f;
+    public float extentX = 0.0f;
+    public float extentY = 0.0f;
+    public float minZ = 0.0f;
+    public float maxZ = 1.0f;
+};
+
+public struct FramebufferDesc
+{
+    public GfxCount renderTargetCount;
+    public NativeRef<IResourceView> *renderTargetViews;
+    public NativeRef<IResourceView> depthStencilView;
+    public NativeRef<IFramebufferLayout> layout;
+};
+
+[COM("0f0c0d9a-4ef3-4e18-9ba9-34-60-ea-69-87-95")]
+public interface IFramebuffer
+{
+};
+
+public enum class WindowHandleType
+{
+    Unknown,
+    Win32Handle,
+    XLibHandle,
+};
+
+public struct WindowHandle
+{
+    public WindowHandleType type;
+    public void* handleValues[2];
+    public static WindowHandle fromHwnd(void *hwnd)
+    {
+        WindowHandle handle = {WindowHandleType::Unknown, {nullptr, nullptr}};
+        handle.type = WindowHandleType::Win32Handle;
+        handle.handleValues[0] = hwnd;
+        return handle;
+    }
+    public static WindowHandle fromXWindow(void *xdisplay, uint32_t xwindow)
+    {
+        WindowHandle handle = {WindowHandleType::Unknown, {nullptr, nullptr}};
+        handle.type = WindowHandleType::XLibHandle;
+        handle.handleValues[0] = xdisplay;
+        handle.handleValues[1] = (void*)xwindow;
+        return handle;
+    }
+};
+
+public enum FaceMask
+{
+    Front = 1, Back = 2
+};
+
+public enum class TargetLoadOp
+{
+    Load, Clear, DontCare
+};
+public enum class TargetStoreOp
+{
+    Store, DontCare
+};
+public struct TargetAccessDesc
+{
+    public TargetLoadOp loadOp;
+    public TargetLoadOp stencilLoadOp;
+    public TargetStoreOp storeOp;
+    public TargetStoreOp stencilStoreOp;
+    public ResourceState initialState;
+    public ResourceState finalState;
+};
+public struct RenderPassLayoutDesc
+{
+    public NativeRef<IFramebufferLayout> framebufferLayout;
+    public GfxCount renderTargetCount;
+    public TargetAccessDesc *renderTargetAccess;
+    public TargetAccessDesc *depthStencilAccess;
+};
+
+[COM("daab0b1a-f45d-4ae9-bf2c-e0-bb-76-7d-fa-d1")]
+public interface IRenderPassLayout
+{
+};
+
+public enum class QueryType
+{
+    Timestamp,
+    AccelerationStructureCompactedSize,
+    AccelerationStructureSerializedSize,
+    AccelerationStructureCurrentSize,
+};
+
+public struct QueryPoolDesc
+{
+    public QueryType type;
+    public GfxCount count;
+};
+
+[COM("c2cc3784-12da-480a-a874-8b-31-96-1c-a4-36")]
+public interface IQueryPool
+{
+    public Result getResult(GfxIndex queryIndex, GfxCount count, uint64_t *data);
+    public Result reset();
+};
+
+[COM("77ea6383-be3d-40aa-8b45-fd-f0-d7-5b-fa-34")]
+public interface ICommandEncoder
+{
+    public void endEncoding();
+    public void writeTimestamp(IQueryPool queryPool, GfxIndex queryIndex);
+};
+
+public struct IndirectDispatchArguments
+{
+    public GfxCount ThreadGroupCountX;
+    public GfxCount ThreadGroupCountY;
+    public GfxCount ThreadGroupCountZ;
+};
+
+public struct IndirectDrawArguments
+{
+    public GfxCount VertexCountPerInstance;
+    public GfxCount InstanceCount;
+    public GfxIndex StartVertexLocation;
+    public GfxIndex StartInstanceLocation;
+};
+
+public struct IndirectDrawIndexedArguments
+{
+    public GfxCount IndexCountPerInstance;
+    public GfxCount InstanceCount;
+    public GfxIndex StartIndexLocation;
+    public GfxIndex BaseVertexLocation;
+    public GfxIndex StartInstanceLocation;
+};
+
+public struct SamplePosition
+{
+    public int8_t x;
+    public int8_t y;
+};
+
+public enum ClearResourceViewFlags
+{
+    None = 0,
+    ClearDepth = 1,
+    ClearStencil = 2,
+    FloatClearValues = 4
+};
+
+[COM("F99A00E9-ED50-4088-8A0E-3B26755031EA")]
+public interface IResourceCommandEncoder : ICommandEncoder
+{
+    public void copyBuffer(
+                 IBufferResource dst,
+                 Offset dstOffset,
+                 IBufferResource src,
+                 Offset srcOffset,
+                 Size size);
+    /// Copies texture from src to dst. If dstSubresource and srcSubresource has mipLevelCount = 0
+    /// and layerCount = 0, the entire resource is being copied and dstOffset, srcOffset and extent
+    /// arguments are ignored.
+    public void copyTexture(
+        ITextureResource dst,
+        ResourceState dstState,
+        SubresourceRange dstSubresource,
+        int3 dstOffset,
+        NativeRef<ITextureResource> src,
+        ResourceState srcState,
+        SubresourceRange srcSubresource,
+        int3 srcOffset,
+        int3 extent);
+
+    /// Copies texture to a buffer. Each row is aligned to kTexturePitchAlignment.
+    public void copyTextureToBuffer(
+        IBufferResource dst,
+        Offset dstOffset,
+        Size dstSize,
+        Size dstRowStride,
+        ITextureResource src,
+        ResourceState srcState,
+        SubresourceRange srcSubresource,
+        int3 srcOffset,
+        int3 extent);
+    public void uploadTextureData(
+        ITextureResource dst,
+        SubresourceRange subResourceRange,
+        int3 offset,
+        int3 extent,
+        SubresourceData *subResourceData,
+        GfxCount subResourceDataCount);
+    public void uploadBufferData(IBufferResource dst, Offset offset, Size size, void *data);
+    public void textureBarrier(
+        GfxCount count, NativeRef<ITextureResource> *textures, ResourceState src, ResourceState dst);
+    public void textureSubresourceBarrier(
+        ITextureResource texture,
+        SubresourceRange subresourceRange,
+        ResourceState src,
+        ResourceState dst);
+    public void bufferBarrier(
+        GfxCount count, NativeRef<IBufferResource> *buffers, ResourceState src, ResourceState dst);
+    public void clearResourceView(
+        IResourceView view, ClearValue *clearValue, ClearResourceViewFlags flags);
+    public void resolveResource(
+        ITextureResource source,
+        ResourceState sourceState,
+        SubresourceRange sourceRange,
+        ITextureResource dest,
+        ResourceState destState,
+        SubresourceRange destRange);
+    public void resolveQuery(
+        IQueryPool queryPool,
+        GfxIndex index,
+        GfxCount count,
+        IBufferResource buffer,
+        Offset offset);
+    public void beginDebugEvent(NativeString name, float rgbColor[3]);
+    public void endDebugEvent();
+};
+
+[COM("7A8D56D0-53E6-4AD6-85F7-D14DC110FDCE")]
+public interface IRenderCommandEncoder : IResourceCommandEncoder
+{
+    // Sets the current pipeline state. This method returns a transient shader object for
+    // writing shader parameters. This shader object will not retain any resources or
+    // sub-shader-objects bound to it. The user must be responsible for ensuring that any
+    // resources or shader objects that is set into `outRootShaderObject` stays alive during
+    // the execution of the command buffer.
+    public Result bindPipeline(IPipelineState state, out IShaderObject outRootShaderObject);
+
+    // Sets the current pipeline state along with a pre-created mutable root shader object.
+    public Result bindPipelineWithRootObject(IPipelineState state, NativeRef<IShaderObject> rootObject);
+
+    public void setViewports(GfxCount count, Viewport *viewports);
+    public void setScissorRects(GfxCount count, ScissorRect *scissors);
+
+    public void setPrimitiveTopology(PrimitiveTopology topology);
+    public void setVertexBuffers(
+        GfxIndex startSlot,
+        GfxCount slotCount,
+        NativeRef<IBufferResource>* buffers,
+        Offset *offsets);
+
+    public void setIndexBuffer(IBufferResource buffer, Format indexFormat, Offset offset);
+    public void draw(GfxCount vertexCount, GfxIndex startVertex);
+    public void drawIndexed(GfxCount indexCount, GfxIndex startIndex = 0, GfxIndex baseVertex = 0);
+    public void drawIndirect(
+        GfxCount maxDrawCount,
+        IBufferResource argBuffer,
+        Offset argOffset,
+        NativeRef<IBufferResource> countBuffer,
+        Offset countOffset = 0);
+    public void drawIndexedIndirect(
+        GfxCount maxDrawCount,
+        IBufferResource argBuffer,
+        Offset argOffset,
+        NativeRef<IBufferResource> countBuffer,
+        Offset countOffset = 0);
+    public void setStencilReference(uint32_t referenceValue);
+    public Result setSamplePositions(
+        GfxCount samplesPerPixel, GfxCount pixelCount, SamplePosition *samplePositions);
+    public void drawInstanced(
+        GfxCount vertexCount,
+        GfxCount instanceCount,
+        GfxIndex startVertex,
+        GfxIndex startInstanceLocation);
+    public void drawIndexedInstanced(
+        GfxCount indexCount,
+        GfxCount instanceCount,
+        GfxIndex startIndexLocation,
+        GfxIndex baseVertexLocation,
+        GfxIndex startInstanceLocation);
+};
+
+[COM("88AA9322-82F7-4FE6-A68A-29C7FE798737")]
+public interface IComputeCommandEncoder : IResourceCommandEncoder
+{
+    // Sets the current pipeline state. This method returns a transient shader object for
+    // writing shader parameters. This shader object will not retain any resources or
+    // sub-shader-objects bound to it. The user must be responsible for ensuring that any
+    // resources or shader objects that is set into `outRooShaderObject` stays alive during
+    // the execution of the command buffer.
+    public Result bindPipeline(IPipelineState state, out Optional<IShaderObject> outRootShaderObject);
+
+    // Sets the current pipeline state along with a pre-created mutable root shader object.
+    public Result bindPipelineWithRootObject(IPipelineState state, IShaderObject rootObject);
+
+    public void dispatchCompute(int x, int y, int z);
+    public void dispatchComputeIndirect(IBufferResource cmdBuffer, Offset offset);
+};
+
+public enum class AccelerationStructureCopyMode
+{
+    Clone, Compact
+};
+
+public struct AccelerationStructureQueryDesc
+{
+    public QueryType queryType;
+
+    public NativeRef<IQueryPool> queryPool;
+
+    public GfxIndex firstQueryIndex;
+};
+
+[COM("9a672b87-5035-45e3-967c-1f-85-cd-b3-63-4f")]
+public interface IRayTracingCommandEncoder : IResourceCommandEncoder
+{
+    public void buildAccelerationStructure(
+        AccelerationStructureBuildDesc *desc,
+        GfxCount propertyQueryCount,
+        AccelerationStructureQueryDesc *queryDescs);
+    public void copyAccelerationStructure(
+        NativeRef<IAccelerationStructure> dest,
+        NativeRef<IAccelerationStructure> src,
+        AccelerationStructureCopyMode mode);
+    public void queryAccelerationStructureProperties(
+        GfxCount accelerationStructureCount,
+        NativeRef<IAccelerationStructure> *accelerationStructures,
+        GfxCount queryCount,
+        AccelerationStructureQueryDesc *queryDescs);
+    public void serializeAccelerationStructure(DeviceAddress dest, IAccelerationStructure source);
+    public void deserializeAccelerationStructure(IAccelerationStructure dest, DeviceAddress source);
+
+    public Result bindPipeline(IPipelineState state, out IShaderObject rootObject);
+    // Sets the current pipeline state along with a pre-created mutable root shader object.
+    public Result bindPipelineWithRootObject(IPipelineState state, IShaderObject rootObject);
+
+    /// Issues a dispatch command to start ray tracing workload with a ray tracing pipeline.
+    /// `rayGenShaderIndex` specifies the index into the shader table that identifies the ray generation shader.
+    public void dispatchRays(
+        GfxIndex rayGenShaderIndex,
+        NativeRef<IShaderTable> shaderTable,
+        GfxCount width,
+        GfxCount height,
+        GfxCount depth);
+};
+
+[COM("5d56063f-91d4-4723-a7a7-7a-15-af-93-eb-48")]
+public interface ICommandBuffer
+{
+    // Only one encoder may be open at a time. User must call `ICommandEncoder::endEncoding`
+    // before calling other `encode*Commands` methods.
+    // Once `endEncoding` is called, the `ICommandEncoder` object becomes obsolete and is
+    // invalid for further use. To continue recording, the user must request a new encoder
+    // object by calling one of the `encode*Commands` methods again.
+    public void encodeRenderCommands(
+        IRenderPassLayout renderPass,
+        IFramebuffer framebuffer,
+        out IRenderCommandEncoder outEncoder);
+
+    public void encodeComputeCommands(out Optional<IComputeCommandEncoder> encoder);
+
+    public void encodeResourceCommands(out Optional<IResourceCommandEncoder> outEncoder);
+
+    public void encodeRayTracingCommands(out Optional<IRayTracingCommandEncoder> outEncoder);
+
+    public void close();
+
+    public Result getNativeHandle(out InteropHandle outHandle);
+};
+
+public enum class QueueType
+{
+    Graphics
+};
+public struct CommandQueueDesc
+{
+    public QueueType type;
+};
+
+[COM("14e2bed0-0ad0-4dc8-b341-06-3f-e7-2d-bf-0e")]
+public interface ICommandQueue
+{
+    public const CommandQueueDesc* getDesc();
+
+    public void executeCommandBuffers(
+        GfxCount count,
+        NativeRef<ICommandBuffer> *commandBuffers,
+        Optional<IFence> fenceToSignal,
+        uint64_t newFenceValue);
+
+    public Result getNativeHandle(out InteropHandle outHandle);
+
+    public void waitOnHost();
+
+    /// Queues a device side wait for the given fences.
+    public Result waitForFenceValuesOnDevice(GfxCount fenceCount, NativeRef<IFence> *fences, uint64_t *waitValues);
+};
+
+public enum TransientResourceHeapFlags
+{
+    None = 0,
+    AllowResizing = 0x1,
+};
+
+public struct TransientResourceHeapDesc
+{
+    public TransientResourceHeapFlags flags;
+    public Size constantBufferSize;
+    public GfxCount samplerDescriptorCount;
+    public GfxCount uavDescriptorCount;
+    public GfxCount srvDescriptorCount;
+    public GfxCount constantBufferDescriptorCount;
+    public GfxCount accelerationStructureDescriptorCount;
+};
+
+[COM("cd48bd29-ee72-41b8-bcff-0a-2b-3a-aa-6d-0b")]
+public interface ITransientResourceHeap
+{
+    // Waits until GPU commands issued before last call to `finish()` has been completed, and resets
+    // all transient resources holds by the heap.
+    // This method must be called before using the transient heap to issue new GPU commands.
+    // In most situations this method should be called at the beginning of each frame.
+    public Result synchronizeAndReset();
+
+    // Must be called when the application has done using this heap to issue commands. In most situations
+    // this method should be called at the end of each frame.
+    public Result finish();
+
+    // Command buffers are one-time use. Once it is submitted to the queue via
+    // `executeCommandBuffers` a command buffer is no longer valid to be used any more. Command
+    // buffers must be closed before submission. The current D3D12 implementation has a limitation
+    // that only one command buffer maybe recorded at a time. User must finish recording a command
+    // buffer before creating another command buffer.
+    public Result createCommandBuffer(out Optional<ICommandBuffer> outCommandBuffer);
+};
+
+public struct SwapchainDesc
+{
+    public Format format;
+    public GfxCount width, height;
+    public GfxCount imageCount;
+    public NativeRef<ICommandQueue> queue;
+    public bool enableVSync;
+};
+
+[COM("be91ba6c-0784-4308-a1-00-19-c3-66-83-44-b2")]
+public interface ISwapchain
+{
+    public const SwapchainDesc* getDesc();
+
+    /// Returns the back buffer image at `index`.
+    public Result getImage(GfxIndex index, out ITextureResource outResource);
+
+    /// Present the next image in the swapchain.
+    public Result present();
+
+    /// Returns the index of next back buffer image that will be presented in the next
+    /// `present` call. If the swapchain is invalid/out-of-date, this method returns -1.
+    public int acquireNextImage();
+
+    /// Resizes the back buffers of this swapchain. All render target views and framebuffers
+    /// referencing the back buffer images must be freed before calling this method.
+    public Result resize(GfxCount width, GfxCount height);
+
+    // Check if the window is occluded.
+    public bool isOccluded();
+
+    // Toggle full screen mode.
+    public Result setFullScreenMode(bool mode);
+};
+
+public struct DeviceInfo
+{
+    public DeviceType deviceType;
+
+    public BindingStyle bindingStyle;
+
+    public ProjectionStyle projectionStyle;
+
+    /// An projection matrix that ensures x, y mapping to pixels
+    /// is the same on all targets
+    public float identityProjectionMatrix[16];
+
+    /// The name of the graphics API being used by this device.
+    public NativeString apiName;
+
+    /// The name of the graphics adapter.
+    public NativeString adapterName;
+
+    /// The clock frequency used in timestamp queries.
+    public uint64_t timestampFrequency;
+};
+
+public enum class DebugMessageType
+{
+    Info, Warning, Error
+};
+public enum class DebugMessageSource
+{
+    Layer, Driver, Slang
+};
+
+[COM("B219D7E8-255A-2572-D46C-A0E5D99CEB90")]
+public interface IDebugCallback
+{
+    public void handleMessage(DebugMessageType type, DebugMessageSource source, NativeString message);
+};
+
+public struct SlangDesc
+{
+    public NativeRef<slang::IGlobalSession> slangGlobalSession = {slang::IGlobalSession()}; // (optional) A slang global session object. If null will create automatically.
+
+    public slang::SlangMatrixLayoutMode defaultMatrixLayoutMode = slang::SlangMatrixLayoutMode::SLANG_MATRIX_LAYOUT_ROW_MAJOR;
+
+    public NativeString *searchPaths = nullptr;
+    public GfxCount searchPathCount = 0;
+
+    public slang::PreprocessorMacroDesc *preprocessorMacros = nullptr;
+    public GfxCount preprocessorMacroCount = 0;
+
+    public NativeString targetProfile = ""; // (optional) Target shader profile. If null this will be set to platform dependent default.
+    public slang::SlangFloatingPointMode floatingPointMode = slang::SlangFloatingPointMode::SLANG_FLOATING_POINT_MODE_DEFAULT;
+    public slang::SlangOptimizationLevel optimizationLevel = slang::SlangOptimizationLevel::SLANG_OPTIMIZATION_LEVEL_DEFAULT;
+    public slang::SlangTargetFlags targetFlags = slang::SlangTargetFlags.None;
+    public slang::SlangLineDirectiveMode lineDirectiveMode = slang::SlangLineDirectiveMode::SLANG_LINE_DIRECTIVE_MODE_DEFAULT;
+};
+
+public struct ShaderCacheDesc
+{
+    // The root directory for the shader cache. If not set, shader cache is disabled.
+    public NativeString shaderCachePath = "";
+    // The maximum number of entries stored in the cache.
+    public GfxCount maxEntryCount = 0;
+};
+
+public struct DeviceInteropHandles
+{
+    public InteropHandle handles[3] = {};
+};
+
+public struct DeviceDesc
+{
+    // The underlying API/Platform of the device.
+    public DeviceType deviceType = DeviceType::Default;
+    // The device's handles (if they exist) and their associated API. For D3D12, this contains a single InteropHandle
+    // for the ID3D12Device. For Vulkan, the first InteropHandle is the VkInstance, the second is the VkPhysicalDevice,
+    // and the third is the VkDevice. For CUDA, this only contains a single value for the CUDADevice.
+    public DeviceInteropHandles existingDeviceHandles = {};
+    // Name to identify the adapter to use
+    public NativeString adapter = "";
+    // Number of required features.
+    public GfxCount requiredFeatureCount = 0;
+    // Array of required feature names, whose size is `requiredFeatureCount`.
+    public NativeString *requiredFeatures = nullptr;
+    // A command dispatcher object that intercepts and handles actual low-level API call.
+    void *apiCommandDispatcher = nullptr;
+    // The slot (typically UAV) used to identify NVAPI intrinsics. If >=0 NVAPI is required.
+    public GfxIndex nvapiExtnSlot = -1;
+    // Configurations for the shader cache.
+    public ShaderCacheDesc shaderCache = {};
+    // Configurations for Slang compiler.
+    public SlangDesc slang = {};
+
+    public GfxCount extendedDescCount = 0;
+    public void **extendedDescs = nullptr;
+};
+
+[COM("715bdf26-5135-11eb-AE93-02-42-AC-13-00-02")]
+public interface IDevice
+{
+    public Result getNativeDeviceHandles(out DeviceInteropHandles outHandles);
+
+    public bool hasFeature(NativeString feature);
+
+    /// Returns a list of features supported by the renderer.
+    public Result getFeatures(NativeString *outFeatures, Size bufferSize, GfxCount *outFeatureCount);
+
+    public Result getFormatSupportedResourceStates(Format format, ResourceStateSet *outStates);
+
+    public Result getSlangSession(NativeRef<slang::ISession>* outSlangSession);
+
+    public Result createTransientResourceHeap(
+        TransientResourceHeapDesc *desc,
+        out Optional<ITransientResourceHeap> outHeap);
+
+    /// Create a texture resource.
+    ///
+    /// If `initData` is non-null, then it must point to an array of
+    /// `ITextureResource::SubresourceData` with one element for each
+    /// subresource of the texture being created.
+    ///
+    /// The number of subresources in a texture is:
+    ///
+    ///     effectiveElementCount * mipLevelCount
+    ///
+    /// where the effective element count is computed as:
+    ///
+    ///     effectiveElementCount = (isArray ? arrayElementCount : 1) * (isCube ? 6 : 1);
+    ///
+    public Result createTextureResource(
+        TextureResourceDesc* desc,
+        SubresourceData *initData,
+        out ITextureResource outResource);
+
+    public Result createTextureFromNativeHandle(
+        InteropHandle handle,
+        TextureResourceDesc* srcDesc,
+        out ITextureResource outResource);
+
+    public Result createTextureFromSharedHandle(
+        InteropHandle handle,
+        TextureResourceDesc *srcDesc,
+        Size size,
+        out ITextureResource outResource);
+
+    /// Create a buffer resource
+    public Result createBufferResource(
+        BufferResourceDesc* desc,
+        void *initData,
+        out Optional<IBufferResource> outResource);
+
+    public Result createBufferFromNativeHandle(
+        InteropHandle handle,
+        BufferResourceDesc* srcDesc,
+        out IBufferResource outResource);
+
+    public Result createBufferFromSharedHandle(
+        InteropHandle handle,
+        BufferResourceDesc* srcDesc,
+        out IBufferResource outResource);
+
+    public Result createSamplerState(SamplerStateDesc* desc, out ISamplerState outSampler);
+
+    public Result createTextureView(
+        ITextureResource texture, ResourceViewDesc* desc, out IResourceView outView);
+
+    public Result createBufferView(
+        IBufferResource buffer,
+        Optional<IBufferResource> counterBuffer,
+        ResourceViewDesc* desc,
+        out Optional<IResourceView> outView);
+
+    public Result createFramebufferLayout(FramebufferLayoutDesc* desc, out IFramebufferLayout outFrameBuffer);
+
+    public Result createFramebuffer(FramebufferDesc* desc, out IFramebuffer outFrameBuffer);
+
+    public Result createRenderPassLayout(
+        RenderPassLayoutDesc* desc,
+        out IRenderPassLayout outRenderPassLayout);
+
+    public Result createSwapchain(
+        SwapchainDesc* desc, WindowHandle window, out ISwapchain outSwapchain);
+
+    public Result createInputLayout(
+        InputLayoutDesc* desc, out IInputLayout outLayout);
+
+    public Result createCommandQueue(CommandQueueDesc* desc, out Optional<ICommandQueue> outQueue);
+
+    public Result createShaderObject(
+        slang::TypeReflection *type,
+        ShaderObjectContainerType container,
+        out IShaderObject outObject);
+
+    public Result createMutableShaderObject(
+        slang::TypeReflection *type,
+        ShaderObjectContainerType container,
+        out IShaderObject outObject);
+
+    public Result createShaderObjectFromTypeLayout(
+        slang::TypeLayoutReflection *typeLayout, out IShaderObject outObject);
+
+    public Result createMutableShaderObjectFromTypeLayout(
+        slang::TypeLayoutReflection *typeLayout, out IShaderObject outObject);
+
+    public Result createMutableRootShaderObject(
+        IShaderProgram program,
+        out IShaderObject outObject);
+
+    public Result createShaderTable(ShaderTableDesc* desc, out IShaderTable outTable);
+
+    public Result createProgram(
+        void *desc,
+        out IShaderProgram outProgram,
+        out slang::ISlangBlob outDiagnosticBlob);
+
+    public Result createProgram2(
+        ShaderProgramDesc2 *desc,
+        out Optional<IShaderProgram> outProgram,
+        out Optional<slang::ISlangBlob> outDiagnosticBlob);
+
+    public Result createGraphicsPipelineState(
+        GraphicsPipelineStateDesc *desc,
+        out Optional<IPipelineState> outState);
+
+    public Result createComputePipelineState(
+        ComputePipelineStateDesc* desc,
+        out Optional<IPipelineState> outState);
+
+    public Result createRayTracingPipelineState(
+        RayTracingPipelineStateDesc *desc, out Optional<IPipelineState> outState);
+
+    /// Read back texture resource and stores the result in `outBlob`.
+    public Result readTextureResource(
+        ITextureResource resource,
+        ResourceState state,
+        out slang::ISlangBlob outBlob,
+        out Size outRowPitch,
+        out Size outPixelSize);
+
+    public Result readBufferResource(
+        IBufferResource buffer,
+        Offset offset,
+        Size size,
+        out Optional<slang::ISlangBlob> outBlob);
+
+    /// Get the type of this renderer
+    public DeviceInfo* getDeviceInfo();
+
+    public Result createQueryPool(
+        QueryPoolDesc* desc, out IQueryPool outPool);
+
+    public Result getAccelerationStructurePrebuildInfo(
+        AccelerationStructureBuildInputs* buildInputs,
+        out AccelerationStructurePrebuildInfo outPrebuildInfo);
+
+    public Result createAccelerationStructure(
+        AccelerationStructureCreateDesc* desc,
+        out IAccelerationStructure outView);
+
+    public Result createFence(FenceDesc* desc, out IFence outFence);
+
+    /// Wait on the host for the fences to signals.
+    /// `timeout` is in nanoseconds, can be set to `kTimeoutInfinite`.
+    public Result waitForFences(
+        GfxCount fenceCount,
+        NativeRef<IFence>* fences,
+        uint64_t *values,
+        bool waitForAll,
+        uint64_t timeout);
+
+    public Result getTextureAllocationInfo(
+        TextureResourceDesc* desc, out Size outSize, out Size outAlignment);
+
+    public Result getTextureRowAlignment(out Size outAlignment);
+};
+
+public struct ShaderCacheStats
+{
+    public GfxCount hitCount;
+    public GfxCount missCount;
+    public GfxCount entryCount;
+};
+
+[COM("715bdf26-5135-11eb-AE93-02-42-AC-13-00-02")]
+public interface IShaderCache
+{
+    public Result clearShaderCache();
+    public Result getShaderCacheStats(out ShaderCacheStats outStats);
+    public Result resetShaderCacheStats();
+};
+
+#define SLANG_GFX_IMPORT [DllImport("gfx")]
+/// Checks if format is compressed
+SLANG_GFX_IMPORT public bool gfxIsCompressedFormat(Format format);
+
+/// Checks if format is typeless
+SLANG_GFX_IMPORT public bool gfxIsTypelessFormat(Format format);
+
+/// Gets information about the format
+SLANG_GFX_IMPORT public Result gfxGetFormatInfo(Format format, FormatInfo *outInfo);
+
+/// Given a type returns a function that can conpublic struct it, or nullptr if there isn't one
+SLANG_GFX_IMPORT public Result gfxCreateDevice(const DeviceDesc* desc, out Optional<IDevice> outDevice);
+
+/// Reports current set of live objects in gfx.
+/// Currently this only calls D3D's ReportLiveObjects.
+SLANG_GFX_IMPORT public Result gfxReportLiveObjects();
+
+/// Sets a callback for receiving debug messages.
+/// The layer does not hold a strong reference to the callback object.
+/// The user is responsible for holding the callback object alive.
+SLANG_GFX_IMPORT public Result gfxSetDebugCallback(IDebugCallback callback);
+
+/// Enables debug layer. The debug layer will check all `gfx` calls and verify that uses are valid.
+SLANG_GFX_IMPORT public void gfxEnableDebugLayer();
+
+SLANG_GFX_IMPORT public NativeString gfxGetDeviceTypeName(DeviceType type);
+
+public bool succeeded(Result code)
+{
+    return code >= 0;
+}
+
+}
diff --git a/external/slang/bin/slang-glsl-module.dll b/external/slang/bin/slang-glsl-module.dll
new file mode 100644
index 00000000..e33b19ad
Binary files /dev/null and b/external/slang/bin/slang-glsl-module.dll differ
diff --git a/external/slang/bin/slang-glslang.dll b/external/slang/bin/slang-glslang.dll
new file mode 100644
index 00000000..e1c8780d
Binary files /dev/null and b/external/slang/bin/slang-glslang.dll differ
diff --git a/external/slang/bin/windows-x64/release/slang-llvm.dll b/external/slang/bin/slang-llvm.dll
similarity index 70%
rename from external/slang/bin/windows-x64/release/slang-llvm.dll
rename to external/slang/bin/slang-llvm.dll
index eda4fce3..2a38a33e 100644
Binary files a/external/slang/bin/windows-x64/release/slang-llvm.dll and b/external/slang/bin/slang-llvm.dll differ
diff --git a/external/slang/bin/slang-rt.dll b/external/slang/bin/slang-rt.dll
new file mode 100644
index 00000000..64a37478
Binary files /dev/null and b/external/slang/bin/slang-rt.dll differ
diff --git a/external/slang/bin/slang.dll b/external/slang/bin/slang.dll
new file mode 100644
index 00000000..96493165
Binary files /dev/null and b/external/slang/bin/slang.dll differ
diff --git a/external/slang/bin/slang.slang b/external/slang/bin/slang.slang
new file mode 100644
index 00000000..26ad7dce
--- /dev/null
+++ b/external/slang/bin/slang.slang
@@ -0,0 +1,444 @@
+public namespace slang
+{
+
+public typedef int32_t Result;
+public typedef uint64_t Size;
+public typedef int64_t Int;
+public typedef uint64_t UInt;
+
+/*!
+@brief Severity of a diagnostic generated by the compiler.
+Values come from the enum below, with higher values representing more severe
+conditions, and all values >= SLANG_SEVERITY_ERROR indicating compilation
+failure.
+*/
+public enum SlangSeverity
+{
+    SLANG_SEVERITY_DISABLED = 0, /**< A message that is disabled, filtered out. */
+    SLANG_SEVERITY_NOTE,         /**< An informative message. */
+    SLANG_SEVERITY_WARNING,      /**< A warning, which indicates a possible proble. */
+    SLANG_SEVERITY_ERROR,        /**< An error, indicating that compilation failed. */
+    SLANG_SEVERITY_FATAL,        /**< An unrecoverable error, which forced compilation to abort. */
+    SLANG_SEVERITY_INTERNAL,     /**< An internal error, indicating a logic error in the compiler. */
+};
+
+public enum SlangDiagnosticFlags
+{
+    SLANG_DIAGNOSTIC_FLAG_VERBOSE_PATHS = 0x01,
+    SLANG_DIAGNOSTIC_FLAG_TREAT_WARNINGS_AS_ERRORS = 0x02
+};
+
+public enum SlangBindableResourceType
+{
+    SLANG_NON_BINDABLE = 0,
+    SLANG_TEXTURE,
+    SLANG_SAMPLER,
+    SLANG_UNIFORM_BUFFER,
+    SLANG_STORAGE_BUFFER,
+};
+
+public enum SlangCompileTarget
+{
+    SLANG_TARGET_UNKNOWN,
+    SLANG_TARGET_NONE,
+    SLANG_GLSL,
+    SLANG_GLSL_VULKAN,          //< deprecated: just use `SLANG_GLSL`
+    SLANG_GLSL_VULKAN_ONE_DESC, //< deprecated
+    SLANG_HLSL,
+    SLANG_SPIRV,
+    SLANG_SPIRV_ASM,
+    SLANG_DXBC,
+    SLANG_DXBC_ASM,
+    SLANG_DXIL,
+    SLANG_DXIL_ASM,
+    SLANG_C_SOURCE,              ///< The C language
+    SLANG_CPP_SOURCE,            ///< C++ code for shader kernels.
+    SLANG_CPP_PYTORCH_BINDING,
+    SLANG_HOST_EXECUTABLE,       ///<  Standalone binary executable (for hosting CPU/OS)
+    SLANG_SHADER_SHARED_LIBRARY, ///< A shared library/Dll for shader kernels (for hosting CPU/OS)
+    SLANG_SHADER_HOST_CALLABLE,  ///< A CPU target that makes the compiled shader code available to be run immediately
+    SLANG_CUDA_SOURCE,           ///< Cuda source
+    SLANG_PTX,                   ///< PTX
+    SLANG_OBJECT_CODE,           ///< Object code that can be used for later linking
+    SLANG_HOST_CPP_SOURCE,       ///< C++ code for host library or executable.
+    SLANG_HOST_HOST_CALLABLE,    ///<
+    SLANG_TARGET_COUNT_OF,
+};
+
+/* A "container format" describes the way that the outputs
+for multiple files, entry points, targets, etc. should be
+combined into a single artifact for output. */
+public enum SlangContainerFormat
+{
+    /* Don't generate a container. */
+    SLANG_CONTAINER_FORMAT_NONE,
+
+    /* Generate a container in the `.slang-module` format,
+    which includes reflection information, compiled kernels, etc. */
+    SLANG_CONTAINER_FORMAT_SLANG_MODULE,
+};
+
+public enum SlangPassThrough : int
+{
+    SLANG_PASS_THROUGH_NONE,
+    SLANG_PASS_THROUGH_FXC,
+    SLANG_PASS_THROUGH_DXC,
+    SLANG_PASS_THROUGH_GLSLANG,
+    SLANG_PASS_THROUGH_SPIRV_DIS,
+    SLANG_PASS_THROUGH_CLANG,         ///< Clang C/C++ compiler
+    SLANG_PASS_THROUGH_VISUAL_STUDIO, ///< Visual studio C/C++ compiler
+    SLANG_PASS_THROUGH_GCC,           ///< GCC C/C++ compiler
+    SLANG_PASS_THROUGH_GENERIC_C_CPP, ///< Generic C or C++ compiler, which is decided by the source type
+    SLANG_PASS_THROUGH_NVRTC,         ///< NVRTC Cuda compiler
+    SLANG_PASS_THROUGH_LLVM,          ///< LLVM 'compiler' - includes LLVM and Clang
+    SLANG_PASS_THROUGH_SPIRV_OPT,
+    SLANG_PASS_THROUGH_COUNT_OF,
+};
+
+/* Defines an archive type used to holds a 'file system' type structure. */
+public enum SlangArchiveType : int
+{
+    SLANG_ARCHIVE_TYPE_UNDEFINED,
+    SLANG_ARCHIVE_TYPE_ZIP,
+    SLANG_ARCHIVE_TYPE_RIFF, ///< Riff container with no compression
+    SLANG_ARCHIVE_TYPE_RIFF_DEFLATE,
+    SLANG_ARCHIVE_TYPE_RIFF_LZ4,
+    SLANG_ARCHIVE_TYPE_COUNT_OF,
+};
+
+/*!
+Flags to control compilation behavior.
+*/
+public enum SlangCompileFlags
+{
+    /* Do as little mangling of names as possible, to try to preserve original names */
+    SLANG_COMPILE_FLAG_NO_MANGLING = 1 << 3,
+
+    /* Skip code generation step, just check the code and generate layout */
+    SLANG_COMPILE_FLAG_NO_CODEGEN = 1 << 4,
+
+    /* Obfuscate shader names on release products */
+    SLANG_COMPILE_FLAG_OBFUSCATE = 1 << 5,
+
+    /* Deprecated flags: kept around to allow existing applications to
+    compile. Note that the relevant features will still be left in
+    their default state. */
+    SLANG_COMPILE_FLAG_NO_CHECKING = 0,
+    SLANG_COMPILE_FLAG_SPLIT_MIXED_TYPES = 0,
+};
+
+/*!
+@brief Flags to control code generation behavior of a compilation target */
+public enum SlangTargetFlags
+{
+    None = 0,
+
+    /* When compiling for a D3D Shader Model 5.1 or higher target, allocate
+       distinct register spaces for parameter blocks.
+
+       @deprecated This behavior is now enabled unconditionally.
+    */
+    SLANG_TARGET_FLAG_PARAMETER_BLOCKS_USE_REGISTER_SPACES = 1 << 4,
+
+    /* When set, will generate target code that contains all entrypoints defined
+       in the input source or specified via the `spAddEntryPoint` function in a
+       single output module (library/source file).
+    */
+    SLANG_TARGET_FLAG_GENERATE_WHOLE_PROGRAM = 1 << 8,
+
+    /* When set, will dump out the IR between intermediate compilation steps.*/
+    SLANG_TARGET_FLAG_DUMP_IR = 1 << 9,
+
+    /* When set, will generate SPIRV directly instead of going through glslang. */
+    SLANG_TARGET_FLAG_GENERATE_SPIRV_DIRECTLY = 1 << 10,
+};
+
+/*!
+@brief Options to control floating-point precision guarantees for a target.
+*/
+public enum SlangFloatingPointMode
+{
+    SLANG_FLOATING_POINT_MODE_DEFAULT = 0,
+    SLANG_FLOATING_POINT_MODE_FAST,
+    SLANG_FLOATING_POINT_MODE_PRECISE,
+};
+
+/*!
+@brief Options to control emission of `#line` directives
+*/
+public enum SlangLineDirectiveMode
+{
+    SLANG_LINE_DIRECTIVE_MODE_DEFAULT = 0, /**< Default behavior: pick behavior base on target. */
+    SLANG_LINE_DIRECTIVE_MODE_NONE,        /**< Don't emit line directives at all. */
+    SLANG_LINE_DIRECTIVE_MODE_STANDARD,    /**< Emit standard C-style `#line` directives. */
+    SLANG_LINE_DIRECTIVE_MODE_GLSL,        /**< Emit GLSL-style directives with file *number* instead of name */
+};
+
+public enum SlangSourceLanguage : int
+{
+    SLANG_SOURCE_LANGUAGE_UNKNOWN,
+    SLANG_SOURCE_LANGUAGE_SLANG,
+    SLANG_SOURCE_LANGUAGE_HLSL,
+    SLANG_SOURCE_LANGUAGE_GLSL,
+    SLANG_SOURCE_LANGUAGE_C,
+    SLANG_SOURCE_LANGUAGE_CPP,
+    SLANG_SOURCE_LANGUAGE_CUDA,
+    SLANG_SOURCE_LANGUAGE_COUNT_OF,
+};
+
+public enum SlangProfileID
+{
+    SLANG_PROFILE_UNKNOWN,
+};
+
+public enum SlangCapabilityID
+{
+    SLANG_CAPABILITY_UNKNOWN = 0,
+};
+
+public enum SlangMatrixLayoutMode
+{
+    SLANG_MATRIX_LAYOUT_MODE_UNKNOWN = 0,
+    SLANG_MATRIX_LAYOUT_ROW_MAJOR,
+    SLANG_MATRIX_LAYOUT_COLUMN_MAJOR,
+};
+
+public enum SlangStage
+{
+    SLANG_STAGE_NONE,
+    SLANG_STAGE_VERTEX,
+    SLANG_STAGE_HULL,
+    SLANG_STAGE_DOMAIN,
+    SLANG_STAGE_GEOMETRY,
+    SLANG_STAGE_FRAGMENT,
+    SLANG_STAGE_COMPUTE,
+    SLANG_STAGE_RAY_GENERATION,
+    SLANG_STAGE_INTERSECTION,
+    SLANG_STAGE_ANY_HIT,
+    SLANG_STAGE_CLOSEST_HIT,
+    SLANG_STAGE_MISS,
+    SLANG_STAGE_CALLABLE,
+    SLANG_STAGE_MESH,
+    SLANG_STAGE_AMPLIFICATION,
+};
+
+public enum SlangDebugInfoLevel
+{
+    SLANG_DEBUG_INFO_LEVEL_NONE = 0, /**< Don't emit debug information at all. */
+    SLANG_DEBUG_INFO_LEVEL_MINIMAL,  /**< Emit as little debug information as possible, while still supporting stack trackes. */
+    SLANG_DEBUG_INFO_LEVEL_STANDARD, /**< Emit whatever is the standard level of debug information for each target. */
+    SLANG_DEBUG_INFO_LEVEL_MAXIMAL,  /**< Emit as much debug infromation as possible for each target. */
+};
+
+public enum SlangOptimizationLevel
+{
+    SLANG_OPTIMIZATION_LEVEL_NONE = 0, /**< Don't optimize at all. */
+    SLANG_OPTIMIZATION_LEVEL_DEFAULT,  /**< Default optimization level: balance code quality and compilation time. */
+    SLANG_OPTIMIZATION_LEVEL_HIGH,     /**< Optimize aggressively. */
+    SLANG_OPTIMIZATION_LEVEL_MAXIMAL,  /**< Include optimizations that may take a very long time, or may involve severe space-vs-speed tradeoffs */
+};
+public enum SlangTypeKind
+{
+    NONE,
+    STRUCT,
+    ARRAY,
+    MATRIX,
+    VECTOR,
+    SCALAR,
+    CONSTANT_BUFFER,
+    RESOURCE,
+    SAMPLER_STATE,
+    TEXTURE_BUFFER,
+    SHADER_STORAGE_BUFFER,
+    PARAMETER_BLOCK,
+    GENERIC_TYPE_PARAMETER,
+    INTERFACE,
+    OUTPUT_STREAM,
+    SPECIALIZED,
+    FEEDBACK,
+    COUNT,
+};
+
+public enum SlangScalarType
+{
+    NONE,
+    VOID,
+    BOOL,
+    INT32,
+    UINT32,
+    INT64,
+    UINT64,
+    FLOAT16,
+    FLOAT32,
+    FLOAT64,
+    INT8,
+    UINT8,
+    INT16,
+    UINT16,
+};
+
+public struct TypeReflection
+{
+};
+
+public enum CompileStdLibFlags
+{
+    WriteDocumentation = 0x1,
+};
+
+[COM("8BA5FB08-5195-40e2-AC58-0D-98-9C-3A-01-02")]
+public interface ISlangBlob
+{
+    public void *getBufferPointer();
+    public Size getBufferSize();
+};
+
+/** Description of a code generation target.
+ */
+public struct TargetDesc
+{
+    /** The size of this structure, in bytes.
+     */
+    public Size structureSize = 40;
+
+    /** The target format to generate code for (e.g., SPIR-V, DXIL, etc.)
+     */
+    public SlangCompileTarget format = SlangCompileTarget.SLANG_TARGET_UNKNOWN;
+
+    /** The compilation profile supported by the target (e.g., "Shader Model 5.1")
+     */
+    public SlangProfileID profile = SlangProfileID.SLANG_PROFILE_UNKNOWN;
+
+    /** Flags for the code generation target. Currently unused. */
+    public SlangTargetFlags flags = SlangTargetFlags.None;
+
+    /** Default mode to use for floating-point operations on the target.
+     */
+    public SlangFloatingPointMode floatingPointMode = SlangFloatingPointMode.SLANG_FLOATING_POINT_MODE_DEFAULT;
+
+    /** Optimization level to use for the target.
+     */
+    public SlangOptimizationLevel optimizationLevel = SlangOptimizationLevel.SLANG_OPTIMIZATION_LEVEL_DEFAULT;
+
+    /** The line directive mode for output source code.
+     */
+    public SlangLineDirectiveMode lineDirectiveMode = SlangLineDirectiveMode.SLANG_LINE_DIRECTIVE_MODE_DEFAULT;
+
+    /** Whether to force `scalar` layout for glsl shader storage buffers.
+     */
+    public bool forceGLSLScalarBufferLayout = false;
+};
+
+public enum SessionFlags
+{
+    kSessionFlags_None = 0
+};
+
+public struct PreprocessorMacroDesc
+{
+    public NativeString name;
+    public NativeString value;
+};
+
+public struct SessionDesc
+{
+    /** The size of this structure, in bytes.
+     */
+    public Size structureSize = 72;
+
+    /** Code generation targets to include in the session.
+     */
+    public TargetDesc *targets = nullptr;
+    public Int targetCount = 0;
+
+    /** Flags to configure the session.
+     */
+    public SessionFlags flags = SessionFlags.kSessionFlags_None;
+
+    /** Default layout to assume for variables with matrix types.
+     */
+    public SlangMatrixLayoutMode defaultMatrixLayoutMode = SlangMatrixLayoutMode.SLANG_MATRIX_LAYOUT_ROW_MAJOR;
+
+    /** Paths to use when searching for `#include`d or `import`ed files.
+     */
+    public NativeString *searchPaths = nullptr;
+    public Int searchPathCount = 0;
+
+    public PreprocessorMacroDesc *preprocessorMacros = nullptr;
+    public Int preprocessorMacroCount = 0;
+
+    public void *fileSystem = nullptr;
+};
+
+/** A global session for interaction with the Slang library.
+
+An application may create and re-use a single global session across
+multiple sessions, in order to amortize startups costs (in current
+Slang this is mostly the cost of loading the Slang standard library).
+
+The global session is currently *not* thread-safe and objects created from
+a single global session should only be used from a single thread at
+a time.
+*/
+[COM("c140b5fd-0c78-452e-ba7c-1a-1e-70-c7-f7-1c")]
+public interface IGlobalSession
+{
+};
+
+public enum class ContainerType
+{
+    None, UnsizedArray, StructuredBuffer, ConstantBuffer, ParameterBlock
+};
+
+/** A session provides a scope for code that is loaded.
+
+A session can be used to load modules of Slang source code,
+and to request target-specific compiled binaries and layout
+information.
+
+In order to be able to load code, the session owns a set
+of active "search paths" for resolving `#include` directives
+and `import` declrations, as well as a set of global
+preprocessor definitions that will be used for all code
+that gets `import`ed in the session.
+
+If multiple user shaders are loaded in the same session,
+and import the same module (e.g., two source files do `import X`)
+then there will only be one copy of `X` loaded within the session.
+
+In order to be able to generate target code, the session
+owns a list of available compilation targets, which specify
+code generation options.
+
+Code loaded and compiled within a session is owned by the session
+and will remain resident in memory until the session is released.
+Applications wishing to control the memory usage for compiled
+and loaded code should use multiple sessions.
+*/
+[COM("67618701-d116-468f-ab3b-47-4b-ed-ce-0e-3d")]
+public interface ISession
+{
+};
+
+[COM("5bc42be8-5c50-4929-9e5e-d15e7c24015f")]
+public interface IComponentType
+{
+}
+
+public struct TypeLayoutReflection { }
+
+/** The kind of specialization argument. */
+public enum class SpecializationArgKind : int32_t
+{
+    Unknown, /**< An invalid specialization argument. */
+    Type,    /**< Specialize to a type. */
+};
+
+public struct SpecializationArg
+{
+    public SpecializationArgKind kind;
+    /** A type specialization argument, used for `Kind::Type`. */
+    public TypeReflection *type;
+}
+
+}
diff --git a/external/slang/bin/slangc.exe b/external/slang/bin/slangc.exe
new file mode 100644
index 00000000..941202f2
Binary files /dev/null and b/external/slang/bin/slangc.exe differ
diff --git a/external/slang/bin/slangd.exe b/external/slang/bin/slangd.exe
new file mode 100644
index 00000000..39848ffc
Binary files /dev/null and b/external/slang/bin/slangd.exe differ
diff --git a/external/slang/bin/windows-x64/release/gfx.dll b/external/slang/bin/windows-x64/release/gfx.dll
deleted file mode 100644
index c836649e..00000000
Binary files a/external/slang/bin/windows-x64/release/gfx.dll and /dev/null differ
diff --git a/external/slang/bin/windows-x64/release/gfx.lib b/external/slang/bin/windows-x64/release/gfx.lib
deleted file mode 100644
index 70749dee..00000000
Binary files a/external/slang/bin/windows-x64/release/gfx.lib and /dev/null differ
diff --git a/external/slang/bin/windows-x64/release/slang-glslang.dll b/external/slang/bin/windows-x64/release/slang-glslang.dll
deleted file mode 100644
index 2c5ac457..00000000
Binary files a/external/slang/bin/windows-x64/release/slang-glslang.dll and /dev/null differ
diff --git a/external/slang/bin/windows-x64/release/slang-rt.dll b/external/slang/bin/windows-x64/release/slang-rt.dll
deleted file mode 100644
index dbd2620a..00000000
Binary files a/external/slang/bin/windows-x64/release/slang-rt.dll and /dev/null differ
diff --git a/external/slang/bin/windows-x64/release/slang-rt.lib b/external/slang/bin/windows-x64/release/slang-rt.lib
deleted file mode 100644
index b162fa1c..00000000
Binary files a/external/slang/bin/windows-x64/release/slang-rt.lib and /dev/null differ
diff --git a/external/slang/bin/windows-x64/release/slang.dll b/external/slang/bin/windows-x64/release/slang.dll
deleted file mode 100644
index 575af41e..00000000
Binary files a/external/slang/bin/windows-x64/release/slang.dll and /dev/null differ
diff --git a/external/slang/bin/windows-x64/release/slang.lib b/external/slang/bin/windows-x64/release/slang.lib
deleted file mode 100644
index 9cb366c1..00000000
Binary files a/external/slang/bin/windows-x64/release/slang.lib and /dev/null differ
diff --git a/external/slang/bin/windows-x64/release/slangc.exe b/external/slang/bin/windows-x64/release/slangc.exe
deleted file mode 100644
index 322a6c14..00000000
Binary files a/external/slang/bin/windows-x64/release/slangc.exe and /dev/null differ
diff --git a/external/slang/bin/windows-x64/release/slangd.exe b/external/slang/bin/windows-x64/release/slangd.exe
deleted file mode 100644
index 726822e1..00000000
Binary files a/external/slang/bin/windows-x64/release/slangd.exe and /dev/null differ
diff --git a/external/slang/cmake/slangConfig.cmake b/external/slang/cmake/slangConfig.cmake
new file mode 100644
index 00000000..988e985a
--- /dev/null
+++ b/external/slang/cmake/slangConfig.cmake
@@ -0,0 +1,44 @@
+
+
+####### Expanded from @PACKAGE_INIT@ by configure_package_config_file() #######
+####### Any changes to this file will be overwritten by the next CMake run ####
+####### The input file was SlangConfig.cmake.in                            ########
+
+get_filename_component(PACKAGE_PREFIX_DIR "${CMAKE_CURRENT_LIST_DIR}/../" ABSOLUTE)
+
+macro(set_and_check _var _file)
+  set(${_var} "${_file}")
+  if(NOT EXISTS "${_file}")
+    message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !")
+  endif()
+endmacro()
+
+macro(check_required_components _NAME)
+  foreach(comp ${${_NAME}_FIND_COMPONENTS})
+    if(NOT ${_NAME}_${comp}_FOUND)
+      if(${_NAME}_FIND_REQUIRED_${comp})
+        set(${_NAME}_FOUND FALSE)
+      endif()
+    endif()
+  endforeach()
+endmacro()
+
+####################################################################################
+
+if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+  include("${CMAKE_CURRENT_LIST_DIR}/slangTargets.cmake")
+  check_required_components("slang")
+endif()
+
+if(ON)
+
+  find_program(SLANGC_EXECUTABLE "slangc" HINTS ENV PATH "${PACKAGE_PREFIX_DIR}/bin")
+
+  if (NOT SLANGC_EXECUTABLE)
+      message(STATUS "slangc executable not found; ensure it is available in your PATH.")
+  endif()
+    
+  set(SLANG_EXECUTABLE ${SLANGC_EXECUTABLE} CACHE STRING "Path to the slangc executable")
+
+endif()
+
diff --git a/external/slang/cmake/slangConfigVersion.cmake b/external/slang/cmake/slangConfigVersion.cmake
new file mode 100644
index 00000000..b7693245
--- /dev/null
+++ b/external/slang/cmake/slangConfigVersion.cmake
@@ -0,0 +1,65 @@
+# This is a basic version file for the Config-mode of find_package().
+# It is used by write_basic_package_version_file() as input file for configure_file()
+# to create a version-file which can be installed along a config.cmake file.
+#
+# The created file sets PACKAGE_VERSION_EXACT if the current version string and
+# the requested version string are exactly the same and it sets
+# PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version,
+# but only if the requested major version is the same as the current one.
+# The variable CVF_VERSION must be set before calling configure_file().
+
+
+set(PACKAGE_VERSION "2025.6.3")
+
+if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)
+  set(PACKAGE_VERSION_COMPATIBLE FALSE)
+else()
+
+  if("2025.6.3" MATCHES "^([0-9]+)\\.")
+    set(CVF_VERSION_MAJOR "${CMAKE_MATCH_1}")
+    if(NOT CVF_VERSION_MAJOR VERSION_EQUAL 0)
+      string(REGEX REPLACE "^0+" "" CVF_VERSION_MAJOR "${CVF_VERSION_MAJOR}")
+    endif()
+  else()
+    set(CVF_VERSION_MAJOR "2025.6.3")
+  endif()
+
+  if(PACKAGE_FIND_VERSION_RANGE)
+    # both endpoints of the range must have the expected major version
+    math (EXPR CVF_VERSION_MAJOR_NEXT "${CVF_VERSION_MAJOR} + 1")
+    if (NOT PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR
+        OR ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX_MAJOR STREQUAL CVF_VERSION_MAJOR)
+          OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX VERSION_LESS_EQUAL CVF_VERSION_MAJOR_NEXT)))
+      set(PACKAGE_VERSION_COMPATIBLE FALSE)
+    elseif(PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR
+        AND ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND PACKAGE_VERSION VERSION_LESS_EQUAL PACKAGE_FIND_VERSION_MAX)
+        OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION_MAX)))
+      set(PACKAGE_VERSION_COMPATIBLE TRUE)
+    else()
+      set(PACKAGE_VERSION_COMPATIBLE FALSE)
+    endif()
+  else()
+    if(PACKAGE_FIND_VERSION_MAJOR STREQUAL CVF_VERSION_MAJOR)
+      set(PACKAGE_VERSION_COMPATIBLE TRUE)
+    else()
+      set(PACKAGE_VERSION_COMPATIBLE FALSE)
+    endif()
+
+    if(PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)
+      set(PACKAGE_VERSION_EXACT TRUE)
+    endif()
+  endif()
+endif()
+
+
+# if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it:
+if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "8" STREQUAL "")
+  return()
+endif()
+
+# check that the installed version has the same 32/64bit-ness as the one which is currently searching:
+if(NOT CMAKE_SIZEOF_VOID_P STREQUAL "8")
+  math(EXPR installedBits "8 * 8")
+  set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)")
+  set(PACKAGE_VERSION_UNSUITABLE TRUE)
+endif()
diff --git a/external/slang/docs/building.md b/external/slang/docs/building.md
deleted file mode 100644
index 89ee9a99..00000000
--- a/external/slang/docs/building.md
+++ /dev/null
@@ -1,340 +0,0 @@
-# Building Slang From Source
-
-We support building with both Premake and CMake. Both systems are described below.
-
-# Building Slang with Premake
-
-## Windows Using Visual Studio
-
-If you are using Visual Studio on Windows, then you can just open `slang.sln` and build your desired platform/configuration. `slang.sln` and associated project files are actually just generated using [`premake5`](https://premake.github.io/). See instructions in premake section below for further explanation.
-
-Whilst using the provided `slang.sln` solution is a fast and easy way to get a build to work, it does not make all binary dependencies available which can add features and improve performance (such as [slang-llvm](https://github.com/shader-slang/slang-llvm)). To get the binary dependencies create the solution using [`premake5`](https://premake.github.io/) described in a later section.
-
-## Other Targets
-
-Slang uses [`premake5`](https://premake.github.io/) to generate projects (such as `Makefile`s) that can then be used to build Slang binaries from source.
-
-For Linux and other targets the section below on `premake` describes the process.
-
-Some targets below are described as 'unofficial'. In practice this means that they are not tested as part of contiguous integration. Thus unfortunately it is quite possible from time to time for them to break on a merge of a PR. That said, if broken it is likely only very minor changes are needed to make them work again.
-
-### Generated Files
-
-Slang as part of it's build process generates header files, which are then used to compile the main Slang project. If you use `premake` to create your project, it will automatically generate these files before compiling the rest of the Slang. These are the current header generations which are created via the `slang-generate` and other tools...
-
-* core.meta.slang -> core.meta.slang.h
-* hlsl.meta.slang -> hlsl.meta.slang.h
-
-Other files that are generated have `generated` as part of their name.
-
-It may be necessary or desirable to create a build of Slang without using `premake`.
-
-One way to do this would be to first compile slang-generate and then invoke it directly or as a dependency in your build. Another perhaps simpler way would be to first compile the same Slang source on another system that does support `premake`, or using a preexisting build mechanism (such as Visual Studio projects on Windows). Then copy the generated header files to your target system. This is appropriate because the generated files are indentical across platforms. It does of course mean that if `core.meta.slang` or `hlsl.meta.slang` files change the headers will need to be regenerated.
-
-## Premake
-
-Slang uses the tool [`premake5`](https://premake.github.io/) in order to generate projects that can be built on different targets. On Linux premake will generate Makefile/s and on windows it will generate a Visual Studio solution. Information on invoking premake for different kinds of targets can be found [here](https://github.com/premake/premake-core/wiki/Using-Premake).
-
-Slang includes `premake5` as part of `slang-binaries` which is in the `external` directory. For the external directory to be setup it is necessary to have updated submodules with `git submodule update --init`.
-
-If you are on a unix-like operating system such as OSX/Linux, it may be necesary to make premake5 executable. Use
-
-```
-% chmod u+x external/slang-binaries/premake/***path to premake version and os***/premake5
-```
-
-Alternatively you can download and install [`premake5`](https://premake.github.io/) on your build system.
-
-Run `premake5` with `--help` to in the root of the Slang project to see available command line options (assuming `premake5` is in your `PATH`):
-
-```
-% premake5 --help
-```
-
-To download and use binaries for a particular architecture the [slang-pack](https://github.com/shader-slang/slang-binaries/tree/master/lua-modules) package manager can be invoked via the additional `--deps` and `--arch` options. If `--arch` isn't specified it defaults to `x64`. On Windows targets, the Visual Studio platform setting should be consistent with the `--arch` option such that the appropriate binary dependencies are available. The `--deps=true` option just indicates that on invoking premake it should make the binary dependencies for the `arch` available.
-
-Supported `--arch` options are
-
-* x64
-* x86
-* aarch64
-* arm
-
-For Unix like targets that might have `clang` or `gcc` compilers available you can select which one via the `-cc` option. For example...
-
-```
-% premake5 gmake2 --cc=clang --deps=true --arch=x64
-```
-
-or
-
-```
-% premake5 gmake2 --cc=gcc --deps=true --arch=x64
-```
-
-If you want to build the [`glslang`](https://github.com/KhronosGroup/glslang) library that Slang uses, add the option `--build-glslang=true`.
-
-# Projects using `make`
-
-The Slang project does not include Makefiles by default - they need to be generated via `premake`. Please read the section on your target operating system on how to use `premake` to create Makefiles.
-
-If building a Makefile based project, for example on Linux, OSX or [Cygwin](https://cygwin.com/), the configuration needs to be specified when invoking make, the following are typical...
-
-```
-% make config=release_x64
-% make config=debug_x64
-% make config=release_x86
-% make config=debug_x86
-% make config=release_aarch64
-% make config=debug_aarch64
-```
-
-To check what compiler is being used/command line options you can add `verbose=1` to `make` command line. For example
-
-```
-% make config=debug_x64 verbose=1
-```
-
-### Windows
-
-First download and install [`premake5`](https://premake.github.io/) on your build system. Open up a command line and go to the root directory of the slang source tree (ie the directory containing `slang.h`).
-
-Assuming premake5 is in your `PATH`, you can create a Visual Studio 2017 project for Slang with the following command line
-
-```
-% premake5 vs2017 --deps=true --arch=x64
-```
-
-For Visual Studio 2019 use
-
-```
-% premake5 vs2019 --deps=true --arch=x64
-```
-
-These should create a slang.sln in the same directory and which you can then open in the appropriate Visual Studio. Building will build all of Slang, examples and it's test infrastructure.
-
-### Linux
-
-On Linux we need to generate Makefiles using `premake`. Please read the `premake` section for more details.
-
-In the terminal go to the root directory of the slang source tree (ie the directory containing `slang.h`). Assuming `premake5` is in your `PATH` use
-
-```
-% premake5 gmake2 --deps=true --arch=x64
-```
-
-To create a release build use
-
-```
-% make config=release_x64
-```
-
-You can vary the compiler to use via the --cc option with 'gcc' or 'clang' for example
-
-### Mac OSX
-
-Note that OSX isn't an official target.
-
-On Mac OSX to generate Makefiles or an XCode project we use `premake`. Please read the `premake` section for more details.
-
-```
-% premake5 gmake2 --deps=true --arch=x64
-```
-
-If you want to build `glslang` (necessary for Slang to output SPIR-V for example), then the additional `--build-glslang` option should be used
-
-```
-% premake5 gmake2 --build-glslang=true --deps=true --arch=x64
-```
-
-To build for release you can use...
-
-```
-% make config=release_x64
-```
-
-Slang can also be built within the Xcode IDE. Invoke `premake` as follows
-
-```
-% premake5 xcode4 --deps=true --arch=x64
-```
-
-Then open the `slang.xcworkspace` project inside of Xcode and build.
-
-### Cygwin
-
-Note that Cygwin isn't an official target.
-
-One issue with building on [Cygwin](https://cygwin.com/), is that there isn't a binary version of `premake` currently available. It may be possible to make this work by building `premake` from source, and then just doing `premake5 gmake2`. Here we use another approach - using the windows `premake` to create a Cygwin project. To do this use the command line...
-
-```
-% premake5 --target-detail=cygwin gmake2 --deps=true --arch=x64
-```
-
-## Testing
-
-When slang is built from source it also builds tools to be able to test the Slang compiler. Testing is achieved using the `slang-test` tool. The binaries are placed in the appropriate directory underneath `bin`. It is important that you initiate the test binary from the root directory of the slang source tree, such that all tests can be correctly located.
-
-For example to run the tests on a windows release x64 build from the command line, in the root directory of slang source tree you can use...
-
-```
-% bin\windows-x64\release\slang-test
-```
-
-Note that on windows if you want to run all of the tests from inside visual studio, it is necessary to set the `Working Directory` under "slang-test project" > "Configuration Properties" > "Debugging" > "Working Directory" to the root directory of the slang source tree. You can do this by setting it to `$(ProjectDir)/../..` for all configurations.
-
-If you only see 'unit-tests' being run (unit tests are prefixed with 'unit-tests/') then the working directory is not correctly set. Most tests are text files describing the test held in the `tests` directory in the root of the slang project.
-
-See the [documentation on testing](../tools/slang-test/README.md) for more information.
-
-# Building Slang with CMake
-
-### TLDR
-
-`cmake --workflow --preset release` to configure, build, and package a release
-version of Slang.
-
-## Prerequisites:
-
-Please install:
-
-- CMake
-- A C++ compiler with support for C++17. GCC, Clang and MSVC are supported
-- A CMake compatible backend, for example Visual Studio or Ninja
-
-Optional dependencies include
-
-- CUDA
-- OptiX
-- NVAPI
-- Aftermath
-- X11
-
-## Get the Source Code
-
-Clone [this](https://github.com/shader-slang/slang) repository. Make sure to
-fetch the submodules also.
-
-```bash
-git clone https://github.com/shader-slang/slang --recursive
-```
-
-## Configure and build
-
-For a Ninja based build system (all platforms) run:
-```bash
-cmake --preset default
-cmake --build --preset release # or --preset debug
-```
-
-For Visual Studio run:
-```bash
-cmake --preset vs2022 # or --preset vs2019
-start devenv .\build\slang.sln # to optionally open the project in Visual Studio
-cmake --build --preset release # to build from the CLI
-```
-
-## Testing
-
-```bash
-build/Debug/bin/slang-test
-```
-
-See the [documentation on testing](../tools/slang-test/README.md) for more information.
-
-## More niche topics
-
-### CMake options
-
-| Option                            | Default          | Description                                                        |
-|-----------------------------------|------------------|--------------------------------------------------------------------|
-| `SLANG_VERSION`                   | Latest `v*` tag  | The project version, detected using git if available               |
-| `SLANG_EMBED_STDLIB`              | `FALSE`          | Build slang with an embedded version of the stdlib                 |
-| `SLANG_EMBED_STDLIB_SOURCE`       | `TRUE`           | Embed stdlib source in the binary                                  |
-| `SLANG_ENABLE_ASAN`               | `FALSE`          | Enable ASAN (address sanitizer)                                    |
-| `SLANG_ENABLE_FULL_IR_VALIDATION` | `FALSE`          | Enable full IR validation (SLOW!)                                  |
-| `SLANG_ENABLE_GFX`                | `TRUE`           | Enable gfx targets                                                 |
-| `SLANG_ENABLE_SLANGD`             | `TRUE`           | Enable language server target                                      |
-| `SLANG_ENABLE_SLANGC`             | `TRUE`           | Enable standalone compiler target                                  |
-| `SLANG_ENABLE_SLANGRT`            | `TRUE`           | Enable runtime target                                              |
-| `SLANG_ENABLE_SLANG_GLSLANG`      | `TRUE`           | Enable glslang dependency and slang-glslang wrapper target         |
-| `SLANG_ENABLE_TESTS`              | `TRUE`           | Enable test targets, requires SLANG_ENABLE_GFX, SLANG_ENABLE_SLANGD and SLANG_ENABLE_SLANGRT |
-| `SLANG_ENABLE_EXAMPLES`           | `TRUE`           | Enable example targets, requires SLANG_ENABLE_GFX                  |
-| `SLANG_LIB_TYPE`                  | `SHARED`         | How to build the slang library                                     |
-| `SLANG_SLANG_LLVM_FLAVOR`         | `FETCH_BINARY`   | How to set up llvm support                                         |
-| `SLANG_SLANG_LLVM_BINARY_URL`     | System dependent | URL specifying the location of the slang-llvm prebuilt library     |
-| `SLANG_GENERATORS_PATH`           | ``               | Path to an installed `all-generators` target for cross compilation |
-
-The following options relate to optional dependencies for additional backends
-and running additional tests. Left unchanged they are auto detected, however
-they can be set to `OFF` to prevent their usage, or set to `ON` to make it an
-error if they can't be found.
-
-| Option                   | CMake hints                    | Notes                                                               |
-|--------------------------|--------------------------------|---------------------------------------------------------------------|
-| `SLANG_ENABLE_CUDA`      | `CUDAToolkit_ROOT` `CUDA_PATH` |                                                                     |
-| `SLANG_ENABLE_OPTIX`     | `Optix_ROOT_DIR`               | Requires CUDA                                                       |
-| `SLANG_ENABLE_NVAPI`     | `NVAPI_ROOT_DIR`               | Only available for builds targeting Windows                         |
-| `SLANG_ENABLE_AFTERMATH` | `Aftermath_ROOT_DIR`           | Enable Aftermath in GFX, and add aftermath crash example to project |
-| `SLANG_ENABLE_XLIB`      |                                |                                                                     |
-
-### LLVM Support
-
-There are several options for getting llvm-support:
-
-- Use a prebuilt binary slang-llvm library: `-DSLANG_SLANG_LLVM_FLAVOR=FETCH_BINARY`,
-  this is the default
-    - You can set `SLANG_SLANG_LLVM_BINARY_URL` to point to a local
-      `libslang-llvm.so/slang-llvm.dll` or set it to a URL of an zip/archive
-      containing such a file
-- Use a system supplied LLVM: `-DSLANG_SLANG_LLVM_FLAVOR=USE_SYSTEM_LLVM`, you
-  must have llvm-13.0 and a matching libclang installed. It's important that
-  either:
-    - You don't end up linking to a dynamic libllvm.so, this will almost
-      certainly cause multiple versions of LLVM to be loaded at runtime,
-      leading to errors like `opt: CommandLine Error: Option
-      'asm-macro-max-nesting-depth' registered more than once!`. Avoid this by
-      compiling LLVM without the dynamic library.
-    - Anything else which may be linked in (for example Mesa, also dynamically
-      loads the same llvm object)
-- Have the Slang build system build LLVM:
-  `-DSLANG_SLANG_LLVM_FLAVOR=BUILD_LLVM`, this will build LLVM binaries at
-  configure time and use that. This is only intended to be used as part of the
-  process of generating the portable binary slang-llvm library. This always
-  builds a `Release` LLVM, so is unsuitable to use when building a `Debug`
-  `slang-llvm` on Windows as the runtime libraries will be incompatible.
-- Do not enable LLVM support: `-DSLANG_SLANG_LLVM_FLAVOR=DISABLE`
-
-To build only a standalone slang-llvm, you can run:
-
-```bash
-cmake --workflow --preset slang-llvm
-```
-
-This will generate `build/dist-release/slang-slang-llvm.zip` containing the
-library. This, of course, uses the system LLVM to build slang-llvm, otherwise
-it would just be a convoluted way to download a prebuilt binary.
-
-### Cross compiling
-
-Slang generates some code at build time, using generators build from this
-codebase. Due to this, for cross compilation one must already have built these
-generators for the build platform. Build them with the `generators` preset, and
-pass the install path to the cross building CMake invocation using
-`SLANG_GENERATORS_PATH`
-
-```bash
-# build the generators
-cmake --workflow --preset generators --fresh
-mkdir my-build-platform-generators
-unzip build/dist-release/slang-generators.zip -d my-build-platform-generators
-# reconfigure, pointing to these generators
-cmake \
-  --preset release \
-  --fresh \
-  -DSLANG_GENERATORS_PATH=my-build-platform-generators/bin \
-  -Dwhatever-other-necessary-options-for-your-cross-build
-# perform the final build
-cmake --workflow --preset release
-```
-
diff --git a/external/slang/docs/command-line-slangc.md b/external/slang/docs/command-line-slangc.md
deleted file mode 100644
index d9536508..00000000
--- a/external/slang/docs/command-line-slangc.md
+++ /dev/null
@@ -1,209 +0,0 @@
-Using the `slangc` Command-Line Compiler
-========================================
-
-The `slangc` command-line tool is used to compile or cross-compile shader source code.
-
-```
-slangc [<options>] <file1> [<file2>...]
-```
-
-## Options
-
-The available options are in [the command line option reference](command-line-slangc-reference.md). 
-
-This information is also available from `slangc` via 
-
-```
-slangc -h
-```
-
-The sections below describe usage in more detail.
-
-Simple Examples
----------------
-
-### HLSL
-
-When compiling an HLSL shader, you must specify the path to your shader code file as well as the target shader model (profile) and shader stage to use.
-For example, to see D3D bytecode assembly for a fragment shader entry point:
-
-    slangc my-shader.hlsl -profile sm_5_0 -stage fragment
-
-To direct that output to a bytecode file:
-
-    slangc my-shader.hlsl -profile sm_5_0 -stage fragment -o my-shader.dxbc
-
-If the entry-point function has a name other than the default `main`, then this is specified with `-entry`:
-
-    slangc my-shader.hlsl -profile sm_5_0 -entry psMain -stage fragment 
-
-If you are using the `[shader("...")]` syntax to mark your entry points, then you may leave off the `-stage` option:
-
-    slangc my-shader.hlsl -profile sm_5_0 -entry psMain
-
-### Slang
-
-Compiling an entry point from a Slang file is similar to HLSL, except that you must also specify a desired code generation target, because there is no assumed default (like DXBC for Direct3D Shader Model 5.x).
-
-To get DXBC assembly written to the console:
-
-    slangc my-shader.slang -profile sm_5_0 -stage fragment -entry main -target dxbc
-
-To get SPIR-V assembly:
-
-    slangc my-shader.slang -profile sm_5_0 -stage fragment -entry main -target spriv
-
-The code generation target is implicit when writing to a file with an appropriate extension.
-To write DXBC, SPIR-V, or GLSL to files, use:
-
-    slangc my-shader.slang -profile sm_5_0 -entry main -stage fragment -o my-shader.dxbc
-    slangc my-shader.slang -profile sm_6_0 -entry main -stage fragment -o my-shader.dxil
-    slangc my-shader.slang -profile glsl_450 -entry main -stage fragment -o my-shader.spv
-
-Usage
------
-
-## Multiple Entry Points
-
-`slangc` can compile multiple entry points, which may span multiple files in a single invocation.
-This is useful when you are taking advantage of Slang's ability to automatically assign binding locations to shader parameters, because the compiler can take all of your entry points into account when assigning location (avoiding overlap between entry points that will be used together).
-
-When specifying multiple entry points, you use multiple `-entry` options on the command line.
-The main thing to be aware of is that any `-stage` options apply to the most recent `-entry` point, and the same goes for any `-o` options to specify per-entry-point output files.
-For example, here is a command line to compile both vertex and fragment shader entry points from a single file and output them to distinct DXBC files:
-
-    slangc -profile sm_5_0 my-shader.hlsl 
-                          -entry vsMain -stage vertex   -o my-shader.vs.dxbc
-                          -entry fsMain -stage fragment -o my-shader.fs.dxbc
-
-If your shader entry points are spread across multiple HLSL files, then each `-entry` option indicates an entry point in the preceding file.
-For example, if the preceding example put its vertex and fragment entry points in distinct files, the command line would be:
-
-    slangc -profile sm_5_0 my-shader.vs.hlsl -entry vsMain -stage vertex   -o my-shader.vs.dxbc
-                           my-shader.fs.hlsl -entry fsMain -stage fragment -o my-shader.fs.dxbc
-
-Note that when compiling multiple `.slang` files in one invocation, they will all be compiled together as a single module (with a single global namespace) so that the relative order of `-entry` options and source files does not matter.
-
-These long command lines obviously aren't pleasant.
-We encourage applications that require complex shader compilation workflows to use the Slang API directly so that they can implement compilation that follows application conventions/policy.
-The ability to specify compilation actions like this on the command line is primarily intended a testing and debugging tool.
-
-<a id="downstream-arguments"></a>
-## Downstream Arguments
-
-During a Slang compilation work may be performed by multiple other stages including downstream compilers and linkers. It isn't possible in general or perhaps even desirable to provide Slang command line equivalents of every option available at every stage of compilation. It is useful to be able to set options specific to a particular compilation stage - to alter code generation, linkage and other options.
-
-The mechanism used here is based on the `-X` mechanism used in GCC, to specify arguments to the linker.
-
-```
--Xlinker option
-```
-
-When used, `option` is not interpreted by GCC, but is passed to the linker once compilation is complete. Slang extends this idea in several ways. First there are many more 'downstream' stages available to Slang than just `linker`. These different stages are known as `SlangPassThrough` types in the API and have the following names
-
-* `fxc` - FXC HLSL compiler
-* `dxc` - DXC HLSL compiler
-* `glslang` - GLSLANG GLSL compiler
-* `visualstudio` - Visual Studio C/C++ compiler
-* `clang` - Clang C/C++ compiler
-* `gcc` - GCC C/C++ compiler
-* `genericcpp` - A generic C++ compiler (can be any one of visual studio, clang or gcc depending on system and availability)
-* `nvrtc` - NVRTC CUDA compiler
-
-The Slang command line allows you to specify an argument to these downstream compilers, by using their name after the `-X`. So for example to send an option `-Gfa` through to DXC you can use 
-
-```
--Xdxc -Gfa
-```
-
-Note that if an option is available via normal Slang command line options then these should be used. This will generally work across multiple targets, but also avoids options clashing which is undefined behavior currently. The `-X` mechanism is best used for options that are unavailable through normal Slang mechanisms. 
-
-If you want to pass multiple options using this mechanism the `-Xdxc` needs to be in front of every options. For example 
-
-```
--Xdxc -Gfa -Xdxc -Vd
-```
-
-Would reach `dxc` as 
-
-```
--Gfa -Vd
-```
-
-This can get a little repetitive especially if there are many parameters, so Slang adds a mechanism to have multiple options passed by using an ellipsis `...`. The syntax is as follows
-
-```
--Xdxc... -Gfa -Vd -X.
-```
-
-The `...` at the end indicates all the following parameters should be sent to `dxc` until it reaches the matching terminating `-X.` or the end of the command line. 
-
-It is also worth noting that `-X...` options can be nested. This would allow a GCC downstream compilation to control linking, for example with
-
-```
--Xgcc -Xlinker --split -X.
-```
-
-In this example gcc would see
-
-```
--Xlinker --split
-```
-
-And the linker would see (as passed through by gcc) 
-
-```
---split
-```
-
-Setting options for tools that aren't used in a Slang compilation has no effect. This allows for setting `-X` options specific for all downstream tools on a command line, and they are only used as part of a compilation that needs them.
-
-NOTE! Not all tools that Slang uses downstream make command line argument parsing available. `FXC` and `GLSLANG` currently do not have any command line argument passing as part of their integration, although this could change in the future.
-
-The `-X` mechanism is also supported by render-test tool. In this usage `slang` becomes a downstream tool. Thus you can use the `dxc` option `-Gfa` in a render-test via 
-
-```
--Xslang... -Xdxc -Gfa -X.
-```
-
-Means that the dxc compilation in the render test (assuming dxc is invoked) will receive 
-
-```
--Gfa
-```
-
-Some options are made available via the same mechanism for all downstream compilers. 
-
-* Use `-I` to specify include path for downstream compilers
-
-For example to specify an include path "somePath" to DXC you can use...
-
-```
--Xdxc -IsomePath
-```
-
-## Specifying where dlls/shared libraries are loaded from
-
-On windows if you want a dll loaded from a specific path, the path must be specified absolutely. See the [LoadLibrary documentation](https://docs.microsoft.com/en-us/windows/win32/api/libloaderapi/nf-libloaderapi-loadlibrarya) for more details. A relative path will cause Windows to check all locations along it's search procedure.
-
-On linux it's similar, but any path (relative or not) will override the regular search mechanism. See [dlopen](https://man7.org/linux/man-pages/man3/dlopen.3.html) for more details. 
-
-See [the reference for a complete list](#command-line-slangc-reference.md#none-path)
-
-* `-dxc-path`: Sets the path where dxc dll/shared libraries are loaded from (dxcompiler & dxil).
-* `-fxc-path`: Sets the path where fxc dll is loaded from (d3dcompiler_47.dll). 
-* `-glslang-path`: Sets where the Slang specific 'slang-glslang' is loaded from
-
-Paths can specify a directory that holds the appropriate binaries. It can also be used to name a specific downstream binary - be it a shared library or an executable. Note that if it is a shared library, it is not necessary to provide the full filesystem name - just the path and/or name that will be used to load it. For example on windows `fxc` can be loaded from `D:/mydlls` with
-
-* `D:/mydlls` - will look for `d3dcompiler_47.dll` in this directory
-* `D:/mydlls/d3dcompiler_47` - it's not necessary to specify .dll to load a dll on windows
-* `D:/mydlls/d3dcompiler_47.dll` - it is also possible name the shared library explicitly for example
-
-The name of the shared library/executable can be used to specify a specific version, for example by using `D:/mydlls/dxcompiler-some-version` for a specific version of `dxc`. 
-
-Limitations
------------
-
-A major limitation of the `slangc` command today is that there is no provision for getting reflection data out along with the compiled shader code.
-For now, the command-line tool is best seen as a debugging/testing tool, and all serious applications should drive Slang through the API.
diff --git a/external/slang/include/slang-com-helper.h b/external/slang/include/slang-com-helper.h
new file mode 100644
index 00000000..557b278d
--- /dev/null
+++ b/external/slang/include/slang-com-helper.h
@@ -0,0 +1,200 @@
+#ifndef SLANG_COM_HELPER_H
+#define SLANG_COM_HELPER_H
+
+/** \file slang-com-helper.h
+ */
+
+#include "slang.h"
+
+#include <atomic>
+
+/* !!!!!!!!!!!!!!!!!!!!! Macros to help checking SlangResult !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
+
+/*! Set SLANG_HANDLE_RESULT_FAIL(x) to code to be executed whenever an error occurs, and is detected
+ * by one of the macros */
+#ifndef SLANG_HANDLE_RESULT_FAIL
+    #define SLANG_HANDLE_RESULT_FAIL(x)
+#endif
+
+//! Helper macro, that makes it easy to add result checking to calls in functions/methods that
+//! themselves return Result.
+#define SLANG_RETURN_ON_FAIL(x)             \
+    {                                       \
+        SlangResult _res = (x);             \
+        if (SLANG_FAILED(_res))             \
+        {                                   \
+            SLANG_HANDLE_RESULT_FAIL(_res); \
+            return _res;                    \
+        }                                   \
+    }
+//! Helper macro that can be used to test the return value from a call, and will return in a void
+//! method/function
+#define SLANG_RETURN_VOID_ON_FAIL(x)        \
+    {                                       \
+        SlangResult _res = (x);             \
+        if (SLANG_FAILED(_res))             \
+        {                                   \
+            SLANG_HANDLE_RESULT_FAIL(_res); \
+            return;                         \
+        }                                   \
+    }
+//! Helper macro that will return false on failure.
+#define SLANG_RETURN_FALSE_ON_FAIL(x)       \
+    {                                       \
+        SlangResult _res = (x);             \
+        if (SLANG_FAILED(_res))             \
+        {                                   \
+            SLANG_HANDLE_RESULT_FAIL(_res); \
+            return false;                   \
+        }                                   \
+    }
+//! Helper macro that will return nullptr on failure.
+#define SLANG_RETURN_NULL_ON_FAIL(x)        \
+    {                                       \
+        SlangResult _res = (x);             \
+        if (SLANG_FAILED(_res))             \
+        {                                   \
+            SLANG_HANDLE_RESULT_FAIL(_res); \
+            return nullptr;                 \
+        }                                   \
+    }
+
+//! Helper macro that will assert if the return code from a call is failure, also returns the
+//! failure.
+#define SLANG_ASSERT_ON_FAIL(x) \
+    {                           \
+        SlangResult _res = (x); \
+        if (SLANG_FAILED(_res)) \
+        {                       \
+            assert(false);      \
+            return _res;        \
+        }                       \
+    }
+//! Helper macro that will assert if the result from a call is a failure, also returns.
+#define SLANG_ASSERT_VOID_ON_FAIL(x) \
+    {                                \
+        SlangResult _res = (x);      \
+        if (SLANG_FAILED(_res))      \
+        {                            \
+            assert(false);           \
+            return;                  \
+        }                            \
+    }
+
+/* !!!!!!!!!!!!!!!!!!!!!!! C++ helpers !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
+
+#if defined(__cplusplus)
+namespace Slang
+{
+
+// Alias SlangResult to Slang::Result
+typedef SlangResult Result;
+// Alias SlangUUID to Slang::Guid
+typedef SlangUUID Guid;
+
+} // namespace Slang
+
+// Operator == and != for Guid/SlangUUID
+
+SLANG_FORCE_INLINE bool operator==(const Slang::Guid& aIn, const Slang::Guid& bIn)
+{
+    using namespace Slang;
+    // Use the largest type the honors the alignment of Guid
+    typedef uint32_t CmpType;
+    union GuidCompare
+    {
+        Guid guid;
+        CmpType data[sizeof(Guid) / sizeof(CmpType)];
+    };
+    // Type pun - so compiler can 'see' the pun and not break aliasing rules
+    const CmpType* a = reinterpret_cast<const GuidCompare&>(aIn).data;
+    const CmpType* b = reinterpret_cast<const GuidCompare&>(bIn).data;
+    // Make the guid comparison a single branch, by not using short circuit
+    return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3])) == 0;
+}
+
+SLANG_FORCE_INLINE bool operator!=(const Slang::Guid& a, const Slang::Guid& b)
+{
+    return !(a == b);
+}
+
+    /* !!!!!!!! Macros to simplify implementing COM interfaces !!!!!!!!!!!!!!!!!!!!!!!!!!!! */
+
+    /* Assumes underlying implementation has a member m_refCount that is initialized to 0 and can
+    have ++ and -- operate on it. For SLANG_IUNKNOWN_QUERY_INTERFACE to work - must have a method
+    'getInterface' that returns valid pointers for the Guid, or nullptr if not found. */
+
+    #define SLANG_IUNKNOWN_QUERY_INTERFACE                     \
+        SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface( \
+            SlangUUID const& uuid,                             \
+            void** outObject) SLANG_OVERRIDE                   \
+        {                                                      \
+            ISlangUnknown* intf = getInterface(uuid);          \
+            if (intf)                                          \
+            {                                                  \
+                addRef();                                      \
+                *outObject = intf;                             \
+                return SLANG_OK;                               \
+            }                                                  \
+            return SLANG_E_NO_INTERFACE;                       \
+        }
+
+    #define SLANG_IUNKNOWN_ADD_REF                   \
+        SLANG_NO_THROW uint32_t SLANG_MCALL addRef() \
+        {                                            \
+            return ++m_refCount;                     \
+        }
+
+    #define SLANG_IUNKNOWN_RELEASE                    \
+        SLANG_NO_THROW uint32_t SLANG_MCALL release() \
+        {                                             \
+            --m_refCount;                             \
+            if (m_refCount == 0)                      \
+            {                                         \
+                delete this;                          \
+                return 0;                             \
+            }                                         \
+            return m_refCount;                        \
+        }
+
+    #define SLANG_IUNKNOWN_ALL         \
+        SLANG_IUNKNOWN_QUERY_INTERFACE \
+        SLANG_IUNKNOWN_ADD_REF         \
+        SLANG_IUNKNOWN_RELEASE
+
+    // ------------------------ RefObject IUnknown -----------------------------
+
+    #define SLANG_REF_OBJECT_IUNKNOWN_QUERY_INTERFACE          \
+        SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface( \
+            SlangUUID const& uuid,                             \
+            void** outObject) SLANG_OVERRIDE                   \
+        {                                                      \
+            void* intf = getInterface(uuid);                   \
+            if (intf)                                          \
+            {                                                  \
+                addReference();                                \
+                *outObject = intf;                             \
+                return SLANG_OK;                               \
+            }                                                  \
+            return SLANG_E_NO_INTERFACE;                       \
+        }
+
+    #define SLANG_REF_OBJECT_IUNKNOWN_ADD_REF                       \
+        SLANG_NO_THROW uint32_t SLANG_MCALL addRef() SLANG_OVERRIDE \
+        {                                                           \
+            return (uint32_t)addReference();                        \
+        }
+    #define SLANG_REF_OBJECT_IUNKNOWN_RELEASE                        \
+        SLANG_NO_THROW uint32_t SLANG_MCALL release() SLANG_OVERRIDE \
+        {                                                            \
+            return (uint32_t)releaseReference();                     \
+        }
+
+    #define SLANG_REF_OBJECT_IUNKNOWN_ALL         \
+        SLANG_REF_OBJECT_IUNKNOWN_QUERY_INTERFACE \
+        SLANG_REF_OBJECT_IUNKNOWN_ADD_REF         \
+        SLANG_REF_OBJECT_IUNKNOWN_RELEASE
+
+#endif // defined(__cplusplus)
+
+#endif
diff --git a/external/slang/include/slang-com-ptr.h b/external/slang/include/slang-com-ptr.h
new file mode 100644
index 00000000..e9d211d9
--- /dev/null
+++ b/external/slang/include/slang-com-ptr.h
@@ -0,0 +1,210 @@
+#ifndef SLANG_COM_PTR_H
+#define SLANG_COM_PTR_H
+
+#include "slang-com-helper.h"
+
+#include <assert.h>
+#include <cstddef>
+
+namespace Slang
+{
+
+/*! \brief ComPtr is a simple smart pointer that manages types which implement COM based interfaces.
+\details A class that implements a COM, must derive from the IUnknown interface or a type that
+matches it's layout exactly (such as ISlangUnknown). Trying to use this template with a class that
+doesn't follow these rules, will lead to undefined behavior. This is a 'strong' pointer type, and
+will AddRef when a non null pointer is set and Release when the pointer leaves scope. Using 'detach'
+allows a pointer to be removed from the management of the ComPtr. To set the smart pointer to null,
+there is the method setNull, or alternatively just assign SLANG_NULL/nullptr.
+
+One edge case using the template is that sometimes you want access as a pointer to a pointer.
+Sometimes this is to write into the smart pointer, other times to pass as an array. To handle these
+different behaviors there are the methods readRef and writeRef, which are used instead of the &
+(ref) operator. For example
+
+\code
+Void doSomething(ID3D12Resource** resources, IndexT numResources);
+// ...
+ComPtr<ID3D12Resource> resources[3];
+doSomething(resources[0].readRef(), SLANG_COUNT_OF(resource));
+\endcode
+
+A more common scenario writing to the pointer
+
+\code
+IUnknown* unk = ...;
+
+ComPtr<ID3D12Resource> resource;
+Result res = unk->QueryInterface(resource.writeRef());
+\endcode
+*/
+
+// Enum to force initializing as an attach (without adding a reference)
+enum InitAttach
+{
+    INIT_ATTACH
+};
+
+template<class T>
+class ComPtr
+{
+public:
+    typedef T Type;
+    typedef ComPtr ThisType;
+    typedef ISlangUnknown* Ptr;
+
+    /// Constructors
+    /// Default Ctor. Sets to nullptr
+    SLANG_FORCE_INLINE ComPtr()
+        : m_ptr(nullptr)
+    {
+    }
+    SLANG_FORCE_INLINE ComPtr(std::nullptr_t)
+        : m_ptr(nullptr)
+    {
+    }
+    /// Sets, and ref counts.
+    SLANG_FORCE_INLINE explicit ComPtr(T* ptr)
+        : m_ptr(ptr)
+    {
+        if (ptr)
+            ((Ptr)ptr)->addRef();
+    }
+    /// The copy ctor
+    SLANG_FORCE_INLINE ComPtr(const ThisType& rhs)
+        : m_ptr(rhs.m_ptr)
+    {
+        if (m_ptr)
+            ((Ptr)m_ptr)->addRef();
+    }
+
+    /// Ctor without adding to ref count.
+    SLANG_FORCE_INLINE explicit ComPtr(InitAttach, T* ptr)
+        : m_ptr(ptr)
+    {
+    }
+    /// Ctor without adding to ref count
+    SLANG_FORCE_INLINE ComPtr(InitAttach, const ThisType& rhs)
+        : m_ptr(rhs.m_ptr)
+    {
+    }
+
+#ifdef SLANG_HAS_MOVE_SEMANTICS
+    /// Move Ctor
+    SLANG_FORCE_INLINE ComPtr(ThisType&& rhs)
+        : m_ptr(rhs.m_ptr)
+    {
+        rhs.m_ptr = nullptr;
+    }
+    /// Move assign
+    SLANG_FORCE_INLINE ComPtr& operator=(ThisType&& rhs)
+    {
+        T* swap = m_ptr;
+        m_ptr = rhs.m_ptr;
+        rhs.m_ptr = swap;
+        return *this;
+    }
+#endif
+
+    /// Destructor releases the pointer, assuming it is set
+    SLANG_FORCE_INLINE ~ComPtr()
+    {
+        if (m_ptr)
+            ((Ptr)m_ptr)->release();
+    }
+
+    // !!! Operators !!!
+
+    /// Returns the dumb pointer
+    SLANG_FORCE_INLINE operator T*() const { return m_ptr; }
+
+    SLANG_FORCE_INLINE T& operator*() { return *m_ptr; }
+    /// For making method invocations through the smart pointer work through the dumb pointer
+    SLANG_FORCE_INLINE T* operator->() const { return m_ptr; }
+
+    /// Assign
+    SLANG_FORCE_INLINE const ThisType& operator=(const ThisType& rhs);
+    /// Assign from dumb ptr
+    SLANG_FORCE_INLINE T* operator=(T* in);
+
+    /// Get the pointer and don't ref
+    SLANG_FORCE_INLINE T* get() const { return m_ptr; }
+    /// Release a contained nullptr pointer if set
+    SLANG_FORCE_INLINE void setNull();
+
+    /// Detach
+    SLANG_FORCE_INLINE T* detach()
+    {
+        T* ptr = m_ptr;
+        m_ptr = nullptr;
+        return ptr;
+    }
+    /// Set to a pointer without changing the ref count
+    SLANG_FORCE_INLINE void attach(T* in) { m_ptr = in; }
+
+    /// Get ready for writing (nulls contents)
+    SLANG_FORCE_INLINE T** writeRef()
+    {
+        setNull();
+        return &m_ptr;
+    }
+    /// Get for read access
+    SLANG_FORCE_INLINE T* const* readRef() const { return &m_ptr; }
+
+    /// Swap
+    void swap(ThisType& rhs);
+
+protected:
+    /// Gets the address of the dumb pointer.
+    // Disabled: use writeRef and readRef to get a reference based on usage.
+#ifndef SLANG_COM_PTR_ENABLE_REF_OPERATOR
+    SLANG_FORCE_INLINE T** operator&() = delete;
+#endif
+
+    T* m_ptr;
+};
+
+//----------------------------------------------------------------------------
+template<typename T>
+void ComPtr<T>::setNull()
+{
+    if (m_ptr)
+    {
+        ((Ptr)m_ptr)->release();
+        m_ptr = nullptr;
+    }
+}
+//----------------------------------------------------------------------------
+template<typename T>
+const ComPtr<T>& ComPtr<T>::operator=(const ThisType& rhs)
+{
+    if (rhs.m_ptr)
+        ((Ptr)rhs.m_ptr)->addRef();
+    if (m_ptr)
+        ((Ptr)m_ptr)->release();
+    m_ptr = rhs.m_ptr;
+    return *this;
+}
+//----------------------------------------------------------------------------
+template<typename T>
+T* ComPtr<T>::operator=(T* ptr)
+{
+    if (ptr)
+        ((Ptr)ptr)->addRef();
+    if (m_ptr)
+        ((Ptr)m_ptr)->release();
+    m_ptr = ptr;
+    return m_ptr;
+}
+//----------------------------------------------------------------------------
+template<typename T>
+void ComPtr<T>::swap(ThisType& rhs)
+{
+    T* tmp = m_ptr;
+    m_ptr = rhs.m_ptr;
+    rhs.m_ptr = tmp;
+}
+
+} // namespace Slang
+
+#endif // SLANG_COM_PTR_H
diff --git a/external/slang/include/slang-cpp-host-prelude.h b/external/slang/include/slang-cpp-host-prelude.h
new file mode 100644
index 00000000..8bc0f5ca
--- /dev/null
+++ b/external/slang/include/slang-cpp-host-prelude.h
@@ -0,0 +1,58 @@
+#ifndef SLANG_CPP_HOST_PRELUDE_H
+#define SLANG_CPP_HOST_PRELUDE_H
+
+#include <cmath>
+#include <cstdio>
+#include <cstring>
+
+#define SLANG_COM_PTR_ENABLE_REF_OPERATOR 1
+
+#include "../source/slang-rt/slang-rt.h"
+#include "slang-com-ptr.h"
+#include "slang-cpp-types.h"
+
+#ifdef SLANG_LLVM
+#include "slang-llvm.h"
+#else // SLANG_LLVM
+#if SLANG_GCC_FAMILY && __GNUC__ < 6
+#include <cmath>
+#define SLANG_PRELUDE_STD std::
+#else
+#include <math.h>
+#define SLANG_PRELUDE_STD
+#endif
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#endif // SLANG_LLVM
+
+#if defined(_MSC_VER)
+#define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
+#else
+#define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
+// #   define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport))
+// __attribute__((__visibility__("default")))
+#endif
+
+#ifdef __cplusplus
+#define SLANG_PRELUDE_EXTERN_C extern "C"
+#define SLANG_PRELUDE_EXTERN_C_START \
+    extern "C"                       \
+    {
+#define SLANG_PRELUDE_EXTERN_C_END }
+#else
+#define SLANG_PRELUDE_EXTERN_C
+#define SLANG_PRELUDE_EXTERN_C_START
+#define SLANG_PRELUDE_EXTERN_C_END
+#endif
+
+#include "slang-cpp-scalar-intrinsics.h"
+
+using namespace Slang;
+
+template<typename TResult, typename... Args>
+using Slang_FuncType = TResult(SLANG_MCALL*)(Args...);
+
+#endif
diff --git a/external/slang/include/slang-cpp-prelude.h b/external/slang/include/slang-cpp-prelude.h
new file mode 100644
index 00000000..4dacac9c
--- /dev/null
+++ b/external/slang/include/slang-cpp-prelude.h
@@ -0,0 +1,322 @@
+#ifndef SLANG_CPP_PRELUDE_H
+#define SLANG_CPP_PRELUDE_H
+
+// Because the signiture of isnan, isfinite, and is isinf changed in C++, we use the macro
+// to use the version in the std namespace.
+// https://stackoverflow.com/questions/39130040/cmath-hides-isnan-in-math-h-in-c14-c11
+
+#ifdef SLANG_LLVM
+#include "slang-llvm.h"
+#else // SLANG_LLVM
+#if SLANG_GCC_FAMILY && __GNUC__ < 6
+#include <cmath>
+#define SLANG_PRELUDE_STD std::
+#else
+#include <math.h>
+#define SLANG_PRELUDE_STD
+#endif
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#endif // SLANG_LLVM
+
+#if defined(_MSC_VER)
+#define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
+#else
+#define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
+// #   define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport))
+// __attribute__((__visibility__("default")))
+#endif
+
+#ifdef __cplusplus
+#define SLANG_PRELUDE_EXTERN_C extern "C"
+#define SLANG_PRELUDE_EXTERN_C_START \
+    extern "C"                       \
+    {
+#define SLANG_PRELUDE_EXTERN_C_END }
+#else
+#define SLANG_PRELUDE_EXTERN_C
+#define SLANG_PRELUDE_EXTERN_C_START
+#define SLANG_PRELUDE_EXTERN_C_END
+#endif
+
+#define SLANG_PRELUDE_EXPORT SLANG_PRELUDE_EXTERN_C SLANG_PRELUDE_SHARED_LIB_EXPORT
+#define SLANG_PRELUDE_EXPORT_START SLANG_PRELUDE_EXTERN_C_START SLANG_PRELUDE_SHARED_LIB_EXPORT
+#define SLANG_PRELUDE_EXPORT_END SLANG_PRELUDE_EXTERN_C_END
+
+#ifndef INFINITY
+// Must overflow for double
+#define INFINITY float(1e+300 * 1e+300)
+#endif
+
+#ifndef SLANG_INFINITY
+#define SLANG_INFINITY INFINITY
+#endif
+
+// Detect the compiler type
+
+#ifndef SLANG_COMPILER
+#define SLANG_COMPILER
+
+/*
+Compiler defines, see http://sourceforge.net/p/predef/wiki/Compilers/
+NOTE that SLANG_VC holds the compiler version - not just 1 or 0
+*/
+#if defined(_MSC_VER)
+#if _MSC_VER >= 1900
+#define SLANG_VC 14
+#elif _MSC_VER >= 1800
+#define SLANG_VC 12
+#elif _MSC_VER >= 1700
+#define SLANG_VC 11
+#elif _MSC_VER >= 1600
+#define SLANG_VC 10
+#elif _MSC_VER >= 1500
+#define SLANG_VC 9
+#else
+#error "unknown version of Visual C++ compiler"
+#endif
+#elif defined(__clang__)
+#define SLANG_CLANG 1
+#elif defined(__SNC__)
+#define SLANG_SNC 1
+#elif defined(__ghs__)
+#define SLANG_GHS 1
+#elif defined(__GNUC__) /* note: __clang__, __SNC__, or __ghs__ imply __GNUC__ */
+#define SLANG_GCC 1
+#else
+#error "unknown compiler"
+#endif
+/*
+Any compilers not detected by the above logic are now now explicitly zeroed out.
+*/
+#ifndef SLANG_VC
+#define SLANG_VC 0
+#endif
+#ifndef SLANG_CLANG
+#define SLANG_CLANG 0
+#endif
+#ifndef SLANG_SNC
+#define SLANG_SNC 0
+#endif
+#ifndef SLANG_GHS
+#define SLANG_GHS 0
+#endif
+#ifndef SLANG_GCC
+#define SLANG_GCC 0
+#endif
+#endif /* SLANG_COMPILER */
+
+/*
+The following section attempts to detect the target platform being compiled for.
+
+If an application defines `SLANG_PLATFORM` before including this header,
+they take responsibility for setting any compiler-dependent macros
+used later in the file.
+
+Most applications should not need to touch this section.
+*/
+#ifndef SLANG_PLATFORM
+#define SLANG_PLATFORM
+/**
+Operating system defines, see http://sourceforge.net/p/predef/wiki/OperatingSystems/
+*/
+#if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_PARTITION_APP
+#define SLANG_WINRT 1 /* Windows Runtime, either on Windows RT or Windows 8 */
+#elif defined(XBOXONE)
+#define SLANG_XBOXONE 1
+#elif defined(_WIN64) /* note: XBOXONE implies _WIN64 */
+#define SLANG_WIN64 1
+#elif defined(_M_PPC)
+#define SLANG_X360 1
+#elif defined(_WIN32) /* note: _M_PPC implies _WIN32 */
+#define SLANG_WIN32 1
+#elif defined(__ANDROID__)
+#define SLANG_ANDROID 1
+#elif defined(__linux__) || defined(__CYGWIN__) /* note: __ANDROID__ implies __linux__ */
+#define SLANG_LINUX 1
+#elif defined(__APPLE__) && !defined(SLANG_LLVM)
+#include "TargetConditionals.h"
+#if TARGET_OS_MAC
+#define SLANG_OSX 1
+#else
+#define SLANG_IOS 1
+#endif
+#elif defined(__APPLE__)
+// On `slang-llvm` we can't inclue "TargetConditionals.h" in general, so for now assume its
+// OSX.
+#define SLANG_OSX 1
+#elif defined(__CELLOS_LV2__)
+#define SLANG_PS3 1
+#elif defined(__ORBIS__)
+#define SLANG_PS4 1
+#elif defined(__SNC__) && defined(__arm__)
+#define SLANG_PSP2 1
+#elif defined(__ghs__)
+#define SLANG_WIIU 1
+#else
+#error "unknown target platform"
+#endif
+
+
+/*
+Any platforms not detected by the above logic are now now explicitly zeroed out.
+*/
+#ifndef SLANG_WINRT
+#define SLANG_WINRT 0
+#endif
+#ifndef SLANG_XBOXONE
+#define SLANG_XBOXONE 0
+#endif
+#ifndef SLANG_WIN64
+#define SLANG_WIN64 0
+#endif
+#ifndef SLANG_X360
+#define SLANG_X360 0
+#endif
+#ifndef SLANG_WIN32
+#define SLANG_WIN32 0
+#endif
+#ifndef SLANG_ANDROID
+#define SLANG_ANDROID 0
+#endif
+#ifndef SLANG_LINUX
+#define SLANG_LINUX 0
+#endif
+#ifndef SLANG_IOS
+#define SLANG_IOS 0
+#endif
+#ifndef SLANG_OSX
+#define SLANG_OSX 0
+#endif
+#ifndef SLANG_PS3
+#define SLANG_PS3 0
+#endif
+#ifndef SLANG_PS4
+#define SLANG_PS4 0
+#endif
+#ifndef SLANG_PSP2
+#define SLANG_PSP2 0
+#endif
+#ifndef SLANG_WIIU
+#define SLANG_WIIU 0
+#endif
+#endif /* SLANG_PLATFORM */
+
+/* Shorthands for "families" of compilers/platforms */
+#define SLANG_GCC_FAMILY (SLANG_CLANG || SLANG_SNC || SLANG_GHS || SLANG_GCC)
+#define SLANG_WINDOWS_FAMILY (SLANG_WINRT || SLANG_WIN32 || SLANG_WIN64)
+#define SLANG_MICROSOFT_FAMILY (SLANG_XBOXONE || SLANG_X360 || SLANG_WINDOWS_FAMILY)
+#define SLANG_LINUX_FAMILY (SLANG_LINUX || SLANG_ANDROID)
+#define SLANG_APPLE_FAMILY (SLANG_IOS || SLANG_OSX) /* equivalent to #if __APPLE__ */
+#define SLANG_UNIX_FAMILY \
+    (SLANG_LINUX_FAMILY || SLANG_APPLE_FAMILY) /* shortcut for unix/posix platforms */
+
+// GCC Specific
+#if SLANG_GCC_FAMILY
+#define SLANG_ALIGN_OF(T) __alignof__(T)
+
+#define SLANG_BREAKPOINT(id) __builtin_trap()
+
+// Use this macro instead of offsetof, because gcc produces warning if offsetof is used on a
+// non POD type, even though it produces the correct result
+#define SLANG_OFFSET_OF(T, ELEMENT) (size_t(&((T*)1)->ELEMENT) - 1)
+#endif // SLANG_GCC_FAMILY
+
+// Microsoft VC specific
+#if SLANG_VC
+#define SLANG_ALIGN_OF(T) __alignof(T)
+
+#define SLANG_BREAKPOINT(id) __debugbreak();
+
+#endif // SLANG_VC
+
+// Default impls
+
+#ifndef SLANG_OFFSET_OF
+#define SLANG_OFFSET_OF(X, Y) offsetof(X, Y)
+#endif
+
+#ifndef SLANG_BREAKPOINT
+// Make it crash with a write to 0!
+#define SLANG_BREAKPOINT(id) (*((int*)0) = int(id));
+#endif
+
+// If slang.h has been included we don't need any of these definitions
+#ifndef SLANG_H
+
+/* Macro for declaring if a method is no throw. Should be set before the return parameter. */
+#ifndef SLANG_NO_THROW
+#if SLANG_WINDOWS_FAMILY && !defined(SLANG_DISABLE_EXCEPTIONS)
+#define SLANG_NO_THROW __declspec(nothrow)
+#endif
+#endif
+#ifndef SLANG_NO_THROW
+#define SLANG_NO_THROW
+#endif
+
+/* The `SLANG_STDCALL` and `SLANG_MCALL` defines are used to set the calling
+convention for interface methods.
+*/
+#ifndef SLANG_STDCALL
+#if SLANG_MICROSOFT_FAMILY
+#define SLANG_STDCALL __stdcall
+#else
+#define SLANG_STDCALL
+#endif
+#endif
+#ifndef SLANG_MCALL
+#define SLANG_MCALL SLANG_STDCALL
+#endif
+
+#ifndef SLANG_FORCE_INLINE
+#define SLANG_FORCE_INLINE inline
+#endif
+
+// TODO(JS): Should these be in slang-cpp-types.h?
+// They are more likely to clash with slang.h
+
+struct SlangUUID
+{
+    uint32_t data1;
+    uint16_t data2;
+    uint16_t data3;
+    uint8_t data4[8];
+};
+
+typedef int32_t SlangResult;
+
+struct ISlangUnknown
+{
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    queryInterface(SlangUUID const& uuid, void** outObject) = 0;
+    virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() = 0;
+    virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() = 0;
+};
+
+#define SLANG_COM_INTERFACE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)             \
+public:                                                                          \
+    SLANG_FORCE_INLINE static const SlangUUID& getTypeGuid()                     \
+    {                                                                            \
+        static const SlangUUID guid = {a, b, c, d0, d1, d2, d3, d4, d5, d6, d7}; \
+        return guid;                                                             \
+    }
+#endif // SLANG_H
+
+// Includes
+
+#include "slang-cpp-scalar-intrinsics.h"
+#include "slang-cpp-types.h"
+
+// TODO(JS): Hack! Output C++ code from slang can copy uninitialized variables.
+#if defined(_MSC_VER)
+#pragma warning(disable : 4700)
+#endif
+
+#ifndef SLANG_UNROLL
+#define SLANG_UNROLL
+#endif
+
+#endif
diff --git a/external/slang/include/slang-cpp-scalar-intrinsics.h b/external/slang/include/slang-cpp-scalar-intrinsics.h
new file mode 100644
index 00000000..22b5e12e
--- /dev/null
+++ b/external/slang/include/slang-cpp-scalar-intrinsics.h
@@ -0,0 +1,838 @@
+#ifndef SLANG_PRELUDE_SCALAR_INTRINSICS_H
+#define SLANG_PRELUDE_SCALAR_INTRINSICS_H
+
+#if !defined(SLANG_LLVM) && SLANG_PROCESSOR_X86_64 && SLANG_VC
+//  If we have visual studio and 64 bit processor, we can assume we have popcnt, and can include
+//  x86 intrinsics
+#include <intrin.h>
+#endif
+
+#ifndef SLANG_FORCE_INLINE
+#define SLANG_FORCE_INLINE inline
+#endif
+
+#ifdef SLANG_PRELUDE_NAMESPACE
+namespace SLANG_PRELUDE_NAMESPACE
+{
+#endif
+
+#ifndef SLANG_PRELUDE_PI
+#define SLANG_PRELUDE_PI 3.14159265358979323846
+#endif
+
+
+union Union32
+{
+    uint32_t u;
+    int32_t i;
+    float f;
+};
+
+union Union64
+{
+    uint64_t u;
+    int64_t i;
+    double d;
+};
+
+// 32 bit cast conversions
+SLANG_FORCE_INLINE int32_t _bitCastFloatToInt(float f)
+{
+    Union32 u;
+    u.f = f;
+    return u.i;
+}
+SLANG_FORCE_INLINE float _bitCastIntToFloat(int32_t i)
+{
+    Union32 u;
+    u.i = i;
+    return u.f;
+}
+SLANG_FORCE_INLINE uint32_t _bitCastFloatToUInt(float f)
+{
+    Union32 u;
+    u.f = f;
+    return u.u;
+}
+SLANG_FORCE_INLINE float _bitCastUIntToFloat(uint32_t ui)
+{
+    Union32 u;
+    u.u = ui;
+    return u.f;
+}
+
+// ----------------------------- F16 -----------------------------------------
+
+
+// This impl is based on FloatToHalf that is in Slang codebase
+SLANG_FORCE_INLINE uint32_t f32tof16(const float value)
+{
+    const uint32_t inBits = _bitCastFloatToUInt(value);
+
+    // bits initially set to just the sign bit
+    uint32_t bits = (inBits >> 16) & 0x8000;
+    // Mantissa can't be used as is, as it holds last bit, for rounding.
+    uint32_t m = (inBits >> 12) & 0x07ff;
+    uint32_t e = (inBits >> 23) & 0xff;
+
+    if (e < 103)
+    {
+        // It's zero
+        return bits;
+    }
+    if (e == 0xff)
+    {
+        // Could be a NAN or INF. Is INF if *input* mantissa is 0.
+
+        // Remove last bit for rounding to make output mantissa.
+        m >>= 1;
+
+        // We *assume* float16/float32 signaling bit and remaining bits
+        // semantics are the same. (The signalling bit convention is target specific!).
+        // Non signal bit's usage within mantissa for a NAN are also target specific.
+
+        // If the m is 0, it could be because the result is INF, but it could also be because all
+        // the bits that made NAN were dropped as we have less mantissa bits in f16.
+
+        // To fix for this we make non zero if m is 0 and the input mantissa was not.
+        // This will (typically) produce a signalling NAN.
+        m += uint32_t(m == 0 && (inBits & 0x007fffffu));
+
+        // Combine for output
+        return (bits | 0x7c00u | m);
+    }
+    if (e > 142)
+    {
+        // INF.
+        return bits | 0x7c00u;
+    }
+    if (e < 113)
+    {
+        m |= 0x0800u;
+        bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
+        return bits;
+    }
+    bits |= ((e - 112) << 10) | (m >> 1);
+    bits += m & 1;
+    return bits;
+}
+
+static const float g_f16tof32Magic = _bitCastIntToFloat((127 + (127 - 15)) << 23);
+
+SLANG_FORCE_INLINE float f16tof32(const uint32_t value)
+{
+    const uint32_t sign = (value & 0x8000) << 16;
+    uint32_t exponent = (value & 0x7c00) >> 10;
+    uint32_t mantissa = (value & 0x03ff);
+
+    if (exponent == 0)
+    {
+        // If mantissa is 0 we are done, as output is 0.
+        // If it's not zero we must have a denormal.
+        if (mantissa)
+        {
+            // We have a denormal so use the magic to do exponent adjust
+            return _bitCastIntToFloat(sign | ((value & 0x7fff) << 13)) * g_f16tof32Magic;
+        }
+    }
+    else
+    {
+        // If the exponent is NAN or INF exponent is 0x1f on input.
+        // If that's the case, we just need to set the exponent to 0xff on output
+        // and the mantissa can just stay the same. If its 0 it's INF, else it is NAN and we just
+        // copy the bits
+        //
+        // Else we need to correct the exponent in the normalized case.
+        exponent = (exponent == 0x1F) ? 0xff : (exponent + (-15 + 127));
+    }
+
+    return _bitCastUIntToFloat(sign | (exponent << 23) | (mantissa << 13));
+}
+
+// ----------------------------- F32 -----------------------------------------
+
+// Helpers
+SLANG_FORCE_INLINE float F32_calcSafeRadians(float radians);
+
+#ifdef SLANG_LLVM
+
+SLANG_PRELUDE_EXTERN_C_START
+
+// Unary
+float F32_ceil(float f);
+float F32_floor(float f);
+float F32_round(float f);
+float F32_sin(float f);
+float F32_cos(float f);
+float F32_tan(float f);
+float F32_asin(float f);
+float F32_acos(float f);
+float F32_atan(float f);
+float F32_sinh(float f);
+float F32_cosh(float f);
+float F32_tanh(float f);
+float F32_log2(float f);
+float F32_log(float f);
+float F32_log10(float f);
+float F32_exp2(float f);
+float F32_exp(float f);
+float F32_abs(float f);
+float F32_trunc(float f);
+float F32_sqrt(float f);
+
+bool F32_isnan(float f);
+bool F32_isfinite(float f);
+bool F32_isinf(float f);
+
+// Binary
+SLANG_FORCE_INLINE float F32_min(float a, float b)
+{
+    return a < b ? a : b;
+}
+SLANG_FORCE_INLINE float F32_max(float a, float b)
+{
+    return a > b ? a : b;
+}
+float F32_pow(float a, float b);
+float F32_fmod(float a, float b);
+float F32_remainder(float a, float b);
+float F32_atan2(float a, float b);
+
+float F32_frexp(float x, int* e);
+
+float F32_modf(float x, float* ip);
+
+// Ternary
+SLANG_FORCE_INLINE float F32_fma(float a, float b, float c)
+{
+    return a * b + c;
+}
+
+SLANG_PRELUDE_EXTERN_C_END
+
+#else
+
+// Unary
+SLANG_FORCE_INLINE float F32_ceil(float f)
+{
+    return ::ceilf(f);
+}
+SLANG_FORCE_INLINE float F32_floor(float f)
+{
+    return ::floorf(f);
+}
+SLANG_FORCE_INLINE float F32_round(float f)
+{
+    return ::roundf(f);
+}
+SLANG_FORCE_INLINE float F32_sin(float f)
+{
+    return ::sinf(f);
+}
+SLANG_FORCE_INLINE float F32_cos(float f)
+{
+    return ::cosf(f);
+}
+SLANG_FORCE_INLINE float F32_tan(float f)
+{
+    return ::tanf(f);
+}
+SLANG_FORCE_INLINE float F32_asin(float f)
+{
+    return ::asinf(f);
+}
+SLANG_FORCE_INLINE float F32_acos(float f)
+{
+    return ::acosf(f);
+}
+SLANG_FORCE_INLINE float F32_atan(float f)
+{
+    return ::atanf(f);
+}
+SLANG_FORCE_INLINE float F32_sinh(float f)
+{
+    return ::sinhf(f);
+}
+SLANG_FORCE_INLINE float F32_cosh(float f)
+{
+    return ::coshf(f);
+}
+SLANG_FORCE_INLINE float F32_tanh(float f)
+{
+    return ::tanhf(f);
+}
+SLANG_FORCE_INLINE float F32_log2(float f)
+{
+    return ::log2f(f);
+}
+SLANG_FORCE_INLINE float F32_log(float f)
+{
+    return ::logf(f);
+}
+SLANG_FORCE_INLINE float F32_log10(float f)
+{
+    return ::log10f(f);
+}
+SLANG_FORCE_INLINE float F32_exp2(float f)
+{
+    return ::exp2f(f);
+}
+SLANG_FORCE_INLINE float F32_exp(float f)
+{
+    return ::expf(f);
+}
+SLANG_FORCE_INLINE float F32_abs(float f)
+{
+    return ::fabsf(f);
+}
+SLANG_FORCE_INLINE float F32_trunc(float f)
+{
+    return ::truncf(f);
+}
+SLANG_FORCE_INLINE float F32_sqrt(float f)
+{
+    return ::sqrtf(f);
+}
+
+SLANG_FORCE_INLINE bool F32_isnan(float f)
+{
+    return SLANG_PRELUDE_STD isnan(f);
+}
+SLANG_FORCE_INLINE bool F32_isfinite(float f)
+{
+    return SLANG_PRELUDE_STD isfinite(f);
+}
+SLANG_FORCE_INLINE bool F32_isinf(float f)
+{
+    return SLANG_PRELUDE_STD isinf(f);
+}
+
+// Binary
+SLANG_FORCE_INLINE float F32_min(float a, float b)
+{
+    return ::fminf(a, b);
+}
+SLANG_FORCE_INLINE float F32_max(float a, float b)
+{
+    return ::fmaxf(a, b);
+}
+SLANG_FORCE_INLINE float F32_pow(float a, float b)
+{
+    return ::powf(a, b);
+}
+SLANG_FORCE_INLINE float F32_fmod(float a, float b)
+{
+    return ::fmodf(a, b);
+}
+SLANG_FORCE_INLINE float F32_remainder(float a, float b)
+{
+    return ::remainderf(a, b);
+}
+SLANG_FORCE_INLINE float F32_atan2(float a, float b)
+{
+    return float(::atan2(a, b));
+}
+
+SLANG_FORCE_INLINE float F32_frexp(float x, int* e)
+{
+    return ::frexpf(x, e);
+}
+
+SLANG_FORCE_INLINE float F32_modf(float x, float* ip)
+{
+    return ::modff(x, ip);
+}
+
+// Ternary
+SLANG_FORCE_INLINE float F32_fma(float a, float b, float c)
+{
+    return ::fmaf(a, b, c);
+}
+
+#endif
+
+SLANG_FORCE_INLINE float F32_calcSafeRadians(float radians)
+{
+    // Put 0 to 2pi cycles to cycle around 0 to 1
+    float a = radians * (1.0f / float(SLANG_PRELUDE_PI * 2));
+    // Get truncated fraction, as value in  0 - 1 range
+    a = a - F32_floor(a);
+    // Convert back to 0 - 2pi range
+    return (a * float(SLANG_PRELUDE_PI * 2));
+}
+
+SLANG_FORCE_INLINE float F32_rsqrt(float f)
+{
+    return 1.0f / F32_sqrt(f);
+}
+SLANG_FORCE_INLINE float F32_sign(float f)
+{
+    return (f == 0.0f) ? f : ((f < 0.0f) ? -1.0f : 1.0f);
+}
+SLANG_FORCE_INLINE float F32_frac(float f)
+{
+    return f - F32_floor(f);
+}
+
+SLANG_FORCE_INLINE uint32_t F32_asuint(float f)
+{
+    Union32 u;
+    u.f = f;
+    return u.u;
+}
+SLANG_FORCE_INLINE int32_t F32_asint(float f)
+{
+    Union32 u;
+    u.f = f;
+    return u.i;
+}
+
+// ----------------------------- F64 -----------------------------------------
+
+SLANG_FORCE_INLINE double F64_calcSafeRadians(double radians);
+
+#ifdef SLANG_LLVM
+
+SLANG_PRELUDE_EXTERN_C_START
+
+// Unary
+double F64_ceil(double f);
+double F64_floor(double f);
+double F64_round(double f);
+double F64_sin(double f);
+double F64_cos(double f);
+double F64_tan(double f);
+double F64_asin(double f);
+double F64_acos(double f);
+double F64_atan(double f);
+double F64_sinh(double f);
+double F64_cosh(double f);
+double F64_tanh(double f);
+double F64_log2(double f);
+double F64_log(double f);
+double F64_log10(double f);
+double F64_exp2(double f);
+double F64_exp(double f);
+double F64_abs(double f);
+double F64_trunc(double f);
+double F64_sqrt(double f);
+
+bool F64_isnan(double f);
+bool F64_isfinite(double f);
+bool F64_isinf(double f);
+
+// Binary
+SLANG_FORCE_INLINE double F64_min(double a, double b)
+{
+    return a < b ? a : b;
+}
+SLANG_FORCE_INLINE double F64_max(double a, double b)
+{
+    return a > b ? a : b;
+}
+double F64_pow(double a, double b);
+double F64_fmod(double a, double b);
+double F64_remainder(double a, double b);
+double F64_atan2(double a, double b);
+
+double F64_frexp(double x, int* e);
+
+double F64_modf(double x, double* ip);
+
+// Ternary
+SLANG_FORCE_INLINE double F64_fma(double a, double b, double c)
+{
+    return a * b + c;
+}
+
+SLANG_PRELUDE_EXTERN_C_END
+
+#else // SLANG_LLVM
+
+// Unary
+SLANG_FORCE_INLINE double F64_ceil(double f)
+{
+    return ::ceil(f);
+}
+SLANG_FORCE_INLINE double F64_floor(double f)
+{
+    return ::floor(f);
+}
+SLANG_FORCE_INLINE double F64_round(double f)
+{
+    return ::round(f);
+}
+SLANG_FORCE_INLINE double F64_sin(double f)
+{
+    return ::sin(f);
+}
+SLANG_FORCE_INLINE double F64_cos(double f)
+{
+    return ::cos(f);
+}
+SLANG_FORCE_INLINE double F64_tan(double f)
+{
+    return ::tan(f);
+}
+SLANG_FORCE_INLINE double F64_asin(double f)
+{
+    return ::asin(f);
+}
+SLANG_FORCE_INLINE double F64_acos(double f)
+{
+    return ::acos(f);
+}
+SLANG_FORCE_INLINE double F64_atan(double f)
+{
+    return ::atan(f);
+}
+SLANG_FORCE_INLINE double F64_sinh(double f)
+{
+    return ::sinh(f);
+}
+SLANG_FORCE_INLINE double F64_cosh(double f)
+{
+    return ::cosh(f);
+}
+SLANG_FORCE_INLINE double F64_tanh(double f)
+{
+    return ::tanh(f);
+}
+SLANG_FORCE_INLINE double F64_log2(double f)
+{
+    return ::log2(f);
+}
+SLANG_FORCE_INLINE double F64_log(double f)
+{
+    return ::log(f);
+}
+SLANG_FORCE_INLINE double F64_log10(float f)
+{
+    return ::log10(f);
+}
+SLANG_FORCE_INLINE double F64_exp2(double f)
+{
+    return ::exp2(f);
+}
+SLANG_FORCE_INLINE double F64_exp(double f)
+{
+    return ::exp(f);
+}
+SLANG_FORCE_INLINE double F64_abs(double f)
+{
+    return ::fabs(f);
+}
+SLANG_FORCE_INLINE double F64_trunc(double f)
+{
+    return ::trunc(f);
+}
+SLANG_FORCE_INLINE double F64_sqrt(double f)
+{
+    return ::sqrt(f);
+}
+
+
+SLANG_FORCE_INLINE bool F64_isnan(double f)
+{
+    return SLANG_PRELUDE_STD isnan(f);
+}
+SLANG_FORCE_INLINE bool F64_isfinite(double f)
+{
+    return SLANG_PRELUDE_STD isfinite(f);
+}
+SLANG_FORCE_INLINE bool F64_isinf(double f)
+{
+    return SLANG_PRELUDE_STD isinf(f);
+}
+
+// Binary
+SLANG_FORCE_INLINE double F64_min(double a, double b)
+{
+    return ::fmin(a, b);
+}
+SLANG_FORCE_INLINE double F64_max(double a, double b)
+{
+    return ::fmax(a, b);
+}
+SLANG_FORCE_INLINE double F64_pow(double a, double b)
+{
+    return ::pow(a, b);
+}
+SLANG_FORCE_INLINE double F64_fmod(double a, double b)
+{
+    return ::fmod(a, b);
+}
+SLANG_FORCE_INLINE double F64_remainder(double a, double b)
+{
+    return ::remainder(a, b);
+}
+SLANG_FORCE_INLINE double F64_atan2(double a, double b)
+{
+    return ::atan2(a, b);
+}
+
+SLANG_FORCE_INLINE double F64_frexp(double x, int* e)
+{
+    return ::frexp(x, e);
+}
+
+SLANG_FORCE_INLINE double F64_modf(double x, double* ip)
+{
+    return ::modf(x, ip);
+}
+
+// Ternary
+SLANG_FORCE_INLINE double F64_fma(double a, double b, double c)
+{
+    return ::fma(a, b, c);
+}
+
+#endif // SLANG_LLVM
+
+SLANG_FORCE_INLINE double F64_rsqrt(double f)
+{
+    return 1.0 / F64_sqrt(f);
+}
+SLANG_FORCE_INLINE double F64_sign(double f)
+{
+    return (f == 0.0) ? f : ((f < 0.0) ? -1.0 : 1.0);
+}
+SLANG_FORCE_INLINE double F64_frac(double f)
+{
+    return f - F64_floor(f);
+}
+
+SLANG_FORCE_INLINE void F64_asuint(double d, uint32_t* low, uint32_t* hi)
+{
+    Union64 u;
+    u.d = d;
+    *low = uint32_t(u.u);
+    *hi = uint32_t(u.u >> 32);
+}
+
+SLANG_FORCE_INLINE void F64_asint(double d, int32_t* low, int32_t* hi)
+{
+    Union64 u;
+    u.d = d;
+    *low = int32_t(u.u);
+    *hi = int32_t(u.u >> 32);
+}
+
+SLANG_FORCE_INLINE double F64_calcSafeRadians(double radians)
+{
+    // Put 0 to 2pi cycles to cycle around 0 to 1
+    double a = radians * (1.0f / (SLANG_PRELUDE_PI * 2));
+    // Get truncated fraction, as value in  0 - 1 range
+    a = a - F64_floor(a);
+    // Convert back to 0 - 2pi range
+    return (a * (SLANG_PRELUDE_PI * 2));
+}
+
+// ----------------------------- I32 -----------------------------------------
+
+SLANG_FORCE_INLINE int32_t I32_abs(int32_t f)
+{
+    return (f < 0) ? -f : f;
+}
+
+SLANG_FORCE_INLINE int32_t I32_min(int32_t a, int32_t b)
+{
+    return a < b ? a : b;
+}
+SLANG_FORCE_INLINE int32_t I32_max(int32_t a, int32_t b)
+{
+    return a > b ? a : b;
+}
+
+SLANG_FORCE_INLINE float I32_asfloat(int32_t x)
+{
+    Union32 u;
+    u.i = x;
+    return u.f;
+}
+SLANG_FORCE_INLINE uint32_t I32_asuint(int32_t x)
+{
+    return uint32_t(x);
+}
+SLANG_FORCE_INLINE double I32_asdouble(int32_t low, int32_t hi)
+{
+    Union64 u;
+    u.u = (uint64_t(hi) << 32) | uint32_t(low);
+    return u.d;
+}
+
+// ----------------------------- U32 -----------------------------------------
+
+SLANG_FORCE_INLINE uint32_t U32_abs(uint32_t f)
+{
+    return f;
+}
+
+SLANG_FORCE_INLINE uint32_t U32_min(uint32_t a, uint32_t b)
+{
+    return a < b ? a : b;
+}
+SLANG_FORCE_INLINE uint32_t U32_max(uint32_t a, uint32_t b)
+{
+    return a > b ? a : b;
+}
+
+SLANG_FORCE_INLINE float U32_asfloat(uint32_t x)
+{
+    Union32 u;
+    u.u = x;
+    return u.f;
+}
+SLANG_FORCE_INLINE uint32_t U32_asint(int32_t x)
+{
+    return uint32_t(x);
+}
+
+SLANG_FORCE_INLINE double U32_asdouble(uint32_t low, uint32_t hi)
+{
+    Union64 u;
+    u.u = (uint64_t(hi) << 32) | low;
+    return u.d;
+}
+
+
+SLANG_FORCE_INLINE uint32_t U32_countbits(uint32_t v)
+{
+#if SLANG_GCC_FAMILY && !defined(SLANG_LLVM)
+    return __builtin_popcount(v);
+#elif SLANG_PROCESSOR_X86_64 && SLANG_VC
+    return __popcnt(v);
+#else
+    uint32_t c = 0;
+    while (v)
+    {
+        c++;
+        v &= v - 1;
+    }
+    return c;
+#endif
+}
+
+// ----------------------------- U64 -----------------------------------------
+
+SLANG_FORCE_INLINE uint64_t U64_abs(uint64_t f)
+{
+    return f;
+}
+
+SLANG_FORCE_INLINE uint64_t U64_min(uint64_t a, uint64_t b)
+{
+    return a < b ? a : b;
+}
+SLANG_FORCE_INLINE uint64_t U64_max(uint64_t a, uint64_t b)
+{
+    return a > b ? a : b;
+}
+
+// TODO(JS): We don't define countbits for 64bit in the core module currently.
+// It's not clear from documentation if it should return 32 or 64 bits, if it exists.
+// 32 bits can always hold the result, and will be implicitly promoted.
+SLANG_FORCE_INLINE uint32_t U64_countbits(uint64_t v)
+{
+#if SLANG_GCC_FAMILY && !defined(SLANG_LLVM)
+    return uint32_t(__builtin_popcountl(v));
+#elif SLANG_PROCESSOR_X86_64 && SLANG_VC
+    return uint32_t(__popcnt64(v));
+#else
+    uint32_t c = 0;
+    while (v)
+    {
+        c++;
+        v &= v - 1;
+    }
+    return c;
+#endif
+}
+
+// ----------------------------- I64 -----------------------------------------
+
+SLANG_FORCE_INLINE int64_t I64_abs(int64_t f)
+{
+    return (f < 0) ? -f : f;
+}
+
+SLANG_FORCE_INLINE int64_t I64_min(int64_t a, int64_t b)
+{
+    return a < b ? a : b;
+}
+SLANG_FORCE_INLINE int64_t I64_max(int64_t a, int64_t b)
+{
+    return a > b ? a : b;
+}
+
+// ----------------------------- UPTR -----------------------------------------
+
+SLANG_FORCE_INLINE uintptr_t UPTR_abs(uintptr_t f)
+{
+    return f;
+}
+
+SLANG_FORCE_INLINE uintptr_t UPTR_min(uintptr_t a, uintptr_t b)
+{
+    return a < b ? a : b;
+}
+
+SLANG_FORCE_INLINE uintptr_t UPTR_max(uintptr_t a, uintptr_t b)
+{
+    return a > b ? a : b;
+}
+
+// ----------------------------- IPTR -----------------------------------------
+
+SLANG_FORCE_INLINE intptr_t IPTR_abs(intptr_t f)
+{
+    return (f < 0) ? -f : f;
+}
+
+SLANG_FORCE_INLINE intptr_t IPTR_min(intptr_t a, intptr_t b)
+{
+    return a < b ? a : b;
+}
+
+SLANG_FORCE_INLINE intptr_t IPTR_max(intptr_t a, intptr_t b)
+{
+    return a > b ? a : b;
+}
+
+// ----------------------------- Interlocked ---------------------------------
+
+#if SLANG_LLVM
+
+#else // SLANG_LLVM
+
+#ifdef _WIN32
+#include <intrin.h>
+#endif
+
+SLANG_FORCE_INLINE void InterlockedAdd(uint32_t* dest, uint32_t value, uint32_t* oldValue)
+{
+#ifdef _WIN32
+    *oldValue = _InterlockedExchangeAdd((long*)dest, (long)value);
+#else
+    *oldValue = __sync_fetch_and_add(dest, value);
+#endif
+}
+
+#endif // SLANG_LLVM
+
+
+// ----------------------- fmod --------------------------
+SLANG_FORCE_INLINE float _slang_fmod(float x, float y)
+{
+    return F32_fmod(x, y);
+}
+SLANG_FORCE_INLINE double _slang_fmod(double x, double y)
+{
+    return F64_fmod(x, y);
+}
+
+#ifdef SLANG_PRELUDE_NAMESPACE
+}
+#endif
+
+#endif
diff --git a/external/slang/include/slang-cpp-types-core.h b/external/slang/include/slang-cpp-types-core.h
new file mode 100644
index 00000000..82674fef
--- /dev/null
+++ b/external/slang/include/slang-cpp-types-core.h
@@ -0,0 +1,696 @@
+#ifndef SLANG_PRELUDE_CPP_TYPES_CORE_H
+#define SLANG_PRELUDE_CPP_TYPES_CORE_H
+
+#ifndef SLANG_PRELUDE_ASSERT
+#ifdef SLANG_PRELUDE_ENABLE_ASSERT
+#define SLANG_PRELUDE_ASSERT(VALUE) assert(VALUE)
+#else
+#define SLANG_PRELUDE_ASSERT(VALUE)
+#endif
+#endif
+
+// Since we are using unsigned arithmatic care is need in this comparison.
+// It is *assumed* that sizeInBytes >= elemSize. Which means (sizeInBytes >= elemSize) >= 0
+// Which means only a single test is needed
+
+// Asserts for bounds checking.
+// It is assumed index/count are unsigned types.
+#define SLANG_BOUND_ASSERT(index, count) SLANG_PRELUDE_ASSERT(index < count);
+#define SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+    SLANG_PRELUDE_ASSERT(index <= (sizeInBytes - elemSize) && (index & 3) == 0);
+
+// Macros to zero index if an access is out of range
+#define SLANG_BOUND_ZERO_INDEX(index, count) index = (index < count) ? index : 0;
+#define SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+    index = (index <= (sizeInBytes - elemSize)) ? index : 0;
+
+// The 'FIX' macro define how the index is fixed. The default is to do nothing. If
+// SLANG_ENABLE_BOUND_ZERO_INDEX the fix macro will zero the index, if out of range
+#ifdef SLANG_ENABLE_BOUND_ZERO_INDEX
+#define SLANG_BOUND_FIX(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
+#define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+    SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
+#define SLANG_BOUND_FIX_FIXED_ARRAY(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
+#else
+#define SLANG_BOUND_FIX(index, count)
+#define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
+#define SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
+#endif
+
+#ifndef SLANG_BOUND_CHECK
+#define SLANG_BOUND_CHECK(index, count) \
+    SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX(index, count)
+#endif
+
+#ifndef SLANG_BOUND_CHECK_BYTE_ADDRESS
+#define SLANG_BOUND_CHECK_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+    SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes)    \
+    SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
+#endif
+
+#ifndef SLANG_BOUND_CHECK_FIXED_ARRAY
+#define SLANG_BOUND_CHECK_FIXED_ARRAY(index, count) \
+    SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
+#endif
+
+struct TypeInfo
+{
+    size_t typeSize;
+};
+
+template<typename T, size_t SIZE>
+struct FixedArray
+{
+    const T& operator[](size_t index) const
+    {
+        SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE);
+        return m_data[index];
+    }
+    T& operator[](size_t index)
+    {
+        SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE);
+        return m_data[index];
+    }
+
+    T m_data[SIZE];
+};
+
+// An array that has no specified size, becomes a 'Array'. This stores the size so it can
+// potentially do bounds checking.
+template<typename T>
+struct Array
+{
+    const T& operator[](size_t index) const
+    {
+        SLANG_BOUND_CHECK(index, count);
+        return data[index];
+    }
+    T& operator[](size_t index)
+    {
+        SLANG_BOUND_CHECK(index, count);
+        return data[index];
+    }
+
+    T* data;
+    size_t count;
+};
+
+/* Constant buffers become a pointer to the contained type, so ConstantBuffer<T> becomes T* in C++
+ * code.
+ */
+
+template<typename T, int COUNT>
+struct Vector;
+
+template<typename T>
+struct Vector<T, 1>
+{
+    T x;
+    const T& operator[](size_t /*index*/) const { return x; }
+    T& operator[](size_t /*index*/) { return x; }
+    operator T() const { return x; }
+    Vector() = default;
+    Vector(T scalar) { x = scalar; }
+    template<typename U>
+    Vector(Vector<U, 1> other)
+    {
+        x = (T)other.x;
+    }
+    template<typename U, int otherSize>
+    Vector(Vector<U, otherSize> other)
+    {
+        int minSize = 1;
+        if (otherSize < minSize)
+            minSize = otherSize;
+        for (int i = 0; i < minSize; i++)
+            (*this)[i] = (T)other[i];
+    }
+};
+
+template<typename T>
+struct Vector<T, 2>
+{
+    T x, y;
+    const T& operator[](size_t index) const { return index == 0 ? x : y; }
+    T& operator[](size_t index) { return index == 0 ? x : y; }
+    Vector() = default;
+    Vector(T scalar) { x = y = scalar; }
+    Vector(T _x, T _y)
+    {
+        x = _x;
+        y = _y;
+    }
+    template<typename U>
+    Vector(Vector<U, 2> other)
+    {
+        x = (T)other.x;
+        y = (T)other.y;
+    }
+    template<typename U, int otherSize>
+    Vector(Vector<U, otherSize> other)
+    {
+        int minSize = 2;
+        if (otherSize < minSize)
+            minSize = otherSize;
+        for (int i = 0; i < minSize; i++)
+            (*this)[i] = (T)other[i];
+    }
+};
+
+template<typename T>
+struct Vector<T, 3>
+{
+    T x, y, z;
+    const T& operator[](size_t index) const { return *((T*)(this) + index); }
+    T& operator[](size_t index) { return *((T*)(this) + index); }
+
+    Vector() = default;
+    Vector(T scalar) { x = y = z = scalar; }
+    Vector(T _x, T _y, T _z)
+    {
+        x = _x;
+        y = _y;
+        z = _z;
+    }
+    template<typename U>
+    Vector(Vector<U, 3> other)
+    {
+        x = (T)other.x;
+        y = (T)other.y;
+        z = (T)other.z;
+    }
+    template<typename U, int otherSize>
+    Vector(Vector<U, otherSize> other)
+    {
+        int minSize = 3;
+        if (otherSize < minSize)
+            minSize = otherSize;
+        for (int i = 0; i < minSize; i++)
+            (*this)[i] = (T)other[i];
+    }
+};
+
+template<typename T>
+struct Vector<T, 4>
+{
+    T x, y, z, w;
+
+    const T& operator[](size_t index) const { return *((T*)(this) + index); }
+    T& operator[](size_t index) { return *((T*)(this) + index); }
+    Vector() = default;
+    Vector(T scalar) { x = y = z = w = scalar; }
+    Vector(T _x, T _y, T _z, T _w)
+    {
+        x = _x;
+        y = _y;
+        z = _z;
+        w = _w;
+    }
+    template<typename U, int otherSize>
+    Vector(Vector<U, otherSize> other)
+    {
+        int minSize = 4;
+        if (otherSize < minSize)
+            minSize = otherSize;
+        for (int i = 0; i < minSize; i++)
+            (*this)[i] = (T)other[i];
+    }
+};
+
+template<typename T, int N>
+SLANG_FORCE_INLINE Vector<T, N> _slang_select(
+    Vector<bool, N> condition,
+    Vector<T, N> v0,
+    Vector<T, N> v1)
+{
+    Vector<T, N> result;
+    for (int i = 0; i < N; i++)
+    {
+        result[i] = condition[i] ? v0[i] : v1[i];
+    }
+    return result;
+}
+
+template<typename T>
+SLANG_FORCE_INLINE T _slang_select(bool condition, T v0, T v1)
+{
+    return condition ? v0 : v1;
+}
+
+template<typename T, int N>
+SLANG_FORCE_INLINE T _slang_vector_get_element(Vector<T, N> x, int index)
+{
+    return x[index];
+}
+
+template<typename T, int N>
+SLANG_FORCE_INLINE const T* _slang_vector_get_element_ptr(const Vector<T, N>* x, int index)
+{
+    return &((*const_cast<Vector<T, N>*>(x))[index]);
+}
+
+template<typename T, int N>
+SLANG_FORCE_INLINE T* _slang_vector_get_element_ptr(Vector<T, N>* x, int index)
+{
+    return &((*x)[index]);
+}
+
+template<typename T, int n, typename OtherT, int m>
+SLANG_FORCE_INLINE Vector<T, n> _slang_vector_reshape(const Vector<OtherT, m> other)
+{
+    Vector<T, n> result;
+    for (int i = 0; i < n; i++)
+    {
+        OtherT otherElement = T(0);
+        if (i < m)
+            otherElement = _slang_vector_get_element(other, i);
+        *_slang_vector_get_element_ptr(&result, i) = (T)otherElement;
+    }
+    return result;
+}
+
+typedef uint32_t uint;
+
+#define SLANG_VECTOR_BINARY_OP(T, op)            \
+    template<int n>                              \
+    SLANG_FORCE_INLINE Vector<T, n> operator op( \
+        const Vector<T, n>& thisVal,             \
+        const Vector<T, n>& other)               \
+    {                                            \
+        Vector<T, n> result;                     \
+        for (int i = 0; i < n; i++)              \
+            result[i] = thisVal[i] op other[i];  \
+        return result;                           \
+    }
+#define SLANG_VECTOR_BINARY_COMPARE_OP(T, op)       \
+    template<int n>                                 \
+    SLANG_FORCE_INLINE Vector<bool, n> operator op( \
+        const Vector<T, n>& thisVal,                \
+        const Vector<T, n>& other)                  \
+    {                                               \
+        Vector<bool, n> result;                     \
+        for (int i = 0; i < n; i++)                 \
+            result[i] = thisVal[i] op other[i];     \
+        return result;                              \
+    }
+
+#define SLANG_VECTOR_UNARY_OP(T, op)                                         \
+    template<int n>                                                          \
+    SLANG_FORCE_INLINE Vector<T, n> operator op(const Vector<T, n>& thisVal) \
+    {                                                                        \
+        Vector<T, n> result;                                                 \
+        for (int i = 0; i < n; i++)                                          \
+            result[i] = op thisVal[i];                                       \
+        return result;                                                       \
+    }
+#define SLANG_INT_VECTOR_OPS(T)           \
+    SLANG_VECTOR_BINARY_OP(T, +)          \
+    SLANG_VECTOR_BINARY_OP(T, -)          \
+    SLANG_VECTOR_BINARY_OP(T, *)          \
+    SLANG_VECTOR_BINARY_OP(T, /)          \
+    SLANG_VECTOR_BINARY_OP(T, &)          \
+    SLANG_VECTOR_BINARY_OP(T, |)          \
+    SLANG_VECTOR_BINARY_OP(T, &&)         \
+    SLANG_VECTOR_BINARY_OP(T, ||)         \
+    SLANG_VECTOR_BINARY_OP(T, ^)          \
+    SLANG_VECTOR_BINARY_OP(T, %)          \
+    SLANG_VECTOR_BINARY_OP(T, >>)         \
+    SLANG_VECTOR_BINARY_OP(T, <<)         \
+    SLANG_VECTOR_BINARY_COMPARE_OP(T, >)  \
+    SLANG_VECTOR_BINARY_COMPARE_OP(T, <)  \
+    SLANG_VECTOR_BINARY_COMPARE_OP(T, >=) \
+    SLANG_VECTOR_BINARY_COMPARE_OP(T, <=) \
+    SLANG_VECTOR_BINARY_COMPARE_OP(T, ==) \
+    SLANG_VECTOR_BINARY_COMPARE_OP(T, !=) \
+    SLANG_VECTOR_UNARY_OP(T, !)           \
+    SLANG_VECTOR_UNARY_OP(T, ~)
+#define SLANG_FLOAT_VECTOR_OPS(T)         \
+    SLANG_VECTOR_BINARY_OP(T, +)          \
+    SLANG_VECTOR_BINARY_OP(T, -)          \
+    SLANG_VECTOR_BINARY_OP(T, *)          \
+    SLANG_VECTOR_BINARY_OP(T, /)          \
+    SLANG_VECTOR_UNARY_OP(T, -)           \
+    SLANG_VECTOR_BINARY_COMPARE_OP(T, >)  \
+    SLANG_VECTOR_BINARY_COMPARE_OP(T, <)  \
+    SLANG_VECTOR_BINARY_COMPARE_OP(T, >=) \
+    SLANG_VECTOR_BINARY_COMPARE_OP(T, <=) \
+    SLANG_VECTOR_BINARY_COMPARE_OP(T, ==) \
+    SLANG_VECTOR_BINARY_COMPARE_OP(T, !=)
+
+SLANG_INT_VECTOR_OPS(bool)
+SLANG_INT_VECTOR_OPS(int)
+SLANG_INT_VECTOR_OPS(int8_t)
+SLANG_INT_VECTOR_OPS(int16_t)
+SLANG_INT_VECTOR_OPS(int64_t)
+SLANG_INT_VECTOR_OPS(uint)
+SLANG_INT_VECTOR_OPS(uint8_t)
+SLANG_INT_VECTOR_OPS(uint16_t)
+SLANG_INT_VECTOR_OPS(uint64_t)
+
+SLANG_FLOAT_VECTOR_OPS(float)
+SLANG_FLOAT_VECTOR_OPS(double)
+
+#define SLANG_VECTOR_INT_NEG_OP(T)                      \
+    template<int N>                                     \
+    Vector<T, N> operator-(const Vector<T, N>& thisVal) \
+    {                                                   \
+        Vector<T, N> result;                            \
+        for (int i = 0; i < N; i++)                     \
+            result[i] = 0 - thisVal[i];                 \
+        return result;                                  \
+    }
+SLANG_VECTOR_INT_NEG_OP(int)
+SLANG_VECTOR_INT_NEG_OP(int8_t)
+SLANG_VECTOR_INT_NEG_OP(int16_t)
+SLANG_VECTOR_INT_NEG_OP(int64_t)
+SLANG_VECTOR_INT_NEG_OP(uint)
+SLANG_VECTOR_INT_NEG_OP(uint8_t)
+SLANG_VECTOR_INT_NEG_OP(uint16_t)
+SLANG_VECTOR_INT_NEG_OP(uint64_t)
+
+#define SLANG_FLOAT_VECTOR_MOD(T)                                               \
+    template<int N>                                                             \
+    Vector<T, N> operator%(const Vector<T, N>& left, const Vector<T, N>& right) \
+    {                                                                           \
+        Vector<T, N> result;                                                    \
+        for (int i = 0; i < N; i++)                                             \
+            result[i] = _slang_fmod(left[i], right[i]);                         \
+        return result;                                                          \
+    }
+
+SLANG_FLOAT_VECTOR_MOD(float)
+SLANG_FLOAT_VECTOR_MOD(double)
+#undef SLANG_FLOAT_VECTOR_MOD
+#undef SLANG_VECTOR_BINARY_OP
+#undef SLANG_VECTOR_UNARY_OP
+#undef SLANG_INT_VECTOR_OPS
+#undef SLANG_FLOAT_VECTOR_OPS
+#undef SLANG_VECTOR_INT_NEG_OP
+#undef SLANG_FLOAT_VECTOR_MOD
+
+template<typename T, int ROWS, int COLS>
+struct Matrix
+{
+    Vector<T, COLS> rows[ROWS];
+    const Vector<T, COLS>& operator[](size_t index) const { return rows[index]; }
+    Vector<T, COLS>& operator[](size_t index) { return rows[index]; }
+    Matrix() = default;
+    Matrix(T scalar)
+    {
+        for (int i = 0; i < ROWS; i++)
+            rows[i] = Vector<T, COLS>(scalar);
+    }
+    Matrix(const Vector<T, COLS>& row0) { rows[0] = row0; }
+    Matrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1)
+    {
+        rows[0] = row0;
+        rows[1] = row1;
+    }
+    Matrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1, const Vector<T, COLS>& row2)
+    {
+        rows[0] = row0;
+        rows[1] = row1;
+        rows[2] = row2;
+    }
+    Matrix(
+        const Vector<T, COLS>& row0,
+        const Vector<T, COLS>& row1,
+        const Vector<T, COLS>& row2,
+        const Vector<T, COLS>& row3)
+    {
+        rows[0] = row0;
+        rows[1] = row1;
+        rows[2] = row2;
+        rows[3] = row3;
+    }
+    template<typename U, int otherRow, int otherCol>
+    Matrix(const Matrix<U, otherRow, otherCol>& other)
+    {
+        int minRow = ROWS;
+        int minCol = COLS;
+        if (minRow > otherRow)
+            minRow = otherRow;
+        if (minCol > otherCol)
+            minCol = otherCol;
+        for (int i = 0; i < minRow; i++)
+            for (int j = 0; j < minCol; j++)
+                rows[i][j] = (T)other.rows[i][j];
+    }
+    Matrix(T v0, T v1, T v2, T v3)
+    {
+        rows[0][0] = v0;
+        rows[0][1] = v1;
+        rows[1][0] = v2;
+        rows[1][1] = v3;
+    }
+    Matrix(T v0, T v1, T v2, T v3, T v4, T v5)
+    {
+        if (COLS == 3)
+        {
+            rows[0][0] = v0;
+            rows[0][1] = v1;
+            rows[0][2] = v2;
+            rows[1][0] = v3;
+            rows[1][1] = v4;
+            rows[1][2] = v5;
+        }
+        else
+        {
+            rows[0][0] = v0;
+            rows[0][1] = v1;
+            rows[1][0] = v2;
+            rows[1][1] = v3;
+            rows[2][0] = v4;
+            rows[2][1] = v5;
+        }
+    }
+    Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7)
+    {
+        if (COLS == 4)
+        {
+            rows[0][0] = v0;
+            rows[0][1] = v1;
+            rows[0][2] = v2;
+            rows[0][3] = v3;
+            rows[1][0] = v4;
+            rows[1][1] = v5;
+            rows[1][2] = v6;
+            rows[1][3] = v7;
+        }
+        else
+        {
+            rows[0][0] = v0;
+            rows[0][1] = v1;
+            rows[1][0] = v2;
+            rows[1][1] = v3;
+            rows[2][0] = v4;
+            rows[2][1] = v5;
+            rows[3][0] = v6;
+            rows[3][1] = v7;
+        }
+    }
+    Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8)
+    {
+        rows[0][0] = v0;
+        rows[0][1] = v1;
+        rows[0][2] = v2;
+        rows[1][0] = v3;
+        rows[1][1] = v4;
+        rows[1][2] = v5;
+        rows[2][0] = v6;
+        rows[2][1] = v7;
+        rows[2][2] = v8;
+    }
+    Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11)
+    {
+        if (COLS == 4)
+        {
+            rows[0][0] = v0;
+            rows[0][1] = v1;
+            rows[0][2] = v2;
+            rows[0][3] = v3;
+            rows[1][0] = v4;
+            rows[1][1] = v5;
+            rows[1][2] = v6;
+            rows[1][3] = v7;
+            rows[2][0] = v8;
+            rows[2][1] = v9;
+            rows[2][2] = v10;
+            rows[2][3] = v11;
+        }
+        else
+        {
+            rows[0][0] = v0;
+            rows[0][1] = v1;
+            rows[0][2] = v2;
+            rows[1][0] = v3;
+            rows[1][1] = v4;
+            rows[1][2] = v5;
+            rows[2][0] = v6;
+            rows[2][1] = v7;
+            rows[2][2] = v8;
+            rows[3][0] = v9;
+            rows[3][1] = v10;
+            rows[3][2] = v11;
+        }
+    }
+    Matrix(
+        T v0,
+        T v1,
+        T v2,
+        T v3,
+        T v4,
+        T v5,
+        T v6,
+        T v7,
+        T v8,
+        T v9,
+        T v10,
+        T v11,
+        T v12,
+        T v13,
+        T v14,
+        T v15)
+    {
+        rows[0][0] = v0;
+        rows[0][1] = v1;
+        rows[0][2] = v2;
+        rows[0][3] = v3;
+        rows[1][0] = v4;
+        rows[1][1] = v5;
+        rows[1][2] = v6;
+        rows[1][3] = v7;
+        rows[2][0] = v8;
+        rows[2][1] = v9;
+        rows[2][2] = v10;
+        rows[2][3] = v11;
+        rows[3][0] = v12;
+        rows[3][1] = v13;
+        rows[3][2] = v14;
+        rows[3][3] = v15;
+    }
+};
+
+#define SLANG_MATRIX_BINARY_OP(T, op)                                                         \
+    template<int R, int C>                                                                    \
+    Matrix<T, R, C> operator op(const Matrix<T, R, C>& thisVal, const Matrix<T, R, C>& other) \
+    {                                                                                         \
+        Matrix<T, R, C> result;                                                               \
+        for (int i = 0; i < R; i++)                                                           \
+            for (int j = 0; j < C; j++)                                                       \
+                result.rows[i][j] = thisVal.rows[i][j] op other.rows[i][j];                   \
+        return result;                                                                        \
+    }
+
+#define SLANG_MATRIX_BINARY_COMPARE_OP(T, op)                                                    \
+    template<int R, int C>                                                                       \
+    Matrix<bool, R, C> operator op(const Matrix<T, R, C>& thisVal, const Matrix<T, R, C>& other) \
+    {                                                                                            \
+        Matrix<bool, R, C> result;                                                               \
+        for (int i = 0; i < R; i++)                                                              \
+            for (int j = 0; j < C; j++)                                                          \
+                result.rows[i][j] = thisVal.rows[i][j] op other.rows[i][j];                      \
+        return result;                                                                           \
+    }
+
+#define SLANG_MATRIX_UNARY_OP(T, op)                            \
+    template<int R, int C>                                      \
+    Matrix<T, R, C> operator op(const Matrix<T, R, C>& thisVal) \
+    {                                                           \
+        Matrix<T, R, C> result;                                 \
+        for (int i = 0; i < R; i++)                             \
+            for (int j = 0; j < C; j++)                         \
+                result[i].rows[i][j] = op thisVal.rows[i][j];   \
+        return result;                                          \
+    }
+
+#define SLANG_INT_MATRIX_OPS(T)           \
+    SLANG_MATRIX_BINARY_OP(T, +)          \
+    SLANG_MATRIX_BINARY_OP(T, -)          \
+    SLANG_MATRIX_BINARY_OP(T, *)          \
+    SLANG_MATRIX_BINARY_OP(T, /)          \
+    SLANG_MATRIX_BINARY_OP(T, &)          \
+    SLANG_MATRIX_BINARY_OP(T, |)          \
+    SLANG_MATRIX_BINARY_OP(T, &&)         \
+    SLANG_MATRIX_BINARY_OP(T, ||)         \
+    SLANG_MATRIX_BINARY_OP(T, ^)          \
+    SLANG_MATRIX_BINARY_OP(T, %)          \
+    SLANG_MATRIX_BINARY_COMPARE_OP(T, >)  \
+    SLANG_MATRIX_BINARY_COMPARE_OP(T, <)  \
+    SLANG_MATRIX_BINARY_COMPARE_OP(T, >=) \
+    SLANG_MATRIX_BINARY_COMPARE_OP(T, <=) \
+    SLANG_MATRIX_BINARY_COMPARE_OP(T, ==) \
+    SLANG_MATRIX_BINARY_COMPARE_OP(T, !=) \
+    SLANG_MATRIX_UNARY_OP(T, !)           \
+    SLANG_MATRIX_UNARY_OP(T, ~)
+#define SLANG_FLOAT_MATRIX_OPS(T)         \
+    SLANG_MATRIX_BINARY_OP(T, +)          \
+    SLANG_MATRIX_BINARY_OP(T, -)          \
+    SLANG_MATRIX_BINARY_OP(T, *)          \
+    SLANG_MATRIX_BINARY_OP(T, /)          \
+    SLANG_MATRIX_UNARY_OP(T, -)           \
+    SLANG_MATRIX_BINARY_COMPARE_OP(T, >)  \
+    SLANG_MATRIX_BINARY_COMPARE_OP(T, <)  \
+    SLANG_MATRIX_BINARY_COMPARE_OP(T, >=) \
+    SLANG_MATRIX_BINARY_COMPARE_OP(T, <=) \
+    SLANG_MATRIX_BINARY_COMPARE_OP(T, ==) \
+    SLANG_MATRIX_BINARY_COMPARE_OP(T, !=)
+SLANG_INT_MATRIX_OPS(int)
+SLANG_INT_MATRIX_OPS(int8_t)
+SLANG_INT_MATRIX_OPS(int16_t)
+SLANG_INT_MATRIX_OPS(int64_t)
+SLANG_INT_MATRIX_OPS(uint)
+SLANG_INT_MATRIX_OPS(uint8_t)
+SLANG_INT_MATRIX_OPS(uint16_t)
+SLANG_INT_MATRIX_OPS(uint64_t)
+
+SLANG_FLOAT_MATRIX_OPS(float)
+SLANG_FLOAT_MATRIX_OPS(double)
+
+#define SLANG_MATRIX_INT_NEG_OP(T)                                        \
+    template<int R, int C>                                                \
+    SLANG_FORCE_INLINE Matrix<T, R, C> operator-(Matrix<T, R, C> thisVal) \
+    {                                                                     \
+        Matrix<T, R, C> result;                                           \
+        for (int i = 0; i < R; i++)                                       \
+            for (int j = 0; j < C; j++)                                   \
+                result.rows[i][j] = 0 - thisVal.rows[i][j];               \
+        return result;                                                    \
+    }
+SLANG_MATRIX_INT_NEG_OP(int)
+SLANG_MATRIX_INT_NEG_OP(int8_t)
+SLANG_MATRIX_INT_NEG_OP(int16_t)
+SLANG_MATRIX_INT_NEG_OP(int64_t)
+SLANG_MATRIX_INT_NEG_OP(uint)
+SLANG_MATRIX_INT_NEG_OP(uint8_t)
+SLANG_MATRIX_INT_NEG_OP(uint16_t)
+SLANG_MATRIX_INT_NEG_OP(uint64_t)
+
+#define SLANG_FLOAT_MATRIX_MOD(T)                                                             \
+    template<int R, int C>                                                                    \
+    SLANG_FORCE_INLINE Matrix<T, R, C> operator%(Matrix<T, R, C> left, Matrix<T, R, C> right) \
+    {                                                                                         \
+        Matrix<T, R, C> result;                                                               \
+        for (int i = 0; i < R; i++)                                                           \
+            for (int j = 0; j < C; j++)                                                       \
+                result.rows[i][j] = _slang_fmod(left.rows[i][j], right.rows[i][j]);           \
+        return result;                                                                        \
+    }
+
+SLANG_FLOAT_MATRIX_MOD(float)
+SLANG_FLOAT_MATRIX_MOD(double)
+#undef SLANG_FLOAT_MATRIX_MOD
+#undef SLANG_MATRIX_BINARY_OP
+#undef SLANG_MATRIX_UNARY_OP
+#undef SLANG_INT_MATRIX_OPS
+#undef SLANG_FLOAT_MATRIX_OPS
+#undef SLANG_MATRIX_INT_NEG_OP
+#undef SLANG_FLOAT_MATRIX_MOD
+
+template<typename TResult, typename TInput>
+TResult slang_bit_cast(TInput val)
+{
+    return *(TResult*)(&val);
+}
+
+#endif
diff --git a/external/slang/include/slang-cpp-types.h b/external/slang/include/slang-cpp-types.h
new file mode 100644
index 00000000..010ab8d6
--- /dev/null
+++ b/external/slang/include/slang-cpp-types.h
@@ -0,0 +1,1364 @@
+#ifndef SLANG_PRELUDE_CPP_TYPES_H
+#define SLANG_PRELUDE_CPP_TYPES_H
+
+#ifdef SLANG_PRELUDE_NAMESPACE
+namespace SLANG_PRELUDE_NAMESPACE
+{
+#endif
+
+#ifndef SLANG_FORCE_INLINE
+#define SLANG_FORCE_INLINE inline
+#endif
+
+#include "slang-cpp-types-core.h"
+
+typedef Vector<float, 2> float2;
+typedef Vector<float, 3> float3;
+typedef Vector<float, 4> float4;
+
+typedef Vector<int32_t, 2> int2;
+typedef Vector<int32_t, 3> int3;
+typedef Vector<int32_t, 4> int4;
+
+typedef Vector<uint32_t, 2> uint2;
+typedef Vector<uint32_t, 3> uint3;
+typedef Vector<uint32_t, 4> uint4;
+
+// We can just map `NonUniformResourceIndex` type directly to the index type on CPU, as CPU does not
+// require any special handling around such accesses.
+typedef size_t NonUniformResourceIndex;
+
+// ----------------------------- ResourceType -----------------------------------------
+
+// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-structuredbuffer-getdimensions
+// Missing  Load(_In_  int  Location, _Out_ uint Status);
+
+template<typename T>
+struct RWStructuredBuffer
+{
+    SLANG_FORCE_INLINE T& operator[](size_t index) const
+    {
+        SLANG_BOUND_CHECK(index, count);
+        return data[index];
+    }
+    const T& Load(size_t index) const
+    {
+        SLANG_BOUND_CHECK(index, count);
+        return data[index];
+    }
+    void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride)
+    {
+        *outNumStructs = uint32_t(count);
+        *outStride = uint32_t(sizeof(T));
+    }
+
+    T* data;
+    size_t count;
+};
+
+template<typename T>
+struct StructuredBuffer
+{
+    SLANG_FORCE_INLINE const T& operator[](size_t index) const
+    {
+        SLANG_BOUND_CHECK(index, count);
+        return data[index];
+    }
+    const T& Load(size_t index) const
+    {
+        SLANG_BOUND_CHECK(index, count);
+        return data[index];
+    }
+    void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride)
+    {
+        *outNumStructs = uint32_t(count);
+        *outStride = uint32_t(sizeof(T));
+    }
+
+    T* data;
+    size_t count;
+};
+
+
+template<typename T>
+struct RWBuffer
+{
+    SLANG_FORCE_INLINE T& operator[](size_t index) const
+    {
+        SLANG_BOUND_CHECK(index, count);
+        return data[index];
+    }
+    const T& Load(size_t index) const
+    {
+        SLANG_BOUND_CHECK(index, count);
+        return data[index];
+    }
+    void GetDimensions(uint32_t* outCount) { *outCount = uint32_t(count); }
+
+    T* data;
+    size_t count;
+};
+
+template<typename T>
+struct Buffer
+{
+    SLANG_FORCE_INLINE const T& operator[](size_t index) const
+    {
+        SLANG_BOUND_CHECK(index, count);
+        return data[index];
+    }
+    const T& Load(size_t index) const
+    {
+        SLANG_BOUND_CHECK(index, count);
+        return data[index];
+    }
+    void GetDimensions(uint32_t* outCount) { *outCount = uint32_t(count); }
+
+    T* data;
+    size_t count;
+};
+
+// Missing  Load(_In_  int  Location, _Out_ uint Status);
+struct ByteAddressBuffer
+{
+    void GetDimensions(uint32_t* outDim) const { *outDim = uint32_t(sizeInBytes); }
+    uint32_t Load(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
+        return data[index >> 2];
+    }
+    uint2 Load2(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        return uint2{data[dataIdx], data[dataIdx + 1]};
+    }
+    uint3 Load3(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]};
+    }
+    uint4 Load4(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]};
+    }
+    template<typename T>
+    T Load(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
+        return *(const T*)(((const char*)data) + index);
+    }
+
+    const uint32_t* data;
+    size_t sizeInBytes; //< Must be multiple of 4
+};
+
+// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-rwbyteaddressbuffer
+// Missing support for Atomic operations
+// Missing support for Load with status
+struct RWByteAddressBuffer
+{
+    void GetDimensions(uint32_t* outDim) const { *outDim = uint32_t(sizeInBytes); }
+
+    uint32_t Load(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
+        return data[index >> 2];
+    }
+    uint2 Load2(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        return uint2{data[dataIdx], data[dataIdx + 1]};
+    }
+    uint3 Load3(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]};
+    }
+    uint4 Load4(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]};
+    }
+    template<typename T>
+    T Load(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
+        return *(const T*)(((const char*)data) + index);
+    }
+
+    void Store(size_t index, uint32_t v) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
+        data[index >> 2] = v;
+    }
+    void Store2(size_t index, uint2 v) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        data[dataIdx + 0] = v.x;
+        data[dataIdx + 1] = v.y;
+    }
+    void Store3(size_t index, uint3 v) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        data[dataIdx + 0] = v.x;
+        data[dataIdx + 1] = v.y;
+        data[dataIdx + 2] = v.z;
+    }
+    void Store4(size_t index, uint4 v) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        data[dataIdx + 0] = v.x;
+        data[dataIdx + 1] = v.y;
+        data[dataIdx + 2] = v.z;
+        data[dataIdx + 3] = v.w;
+    }
+    template<typename T>
+    void Store(size_t index, T const& value) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
+        *(T*)(((char*)data) + index) = value;
+    }
+
+    uint32_t* data;
+    size_t sizeInBytes; //< Must be multiple of 4
+};
+
+struct ISamplerState;
+struct ISamplerComparisonState;
+
+struct SamplerState
+{
+    ISamplerState* state;
+};
+
+struct SamplerComparisonState
+{
+    ISamplerComparisonState* state;
+};
+
+#ifndef SLANG_RESOURCE_SHAPE
+#define SLANG_RESOURCE_SHAPE
+typedef unsigned int SlangResourceShape;
+enum
+{
+    SLANG_RESOURCE_BASE_SHAPE_MASK = 0x0F,
+
+    SLANG_RESOURCE_NONE = 0x00,
+
+    SLANG_TEXTURE_1D = 0x01,
+    SLANG_TEXTURE_2D = 0x02,
+    SLANG_TEXTURE_3D = 0x03,
+    SLANG_TEXTURE_CUBE = 0x04,
+    SLANG_TEXTURE_BUFFER = 0x05,
+
+    SLANG_STRUCTURED_BUFFER = 0x06,
+    SLANG_BYTE_ADDRESS_BUFFER = 0x07,
+    SLANG_RESOURCE_UNKNOWN = 0x08,
+    SLANG_ACCELERATION_STRUCTURE = 0x09,
+    SLANG_TEXTURE_SUBPASS = 0x0A,
+
+    SLANG_RESOURCE_EXT_SHAPE_MASK = 0xF0,
+
+    SLANG_TEXTURE_FEEDBACK_FLAG = 0x10,
+    SLANG_TEXTURE_ARRAY_FLAG = 0x40,
+    SLANG_TEXTURE_MULTISAMPLE_FLAG = 0x80,
+
+    SLANG_TEXTURE_1D_ARRAY = SLANG_TEXTURE_1D | SLANG_TEXTURE_ARRAY_FLAG,
+    SLANG_TEXTURE_2D_ARRAY = SLANG_TEXTURE_2D | SLANG_TEXTURE_ARRAY_FLAG,
+    SLANG_TEXTURE_CUBE_ARRAY = SLANG_TEXTURE_CUBE | SLANG_TEXTURE_ARRAY_FLAG,
+
+    SLANG_TEXTURE_2D_MULTISAMPLE = SLANG_TEXTURE_2D | SLANG_TEXTURE_MULTISAMPLE_FLAG,
+    SLANG_TEXTURE_2D_MULTISAMPLE_ARRAY =
+        SLANG_TEXTURE_2D | SLANG_TEXTURE_MULTISAMPLE_FLAG | SLANG_TEXTURE_ARRAY_FLAG,
+    SLANG_TEXTURE_SUBPASS_MULTISAMPLE = SLANG_TEXTURE_SUBPASS | SLANG_TEXTURE_MULTISAMPLE_FLAG,
+};
+#endif
+
+//
+struct TextureDimensions
+{
+    void reset()
+    {
+        shape = 0;
+        width = height = depth = 0;
+        numberOfLevels = 0;
+        arrayElementCount = 0;
+    }
+    int getDimSizes(uint32_t outDims[4]) const
+    {
+        const auto baseShape = (shape & SLANG_RESOURCE_BASE_SHAPE_MASK);
+        int count = 0;
+        switch (baseShape)
+        {
+        case SLANG_TEXTURE_1D:
+            {
+                outDims[count++] = width;
+                break;
+            }
+        case SLANG_TEXTURE_2D:
+            {
+                outDims[count++] = width;
+                outDims[count++] = height;
+                break;
+            }
+        case SLANG_TEXTURE_3D:
+            {
+                outDims[count++] = width;
+                outDims[count++] = height;
+                outDims[count++] = depth;
+                break;
+            }
+        case SLANG_TEXTURE_CUBE:
+            {
+                outDims[count++] = width;
+                outDims[count++] = height;
+                outDims[count++] = 6;
+                break;
+            }
+        }
+
+        if (shape & SLANG_TEXTURE_ARRAY_FLAG)
+        {
+            outDims[count++] = arrayElementCount;
+        }
+        return count;
+    }
+    int getMIPDims(int outDims[3]) const
+    {
+        const auto baseShape = (shape & SLANG_RESOURCE_BASE_SHAPE_MASK);
+        int count = 0;
+        switch (baseShape)
+        {
+        case SLANG_TEXTURE_1D:
+            {
+                outDims[count++] = width;
+                break;
+            }
+        case SLANG_TEXTURE_CUBE:
+        case SLANG_TEXTURE_2D:
+            {
+                outDims[count++] = width;
+                outDims[count++] = height;
+                break;
+            }
+        case SLANG_TEXTURE_3D:
+            {
+                outDims[count++] = width;
+                outDims[count++] = height;
+                outDims[count++] = depth;
+                break;
+            }
+        }
+        return count;
+    }
+    int calcMaxMIPLevels() const
+    {
+        int dims[3];
+        const int dimCount = getMIPDims(dims);
+        for (int count = 1; true; count++)
+        {
+            bool allOne = true;
+            for (int i = 0; i < dimCount; ++i)
+            {
+                if (dims[i] > 1)
+                {
+                    allOne = false;
+                    dims[i] >>= 1;
+                }
+            }
+            if (allOne)
+            {
+                return count;
+            }
+        }
+    }
+
+    uint32_t shape;
+    uint32_t width, height, depth;
+    uint32_t numberOfLevels;
+    uint32_t arrayElementCount; ///< For array types, 0 otherwise
+};
+
+
+// Texture
+
+struct ITexture
+{
+    virtual TextureDimensions GetDimensions(int mipLevel = -1) = 0;
+    virtual void Load(const int32_t* v, void* outData, size_t dataSize) = 0;
+    virtual void Sample(
+        SamplerState samplerState,
+        const float* loc,
+        void* outData,
+        size_t dataSize) = 0;
+    virtual void SampleLevel(
+        SamplerState samplerState,
+        const float* loc,
+        float level,
+        void* outData,
+        size_t dataSize) = 0;
+};
+
+template<typename T>
+struct Texture1D
+{
+    void GetDimensions(uint32_t* outWidth) { *outWidth = texture->GetDimensions().width; }
+    void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outNumberOfLevels)
+    {
+        auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    void GetDimensions(float* outWidth) { *outWidth = texture->GetDimensions().width; }
+    void GetDimensions(uint32_t mipLevel, float* outWidth, float* outNumberOfLevels)
+    {
+        auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    T Load(const int2& loc) const
+    {
+        T out;
+        texture->Load(&loc.x, &out, sizeof(out));
+        return out;
+    }
+    T Sample(SamplerState samplerState, float loc) const
+    {
+        T out;
+        texture->Sample(samplerState, &loc, &out, sizeof(out));
+        return out;
+    }
+    T SampleLevel(SamplerState samplerState, float loc, float level)
+    {
+        T out;
+        texture->SampleLevel(samplerState, &loc, level, &out, sizeof(out));
+        return out;
+    }
+
+    ITexture* texture;
+};
+
+template<typename T>
+struct Texture2D
+{
+    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight)
+    {
+        const auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        uint32_t* outWidth,
+        uint32_t* outHeight,
+        uint32_t* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+    void GetDimensions(float* outWidth, float* outHeight)
+    {
+        const auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        float* outWidth,
+        float* outHeight,
+        float* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    T Load(const int3& loc) const
+    {
+        T out;
+        texture->Load(&loc.x, &out, sizeof(out));
+        return out;
+    }
+    T Sample(SamplerState samplerState, const float2& loc) const
+    {
+        T out;
+        texture->Sample(samplerState, &loc.x, &out, sizeof(out));
+        return out;
+    }
+    T SampleLevel(SamplerState samplerState, const float2& loc, float level)
+    {
+        T out;
+        texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out));
+        return out;
+    }
+
+    ITexture* texture;
+};
+
+template<typename T>
+struct Texture3D
+{
+    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outDepth)
+    {
+        const auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outDepth = dims.depth;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        uint32_t* outWidth,
+        uint32_t* outHeight,
+        uint32_t* outDepth,
+        uint32_t* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outDepth = dims.depth;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+    void GetDimensions(float* outWidth, float* outHeight, float* outDepth)
+    {
+        const auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outDepth = dims.depth;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        float* outWidth,
+        float* outHeight,
+        float* outDepth,
+        float* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outDepth = dims.depth;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    T Load(const int4& loc) const
+    {
+        T out;
+        texture->Load(&loc.x, &out, sizeof(out));
+        return out;
+    }
+    T Sample(SamplerState samplerState, const float3& loc) const
+    {
+        T out;
+        texture->Sample(samplerState, &loc.x, &out, sizeof(out));
+        return out;
+    }
+    T SampleLevel(SamplerState samplerState, const float3& loc, float level)
+    {
+        T out;
+        texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out));
+        return out;
+    }
+
+    ITexture* texture;
+};
+
+template<typename T>
+struct TextureCube
+{
+    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight)
+    {
+        const auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        uint32_t* outWidth,
+        uint32_t* outHeight,
+        uint32_t* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+    void GetDimensions(float* outWidth, float* outHeight)
+    {
+        const auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        float* outWidth,
+        float* outHeight,
+        float* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    T Sample(SamplerState samplerState, const float3& loc) const
+    {
+        T out;
+        texture->Sample(samplerState, &loc.x, &out, sizeof(out));
+        return out;
+    }
+    T SampleLevel(SamplerState samplerState, const float3& loc, float level)
+    {
+        T out;
+        texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out));
+        return out;
+    }
+
+    ITexture* texture;
+};
+
+template<typename T>
+struct Texture1DArray
+{
+    void GetDimensions(uint32_t* outWidth, uint32_t* outElements)
+    {
+        auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outElements = dims.arrayElementCount;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        uint32_t* outWidth,
+        uint32_t* outElements,
+        uint32_t* outNumberOfLevels)
+    {
+        auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outNumberOfLevels = dims.numberOfLevels;
+        *outElements = dims.arrayElementCount;
+    }
+    void GetDimensions(float* outWidth, float* outElements)
+    {
+        auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outElements = dims.arrayElementCount;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        float* outWidth,
+        float* outElements,
+        float* outNumberOfLevels)
+    {
+        auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outNumberOfLevels = dims.numberOfLevels;
+        *outElements = dims.arrayElementCount;
+    }
+
+    T Load(const int3& loc) const
+    {
+        T out;
+        texture->Load(&loc.x, &out, sizeof(out));
+        return out;
+    }
+    T Sample(SamplerState samplerState, const float2& loc) const
+    {
+        T out;
+        texture->Sample(samplerState, &loc.x, &out, sizeof(out));
+        return out;
+    }
+    T SampleLevel(SamplerState samplerState, const float2& loc, float level)
+    {
+        T out;
+        texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out));
+        return out;
+    }
+
+    ITexture* texture;
+};
+
+template<typename T>
+struct Texture2DArray
+{
+    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements)
+    {
+        auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outElements = dims.arrayElementCount;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        uint32_t* outWidth,
+        uint32_t* outHeight,
+        uint32_t* outElements,
+        uint32_t* outNumberOfLevels)
+    {
+        auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outElements = dims.arrayElementCount;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    void GetDimensions(uint32_t* outWidth, float* outHeight, float* outElements)
+    {
+        auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outElements = dims.arrayElementCount;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        float* outWidth,
+        float* outHeight,
+        float* outElements,
+        float* outNumberOfLevels)
+    {
+        auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outElements = dims.arrayElementCount;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    T Load(const int4& loc) const
+    {
+        T out;
+        texture->Load(&loc.x, &out, sizeof(out));
+        return out;
+    }
+    T Sample(SamplerState samplerState, const float3& loc) const
+    {
+        T out;
+        texture->Sample(samplerState, &loc.x, &out, sizeof(out));
+        return out;
+    }
+    T SampleLevel(SamplerState samplerState, const float3& loc, float level)
+    {
+        T out;
+        texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out));
+        return out;
+    }
+
+    ITexture* texture;
+};
+
+template<typename T>
+struct TextureCubeArray
+{
+    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements)
+    {
+        auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outElements = dims.arrayElementCount;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        uint32_t* outWidth,
+        uint32_t* outHeight,
+        uint32_t* outElements,
+        uint32_t* outNumberOfLevels)
+    {
+        auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outElements = dims.arrayElementCount;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    void GetDimensions(uint32_t* outWidth, float* outHeight, float* outElements)
+    {
+        auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outElements = dims.arrayElementCount;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        float* outWidth,
+        float* outHeight,
+        float* outElements,
+        float* outNumberOfLevels)
+    {
+        auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outElements = dims.arrayElementCount;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    T Sample(SamplerState samplerState, const float4& loc) const
+    {
+        T out;
+        texture->Sample(samplerState, &loc.x, &out, sizeof(out));
+        return out;
+    }
+    T SampleLevel(SamplerState samplerState, const float4& loc, float level)
+    {
+        T out;
+        texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out));
+        return out;
+    }
+
+    ITexture* texture;
+};
+
+/* !!!!!!!!!!!!!!!!!!!!!!!!!!! RWTexture !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
+
+struct IRWTexture : ITexture
+{
+    /// Get the reference to the element at loc.
+    virtual void* refAt(const uint32_t* loc) = 0;
+};
+
+template<typename T>
+struct RWTexture1D
+{
+    void GetDimensions(uint32_t* outWidth) { *outWidth = texture->GetDimensions().width; }
+    void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outNumberOfLevels)
+    {
+        auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    void GetDimensions(float* outWidth) { *outWidth = texture->GetDimensions().width; }
+    void GetDimensions(uint32_t mipLevel, float* outWidth, float* outNumberOfLevels)
+    {
+        auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    T Load(int32_t loc) const
+    {
+        T out;
+        texture->Load(&loc, &out, sizeof(out));
+        return out;
+    }
+    T& operator[](uint32_t loc) { return *(T*)texture->refAt(&loc); }
+    IRWTexture* texture;
+};
+
+template<typename T>
+struct RWTexture2D
+{
+    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight)
+    {
+        const auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        uint32_t* outWidth,
+        uint32_t* outHeight,
+        uint32_t* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+    void GetDimensions(float* outWidth, float* outHeight)
+    {
+        const auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        float* outWidth,
+        float* outHeight,
+        float* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    T Load(const int2& loc) const
+    {
+        T out;
+        texture->Load(&loc.x, &out, sizeof(out));
+        return out;
+    }
+    T& operator[](const uint2& loc) { return *(T*)texture->refAt(&loc.x); }
+    IRWTexture* texture;
+};
+
+template<typename T>
+struct RWTexture3D
+{
+    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outDepth)
+    {
+        const auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outDepth = dims.depth;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        uint32_t* outWidth,
+        uint32_t* outHeight,
+        uint32_t* outDepth,
+        uint32_t* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outDepth = dims.depth;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+    void GetDimensions(float* outWidth, float* outHeight, float* outDepth)
+    {
+        const auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outDepth = dims.depth;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        float* outWidth,
+        float* outHeight,
+        float* outDepth,
+        float* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outDepth = dims.depth;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    T Load(const int3& loc) const
+    {
+        T out;
+        texture->Load(&loc.x, &out, sizeof(out));
+        return out;
+    }
+    T& operator[](const uint3& loc) { return *(T*)texture->refAt(&loc.x); }
+    IRWTexture* texture;
+};
+
+
+template<typename T>
+struct RWTexture1DArray
+{
+    void GetDimensions(uint32_t* outWidth, uint32_t* outElements)
+    {
+        auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outElements = dims.arrayElementCount;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        uint32_t* outWidth,
+        uint32_t* outElements,
+        uint32_t* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outElements = dims.arrayElementCount;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+    void GetDimensions(float* outWidth, float* outElements)
+    {
+        auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outElements = dims.arrayElementCount;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        float* outWidth,
+        float* outElements,
+        float* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outElements = dims.arrayElementCount;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    T Load(int2 loc) const
+    {
+        T out;
+        texture->Load(&loc.x, &out, sizeof(out));
+        return out;
+    }
+    T& operator[](uint2 loc) { return *(T*)texture->refAt(&loc.x); }
+
+    IRWTexture* texture;
+};
+
+template<typename T>
+struct RWTexture2DArray
+{
+    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements)
+    {
+        auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outElements = dims.arrayElementCount;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        uint32_t* outWidth,
+        uint32_t* outHeight,
+        uint32_t* outElements,
+        uint32_t* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outElements = dims.arrayElementCount;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+    void GetDimensions(float* outWidth, float* outHeight, float* outElements)
+    {
+        auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outElements = dims.arrayElementCount;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        float* outWidth,
+        float* outHeight,
+        float* outElements,
+        float* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outElements = dims.arrayElementCount;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    T Load(const int3& loc) const
+    {
+        T out;
+        texture->Load(&loc.x, &out, sizeof(out));
+        return out;
+    }
+    T& operator[](const uint3& loc) { return *(T*)texture->refAt(&loc.x); }
+
+    IRWTexture* texture;
+};
+
+// FeedbackTexture
+
+struct FeedbackType
+{
+};
+struct SAMPLER_FEEDBACK_MIN_MIP : FeedbackType
+{
+};
+struct SAMPLER_FEEDBACK_MIP_REGION_USED : FeedbackType
+{
+};
+
+struct IFeedbackTexture
+{
+    virtual TextureDimensions GetDimensions(int mipLevel = -1) = 0;
+
+    // Note here we pass the optional clamp parameter as a pointer. Passing nullptr means no clamp.
+    // This was preferred over having two function definitions, and having to differentiate their
+    // names
+    virtual void WriteSamplerFeedback(
+        ITexture* tex,
+        SamplerState samp,
+        const float* location,
+        const float* clamp = nullptr) = 0;
+    virtual void WriteSamplerFeedbackBias(
+        ITexture* tex,
+        SamplerState samp,
+        const float* location,
+        float bias,
+        const float* clamp = nullptr) = 0;
+    virtual void WriteSamplerFeedbackGrad(
+        ITexture* tex,
+        SamplerState samp,
+        const float* location,
+        const float* ddx,
+        const float* ddy,
+        const float* clamp = nullptr) = 0;
+
+    virtual void WriteSamplerFeedbackLevel(
+        ITexture* tex,
+        SamplerState samp,
+        const float* location,
+        float lod) = 0;
+};
+
+template<typename T>
+struct FeedbackTexture2D
+{
+    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight)
+    {
+        const auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        uint32_t* outWidth,
+        uint32_t* outHeight,
+        uint32_t* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+    void GetDimensions(float* outWidth, float* outHeight)
+    {
+        const auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        float* outWidth,
+        float* outHeight,
+        float* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    template<typename S>
+    void WriteSamplerFeedback(Texture2D<S> tex, SamplerState samp, float2 location, float clamp)
+    {
+        texture->WriteSamplerFeedback(tex.texture, samp, &location.x, &clamp);
+    }
+
+    template<typename S>
+    void WriteSamplerFeedbackBias(
+        Texture2D<S> tex,
+        SamplerState samp,
+        float2 location,
+        float bias,
+        float clamp)
+    {
+        texture->WriteSamplerFeedbackBias(tex.texture, samp, &location.x, bias, &clamp);
+    }
+
+    template<typename S>
+    void WriteSamplerFeedbackGrad(
+        Texture2D<S> tex,
+        SamplerState samp,
+        float2 location,
+        float2 ddx,
+        float2 ddy,
+        float clamp)
+    {
+        texture->WriteSamplerFeedbackGrad(tex.texture, samp, &location.x, &ddx.x, &ddy.x, &clamp);
+    }
+
+    // Level
+
+    template<typename S>
+    void WriteSamplerFeedbackLevel(Texture2D<S> tex, SamplerState samp, float2 location, float lod)
+    {
+        texture->WriteSamplerFeedbackLevel(tex.texture, samp, &location.x, lod);
+    }
+
+    // Without Clamp
+    template<typename S>
+    void WriteSamplerFeedback(Texture2D<S> tex, SamplerState samp, float2 location)
+    {
+        texture->WriteSamplerFeedback(tex.texture, samp, &location.x);
+    }
+
+    template<typename S>
+    void WriteSamplerFeedbackBias(Texture2D<S> tex, SamplerState samp, float2 location, float bias)
+    {
+        texture->WriteSamplerFeedbackBias(tex.texture, samp, &location.x, bias);
+    }
+
+    template<typename S>
+    void WriteSamplerFeedbackGrad(
+        Texture2D<S> tex,
+        SamplerState samp,
+        float2 location,
+        float2 ddx,
+        float2 ddy)
+    {
+        texture->WriteSamplerFeedbackGrad(tex.texture, samp, &location.x, &ddx.x, &ddy.x);
+    }
+
+    IFeedbackTexture* texture;
+};
+
+template<typename T>
+struct FeedbackTexture2DArray
+{
+    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements)
+    {
+        auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outElements = dims.arrayElementCount;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        uint32_t* outWidth,
+        uint32_t* outHeight,
+        uint32_t* outElements,
+        uint32_t* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outElements = dims.arrayElementCount;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+    void GetDimensions(float* outWidth, float* outHeight, float* outElements)
+    {
+        auto dims = texture->GetDimensions();
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outElements = dims.arrayElementCount;
+    }
+    void GetDimensions(
+        uint32_t mipLevel,
+        float* outWidth,
+        float* outHeight,
+        float* outElements,
+        float* outNumberOfLevels)
+    {
+        const auto dims = texture->GetDimensions(mipLevel);
+        *outWidth = dims.width;
+        *outHeight = dims.height;
+        *outElements = dims.arrayElementCount;
+        *outNumberOfLevels = dims.numberOfLevels;
+    }
+
+    template<typename S>
+    void WriteSamplerFeedback(
+        Texture2DArray<S> texArray,
+        SamplerState samp,
+        float3 location,
+        float clamp)
+    {
+        texture->WriteSamplerFeedback(texArray.texture, samp, &location.x, &clamp);
+    }
+
+    template<typename S>
+    void WriteSamplerFeedbackBias(
+        Texture2DArray<S> texArray,
+        SamplerState samp,
+        float3 location,
+        float bias,
+        float clamp)
+    {
+        texture->WriteSamplerFeedbackBias(texArray.texture, samp, &location.x, bias, &clamp);
+    }
+
+    template<typename S>
+    void WriteSamplerFeedbackGrad(
+        Texture2DArray<S> texArray,
+        SamplerState samp,
+        float3 location,
+        float3 ddx,
+        float3 ddy,
+        float clamp)
+    {
+        texture
+            ->WriteSamplerFeedbackGrad(texArray.texture, samp, &location.x, &ddx.x, &ddy.x, &clamp);
+    }
+
+    // Level
+    template<typename S>
+    void WriteSamplerFeedbackLevel(
+        Texture2DArray<S> texArray,
+        SamplerState samp,
+        float3 location,
+        float lod)
+    {
+        texture->WriteSamplerFeedbackLevel(texArray.texture, samp, &location.x, lod);
+    }
+
+    // Without Clamp
+
+    template<typename S>
+    void WriteSamplerFeedback(Texture2DArray<S> texArray, SamplerState samp, float3 location)
+    {
+        texture->WriteSamplerFeedback(texArray.texture, samp, &location.x);
+    }
+
+    template<typename S>
+    void WriteSamplerFeedbackBias(
+        Texture2DArray<S> texArray,
+        SamplerState samp,
+        float3 location,
+        float bias)
+    {
+        texture->WriteSamplerFeedbackBias(texArray.texture, samp, &location.x, bias);
+    }
+
+    template<typename S>
+    void WriteSamplerFeedbackGrad(
+        Texture2DArray<S> texArray,
+        SamplerState samp,
+        float3 location,
+        float3 ddx,
+        float3 ddy)
+    {
+        texture->WriteSamplerFeedbackGrad(texArray.texture, samp, &location.x, &ddx.x, &ddy.x);
+    }
+
+    IFeedbackTexture* texture;
+};
+
+/* Varying input for Compute */
+
+/* Used when running a single thread */
+struct ComputeThreadVaryingInput
+{
+    uint3 groupID;
+    uint3 groupThreadID;
+};
+
+struct ComputeVaryingInput
+{
+    uint3 startGroupID; ///< start groupID
+    uint3 endGroupID;   ///< Non inclusive end groupID
+};
+
+// The uniformEntryPointParams and uniformState must be set to structures that match layout that the
+// kernel expects. This can be determined via reflection for example.
+
+typedef void (*ComputeThreadFunc)(
+    ComputeThreadVaryingInput* varyingInput,
+    void* uniformEntryPointParams,
+    void* uniformState);
+typedef void (*ComputeFunc)(
+    ComputeVaryingInput* varyingInput,
+    void* uniformEntryPointParams,
+    void* uniformState);
+
+#ifdef SLANG_PRELUDE_NAMESPACE
+}
+#endif
+
+#endif
diff --git a/external/slang/include/slang-cuda-prelude.h b/external/slang/include/slang-cuda-prelude.h
new file mode 100644
index 00000000..5585ad6e
--- /dev/null
+++ b/external/slang/include/slang-cuda-prelude.h
@@ -0,0 +1,3358 @@
+#define SLANG_PRELUDE_EXPORT
+
+#ifdef __CUDACC_RTC__
+#define SLANG_CUDA_RTC 1
+#else
+#define SLANG_CUDA_RTC 0
+#endif
+
+#if SLANG_CUDA_RTC
+
+#else
+
+#include <cstdint>
+#include <stdio.h>
+
+#endif
+
+// Define SLANG_CUDA_ENABLE_HALF to use the cuda_fp16 include to add half support.
+// For this to work NVRTC needs to have the path to the CUDA SDK.
+//
+// As it stands the includes paths defined for Slang are passed down to NVRTC. Similarly defines
+// defined for the Slang compile are passed down.
+
+#ifdef SLANG_CUDA_ENABLE_HALF
+// We don't want half2 operators, because it will implement comparison operators that return a
+// bool(!). We want to generate those functions. Doing so means that we will have to define all
+// the other half2 operators.
+#define __CUDA_NO_HALF2_OPERATORS__
+#include <cuda_fp16.h>
+#endif
+
+#ifdef SLANG_CUDA_ENABLE_OPTIX
+#include <optix.h>
+#endif
+
+// Define slang offsetof implementation
+#ifndef SLANG_OFFSET_OF
+#define SLANG_OFFSET_OF(type, member) (size_t)((char*)&(((type*)0)->member) - (char*)0)
+#endif
+
+#ifndef SLANG_ALIGN_OF
+#define SLANG_ALIGN_OF(type) __alignof__(type)
+#endif
+
+// Must be large enough to cause overflow and therefore infinity
+#ifndef SLANG_INFINITY
+#define SLANG_INFINITY ((float)(1e+300 * 1e+300))
+#endif
+
+// For now we'll disable any asserts in this prelude
+#define SLANG_PRELUDE_ASSERT(x)
+
+#ifndef SLANG_CUDA_WARP_SIZE
+#define SLANG_CUDA_WARP_SIZE 32
+#endif
+
+#define SLANG_CUDA_WARP_MASK \
+    (SLANG_CUDA_WARP_SIZE - 1) // Used for masking threadIdx.x to the warp lane index
+#define SLANG_CUDA_WARP_BITMASK (~int(0))
+
+//
+#define SLANG_FORCE_INLINE inline
+
+#define SLANG_CUDA_CALL __device__
+
+#define SLANG_FORCE_INLINE inline
+#define SLANG_INLINE inline
+
+
+// Since we are using unsigned arithmatic care is need in this comparison.
+// It is *assumed* that sizeInBytes >= elemSize. Which means (sizeInBytes >= elemSize) >= 0
+// Which means only a single test is needed
+
+// Asserts for bounds checking.
+// It is assumed index/count are unsigned types.
+#define SLANG_BOUND_ASSERT(index, count) SLANG_PRELUDE_ASSERT(index < count);
+#define SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+    SLANG_PRELUDE_ASSERT(index <= (sizeInBytes - elemSize) && (index & 3) == 0);
+
+// Macros to zero index if an access is out of range
+#define SLANG_BOUND_ZERO_INDEX(index, count) index = (index < count) ? index : 0;
+#define SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+    index = (index <= (sizeInBytes - elemSize)) ? index : 0;
+
+// The 'FIX' macro define how the index is fixed. The default is to do nothing. If
+// SLANG_ENABLE_BOUND_ZERO_INDEX the fix macro will zero the index, if out of range
+#ifdef SLANG_ENABLE_BOUND_ZERO_INDEX
+#define SLANG_BOUND_FIX(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
+#define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+    SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
+#define SLANG_BOUND_FIX_FIXED_ARRAY(index, count) \
+    SLANG_BOUND_ZERO_INDEX(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
+#else
+#define SLANG_BOUND_FIX(index, count)
+#define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
+#define SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
+#endif
+
+#ifndef SLANG_BOUND_CHECK
+#define SLANG_BOUND_CHECK(index, count) \
+    SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX(index, count)
+#endif
+
+#ifndef SLANG_BOUND_CHECK_BYTE_ADDRESS
+#define SLANG_BOUND_CHECK_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
+    SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes)    \
+    SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
+#endif
+
+#ifndef SLANG_BOUND_CHECK_FIXED_ARRAY
+#define SLANG_BOUND_CHECK_FIXED_ARRAY(index, count) \
+    SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
+#endif
+
+// This macro handles how out-of-range surface coordinates are handled;
+// I can equal
+// cudaBoundaryModeClamp, in which case out-of-range coordinates are clamped to the valid range
+// cudaBoundaryModeZero, in which case out-of-range reads return zero and out-of-range writes are
+// ignored cudaBoundaryModeTrap, in which case out-of-range accesses cause the kernel execution to
+// fail.
+
+#ifndef SLANG_CUDA_BOUNDARY_MODE
+#define SLANG_CUDA_BOUNDARY_MODE cudaBoundaryModeZero
+
+// Can be one of SLANG_CUDA_PTX_BOUNDARY_MODE. Only applies *PTX* emitted CUDA operations
+// which currently is just RWTextureRW format writes
+//
+// .trap         causes an execution trap on out-of-bounds addresses
+// .clamp        stores data at the nearest surface location (sized appropriately)
+// .zero         drops stores to out-of-bounds addresses
+
+#define SLANG_PTX_BOUNDARY_MODE "zero"
+#endif
+
+struct TypeInfo
+{
+    size_t typeSize;
+};
+
+template<typename T, size_t SIZE>
+struct FixedArray
+{
+    SLANG_CUDA_CALL const T& operator[](size_t index) const
+    {
+        SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE);
+        return m_data[index];
+    }
+    SLANG_CUDA_CALL T& operator[](size_t index)
+    {
+        SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE);
+        return m_data[index];
+    }
+
+    T m_data[SIZE];
+};
+
+// An array that has no specified size, becomes a 'Array'. This stores the size so it can
+// potentially do bounds checking.
+template<typename T>
+struct Array
+{
+    SLANG_CUDA_CALL const T& operator[](size_t index) const
+    {
+        SLANG_BOUND_CHECK(index, count);
+        return data[index];
+    }
+    SLANG_CUDA_CALL T& operator[](size_t index)
+    {
+        SLANG_BOUND_CHECK(index, count);
+        return data[index];
+    }
+
+    T* data;
+    size_t count;
+};
+
+// Typically defined in cuda.h, but we can't ship/rely on that, so just define here
+typedef unsigned long long CUtexObject;
+typedef unsigned long long CUsurfObject;
+
+// On CUDA sampler state is actually bound up with the texture object. We have a SamplerState type,
+// backed as a pointer, to simplify code generation, with the downside that such a binding will take
+// up uniform space, even though it will have no effect.
+// TODO(JS): Consider ways to strip use of variables of this type so have no binding,
+struct SamplerStateUnused;
+typedef SamplerStateUnused* SamplerState;
+
+
+// TODO(JS): Not clear yet if this can be handled on CUDA, by just ignoring.
+// For now, just map to the index type.
+typedef size_t NonUniformResourceIndex;
+
+// Code generator will generate the specific type
+template<typename T, int ROWS, int COLS>
+struct Matrix;
+
+typedef int1 bool1;
+typedef int2 bool2;
+typedef int3 bool3;
+typedef int4 bool4;
+
+#if SLANG_CUDA_RTC
+
+typedef signed char int8_t;
+typedef short int16_t;
+typedef int int32_t;
+typedef long long int64_t;
+typedef ptrdiff_t intptr_t;
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+typedef size_t uintptr_t;
+
+#endif
+
+typedef long long longlong;
+typedef unsigned long long ulonglong;
+
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+
+union Union32
+{
+    uint32_t u;
+    int32_t i;
+    float f;
+};
+
+union Union64
+{
+    uint64_t u;
+    int64_t i;
+    double d;
+};
+
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float make_float(T val)
+{
+    return (float)val;
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float _slang_fmod(float x, float y)
+{
+    return ::fmodf(x, y);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double _slang_fmod(double x, double y)
+{
+    return ::fmod(x, y);
+}
+
+#if SLANG_CUDA_ENABLE_HALF
+
+// Add the other vector half types
+struct __half1
+{
+    __half x;
+};
+struct __align__(4) __half3
+{
+    __half x, y, z;
+};
+struct __align__(4) __half4
+{
+    __half x, y, z, w;
+};
+#endif
+
+#define SLANG_VECTOR_GET_ELEMENT(T)                                                   \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##1 x, int index) \
+    {                                                                                 \
+        return ((T*)(&x))[index];                                                     \
+    }                                                                                 \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##2 x, int index) \
+    {                                                                                 \
+        return ((T*)(&x))[index];                                                     \
+    }                                                                                 \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##3 x, int index) \
+    {                                                                                 \
+        return ((T*)(&x))[index];                                                     \
+    }                                                                                 \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##4 x, int index) \
+    {                                                                                 \
+        return ((T*)(&x))[index];                                                     \
+    }
+SLANG_VECTOR_GET_ELEMENT(int)
+SLANG_VECTOR_GET_ELEMENT(uint)
+SLANG_VECTOR_GET_ELEMENT(short)
+SLANG_VECTOR_GET_ELEMENT(ushort)
+SLANG_VECTOR_GET_ELEMENT(char)
+SLANG_VECTOR_GET_ELEMENT(uchar)
+SLANG_VECTOR_GET_ELEMENT(longlong)
+SLANG_VECTOR_GET_ELEMENT(ulonglong)
+SLANG_VECTOR_GET_ELEMENT(float)
+SLANG_VECTOR_GET_ELEMENT(double)
+
+#define SLANG_VECTOR_GET_ELEMENT_PTR(T)                                                      \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##1 * x, int index) \
+    {                                                                                        \
+        return ((T*)(x)) + index;                                                            \
+    }                                                                                        \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##2 * x, int index) \
+    {                                                                                        \
+        return ((T*)(x)) + index;                                                            \
+    }                                                                                        \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##3 * x, int index) \
+    {                                                                                        \
+        return ((T*)(x)) + index;                                                            \
+    }                                                                                        \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##4 * x, int index) \
+    {                                                                                        \
+        return ((T*)(x)) + index;                                                            \
+    }
+SLANG_VECTOR_GET_ELEMENT_PTR(int)
+SLANG_VECTOR_GET_ELEMENT_PTR(uint)
+SLANG_VECTOR_GET_ELEMENT_PTR(short)
+SLANG_VECTOR_GET_ELEMENT_PTR(ushort)
+SLANG_VECTOR_GET_ELEMENT_PTR(char)
+SLANG_VECTOR_GET_ELEMENT_PTR(uchar)
+SLANG_VECTOR_GET_ELEMENT_PTR(longlong)
+SLANG_VECTOR_GET_ELEMENT_PTR(ulonglong)
+SLANG_VECTOR_GET_ELEMENT_PTR(float)
+SLANG_VECTOR_GET_ELEMENT_PTR(double)
+
+#if SLANG_CUDA_ENABLE_HALF
+SLANG_VECTOR_GET_ELEMENT(__half)
+SLANG_VECTOR_GET_ELEMENT_PTR(__half)
+#endif
+
+#define SLANG_CUDA_VECTOR_BINARY_OP(T, n, op)                                                 \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n operator op(T##n thisVal, T##n other)             \
+    {                                                                                         \
+        T##n result;                                                                          \
+        for (int i = 0; i < n; i++)                                                           \
+            *_slang_vector_get_element_ptr(&result, i) =                                      \
+                _slang_vector_get_element(thisVal, i) op _slang_vector_get_element(other, i); \
+        return result;                                                                        \
+    }
+#define SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, op)                                \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL bool##n operator op(T##n thisVal, T##n other) \
+    {                                                                                \
+        bool##n result;                                                              \
+        for (int i = 0; i < n; i++)                                                  \
+            *_slang_vector_get_element_ptr(&result, i) =                             \
+                (int)(_slang_vector_get_element(thisVal, i)                          \
+                          op _slang_vector_get_element(other, i));                   \
+        return result;                                                               \
+    }
+#define SLANG_CUDA_VECTOR_UNARY_OP(T, n, op)                                                       \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n operator op(T##n thisVal)                              \
+    {                                                                                              \
+        T##n result;                                                                               \
+        for (int i = 0; i < n; i++)                                                                \
+            *_slang_vector_get_element_ptr(&result, i) = op _slang_vector_get_element(thisVal, i); \
+        return result;                                                                             \
+    }
+
+#define SLANG_CUDA_VECTOR_INT_OP(T, n)            \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, +)          \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, -)          \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, *)          \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, /)          \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, %)          \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, ^)          \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, &)          \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, |)          \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, &&)         \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, ||)         \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, >>)         \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, <<)         \
+    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >)  \
+    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <)  \
+    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >=) \
+    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <=) \
+    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, ==) \
+    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, !=) \
+    SLANG_CUDA_VECTOR_UNARY_OP(T, n, !)           \
+    SLANG_CUDA_VECTOR_UNARY_OP(T, n, -)           \
+    SLANG_CUDA_VECTOR_UNARY_OP(T, n, ~)
+
+#define SLANG_CUDA_VECTOR_INT_OPS(T) \
+    SLANG_CUDA_VECTOR_INT_OP(T, 2)   \
+    SLANG_CUDA_VECTOR_INT_OP(T, 3)   \
+    SLANG_CUDA_VECTOR_INT_OP(T, 4)
+
+SLANG_CUDA_VECTOR_INT_OPS(int)
+SLANG_CUDA_VECTOR_INT_OPS(uint)
+SLANG_CUDA_VECTOR_INT_OPS(ushort)
+SLANG_CUDA_VECTOR_INT_OPS(short)
+SLANG_CUDA_VECTOR_INT_OPS(char)
+SLANG_CUDA_VECTOR_INT_OPS(uchar)
+SLANG_CUDA_VECTOR_INT_OPS(longlong)
+SLANG_CUDA_VECTOR_INT_OPS(ulonglong)
+
+#define SLANG_CUDA_VECTOR_FLOAT_OP(T, n)          \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, +)          \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, -)          \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, *)          \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, /)          \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, &&)         \
+    SLANG_CUDA_VECTOR_BINARY_OP(T, n, ||)         \
+    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >)  \
+    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <)  \
+    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >=) \
+    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <=) \
+    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, ==) \
+    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, !=) \
+    SLANG_CUDA_VECTOR_UNARY_OP(T, n, -)
+#define SLANG_CUDA_VECTOR_FLOAT_OPS(T) \
+    SLANG_CUDA_VECTOR_FLOAT_OP(T, 2)   \
+    SLANG_CUDA_VECTOR_FLOAT_OP(T, 3)   \
+    SLANG_CUDA_VECTOR_FLOAT_OP(T, 4)
+
+SLANG_CUDA_VECTOR_FLOAT_OPS(float)
+SLANG_CUDA_VECTOR_FLOAT_OPS(double)
+#if SLANG_CUDA_ENABLE_HALF
+SLANG_CUDA_VECTOR_FLOAT_OPS(__half)
+#endif
+#define SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, n)                                             \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n operator%(const T##n& left, const T##n& right) \
+    {                                                                                      \
+        T##n result;                                                                       \
+        for (int i = 0; i < n; i++)                                                        \
+            *_slang_vector_get_element_ptr(&result, i) = _slang_fmod(                      \
+                _slang_vector_get_element(left, i),                                        \
+                _slang_vector_get_element(right, i));                                      \
+        return result;                                                                     \
+    }
+#define SLANG_CUDA_FLOAT_VECTOR_MOD(T)     \
+    SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, 2) \
+    SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, 3) \
+    SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, 4)
+
+SLANG_CUDA_FLOAT_VECTOR_MOD(float)
+SLANG_CUDA_FLOAT_VECTOR_MOD(double)
+
+#if SLANG_CUDA_RTC || SLANG_CUDA_ENABLE_HALF
+#define SLANG_MAKE_VECTOR(T)                                                \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##2 make_##T##2(T x, T y)           \
+    {                                                                       \
+        return T##2 {x, y};                                                 \
+    }                                                                       \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##3 make_##T##3(T x, T y, T z)      \
+    {                                                                       \
+        return T##3 {x, y, z};                                              \
+    }                                                                       \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##4 make_##T##4(T x, T y, T z, T w) \
+    {                                                                       \
+        return T##4 {x, y, z, w};                                           \
+    }
+#endif
+
+#if SLANG_CUDA_RTC
+SLANG_MAKE_VECTOR(int)
+SLANG_MAKE_VECTOR(uint)
+SLANG_MAKE_VECTOR(short)
+SLANG_MAKE_VECTOR(ushort)
+SLANG_MAKE_VECTOR(char)
+SLANG_MAKE_VECTOR(uchar)
+SLANG_MAKE_VECTOR(float)
+SLANG_MAKE_VECTOR(double)
+SLANG_MAKE_VECTOR(longlong)
+SLANG_MAKE_VECTOR(ulonglong)
+#endif
+
+#if SLANG_CUDA_ENABLE_HALF
+SLANG_MAKE_VECTOR(__half)
+#endif
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool1 make_bool1(bool x)
+{
+    return bool1{x};
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool2 make_bool2(bool x, bool y)
+{
+    return bool2{x, y};
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool3 make_bool3(bool x, bool y, bool z)
+{
+    return bool3{x, y, z};
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool4 make_bool4(bool x, bool y, bool z, bool w)
+{
+    return bool4{x, y, z, w};
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool2 make_bool2(bool x)
+{
+    return bool2{x, x};
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool3 make_bool3(bool x)
+{
+    return bool3{x, x, x};
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool4 make_bool4(bool x)
+{
+    return bool4{x, x, x, x};
+}
+
+#if SLANG_CUDA_RTC
+#define SLANG_MAKE_VECTOR_FROM_SCALAR(T)                     \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##1 make_##T##1(T x) \
+    {                                                        \
+        return T##1 {x};                                     \
+    }                                                        \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##2 make_##T##2(T x) \
+    {                                                        \
+        return make_##T##2(x, x);                            \
+    }                                                        \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##3 make_##T##3(T x) \
+    {                                                        \
+        return make_##T##3(x, x, x);                         \
+    }                                                        \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##4 make_##T##4(T x) \
+    {                                                        \
+        return make_##T##4(x, x, x, x);                      \
+    }
+#else
+#define SLANG_MAKE_VECTOR_FROM_SCALAR(T)                     \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##2 make_##T##2(T x) \
+    {                                                        \
+        return make_##T##2(x, x);                            \
+    }                                                        \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##3 make_##T##3(T x) \
+    {                                                        \
+        return make_##T##3(x, x, x);                         \
+    }                                                        \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##4 make_##T##4(T x) \
+    {                                                        \
+        return make_##T##4(x, x, x, x);                      \
+    }
+#endif
+SLANG_MAKE_VECTOR_FROM_SCALAR(int)
+SLANG_MAKE_VECTOR_FROM_SCALAR(uint)
+SLANG_MAKE_VECTOR_FROM_SCALAR(short)
+SLANG_MAKE_VECTOR_FROM_SCALAR(ushort)
+SLANG_MAKE_VECTOR_FROM_SCALAR(char)
+SLANG_MAKE_VECTOR_FROM_SCALAR(uchar)
+SLANG_MAKE_VECTOR_FROM_SCALAR(longlong)
+SLANG_MAKE_VECTOR_FROM_SCALAR(ulonglong)
+SLANG_MAKE_VECTOR_FROM_SCALAR(float)
+SLANG_MAKE_VECTOR_FROM_SCALAR(double)
+#if SLANG_CUDA_ENABLE_HALF
+SLANG_MAKE_VECTOR_FROM_SCALAR(__half)
+#if !SLANG_CUDA_RTC
+SLANG_FORCE_INLINE SLANG_CUDA_CALL __half1 make___half1(__half x)
+{
+    return __half1{x};
+}
+#endif
+#endif
+
+#define SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(Fn, T, N)                                            \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##N Fn(T##N* address, T##N val)                           \
+    {                                                                                             \
+        T##N result;                                                                              \
+        for (int i = 0; i < N; i++)                                                               \
+            *_slang_vector_get_element_ptr(&result, i) =                                          \
+                Fn(_slang_vector_get_element_ptr(address, i), _slang_vector_get_element(val, i)); \
+        return result;                                                                            \
+    }
+
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 900
+SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, float, 2)
+SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, float, 4)
+#endif
+SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, float, 3)
+SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, int, 2)
+SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, int, 3)
+SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, int, 4)
+SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, uint, 2)
+SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, uint, 3)
+SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, uint, 4)
+SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, ulonglong, 2)
+SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, ulonglong, 3)
+SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, ulonglong, 4)
+
+template<typename T, int n>
+struct GetVectorTypeImpl
+{
+};
+
+#define GET_VECTOR_TYPE_IMPL(T, n)                                     \
+    template<>                                                         \
+    struct GetVectorTypeImpl<T, n>                                     \
+    {                                                                  \
+        typedef T##n type;                                             \
+        static SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n fromScalar(T v) \
+        {                                                              \
+            return make_##T##n(v);                                     \
+        }                                                              \
+    };
+#define GET_VECTOR_TYPE_IMPL_N(T) \
+    GET_VECTOR_TYPE_IMPL(T, 1)    \
+    GET_VECTOR_TYPE_IMPL(T, 2)    \
+    GET_VECTOR_TYPE_IMPL(T, 3)    \
+    GET_VECTOR_TYPE_IMPL(T, 4)
+
+GET_VECTOR_TYPE_IMPL_N(int)
+GET_VECTOR_TYPE_IMPL_N(uint)
+GET_VECTOR_TYPE_IMPL_N(short)
+GET_VECTOR_TYPE_IMPL_N(ushort)
+GET_VECTOR_TYPE_IMPL_N(char)
+GET_VECTOR_TYPE_IMPL_N(uchar)
+GET_VECTOR_TYPE_IMPL_N(longlong)
+GET_VECTOR_TYPE_IMPL_N(ulonglong)
+GET_VECTOR_TYPE_IMPL_N(float)
+GET_VECTOR_TYPE_IMPL_N(double)
+#if SLANG_CUDA_ENABLE_HALF
+GET_VECTOR_TYPE_IMPL_N(__half)
+#endif
+template<typename T, int n>
+using Vector = typename GetVectorTypeImpl<T, n>::type;
+
+template<typename T, int n, typename OtherT, int m>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Vector<T, n> _slang_vector_reshape(const Vector<OtherT, m> other)
+{
+    Vector<T, n> result;
+    for (int i = 0; i < n; i++)
+    {
+        OtherT otherElement = T(0);
+        if (i < m)
+            otherElement = _slang_vector_get_element(other, i);
+        *_slang_vector_get_element_ptr(&result, i) = (T)otherElement;
+    }
+    return result;
+}
+
+template<typename T, int ROWS, int COLS>
+struct Matrix
+{
+    Vector<T, COLS> rows[ROWS];
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL Vector<T, COLS>& operator[](size_t index)
+    {
+        return rows[index];
+    }
+};
+
+
+template<typename T, int ROWS, int COLS>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(T scalar)
+{
+    Matrix<T, ROWS, COLS> result;
+    for (int i = 0; i < ROWS; i++)
+        result.rows[i] = GetVectorTypeImpl<T, COLS>::fromScalar(scalar);
+    return result;
+}
+
+template<typename T, int ROWS, int COLS>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(const Vector<T, COLS>& row0)
+{
+    Matrix<T, ROWS, COLS> result;
+    result.rows[0] = row0;
+    return result;
+}
+
+template<typename T, int ROWS, int COLS>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+    const Vector<T, COLS>& row0,
+    const Vector<T, COLS>& row1)
+{
+    Matrix<T, ROWS, COLS> result;
+    result.rows[0] = row0;
+    result.rows[1] = row1;
+    return result;
+}
+
+template<typename T, int ROWS, int COLS>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+    const Vector<T, COLS>& row0,
+    const Vector<T, COLS>& row1,
+    const Vector<T, COLS>& row2)
+{
+    Matrix<T, ROWS, COLS> result;
+    result.rows[0] = row0;
+    result.rows[1] = row1;
+    result.rows[2] = row2;
+    return result;
+}
+
+template<typename T, int ROWS, int COLS>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+    const Vector<T, COLS>& row0,
+    const Vector<T, COLS>& row1,
+    const Vector<T, COLS>& row2,
+    const Vector<T, COLS>& row3)
+{
+    Matrix<T, ROWS, COLS> result;
+    result.rows[0] = row0;
+    result.rows[1] = row1;
+    result.rows[2] = row2;
+    result.rows[3] = row3;
+    return result;
+}
+
+template<typename T, int ROWS, int COLS, typename U, int otherRow, int otherCol>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+    const Matrix<U, otherRow, otherCol>& other)
+{
+    Matrix<T, ROWS, COLS> result;
+    int minRow = ROWS;
+    int minCol = COLS;
+    if (minRow > otherRow)
+        minRow = otherRow;
+    if (minCol > otherCol)
+        minCol = otherCol;
+    for (int i = 0; i < minRow; i++)
+        for (int j = 0; j < minCol; j++)
+            *_slang_vector_get_element_ptr(result.rows + i, j) =
+                (T)_slang_vector_get_element(other.rows[i], j);
+    return result;
+}
+
+template<typename T, int ROWS, int COLS>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(T v0, T v1, T v2, T v3)
+{
+    Matrix<T, ROWS, COLS> rs;
+    rs.rows[0].x = v0;
+    rs.rows[0].y = v1;
+    rs.rows[1].x = v2;
+    rs.rows[1].y = v3;
+    return rs;
+}
+
+template<typename T, int ROWS, int COLS>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+    T v0,
+    T v1,
+    T v2,
+    T v3,
+    T v4,
+    T v5)
+{
+    Matrix<T, ROWS, COLS> rs;
+    if (COLS == 3)
+    {
+        *_slang_vector_get_element_ptr(&rs.rows[0], 0) = v0;
+        *_slang_vector_get_element_ptr(&rs.rows[0], 1) = v1;
+        *_slang_vector_get_element_ptr(&rs.rows[0], 2) = v2;
+        *_slang_vector_get_element_ptr(&rs.rows[1], 0) = v3;
+        *_slang_vector_get_element_ptr(&rs.rows[1], 1) = v4;
+        *_slang_vector_get_element_ptr(&rs.rows[1], 2) = v5;
+    }
+    else
+    {
+        rs.rows[0].x = v0;
+        rs.rows[0].y = v1;
+        rs.rows[1].x = v2;
+        rs.rows[1].y = v3;
+        rs.rows[2].x = v4;
+        rs.rows[2].y = v5;
+    }
+    return rs;
+}
+
+template<typename T, int ROWS, int COLS>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+    T v0,
+    T v1,
+    T v2,
+    T v3,
+    T v4,
+    T v5,
+    T v6,
+    T v7)
+{
+    Matrix<T, ROWS, COLS> rs;
+    if (COLS == 4)
+    {
+        *_slang_vector_get_element_ptr(&rs.rows[0], 0) = v0;
+        *_slang_vector_get_element_ptr(&rs.rows[0], 1) = v1;
+        *_slang_vector_get_element_ptr(&rs.rows[0], 2) = v2;
+        *_slang_vector_get_element_ptr(&rs.rows[0], 3) = v3;
+        *_slang_vector_get_element_ptr(&rs.rows[1], 0) = v4;
+        *_slang_vector_get_element_ptr(&rs.rows[1], 1) = v5;
+        *_slang_vector_get_element_ptr(&rs.rows[1], 2) = v6;
+        *_slang_vector_get_element_ptr(&rs.rows[1], 3) = v7;
+    }
+    else
+    {
+        rs.rows[0].x = v0;
+        rs.rows[0].y = v1;
+        rs.rows[1].x = v2;
+        rs.rows[1].y = v3;
+        rs.rows[2].x = v4;
+        rs.rows[2].y = v5;
+        rs.rows[3].x = v6;
+        rs.rows[3].y = v7;
+    }
+    return rs;
+}
+
+template<typename T, int ROWS, int COLS>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+    T v0,
+    T v1,
+    T v2,
+    T v3,
+    T v4,
+    T v5,
+    T v6,
+    T v7,
+    T v8)
+{
+    Matrix<T, ROWS, COLS> rs;
+    rs.rows[0].x = v0;
+    rs.rows[0].y = v1;
+    rs.rows[0].z = v2;
+    rs.rows[1].x = v3;
+    rs.rows[1].y = v4;
+    rs.rows[1].z = v5;
+    rs.rows[2].x = v6;
+    rs.rows[2].y = v7;
+    rs.rows[2].z = v8;
+    return rs;
+}
+
+template<typename T, int ROWS, int COLS>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+    T v0,
+    T v1,
+    T v2,
+    T v3,
+    T v4,
+    T v5,
+    T v6,
+    T v7,
+    T v8,
+    T v9,
+    T v10,
+    T v11)
+{
+    Matrix<T, ROWS, COLS> rs;
+    if (COLS == 4)
+    {
+        *_slang_vector_get_element_ptr(&rs.rows[0], 0) = v0;
+        *_slang_vector_get_element_ptr(&rs.rows[0], 1) = v1;
+        *_slang_vector_get_element_ptr(&rs.rows[0], 2) = v2;
+        *_slang_vector_get_element_ptr(&rs.rows[0], 3) = v3;
+        *_slang_vector_get_element_ptr(&rs.rows[1], 0) = v4;
+        *_slang_vector_get_element_ptr(&rs.rows[1], 1) = v5;
+        *_slang_vector_get_element_ptr(&rs.rows[1], 2) = v6;
+        *_slang_vector_get_element_ptr(&rs.rows[1], 3) = v7;
+        *_slang_vector_get_element_ptr(&rs.rows[2], 0) = v8;
+        *_slang_vector_get_element_ptr(&rs.rows[2], 1) = v9;
+        *_slang_vector_get_element_ptr(&rs.rows[2], 2) = v10;
+        *_slang_vector_get_element_ptr(&rs.rows[2], 3) = v11;
+    }
+    else
+    {
+        rs.rows[0].x = v0;
+        rs.rows[0].y = v1;
+        rs.rows[0].z = v2;
+        rs.rows[1].x = v3;
+        rs.rows[1].y = v4;
+        rs.rows[1].z = v5;
+        rs.rows[2].x = v6;
+        rs.rows[2].y = v7;
+        rs.rows[2].z = v8;
+        rs.rows[3].x = v9;
+        rs.rows[3].y = v10;
+        rs.rows[3].z = v11;
+    }
+    return rs;
+}
+
+template<typename T, int ROWS, int COLS>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(
+    T v0,
+    T v1,
+    T v2,
+    T v3,
+    T v4,
+    T v5,
+    T v6,
+    T v7,
+    T v8,
+    T v9,
+    T v10,
+    T v11,
+    T v12,
+    T v13,
+    T v14,
+    T v15)
+{
+    Matrix<T, ROWS, COLS> rs;
+    rs.rows[0].x = v0;
+    rs.rows[0].y = v1;
+    rs.rows[0].z = v2;
+    rs.rows[0].w = v3;
+    rs.rows[1].x = v4;
+    rs.rows[1].y = v5;
+    rs.rows[1].z = v6;
+    rs.rows[1].w = v7;
+    rs.rows[2].x = v8;
+    rs.rows[2].y = v9;
+    rs.rows[2].z = v10;
+    rs.rows[2].w = v11;
+    rs.rows[3].x = v12;
+    rs.rows[3].y = v13;
+    rs.rows[3].z = v14;
+    rs.rows[3].w = v15;
+    return rs;
+}
+
+#define SLANG_MATRIX_BINARY_OP(T, op)                                   \
+    template<int R, int C>                                              \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, R, C> operator op(     \
+        const Matrix<T, R, C>& thisVal,                                 \
+        const Matrix<T, R, C>& other)                                   \
+    {                                                                   \
+        Matrix<T, R, C> result;                                         \
+        for (int i = 0; i < R; i++)                                     \
+            for (int j = 0; j < C; j++)                                 \
+                *_slang_vector_get_element_ptr(result.rows + i, j) =    \
+                    _slang_vector_get_element(thisVal.rows[i], j)       \
+                        op _slang_vector_get_element(other.rows[i], j); \
+        return result;                                                  \
+    }
+
+#define SLANG_MATRIX_UNARY_OP(T, op)                                                               \
+    template<int R, int C>                                                                         \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, R, C> operator op(const Matrix<T, R, C>& thisVal) \
+    {                                                                                              \
+        Matrix<T, R, C> result;                                                                    \
+        for (int i = 0; i < R; i++)                                                                \
+            for (int j = 0; j < C; j++)                                                            \
+                *_slang_vector_get_element_ptr(result.rows + i, j) =                               \
+                    op _slang_vector_get_element(thisVal.rows[i], j);                              \
+        return result;                                                                             \
+    }
+#define SLANG_INT_MATRIX_OPS(T)   \
+    SLANG_MATRIX_BINARY_OP(T, +)  \
+    SLANG_MATRIX_BINARY_OP(T, -)  \
+    SLANG_MATRIX_BINARY_OP(T, *)  \
+    SLANG_MATRIX_BINARY_OP(T, /)  \
+    SLANG_MATRIX_BINARY_OP(T, &)  \
+    SLANG_MATRIX_BINARY_OP(T, |)  \
+    SLANG_MATRIX_BINARY_OP(T, &&) \
+    SLANG_MATRIX_BINARY_OP(T, ||) \
+    SLANG_MATRIX_BINARY_OP(T, ^)  \
+    SLANG_MATRIX_BINARY_OP(T, %)  \
+    SLANG_MATRIX_UNARY_OP(T, !)   \
+    SLANG_MATRIX_UNARY_OP(T, ~)
+#define SLANG_FLOAT_MATRIX_OPS(T) \
+    SLANG_MATRIX_BINARY_OP(T, +)  \
+    SLANG_MATRIX_BINARY_OP(T, -)  \
+    SLANG_MATRIX_BINARY_OP(T, *)  \
+    SLANG_MATRIX_BINARY_OP(T, /)  \
+    SLANG_MATRIX_UNARY_OP(T, -)
+SLANG_INT_MATRIX_OPS(int)
+SLANG_INT_MATRIX_OPS(uint)
+SLANG_INT_MATRIX_OPS(short)
+SLANG_INT_MATRIX_OPS(ushort)
+SLANG_INT_MATRIX_OPS(char)
+SLANG_INT_MATRIX_OPS(uchar)
+SLANG_INT_MATRIX_OPS(longlong)
+SLANG_INT_MATRIX_OPS(ulonglong)
+SLANG_FLOAT_MATRIX_OPS(float)
+SLANG_FLOAT_MATRIX_OPS(double)
+#if SLANG_CUDA_ENABLE_HALF
+SLANG_FLOAT_MATRIX_OPS(__half)
+#endif
+#define SLANG_MATRIX_INT_NEG_OP(T)                                                        \
+    template<int R, int C>                                                                \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, R, C> operator-(Matrix<T, R, C> thisVal) \
+    {                                                                                     \
+        Matrix<T, R, C> result;                                                           \
+        for (int i = 0; i < R; i++)                                                       \
+            for (int j = 0; j < C; j++)                                                   \
+                *_slang_vector_get_element_ptr(result.rows + i, j) =                      \
+                    0 - _slang_vector_get_element(thisVal.rows[i], j);                    \
+        return result;                                                                    \
+    }
+SLANG_MATRIX_INT_NEG_OP(int)
+SLANG_MATRIX_INT_NEG_OP(uint)
+SLANG_MATRIX_INT_NEG_OP(short)
+SLANG_MATRIX_INT_NEG_OP(ushort)
+SLANG_MATRIX_INT_NEG_OP(char)
+SLANG_MATRIX_INT_NEG_OP(uchar)
+SLANG_MATRIX_INT_NEG_OP(longlong)
+SLANG_MATRIX_INT_NEG_OP(ulonglong)
+
+#define SLANG_FLOAT_MATRIX_MOD(T)                                                 \
+    template<int R, int C>                                                        \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, R, C> operator%(                 \
+        Matrix<T, R, C> left,                                                     \
+        Matrix<T, R, C> right)                                                    \
+    {                                                                             \
+        Matrix<T, R, C> result;                                                   \
+        for (int i = 0; i < R; i++)                                               \
+            for (int j = 0; j < C; j++)                                           \
+                *_slang_vector_get_element_ptr(result.rows + i, j) = _slang_fmod( \
+                    _slang_vector_get_element(left.rows[i], j),                   \
+                    _slang_vector_get_element(right.rows[i], j));                 \
+        return result;                                                            \
+    }
+
+SLANG_FLOAT_MATRIX_MOD(float)
+SLANG_FLOAT_MATRIX_MOD(double)
+#if SLANG_CUDA_ENABLE_HALF
+template<int R, int C>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<__half, R, C> operator%(
+    Matrix<__half, R, C> left,
+    Matrix<__half, R, C> right)
+{
+    Matrix<__half, R, C> result;
+    for (int i = 0; i < R; i++)
+        for (int j = 0; j < C; j++)
+            *_slang_vector_get_element_ptr(result.rows + i, j) = __float2half(_slang_fmod(
+                __half2float(_slang_vector_get_element(left.rows[i], j)),
+                __half2float(_slang_vector_get_element(right.rows[i], j))));
+    return result;
+}
+#endif
+#undef SLANG_FLOAT_MATRIX_MOD
+#undef SLANG_MATRIX_BINARY_OP
+#undef SLANG_MATRIX_UNARY_OP
+#undef SLANG_INT_MATRIX_OPS
+#undef SLANG_FLOAT_MATRIX_OPS
+#undef SLANG_MATRIX_INT_NEG_OP
+#undef SLANG_FLOAT_MATRIX_MOD
+
+#define SLANG_SELECT_IMPL(T, N)                                                                  \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL Vector<T, N> _slang_select(                               \
+        bool##N condition,                                                                       \
+        Vector<T, N> v0,                                                                         \
+        Vector<T, N> v1)                                                                         \
+    {                                                                                            \
+        Vector<T, N> result;                                                                     \
+        for (int i = 0; i < N; i++)                                                              \
+        {                                                                                        \
+            *_slang_vector_get_element_ptr(&result, i) = _slang_vector_get_element(condition, i) \
+                                                             ? _slang_vector_get_element(v0, i)  \
+                                                             : _slang_vector_get_element(v1, i); \
+        }                                                                                        \
+        return result;                                                                           \
+    }
+#define SLANG_SELECT_T(T)   \
+    SLANG_SELECT_IMPL(T, 2) \
+    SLANG_SELECT_IMPL(T, 3) \
+    SLANG_SELECT_IMPL(T, 4)
+
+SLANG_SELECT_T(int)
+SLANG_SELECT_T(uint)
+SLANG_SELECT_T(short)
+SLANG_SELECT_T(ushort)
+SLANG_SELECT_T(char)
+SLANG_SELECT_T(uchar)
+SLANG_SELECT_T(float)
+SLANG_SELECT_T(double)
+
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_select(bool condition, T v0, T v1)
+{
+    return condition ? v0 : v1;
+}
+
+//
+// Half support
+//
+
+#if SLANG_CUDA_ENABLE_HALF
+SLANG_SELECT_T(__half)
+
+// Convenience functions ushort -> half
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 __ushort_as_half(const ushort2& i)
+{
+    return __halves2half2(__ushort_as_half(i.x), __ushort_as_half(i.y));
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 __ushort_as_half(const ushort3& i)
+{
+    return __half3{__ushort_as_half(i.x), __ushort_as_half(i.y), __ushort_as_half(i.z)};
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 __ushort_as_half(const ushort4& i)
+{
+    return __half4{
+        __ushort_as_half(i.x),
+        __ushort_as_half(i.y),
+        __ushort_as_half(i.z),
+        __ushort_as_half(i.w)};
+}
+
+// Convenience functions half -> ushort
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort2 __half_as_ushort(const __half2& i)
+{
+    return make_ushort2(__half_as_ushort(i.x), __half_as_ushort(i.y));
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort3 __half_as_ushort(const __half3& i)
+{
+    return make_ushort3(__half_as_ushort(i.x), __half_as_ushort(i.y), __half_as_ushort(i.z));
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort4 __half_as_ushort(const __half4& i)
+{
+    return make_ushort4(
+        __half_as_ushort(i.x),
+        __half_as_ushort(i.y),
+        __half_as_ushort(i.z),
+        __half_as_ushort(i.w));
+}
+
+// This is a little bit of a hack. Fortunately CUDA has the definitions of the templated types in
+// include/surface_indirect_functions.h
+// Here we find the template definition requires a specialization of __nv_isurf_trait to allow
+// a specialization of the surface write functions.
+// This *isn't* a problem on the read functions as they don't have a return type that uses this
+// mechanism
+
+template<>
+struct __nv_isurf_trait<__half>
+{
+    typedef void type;
+};
+template<>
+struct __nv_isurf_trait<__half2>
+{
+    typedef void type;
+};
+template<>
+struct __nv_isurf_trait<__half4>
+{
+    typedef void type;
+};
+
+#define SLANG_DROP_PARENS(...) __VA_ARGS__
+
+#define SLANG_SURFACE_READ(FUNC_NAME, TYPE_ARGS, ARGS)                                             \
+    template<>                                                                                     \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL __half FUNC_NAME<__half>(                                   \
+        cudaSurfaceObject_t surfObj,                                                               \
+        SLANG_DROP_PARENS TYPE_ARGS,                                                               \
+        cudaSurfaceBoundaryMode boundaryMode)                                                      \
+    {                                                                                              \
+        return __ushort_as_half(FUNC_NAME<ushort>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
+    }                                                                                              \
+                                                                                                   \
+    template<>                                                                                     \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 FUNC_NAME<__half2>(                                 \
+        cudaSurfaceObject_t surfObj,                                                               \
+        SLANG_DROP_PARENS TYPE_ARGS,                                                               \
+        cudaSurfaceBoundaryMode boundaryMode)                                                      \
+    {                                                                                              \
+        return __ushort_as_half(                                                                   \
+            FUNC_NAME<ushort2>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode));                    \
+    }                                                                                              \
+                                                                                                   \
+    template<>                                                                                     \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 FUNC_NAME<__half4>(                                 \
+        cudaSurfaceObject_t surfObj,                                                               \
+        SLANG_DROP_PARENS TYPE_ARGS,                                                               \
+        cudaSurfaceBoundaryMode boundaryMode)                                                      \
+    {                                                                                              \
+        return __ushort_as_half(                                                                   \
+            FUNC_NAME<ushort4>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode));                    \
+    }
+
+SLANG_SURFACE_READ(surf1Dread, (int x), (x))
+SLANG_SURFACE_READ(surf2Dread, (int x, int y), (x, y))
+SLANG_SURFACE_READ(surf3Dread, (int x, int y, int z), (x, y, z))
+SLANG_SURFACE_READ(surf1DLayeredread, (int x, int layer), (x, layer))
+SLANG_SURFACE_READ(surf2DLayeredread, (int x, int y, int layer), (x, y, layer))
+SLANG_SURFACE_READ(surfCubemapread, (int x, int y, int face), (x, y, face))
+SLANG_SURFACE_READ(surfCubemapLayeredread, (int x, int y, int layerFace), (x, y, layerFace))
+
+#define SLANG_SURFACE_WRITE(FUNC_NAME, TYPE_ARGS, ARGS)                                            \
+    template<>                                                                                     \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half>(                                     \
+        __half data,                                                                               \
+        cudaSurfaceObject_t surfObj,                                                               \
+        SLANG_DROP_PARENS TYPE_ARGS,                                                               \
+        cudaSurfaceBoundaryMode boundaryMode)                                                      \
+    {                                                                                              \
+        FUNC_NAME<ushort>(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode);  \
+    }                                                                                              \
+                                                                                                   \
+    template<>                                                                                     \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half2>(                                    \
+        __half2 data,                                                                              \
+        cudaSurfaceObject_t surfObj,                                                               \
+        SLANG_DROP_PARENS TYPE_ARGS,                                                               \
+        cudaSurfaceBoundaryMode boundaryMode)                                                      \
+    {                                                                                              \
+        FUNC_NAME<ushort2>(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode); \
+    }                                                                                              \
+                                                                                                   \
+    template<>                                                                                     \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half4>(                                    \
+        __half4 data,                                                                              \
+        cudaSurfaceObject_t surfObj,                                                               \
+        SLANG_DROP_PARENS TYPE_ARGS,                                                               \
+        cudaSurfaceBoundaryMode boundaryMode)                                                      \
+    {                                                                                              \
+        FUNC_NAME<ushort4>(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode); \
+    }
+
+SLANG_SURFACE_WRITE(surf1Dwrite, (int x), (x))
+SLANG_SURFACE_WRITE(surf2Dwrite, (int x, int y), (x, y))
+SLANG_SURFACE_WRITE(surf3Dwrite, (int x, int y, int z), (x, y, z))
+SLANG_SURFACE_WRITE(surf1DLayeredwrite, (int x, int layer), (x, layer))
+SLANG_SURFACE_WRITE(surf2DLayeredwrite, (int x, int y, int layer), (x, y, layer))
+SLANG_SURFACE_WRITE(surfCubemapwrite, (int x, int y, int face), (x, y, face))
+SLANG_SURFACE_WRITE(surfCubemapLayeredwrite, (int x, int y, int layerFace), (x, y, layerFace))
+
+// ! Hack to test out reading !!!
+// Only works converting *from* half
+
+// template <typename T>
+// SLANG_FORCE_INLINE SLANG_CUDA_CALL T surf2Dread_convert(cudaSurfaceObject_t surfObj, int x, int
+// y, cudaSurfaceBoundaryMode boundaryMode);
+
+#define SLANG_SURFACE_READ_HALF_CONVERT(FUNC_NAME, TYPE_ARGS, ARGS)                              \
+                                                                                                 \
+    template<typename T>                                                                         \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL T FUNC_NAME##_convert(                                    \
+        cudaSurfaceObject_t surfObj,                                                             \
+        SLANG_DROP_PARENS TYPE_ARGS,                                                             \
+        cudaSurfaceBoundaryMode boundaryMode);                                                   \
+                                                                                                 \
+    template<>                                                                                   \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL float FUNC_NAME##_convert<float>(                         \
+        cudaSurfaceObject_t surfObj,                                                             \
+        SLANG_DROP_PARENS TYPE_ARGS,                                                             \
+        cudaSurfaceBoundaryMode boundaryMode)                                                    \
+    {                                                                                            \
+        return __ushort_as_half(                                                                 \
+            FUNC_NAME<uint16_t>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode));                 \
+    }                                                                                            \
+                                                                                                 \
+    template<>                                                                                   \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL float2 FUNC_NAME##_convert<float2>(                       \
+        cudaSurfaceObject_t surfObj,                                                             \
+        SLANG_DROP_PARENS TYPE_ARGS,                                                             \
+        cudaSurfaceBoundaryMode boundaryMode)                                                    \
+    {                                                                                            \
+        const __half2 v =                                                                        \
+            __ushort_as_half(FUNC_NAME<ushort2>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
+        return float2{v.x, v.y};                                                                 \
+    }                                                                                            \
+                                                                                                 \
+    template<>                                                                                   \
+    SLANG_FORCE_INLINE SLANG_CUDA_CALL float4 FUNC_NAME##_convert<float4>(                       \
+        cudaSurfaceObject_t surfObj,                                                             \
+        SLANG_DROP_PARENS TYPE_ARGS,                                                             \
+        cudaSurfaceBoundaryMode boundaryMode)                                                    \
+    {                                                                                            \
+        const __half4 v =                                                                        \
+            __ushort_as_half(FUNC_NAME<ushort4>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
+        return float4{v.x, v.y, v.z, v.w};                                                       \
+    }
+
+SLANG_SURFACE_READ_HALF_CONVERT(surf1Dread, (int x), (x))
+SLANG_SURFACE_READ_HALF_CONVERT(surf2Dread, (int x, int y), (x, y))
+SLANG_SURFACE_READ_HALF_CONVERT(surf3Dread, (int x, int y, int z), (x, y, z))
+
+#endif
+
+// Support for doing format conversion when writing to a surface/RWTexture
+
+// NOTE! For normal surface access x values are *byte* addressed.
+// For the _convert versions they are *not*. They don't need to be because sust.p does not require
+// it.
+
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert(
+    T,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    cudaSurfaceBoundaryMode boundaryMode);
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert(
+    T,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    cudaSurfaceBoundaryMode boundaryMode);
+template<typename T>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert(
+    T,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    int z,
+    cudaSurfaceBoundaryMode boundaryMode);
+
+// https://docs.nvidia.com/cuda/inline-ptx-assembly/index.html
+// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#surface-instructions-sust
+
+// Float
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float>(
+    float v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    asm volatile(
+        "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "f"(v));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float>(
+    float v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    asm volatile(
+        "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "f"(v));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float>(
+    float v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    int z,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    asm volatile(
+        "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "r"(z),
+        "f"(v));
+}
+
+// Float2
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float2>(
+    float2 v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    const float vx = v.x, vy = v.y;
+    asm volatile(
+        "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2,%3};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "f"(vx),
+        "f"(vy));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float2>(
+    float2 v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    const float vx = v.x, vy = v.y;
+    asm volatile(
+        "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3,%4};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "f"(vx),
+        "f"(vy));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float2>(
+    float2 v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    int z,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    const float vx = v.x, vy = v.y;
+    asm volatile(
+        "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4,%5};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "r"(z),
+        "f"(vx),
+        "f"(vy));
+}
+
+// Float4
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float4>(
+    float4 v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    const float vx = v.x, vy = v.y, vz = v.z, vw = v.w;
+    asm volatile(
+        "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2,%3,%4,%5};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "f"(vx),
+        "f"(vy),
+        "f"(vz),
+        "f"(vw));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float4>(
+    float4 v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    const float vx = v.x, vy = v.y, vz = v.z, vw = v.w;
+    asm volatile(
+        "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE
+        " [%0, {%1,%2}], {%3,%4,%5,%6};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "f"(vx),
+        "f"(vy),
+        "f"(vz),
+        "f"(vw));
+}
+
+template<>
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float4>(
+    float4 v,
+    cudaSurfaceObject_t surfObj,
+    int x,
+    int y,
+    int z,
+    cudaSurfaceBoundaryMode boundaryMode)
+{
+    const float vx = v.x, vy = v.y, vz = v.z, vw = v.w;
+    asm volatile(
+        "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE
+        " [%0, {%1,%2,%3}], {%4,%5,%6,%7};}\n\t" ::"l"(surfObj),
+        "r"(x),
+        "r"(y),
+        "r"(z),
+        "f"(vx),
+        "f"(vy),
+        "f"(vz),
+        "f"(vw));
+}
+
+// ----------------------------- F32 -----------------------------------------
+
+// Unary
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_ceil(float f)
+{
+    return ::ceilf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_floor(float f)
+{
+    return ::floorf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_round(float f)
+{
+    return ::roundf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sin(float f)
+{
+    return ::sinf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_cos(float f)
+{
+    return ::cosf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void F32_sincos(float f, float* s, float* c)
+{
+    ::sincosf(f, s, c);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_tan(float f)
+{
+    return ::tanf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_asin(float f)
+{
+    return ::asinf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_acos(float f)
+{
+    return ::acosf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_atan(float f)
+{
+    return ::atanf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sinh(float f)
+{
+    return ::sinhf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_cosh(float f)
+{
+    return ::coshf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_tanh(float f)
+{
+    return ::tanhf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_log2(float f)
+{
+    return ::log2f(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_log(float f)
+{
+    return ::logf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_log10(float f)
+{
+    return ::log10f(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_exp2(float f)
+{
+    return ::exp2f(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_exp(float f)
+{
+    return ::expf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_abs(float f)
+{
+    return ::fabsf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_trunc(float f)
+{
+    return ::truncf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sqrt(float f)
+{
+    return ::sqrtf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_rsqrt(float f)
+{
+    return ::rsqrtf(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sign(float f)
+{
+    return (f == 0.0f) ? f : ((f < 0.0f) ? -1.0f : 1.0f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_frac(float f)
+{
+    return f - F32_floor(f);
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F32_isnan(float f)
+{
+    return isnan(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F32_isfinite(float f)
+{
+    return isfinite(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F32_isinf(float f)
+{
+    return isinf(f);
+}
+
+// Binary
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_min(float a, float b)
+{
+    return ::fminf(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_max(float a, float b)
+{
+    return ::fmaxf(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_pow(float a, float b)
+{
+    return ::powf(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_fmod(float a, float b)
+{
+    return ::fmodf(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_remainder(float a, float b)
+{
+    return ::remainderf(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_atan2(float a, float b)
+{
+    return float(::atan2(a, b));
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_frexp(float x, int* e)
+{
+    return frexpf(x, e);
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_modf(float x, float* ip)
+{
+    return ::modff(x, ip);
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t F32_asuint(float f)
+{
+    Union32 u;
+    u.f = f;
+    return u.u;
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t F32_asint(float f)
+{
+    Union32 u;
+    u.f = f;
+    return u.i;
+}
+
+// Ternary
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_fma(float a, float b, float c)
+{
+    return ::fmaf(a, b, c);
+}
+
+
+// ----------------------------- F64 -----------------------------------------
+
+// Unary
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_ceil(double f)
+{
+    return ::ceil(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_floor(double f)
+{
+    return ::floor(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_round(double f)
+{
+    return ::round(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sin(double f)
+{
+    return ::sin(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_cos(double f)
+{
+    return ::cos(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void F64_sincos(double f, double* s, double* c)
+{
+    ::sincos(f, s, c);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_tan(double f)
+{
+    return ::tan(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_asin(double f)
+{
+    return ::asin(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_acos(double f)
+{
+    return ::acos(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_atan(double f)
+{
+    return ::atan(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sinh(double f)
+{
+    return ::sinh(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_cosh(double f)
+{
+    return ::cosh(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_tanh(double f)
+{
+    return ::tanh(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_log2(double f)
+{
+    return ::log2(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_log(double f)
+{
+    return ::log(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_log10(float f)
+{
+    return ::log10(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_exp2(double f)
+{
+    return ::exp2(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_exp(double f)
+{
+    return ::exp(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_abs(double f)
+{
+    return ::fabs(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_trunc(double f)
+{
+    return ::trunc(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sqrt(double f)
+{
+    return ::sqrt(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_rsqrt(double f)
+{
+    return ::rsqrt(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sign(double f)
+{
+    return (f == 0.0) ? f : ((f < 0.0) ? -1.0 : 1.0);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_frac(double f)
+{
+    return f - F64_floor(f);
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F64_isnan(double f)
+{
+    return isnan(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F64_isfinite(double f)
+{
+    return isfinite(f);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F64_isinf(double f)
+{
+    return isinf(f);
+}
+
+// Binary
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_min(double a, double b)
+{
+    return ::fmin(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_max(double a, double b)
+{
+    return ::fmax(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_pow(double a, double b)
+{
+    return ::pow(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_fmod(double a, double b)
+{
+    return ::fmod(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_remainder(double a, double b)
+{
+    return ::remainder(a, b);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_atan2(double a, double b)
+{
+    return ::atan2(a, b);
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_frexp(double x, int* e)
+{
+    return ::frexp(x, e);
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_modf(double x, double* ip)
+{
+    return ::modf(x, ip);
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void F64_asuint(double d, uint32_t* low, uint32_t* hi)
+{
+    Union64 u;
+    u.d = d;
+    *low = uint32_t(u.u);
+    *hi = uint32_t(u.u >> 32);
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL void F64_asint(double d, int32_t* low, int32_t* hi)
+{
+    Union64 u;
+    u.d = d;
+    *low = int32_t(u.u);
+    *hi = int32_t(u.u >> 32);
+}
+
+// Ternary
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_fma(double a, double b, double c)
+{
+    return ::fma(a, b, c);
+}
+
+// ----------------------------- I32 -----------------------------------------
+
+// Unary
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_abs(int32_t f)
+{
+    return (f < 0) ? -f : f;
+}
+
+// Binary
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_min(int32_t a, int32_t b)
+{
+    return a < b ? a : b;
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_max(int32_t a, int32_t b)
+{
+    return a > b ? a : b;
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float I32_asfloat(int32_t x)
+{
+    Union32 u;
+    u.i = x;
+    return u.f;
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t I32_asuint(int32_t x)
+{
+    return uint32_t(x);
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double I32_asdouble(int32_t low, int32_t hi)
+{
+    Union64 u;
+    u.u = (uint64_t(hi) << 32) | uint32_t(low);
+    return u.d;
+}
+
+// ----------------------------- U32 -----------------------------------------
+
+// Unary
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_abs(uint32_t f)
+{
+    return f;
+}
+
+// Binary
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_min(uint32_t a, uint32_t b)
+{
+    return a < b ? a : b;
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_max(uint32_t a, uint32_t b)
+{
+    return a > b ? a : b;
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL float U32_asfloat(uint32_t x)
+{
+    Union32 u;
+    u.u = x;
+    return u.f;
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_asint(int32_t x)
+{
+    return uint32_t(x);
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL double U32_asdouble(uint32_t low, uint32_t hi)
+{
+    Union64 u;
+    u.u = (uint64_t(hi) << 32) | low;
+    return u.d;
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_countbits(uint32_t v)
+{
+    // https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html#group__CUDA__MATH__INTRINSIC__INT_1g43c9c7d2b9ebf202ff1ef5769989be46
+    return __popc(v);
+}
+
+
+// ----------------------------- I64 -----------------------------------------
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_abs(int64_t f)
+{
+    return (f < 0) ? -f : f;
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_min(int64_t a, int64_t b)
+{
+    return a < b ? a : b;
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_max(int64_t a, int64_t b)
+{
+    return a > b ? a : b;
+}
+
+// ----------------------------- U64 -----------------------------------------
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_abs(uint64_t f)
+{
+    return f;
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_min(uint64_t a, uint64_t b)
+{
+    return a < b ? a : b;
+}
+SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_max(uint64_t a, uint64_t b)
+{
+    return a > b ? a : b;
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U64_countbits(uint64_t v)
+{
+    // https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html#group__CUDA__MATH__INTRINSIC__INT_1g43c9c7d2b9ebf202ff1ef5769989be46
+    return __popcll(v);
+}
+
+// ----------------------------- IPTR -----------------------------------------
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL intptr_t IPTR_abs(intptr_t f)
+{
+    return (f < 0) ? -f : f;
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL intptr_t IPTR_min(intptr_t a, intptr_t b)
+{
+    return a < b ? a : b;
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL intptr_t IPTR_max(intptr_t a, intptr_t b)
+{
+    return a > b ? a : b;
+}
+
+// ----------------------------- UPTR -----------------------------------------
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uintptr_t UPTR_abs(uintptr_t f)
+{
+    return f;
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uintptr_t UPTR_min(uintptr_t a, uintptr_t b)
+{
+    return a < b ? a : b;
+}
+
+SLANG_FORCE_INLINE SLANG_CUDA_CALL uintptr_t UPTR_max(uintptr_t a, uintptr_t b)
+{
+    return a > b ? a : b;
+}
+
+// ----------------------------- ResourceType -----------------------------------------
+
+
+// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-structuredbuffer-getdimensions
+// Missing  Load(_In_  int  Location, _Out_ uint Status);
+
+template<typename T>
+struct StructuredBuffer
+{
+    SLANG_CUDA_CALL const T& operator[](size_t index) const
+    {
+#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT
+        SLANG_BOUND_CHECK(index, count);
+#endif
+        return data[index];
+    }
+
+    SLANG_CUDA_CALL const T& Load(size_t index) const
+    {
+#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT
+        SLANG_BOUND_CHECK(index, count);
+#endif
+        return data[index];
+    }
+
+#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT
+    SLANG_CUDA_CALL void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride)
+    {
+        *outNumStructs = uint32_t(count);
+        *outStride = uint32_t(sizeof(T));
+    }
+#endif
+
+    T* data;
+#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT
+    size_t count;
+#endif
+};
+
+template<typename T>
+struct RWStructuredBuffer : StructuredBuffer<T>
+{
+    SLANG_CUDA_CALL T& operator[](size_t index) const
+    {
+#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT
+        SLANG_BOUND_CHECK(index, this->count);
+#endif
+        return this->data[index];
+    }
+};
+
+// Missing  Load(_In_  int  Location, _Out_ uint Status);
+struct ByteAddressBuffer
+{
+    SLANG_CUDA_CALL void GetDimensions(uint32_t* outDim) const { *outDim = uint32_t(sizeInBytes); }
+    SLANG_CUDA_CALL uint32_t Load(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
+        return data[index >> 2];
+    }
+    SLANG_CUDA_CALL uint2 Load2(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        return uint2{data[dataIdx], data[dataIdx + 1]};
+    }
+    SLANG_CUDA_CALL uint3 Load3(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]};
+    }
+    SLANG_CUDA_CALL uint4 Load4(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]};
+    }
+    template<typename T>
+    SLANG_CUDA_CALL T Load(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
+        T data;
+        memcpy(&data, ((const char*)this->data) + index, sizeof(T));
+        return data;
+    }
+    template<typename T>
+    SLANG_CUDA_CALL StructuredBuffer<T> asStructuredBuffer() const
+    {
+        StructuredBuffer<T> rs;
+        rs.data = (T*)data;
+        rs.count = sizeInBytes / sizeof(T);
+        return rs;
+    }
+    const uint32_t* data;
+    size_t sizeInBytes; //< Must be multiple of 4
+};
+
+// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-rwbyteaddressbuffer
+// Missing support for Atomic operations
+// Missing support for Load with status
+struct RWByteAddressBuffer
+{
+    SLANG_CUDA_CALL void GetDimensions(uint32_t* outDim) const { *outDim = uint32_t(sizeInBytes); }
+
+    SLANG_CUDA_CALL uint32_t Load(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
+        return data[index >> 2];
+    }
+    SLANG_CUDA_CALL uint2 Load2(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        return uint2{data[dataIdx], data[dataIdx + 1]};
+    }
+    SLANG_CUDA_CALL uint3 Load3(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]};
+    }
+    SLANG_CUDA_CALL uint4 Load4(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]};
+    }
+    template<typename T>
+    SLANG_CUDA_CALL T Load(size_t index) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
+        T data;
+        memcpy(&data, ((const char*)this->data) + index, sizeof(T));
+        return data;
+    }
+
+    SLANG_CUDA_CALL void Store(size_t index, uint32_t v) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
+        data[index >> 2] = v;
+    }
+    SLANG_CUDA_CALL void Store2(size_t index, uint2 v) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        data[dataIdx + 0] = v.x;
+        data[dataIdx + 1] = v.y;
+    }
+    SLANG_CUDA_CALL void Store3(size_t index, uint3 v) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        data[dataIdx + 0] = v.x;
+        data[dataIdx + 1] = v.y;
+        data[dataIdx + 2] = v.z;
+    }
+    SLANG_CUDA_CALL void Store4(size_t index, uint4 v) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
+        const size_t dataIdx = index >> 2;
+        data[dataIdx + 0] = v.x;
+        data[dataIdx + 1] = v.y;
+        data[dataIdx + 2] = v.z;
+        data[dataIdx + 3] = v.w;
+    }
+    template<typename T>
+    SLANG_CUDA_CALL void Store(size_t index, T const& value) const
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
+        memcpy((char*)data + index, &value, sizeof(T));
+    }
+
+    /// Can be used in the core module to gain access
+    template<typename T>
+    SLANG_CUDA_CALL T* _getPtrAt(size_t index)
+    {
+        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
+        return (T*)(((char*)data) + index);
+    }
+    template<typename T>
+    SLANG_CUDA_CALL RWStructuredBuffer<T> asStructuredBuffer() const
+    {
+        RWStructuredBuffer<T> rs;
+        rs.data = (T*)data;
+        rs.count = sizeInBytes / sizeof(T);
+        return rs;
+    }
+    uint32_t* data;
+    size_t sizeInBytes; //< Must be multiple of 4
+};
+
+
+// ---------------------- Wave --------------------------------------
+
+// TODO(JS): It appears that cuda does not have a simple way to get a lane index.
+//
+// Another approach could be...
+// laneId = ((threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x) &
+// SLANG_CUDA_WARP_MASK If that is really true another way to do this, would be for code generator
+// to add this function with the [numthreads] baked in.
+//
+// For now I'll just assume you have a launch that makes the following correct if the kernel uses
+// WaveGetLaneIndex()
+#ifndef SLANG_USE_ASM_LANE_ID
+__forceinline__ __device__ uint32_t _getLaneId()
+{
+    // If the launch is (or I guess some multiple of the warp size)
+    // we try this mechanism, which is apparently faster.
+    return threadIdx.x & SLANG_CUDA_WARP_MASK;
+}
+#else
+__forceinline__ __device__ uint32_t _getLaneId()
+{
+    // https://stackoverflow.com/questions/44337309/whats-the-most-efficient-way-to-calculate-the-warp-id-lane-id-in-a-1-d-grid#
+    // This mechanism is not the fastest way to do it, and that is why the other mechanism
+    // is the default. But the other mechanism relies on a launch that makes the assumption
+    // true.
+    unsigned ret;
+    asm volatile("mov.u32 %0, %laneid;" : "=r"(ret));
+    return ret;
+}
+#endif
+
+typedef int WarpMask;
+
+// It appears that the __activemask() cannot always be used because
+// threads need to be converged.
+//
+// For CUDA the article claims mask has to be used carefully
+// https://devblogs.nvidia.com/using-cuda-warp-level-primitives/
+// With the Warp intrinsics there is no mask, and it's just the 'active lanes'.
+// __activemask() though does not require there is convergence, so that doesn't work.
+//
+// '__ballot_sync' produces a convergance.
+//
+// From the CUDA docs:
+// ```For __all_sync, __any_sync, and __ballot_sync, a mask must be passed that specifies the
+// threads participating in the call. A bit, representing the thread's lane ID, must be set for each
+// participating thread to ensure they are properly converged before the intrinsic is executed by
+// the hardware. All active threads named in mask must execute the same intrinsic with the same
+// mask, or the result is undefined.```
+//
+// Currently there isn't a mechanism to correctly get the mask without it being passed through.
+// Doing so will most likely require some changes to slang code generation to track masks, for now
+// then we use _getActiveMask.
+
+// Return mask of all the lanes less than the current lane
+__forceinline__ __device__ WarpMask _getLaneLtMask()
+{
+    return (int(1) << _getLaneId()) - 1;
+}
+
+// TODO(JS):
+// THIS IS NOT CORRECT! That determining the appropriate active mask requires appropriate
+// mask tracking.
+__forceinline__ __device__ WarpMask _getActiveMask()
+{
+    return __ballot_sync(__activemask(), true);
+}
+
+// Return a mask suitable for the 'MultiPrefix' style functions
+__forceinline__ __device__ WarpMask _getMultiPrefixMask(int mask)
+{
+    return mask;
+}
+
+// Note! Note will return true if mask is 0, but thats okay, because there must be one
+// lane active to execute anything
+__inline__ __device__ bool _waveIsSingleLane(WarpMask mask)
+{
+    return (mask & (mask - 1)) == 0;
+}
+
+// Returns the power of 2 size of run of set bits. Returns 0 if not a suitable run.
+// Examples:
+// 0b00000000'00000000'00000000'11111111 -> 8
+// 0b11111111'11111111'11111111'11111111 -> 32
+// 0b00000000'00000000'00000000'00011111 -> 0 (since 5 is not a power of 2)
+// 0b00000000'00000000'00000000'11110000 -> 0 (since the run of bits does not start at the LSB)
+// 0b00000000'00000000'00000000'00100111 -> 0 (since it is not a single contiguous run)
+__inline__ __device__ int _waveCalcPow2Offset(WarpMask mask)
+{
+    // This should be the most common case, so fast path it
+    if (mask == SLANG_CUDA_WARP_BITMASK)
+    {
+        return SLANG_CUDA_WARP_SIZE;
+    }
+    // Is it a contiguous run of bits?
+    if ((mask & (mask + 1)) == 0)
+    {
+        // const int offsetSize = __ffs(mask + 1) - 1;
+        const int offset = 32 - __clz(mask);
+        // Is it a power of 2 size
+        if ((offset & (offset - 1)) == 0)
+        {
+            return offset;
+        }
+    }
+    return 0;
+}
+
+__inline__ __device__ bool _waveIsFirstLane()
+{
+    const WarpMask mask = __activemask();
+    // We special case bit 0, as that most warps are expected to be fully active.
+
+    // mask & -mask, isolates the lowest set bit.
+    // return (mask & 1 ) || ((mask & -mask) == (1 << _getLaneId()));
+
+    // This mechanism is most similar to what was in an nVidia post, so assume it is prefered.
+    return (mask & 1) || ((__ffs(mask) - 1) == _getLaneId());
+}
+
+template<typename T>
+struct WaveOpOr
+{
+    __inline__ __device__ static T getInitial(T a) { return 0; }
+    __inline__ __device__ static T doOp(T a, T b) { return a | b; }
+};
+
+template<typename T>
+struct WaveOpAnd
+{
+    __inline__ __device__ static T getInitial(T a) { return ~T(0); }
+    __inline__ __device__ static T doOp(T a, T b) { return a & b; }
+};
+
+template<typename T>
+struct WaveOpXor
+{
+    __inline__ __device__ static T getInitial(T a) { return 0; }
+    __inline__ __device__ static T doOp(T a, T b) { return a ^ b; }
+    __inline__ __device__ static T doInverse(T a, T b) { return a ^ b; }
+};
+
+template<typename T>
+struct WaveOpAdd
+{
+    __inline__ __device__ static T getInitial(T a) { return 0; }
+    __inline__ __device__ static T doOp(T a, T b) { return a + b; }
+    __inline__ __device__ static T doInverse(T a, T b) { return a - b; }
+};
+
+template<typename T>
+struct WaveOpMul
+{
+    __inline__ __device__ static T getInitial(T a) { return T(1); }
+    __inline__ __device__ static T doOp(T a, T b) { return a * b; }
+    // Using this inverse for int is probably undesirable - because in general it requires T to have
+    // more precision There is also a performance aspect to it, where divides are generally
+    // significantly slower
+    __inline__ __device__ static T doInverse(T a, T b) { return a / b; }
+};
+
+template<typename T>
+struct WaveOpMax
+{
+    __inline__ __device__ static T getInitial(T a) { return a; }
+    __inline__ __device__ static T doOp(T a, T b) { return a > b ? a : b; }
+};
+
+template<typename T>
+struct WaveOpMin
+{
+    __inline__ __device__ static T getInitial(T a) { return a; }
+    __inline__ __device__ static T doOp(T a, T b) { return a < b ? a : b; }
+};
+
+template<typename T>
+struct ElementTypeTrait;
+
+// Scalar
+template<>
+struct ElementTypeTrait<int>
+{
+    typedef int Type;
+};
+template<>
+struct ElementTypeTrait<uint>
+{
+    typedef uint Type;
+};
+template<>
+struct ElementTypeTrait<float>
+{
+    typedef float Type;
+};
+template<>
+struct ElementTypeTrait<double>
+{
+    typedef double Type;
+};
+template<>
+struct ElementTypeTrait<uint64_t>
+{
+    typedef uint64_t Type;
+};
+template<>
+struct ElementTypeTrait<int64_t>
+{
+    typedef int64_t Type;
+};
+
+// Vector
+template<>
+struct ElementTypeTrait<int1>
+{
+    typedef int Type;
+};
+template<>
+struct ElementTypeTrait<int2>
+{
+    typedef int Type;
+};
+template<>
+struct ElementTypeTrait<int3>
+{
+    typedef int Type;
+};
+template<>
+struct ElementTypeTrait<int4>
+{
+    typedef int Type;
+};
+
+template<>
+struct ElementTypeTrait<uint1>
+{
+    typedef uint Type;
+};
+template<>
+struct ElementTypeTrait<uint2>
+{
+    typedef uint Type;
+};
+template<>
+struct ElementTypeTrait<uint3>
+{
+    typedef uint Type;
+};
+template<>
+struct ElementTypeTrait<uint4>
+{
+    typedef uint Type;
+};
+
+template<>
+struct ElementTypeTrait<float1>
+{
+    typedef float Type;
+};
+template<>
+struct ElementTypeTrait<float2>
+{
+    typedef float Type;
+};
+template<>
+struct ElementTypeTrait<float3>
+{
+    typedef float Type;
+};
+template<>
+struct ElementTypeTrait<float4>
+{
+    typedef float Type;
+};
+
+template<>
+struct ElementTypeTrait<double1>
+{
+    typedef double Type;
+};
+template<>
+struct ElementTypeTrait<double2>
+{
+    typedef double Type;
+};
+template<>
+struct ElementTypeTrait<double3>
+{
+    typedef double Type;
+};
+template<>
+struct ElementTypeTrait<double4>
+{
+    typedef double Type;
+};
+
+// Matrix
+template<typename T, int ROWS, int COLS>
+struct ElementTypeTrait<Matrix<T, ROWS, COLS>>
+{
+    typedef T Type;
+};
+
+// Scalar
+template<typename INTF, typename T>
+__device__ T _waveReduceScalar(WarpMask mask, T val)
+{
+    const int offsetSize = _waveCalcPow2Offset(mask);
+    if (offsetSize > 0)
+    {
+        // Fast path O(log2(activeLanes))
+        for (int offset = offsetSize >> 1; offset > 0; offset >>= 1)
+        {
+            val = INTF::doOp(val, __shfl_xor_sync(mask, val, offset));
+        }
+    }
+    else if (!_waveIsSingleLane(mask))
+    {
+        T result = INTF::getInitial(val);
+        int remaining = mask;
+        while (remaining)
+        {
+            const int laneBit = remaining & -remaining;
+            // Get the sourceLane
+            const int srcLane = __ffs(laneBit) - 1;
+            // Broadcast (can also broadcast to self)
+            result = INTF::doOp(result, __shfl_sync(mask, val, srcLane));
+            remaining &= ~laneBit;
+        }
+        return result;
+    }
+    return val;
+}
+
+
+// Multiple values
+template<typename INTF, typename T, size_t COUNT>
+__device__ void _waveReduceMultiple(WarpMask mask, T* val)
+{
+    const int offsetSize = _waveCalcPow2Offset(mask);
+    if (offsetSize > 0)
+    {
+        // Fast path O(log2(activeLanes))
+        for (int offset = offsetSize >> 1; offset > 0; offset >>= 1)
+        {
+            for (size_t i = 0; i < COUNT; ++i)
+            {
+                val[i] = INTF::doOp(val[i], __shfl_xor_sync(mask, val[i], offset));
+            }
+        }
+    }
+    else if (!_waveIsSingleLane(mask))
+    {
+        // Copy the original
+        T originalVal[COUNT];
+        for (size_t i = 0; i < COUNT; ++i)
+        {
+            const T v = val[i];
+            originalVal[i] = v;
+            val[i] = INTF::getInitial(v);
+        }
+
+        int remaining = mask;
+        while (remaining)
+        {
+            const int laneBit = remaining & -remaining;
+            // Get the sourceLane
+            const int srcLane = __ffs(laneBit) - 1;
+            // Broadcast (can also broadcast to self)
+            for (size_t i = 0; i < COUNT; ++i)
+            {
+                val[i] = INTF::doOp(val[i], __shfl_sync(mask, originalVal[i], srcLane));
+            }
+            remaining &= ~laneBit;
+        }
+    }
+}
+
+template<typename INTF, typename T>
+__device__ void _waveReduceMultiple(WarpMask mask, T* val)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    _waveReduceMultiple<INTF, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)val);
+}
+
+template<typename T>
+__inline__ __device__ T _waveOr(WarpMask mask, T val)
+{
+    return _waveReduceScalar<WaveOpOr<T>, T>(mask, val);
+}
+
+template<typename T>
+__inline__ __device__ T _waveAnd(WarpMask mask, T val)
+{
+    return _waveReduceScalar<WaveOpAnd<T>, T>(mask, val);
+}
+
+template<typename T>
+__inline__ __device__ T _waveXor(WarpMask mask, T val)
+{
+    return _waveReduceScalar<WaveOpXor<T>, T>(mask, val);
+}
+
+template<typename T>
+__inline__ __device__ T _waveProduct(WarpMask mask, T val)
+{
+    return _waveReduceScalar<WaveOpMul<T>, T>(mask, val);
+}
+
+template<typename T>
+__inline__ __device__ T _waveSum(WarpMask mask, T val)
+{
+    return _waveReduceScalar<WaveOpAdd<T>, T>(mask, val);
+}
+
+template<typename T>
+__inline__ __device__ T _waveMin(WarpMask mask, T val)
+{
+    return _waveReduceScalar<WaveOpMin<T>, T>(mask, val);
+}
+
+template<typename T>
+__inline__ __device__ T _waveMax(WarpMask mask, T val)
+{
+    return _waveReduceScalar<WaveOpMax<T>, T>(mask, val);
+}
+
+// Fast-path specializations when CUDA warp reduce operators are available
+#if __CUDA_ARCH__ >= 800 // 8.x or higher
+template<>
+__inline__ __device__ unsigned _waveOr<unsigned>(WarpMask mask, unsigned val)
+{
+    return __reduce_or_sync(mask, val);
+}
+
+template<>
+__inline__ __device__ unsigned _waveAnd<unsigned>(WarpMask mask, unsigned val)
+{
+    return __reduce_and_sync(mask, val);
+}
+
+template<>
+__inline__ __device__ unsigned _waveXor<unsigned>(WarpMask mask, unsigned val)
+{
+    return __reduce_xor_sync(mask, val);
+}
+
+template<>
+__inline__ __device__ unsigned _waveSum<unsigned>(WarpMask mask, unsigned val)
+{
+    return __reduce_add_sync(mask, val);
+}
+
+template<>
+__inline__ __device__ int _waveSum<int>(WarpMask mask, int val)
+{
+    return __reduce_add_sync(mask, val);
+}
+
+template<>
+__inline__ __device__ unsigned _waveMin<unsigned>(WarpMask mask, unsigned val)
+{
+    return __reduce_min_sync(mask, val);
+}
+
+template<>
+__inline__ __device__ int _waveMin<int>(WarpMask mask, int val)
+{
+    return __reduce_min_sync(mask, val);
+}
+
+template<>
+__inline__ __device__ unsigned _waveMax<unsigned>(WarpMask mask, unsigned val)
+{
+    return __reduce_max_sync(mask, val);
+}
+
+template<>
+__inline__ __device__ int _waveMax<int>(WarpMask mask, int val)
+{
+    return __reduce_max_sync(mask, val);
+}
+#endif
+
+
+// Multiple
+
+template<typename T>
+__inline__ __device__ T _waveOrMultiple(WarpMask mask, T val)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    _waveReduceMultiple<WaveOpOr<ElemType>>(mask, &val);
+    return val;
+}
+
+template<typename T>
+__inline__ __device__ T _waveAndMultiple(WarpMask mask, T val)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    _waveReduceMultiple<WaveOpAnd<ElemType>>(mask, &val);
+    return val;
+}
+
+template<typename T>
+__inline__ __device__ T _waveXorMultiple(WarpMask mask, T val)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    _waveReduceMultiple<WaveOpXor<ElemType>>(mask, &val);
+    return val;
+}
+
+template<typename T>
+__inline__ __device__ T _waveProductMultiple(WarpMask mask, T val)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    _waveReduceMultiple<WaveOpMul<ElemType>>(mask, &val);
+    return val;
+}
+
+template<typename T>
+__inline__ __device__ T _waveSumMultiple(WarpMask mask, T val)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    _waveReduceMultiple<WaveOpAdd<ElemType>>(mask, &val);
+    return val;
+}
+
+template<typename T>
+__inline__ __device__ T _waveMinMultiple(WarpMask mask, T val)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    _waveReduceMultiple<WaveOpMin<ElemType>>(mask, &val);
+    return val;
+}
+
+template<typename T>
+__inline__ __device__ T _waveMaxMultiple(WarpMask mask, T val)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    _waveReduceMultiple<WaveOpMax<ElemType>>(mask, &val);
+    return val;
+}
+
+
+template<typename T>
+__inline__ __device__ bool _waveAllEqual(WarpMask mask, T val)
+{
+    int pred;
+    __match_all_sync(mask, val, &pred);
+    return pred != 0;
+}
+
+template<typename T>
+__inline__ __device__ bool _waveAllEqualMultiple(WarpMask mask, T inVal)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    const size_t count = sizeof(T) / sizeof(ElemType);
+    int pred;
+    const ElemType* src = (const ElemType*)&inVal;
+    for (size_t i = 0; i < count; ++i)
+    {
+        __match_all_sync(mask, src[i], &pred);
+        if (pred == 0)
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+template<typename T>
+__inline__ __device__ T _waveReadFirst(WarpMask mask, T val)
+{
+    const int lowestLaneId = __ffs(mask) - 1;
+    return __shfl_sync(mask, val, lowestLaneId);
+}
+
+template<typename T>
+__inline__ __device__ T _waveReadFirstMultiple(WarpMask mask, T inVal)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    const size_t count = sizeof(T) / sizeof(ElemType);
+    T outVal;
+    const ElemType* src = (const ElemType*)&inVal;
+    ElemType* dst = (ElemType*)&outVal;
+    const int lowestLaneId = __ffs(mask) - 1;
+    for (size_t i = 0; i < count; ++i)
+    {
+        dst[i] = __shfl_sync(mask, src[i], lowestLaneId);
+    }
+    return outVal;
+}
+
+template<typename T>
+__inline__ __device__ T _waveShuffleMultiple(WarpMask mask, T inVal, int lane)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    const size_t count = sizeof(T) / sizeof(ElemType);
+    T outVal;
+    const ElemType* src = (const ElemType*)&inVal;
+    ElemType* dst = (ElemType*)&outVal;
+    for (size_t i = 0; i < count; ++i)
+    {
+        dst[i] = __shfl_sync(mask, src[i], lane);
+    }
+    return outVal;
+}
+
+// Scalar
+
+// Invertable means that when we get to the end of the reduce, we can remove val (to make
+// exclusive), using the inverse of the op.
+template<typename INTF, typename T>
+__device__ T _wavePrefixInvertableScalar(WarpMask mask, T val)
+{
+    const int offsetSize = _waveCalcPow2Offset(mask);
+
+    const int laneId = _getLaneId();
+    T result;
+    if (offsetSize > 0)
+    {
+        // Sum is calculated inclusive of this lanes value
+        result = val;
+        for (int i = 1; i < offsetSize; i += i)
+        {
+            const T readVal = __shfl_up_sync(mask, result, i, offsetSize);
+            if (laneId >= i)
+            {
+                result = INTF::doOp(result, readVal);
+            }
+        }
+        // Remove val from the result, by applyin inverse
+        result = INTF::doInverse(result, val);
+    }
+    else
+    {
+        result = INTF::getInitial(val);
+        if (!_waveIsSingleLane(mask))
+        {
+            int remaining = mask;
+            while (remaining)
+            {
+                const int laneBit = remaining & -remaining;
+                // Get the sourceLane
+                const int srcLane = __ffs(laneBit) - 1;
+                // Broadcast (can also broadcast to self)
+                const T readValue = __shfl_sync(mask, val, srcLane);
+                // Only accumulate if srcLane is less than this lane
+                if (srcLane < laneId)
+                {
+                    result = INTF::doOp(result, readValue);
+                }
+                remaining &= ~laneBit;
+            }
+        }
+    }
+    return result;
+}
+
+
+// This implementation separately tracks the value to be propogated, and the value
+// that is the final result
+template<typename INTF, typename T>
+__device__ T _wavePrefixScalar(WarpMask mask, T val)
+{
+    const int offsetSize = _waveCalcPow2Offset(mask);
+
+    const int laneId = _getLaneId();
+    T result = INTF::getInitial(val);
+    if (offsetSize > 0)
+    {
+        // For transmitted value we will do it inclusively with this lanes value
+        // For the result we do not include the lanes value. This means an extra multiply for each
+        // iteration but means we don't need to have a divide at the end and also removes overflow
+        // issues in that scenario.
+        for (int i = 1; i < offsetSize; i += i)
+        {
+            const T readVal = __shfl_up_sync(mask, val, i, offsetSize);
+            if (laneId >= i)
+            {
+                result = INTF::doOp(result, readVal);
+                val = INTF::doOp(val, readVal);
+            }
+        }
+    }
+    else
+    {
+        if (!_waveIsSingleLane(mask))
+        {
+            int remaining = mask;
+            while (remaining)
+            {
+                const int laneBit = remaining & -remaining;
+                // Get the sourceLane
+                const int srcLane = __ffs(laneBit) - 1;
+                // Broadcast (can also broadcast to self)
+                const T readValue = __shfl_sync(mask, val, srcLane);
+                // Only accumulate if srcLane is less than this lane
+                if (srcLane < laneId)
+                {
+                    result = INTF::doOp(result, readValue);
+                }
+                remaining &= ~laneBit;
+            }
+        }
+    }
+    return result;
+}
+
+
+template<typename INTF, typename T, size_t COUNT>
+__device__ T _waveOpCopy(T* dst, const T* src)
+{
+    for (size_t j = 0; j < COUNT; ++j)
+    {
+        dst[j] = src[j];
+    }
+}
+
+
+template<typename INTF, typename T, size_t COUNT>
+__device__ T _waveOpDoInverse(T* inOut, const T* val)
+{
+    for (size_t j = 0; j < COUNT; ++j)
+    {
+        inOut[j] = INTF::doInverse(inOut[j], val[j]);
+    }
+}
+
+template<typename INTF, typename T, size_t COUNT>
+__device__ T _waveOpSetInitial(T* out, const T* val)
+{
+    for (size_t j = 0; j < COUNT; ++j)
+    {
+        out[j] = INTF::getInitial(val[j]);
+    }
+}
+
+template<typename INTF, typename T, size_t COUNT>
+__device__ T _wavePrefixInvertableMultiple(WarpMask mask, T* val)
+{
+    const int offsetSize = _waveCalcPow2Offset(mask);
+
+    const int laneId = _getLaneId();
+    T originalVal[COUNT];
+    _waveOpCopy<INTF, T, COUNT>(originalVal, val);
+
+    if (offsetSize > 0)
+    {
+        // Sum is calculated inclusive of this lanes value
+        for (int i = 1; i < offsetSize; i += i)
+        {
+            // TODO(JS): Note that here I don't split the laneId outside so it's only tested once.
+            // This may be better but it would also mean that there would be shfl between lanes
+            // that are on different (albeit identical) instructions. So this seems more likely to
+            // work as expected with everything in lock step.
+            for (size_t j = 0; j < COUNT; ++j)
+            {
+                const T readVal = __shfl_up_sync(mask, val[j], i, offsetSize);
+                if (laneId >= i)
+                {
+                    val[j] = INTF::doOp(val[j], readVal);
+                }
+            }
+        }
+        // Remove originalVal from the result, by applyin inverse
+        _waveOpDoInverse<INTF, T, COUNT>(val, originalVal);
+    }
+    else
+    {
+        _waveOpSetInitial<INTF, T, COUNT>(val, val);
+        if (!_waveIsSingleLane(mask))
+        {
+            int remaining = mask;
+            while (remaining)
+            {
+                const int laneBit = remaining & -remaining;
+                // Get the sourceLane
+                const int srcLane = __ffs(laneBit) - 1;
+
+                for (size_t j = 0; j < COUNT; ++j)
+                {
+                    // Broadcast (can also broadcast to self)
+                    const T readValue = __shfl_sync(mask, originalVal[j], srcLane);
+                    // Only accumulate if srcLane is less than this lane
+                    if (srcLane < laneId)
+                    {
+                        val[j] = INTF::doOp(val[j], readValue);
+                    }
+                    remaining &= ~laneBit;
+                }
+            }
+        }
+    }
+}
+
+template<typename INTF, typename T, size_t COUNT>
+__device__ T _wavePrefixMultiple(WarpMask mask, T* val)
+{
+    const int offsetSize = _waveCalcPow2Offset(mask);
+
+    const int laneId = _getLaneId();
+
+    T work[COUNT];
+    _waveOpCopy<INTF, T, COUNT>(work, val);
+    _waveOpSetInitial<INTF, T, COUNT>(val, val);
+
+    if (offsetSize > 0)
+    {
+        // For transmitted value we will do it inclusively with this lanes value
+        // For the result we do not include the lanes value. This means an extra op for each
+        // iteration but means we don't need to have a divide at the end and also removes overflow
+        // issues in that scenario.
+        for (int i = 1; i < offsetSize; i += i)
+        {
+            for (size_t j = 0; j < COUNT; ++j)
+            {
+                const T readVal = __shfl_up_sync(mask, work[j], i, offsetSize);
+                if (laneId >= i)
+                {
+                    work[j] = INTF::doOp(work[j], readVal);
+                    val[j] = INTF::doOp(val[j], readVal);
+                }
+            }
+        }
+    }
+    else
+    {
+        if (!_waveIsSingleLane(mask))
+        {
+            int remaining = mask;
+            while (remaining)
+            {
+                const int laneBit = remaining & -remaining;
+                // Get the sourceLane
+                const int srcLane = __ffs(laneBit) - 1;
+
+                for (size_t j = 0; j < COUNT; ++j)
+                {
+                    // Broadcast (can also broadcast to self)
+                    const T readValue = __shfl_sync(mask, work[j], srcLane);
+                    // Only accumulate if srcLane is less than this lane
+                    if (srcLane < laneId)
+                    {
+                        val[j] = INTF::doOp(val[j], readValue);
+                    }
+                }
+                remaining &= ~laneBit;
+            }
+        }
+    }
+}
+
+template<typename T>
+__inline__ __device__ T _wavePrefixProduct(WarpMask mask, T val)
+{
+    return _wavePrefixScalar<WaveOpMul<T>, T>(mask, val);
+}
+
+template<typename T>
+__inline__ __device__ T _wavePrefixSum(WarpMask mask, T val)
+{
+    return _wavePrefixInvertableScalar<WaveOpAdd<T>, T>(mask, val);
+}
+
+template<typename T>
+__inline__ __device__ T _wavePrefixXor(WarpMask mask, T val)
+{
+    return _wavePrefixInvertableScalar<WaveOpXor<T>, T>(mask, val);
+}
+
+template<typename T>
+__inline__ __device__ T _wavePrefixOr(WarpMask mask, T val)
+{
+    return _wavePrefixScalar<WaveOpOr<T>, T>(mask, val);
+}
+
+template<typename T>
+__inline__ __device__ T _wavePrefixAnd(WarpMask mask, T val)
+{
+    return _wavePrefixScalar<WaveOpAnd<T>, T>(mask, val);
+}
+
+
+template<typename T>
+__inline__ __device__ T _wavePrefixProductMultiple(WarpMask mask, T val)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    _wavePrefixInvertableMultiple<WaveOpMul<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(
+        mask,
+        (ElemType*)&val);
+    return val;
+}
+
+template<typename T>
+__inline__ __device__ T _wavePrefixSumMultiple(WarpMask mask, T val)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    _wavePrefixInvertableMultiple<WaveOpAdd<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(
+        mask,
+        (ElemType*)&val);
+    return val;
+}
+
+template<typename T>
+__inline__ __device__ T _wavePrefixXorMultiple(WarpMask mask, T val)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    _wavePrefixInvertableMultiple<WaveOpXor<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(
+        mask,
+        (ElemType*)&val);
+    return val;
+}
+
+template<typename T>
+__inline__ __device__ T _wavePrefixOrMultiple(WarpMask mask, T val)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    _wavePrefixMultiple<WaveOpOr<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(
+        mask,
+        (ElemType*)&val);
+    return val;
+}
+
+template<typename T>
+__inline__ __device__ T _wavePrefixAndMultiple(WarpMask mask, T val)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    _wavePrefixMultiple<WaveOpAnd<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(
+        mask,
+        (ElemType*)&val);
+    return val;
+}
+
+template<typename T>
+__inline__ __device__ uint4 _waveMatchScalar(WarpMask mask, T val)
+{
+    int pred;
+    return make_uint4(__match_all_sync(mask, val, &pred), 0, 0, 0);
+}
+
+template<typename T>
+__inline__ __device__ uint4 _waveMatchMultiple(WarpMask mask, const T& inVal)
+{
+    typedef typename ElementTypeTrait<T>::Type ElemType;
+    const size_t count = sizeof(T) / sizeof(ElemType);
+    int pred;
+    const ElemType* src = (const ElemType*)&inVal;
+    uint matchBits = 0xffffffff;
+    for (size_t i = 0; i < count && matchBits; ++i)
+    {
+        matchBits = matchBits & __match_all_sync(mask, src[i], &pred);
+    }
+    return make_uint4(matchBits, 0, 0, 0);
+}
+
+__device__ uint getAt(dim3 a, int b)
+{
+    SLANG_PRELUDE_ASSERT(b >= 0 && b < 3);
+    return (&a.x)[b];
+}
+__device__ uint3 operator*(uint3 a, dim3 b)
+{
+    uint3 r;
+    r.x = a.x * b.x;
+    r.y = a.y * b.y;
+    r.z = a.z * b.z;
+    return r;
+}
+
+template<typename TResult, typename TInput>
+__inline__ __device__ TResult slang_bit_cast(TInput val)
+{
+    return *(TResult*)(&val);
+}
+
+/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
+
+
+/* Type that defines the uniform entry point params. The actual content of this type is dependent on
+the entry point parameters, and can be found via reflection or defined such that it matches the
+shader appropriately.
+*/
+struct UniformEntryPointParams;
+struct UniformState;
+
+// ---------------------- OptiX Ray Payload --------------------------------------
+#ifdef SLANG_CUDA_ENABLE_OPTIX
+struct RayDesc
+{
+    float3 Origin;
+    float TMin;
+    float3 Direction;
+    float TMax;
+};
+
+static __forceinline__ __device__ void* unpackOptiXRayPayloadPointer(uint32_t i0, uint32_t i1)
+{
+    const uint64_t uptr = static_cast<uint64_t>(i0) << 32 | i1;
+    void* ptr = reinterpret_cast<void*>(uptr);
+    return ptr;
+}
+
+static __forceinline__ __device__ void packOptiXRayPayloadPointer(
+    void* ptr,
+    uint32_t& i0,
+    uint32_t& i1)
+{
+    const uint64_t uptr = reinterpret_cast<uint64_t>(ptr);
+    i0 = uptr >> 32;
+    i1 = uptr & 0x00000000ffffffff;
+}
+
+static __forceinline__ __device__ void* getOptiXRayPayloadPtr()
+{
+    const uint32_t u0 = optixGetPayload_0();
+    const uint32_t u1 = optixGetPayload_1();
+    return unpackOptiXRayPayloadPointer(u0, u1);
+}
+
+template<typename T>
+__forceinline__ __device__ void* traceOptiXRay(
+    OptixTraversableHandle AccelerationStructure,
+    uint32_t RayFlags,
+    uint32_t InstanceInclusionMask,
+    uint32_t RayContributionToHitGroupIndex,
+    uint32_t MultiplierForGeometryContributionToHitGroupIndex,
+    uint32_t MissShaderIndex,
+    RayDesc Ray,
+    T* Payload)
+{
+    uint32_t r0, r1;
+    packOptiXRayPayloadPointer((void*)Payload, r0, r1);
+    optixTrace(
+        AccelerationStructure,
+        Ray.Origin,
+        Ray.Direction,
+        Ray.TMin,
+        Ray.TMax,
+        0.f, /* Time for motion blur, currently unsupported in slang */
+        InstanceInclusionMask,
+        RayFlags,
+        RayContributionToHitGroupIndex,
+        MultiplierForGeometryContributionToHitGroupIndex,
+        MissShaderIndex,
+        r0,
+        r1);
+}
+
+#endif
+
+static const int kSlangTorchTensorMaxDim = 5;
+
+// TensorView
+struct TensorView
+{
+    uint8_t* data;
+    uint32_t strides[kSlangTorchTensorMaxDim];
+    uint32_t sizes[kSlangTorchTensorMaxDim];
+    uint32_t dimensionCount;
+
+    template<typename T>
+    __device__ T* data_ptr()
+    {
+        return reinterpret_cast<T*>(data);
+    }
+
+    template<typename T>
+    __device__ T* data_ptr_at(uint32_t index)
+    {
+        uint64_t offset = strides[0] * index;
+        return reinterpret_cast<T*>(data + offset);
+    }
+
+    template<typename T>
+    __device__ T* data_ptr_at(uint2 index)
+    {
+        uint64_t offset = strides[0] * index.x + strides[1] * index.y;
+        return reinterpret_cast<T*>(data + offset);
+    }
+
+    template<typename T>
+    __device__ T* data_ptr_at(uint3 index)
+    {
+        uint64_t offset = strides[0] * index.x + strides[1] * index.y + strides[2] * index.z;
+        return reinterpret_cast<T*>(data + offset);
+    }
+
+    template<typename T>
+    __device__ T* data_ptr_at(uint4 index)
+    {
+        uint64_t offset = strides[0] * index.x + strides[1] * index.y + strides[2] * index.z +
+                          strides[3] * index.w;
+        return reinterpret_cast<T*>(data + offset);
+    }
+
+    template<typename T, unsigned int N>
+    __device__ T* data_ptr_at(uint index[N])
+    {
+        uint64_t offset = 0;
+        for (unsigned int i = 0; i < N; ++i)
+        {
+            offset += strides[i] * index[i];
+        }
+        return reinterpret_cast<T*>(data + offset);
+    }
+
+    template<typename T>
+    __device__ T& load(uint32_t x)
+    {
+        return *reinterpret_cast<T*>(data + strides[0] * x);
+    }
+    template<typename T>
+    __device__ T& load(uint32_t x, uint32_t y)
+    {
+        return *reinterpret_cast<T*>(data + strides[0] * x + strides[1] * y);
+    }
+    template<typename T>
+    __device__ T& load(uint2 index)
+    {
+        return *reinterpret_cast<T*>(data + strides[0] * index.x + strides[1] * index.y);
+    }
+    template<typename T>
+    __device__ T& load(uint32_t x, uint32_t y, uint32_t z)
+    {
+        return *reinterpret_cast<T*>(data + strides[0] * x + strides[1] * y + strides[2] * z);
+    }
+    template<typename T>
+    __device__ T& load(uint3 index)
+    {
+        return *reinterpret_cast<T*>(
+            data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z);
+    }
+    template<typename T>
+    __device__ T& load(uint32_t x, uint32_t y, uint32_t z, uint32_t w)
+    {
+        return *reinterpret_cast<T*>(
+            data + strides[0] * x + strides[1] * y + strides[2] * z + strides[3] * w);
+    }
+    template<typename T>
+    __device__ T& load(uint4 index)
+    {
+        return *reinterpret_cast<T*>(
+            data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z +
+            strides[3] * index.w);
+    }
+    template<typename T>
+    __device__ T& load(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4)
+    {
+        return *reinterpret_cast<T*>(
+            data + strides[0] * i0 + strides[1] * i1 + strides[2] * i2 + strides[3] * i3 +
+            strides[4] * i4);
+    }
+
+    // Generic version of load
+    template<typename T, unsigned int N>
+    __device__ T& load(uint index[N])
+    {
+        uint64_t offset = 0;
+        for (unsigned int i = 0; i < N; ++i)
+        {
+            offset += strides[i] * index[i];
+        }
+        return *reinterpret_cast<T*>(data + offset);
+    }
+
+    template<typename T>
+    __device__ void store(uint32_t x, T val)
+    {
+        *reinterpret_cast<T*>(data + strides[0] * x) = val;
+    }
+    template<typename T>
+    __device__ void store(uint32_t x, uint32_t y, T val)
+    {
+        *reinterpret_cast<T*>(data + strides[0] * x + strides[1] * y) = val;
+    }
+    template<typename T>
+    __device__ void store(uint2 index, T val)
+    {
+        *reinterpret_cast<T*>(data + strides[0] * index.x + strides[1] * index.y) = val;
+    }
+    template<typename T>
+    __device__ void store(uint32_t x, uint32_t y, uint32_t z, T val)
+    {
+        *reinterpret_cast<T*>(data + strides[0] * x + strides[1] * y + strides[2] * z) = val;
+    }
+    template<typename T>
+    __device__ void store(uint3 index, T val)
+    {
+        *reinterpret_cast<T*>(
+            data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z) = val;
+    }
+    template<typename T>
+    __device__ void store(uint32_t x, uint32_t y, uint32_t z, uint32_t w, T val)
+    {
+        *reinterpret_cast<T*>(
+            data + strides[0] * x + strides[1] * y + strides[2] * z + strides[3] * w) = val;
+    }
+    template<typename T>
+    __device__ void store(uint4 index, T val)
+    {
+        *reinterpret_cast<T*>(
+            data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z +
+            strides[3] * index.w) = val;
+    }
+    template<typename T>
+    __device__ void store(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4, T val)
+    {
+        *reinterpret_cast<T*>(
+            data + strides[0] * i0 + strides[1] * i1 + strides[2] * i2 + strides[3] * i3 +
+            strides[4] * i4) = val;
+    }
+
+    // Generic version
+    template<typename T, unsigned int N>
+    __device__ void store(uint index[N], T val)
+    {
+        uint64_t offset = 0;
+        for (unsigned int i = 0; i < N; ++i)
+        {
+            offset += strides[i] * index[i];
+        }
+        *reinterpret_cast<T*>(data + offset) = val;
+    }
+};
diff --git a/external/slang/include/slang-deprecated.h b/external/slang/include/slang-deprecated.h
new file mode 100644
index 00000000..2ae91c6d
--- /dev/null
+++ b/external/slang/include/slang-deprecated.h
@@ -0,0 +1,1602 @@
+#pragma once
+
+#include "slang.h"
+
+/* DEPRECATED DEFINITIONS
+
+Everything in this file represents deprecated APIs/definition that are only
+being kept around for source/binary compatibility with old client code. New
+code should not use any of these declarations, and the Slang API will drop these
+declarations over time.
+*/
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+    /*!
+    @brief Initialize an instance of the Slang library.
+    */
+    SLANG_API SlangSession* spCreateSession(const char* deprecated = 0);
+
+    /*!
+    @brief Clean up after an instance of the Slang library.
+    */
+    SLANG_API void spDestroySession(SlangSession* session);
+
+    /** @see slang::IGlobalSession::setSharedLibraryLoader
+     */
+    SLANG_API void spSessionSetSharedLibraryLoader(
+        SlangSession* session,
+        ISlangSharedLibraryLoader* loader);
+
+    /** @see slang::IGlobalSession::getSharedLibraryLoader
+     */
+    SLANG_API ISlangSharedLibraryLoader* spSessionGetSharedLibraryLoader(SlangSession* session);
+
+    /** @see slang::IGlobalSession::checkCompileTargetSupport
+     */
+    SLANG_API SlangResult
+    spSessionCheckCompileTargetSupport(SlangSession* session, SlangCompileTarget target);
+
+    /** @see slang::IGlobalSession::checkPassThroughSupport
+     */
+    SLANG_API SlangResult
+    spSessionCheckPassThroughSupport(SlangSession* session, SlangPassThrough passThrough);
+
+    /** @see slang::IGlobalSession::addBuiltins
+     */
+    SLANG_API void spAddBuiltins(
+        SlangSession* session,
+        char const* sourcePath,
+        char const* sourceString);
+
+    /* @see slang::IGlobalSession::createCompileRequest
+     */
+    SLANG_API SlangCompileRequest* spCreateCompileRequest(SlangSession* session);
+
+    /*!
+    @brief Destroy a compile request.
+    Note a request is a COM object and can be destroyed via 'Release'.
+    */
+    SLANG_API void spDestroyCompileRequest(SlangCompileRequest* request);
+
+    /*! @see slang::ICompileRequest::setFileSystem */
+    SLANG_API void spSetFileSystem(SlangCompileRequest* request, ISlangFileSystem* fileSystem);
+
+    /*! @see slang::ICompileRequest::setCompileFlags */
+    SLANG_API void spSetCompileFlags(SlangCompileRequest* request, SlangCompileFlags flags);
+
+    /*! @see slang::ICompileRequest::getCompileFlags */
+    SLANG_API SlangCompileFlags spGetCompileFlags(SlangCompileRequest* request);
+
+    /*! @see slang::ICompileRequest::setDumpIntermediates */
+    SLANG_API void spSetDumpIntermediates(SlangCompileRequest* request, int enable);
+
+    /*! @see slang::ICompileRequest::setDumpIntermediatePrefix */
+    SLANG_API void spSetDumpIntermediatePrefix(SlangCompileRequest* request, const char* prefix);
+
+    /*! DEPRECATED: use `spSetTargetLineDirectiveMode` instead.
+        @see slang::ICompileRequest::setLineDirectiveMode */
+    SLANG_API void spSetLineDirectiveMode(
+        SlangCompileRequest* request,
+        SlangLineDirectiveMode mode);
+
+    /*! @see slang::ICompileRequest::setTargetLineDirectiveMode */
+    SLANG_API void spSetTargetLineDirectiveMode(
+        SlangCompileRequest* request,
+        int targetIndex,
+        SlangLineDirectiveMode mode);
+
+    /*! @see slang::ICompileRequest::setTargetLineDirectiveMode */
+    SLANG_API void spSetTargetForceGLSLScalarBufferLayout(
+        SlangCompileRequest* request,
+        int targetIndex,
+        bool forceScalarLayout);
+
+    /*! @see slang::ICompileRequest::setTargetUseMinimumSlangOptimization */
+    SLANG_API void spSetTargetUseMinimumSlangOptimization(
+        slang::ICompileRequest* request,
+        int targetIndex,
+        bool val);
+
+    /*! @see slang::ICompileRequest::setIgnoreCapabilityCheck */
+    SLANG_API void spSetIgnoreCapabilityCheck(slang::ICompileRequest* request, bool val);
+
+    /*! @see slang::ICompileRequest::setCodeGenTarget */
+    SLANG_API void spSetCodeGenTarget(SlangCompileRequest* request, SlangCompileTarget target);
+
+    /*! @see slang::ICompileRequest::addCodeGenTarget */
+    SLANG_API int spAddCodeGenTarget(SlangCompileRequest* request, SlangCompileTarget target);
+
+    /*! @see slang::ICompileRequest::setTargetProfile */
+    SLANG_API void spSetTargetProfile(
+        SlangCompileRequest* request,
+        int targetIndex,
+        SlangProfileID profile);
+
+    /*! @see slang::ICompileRequest::setTargetFlags */
+    SLANG_API void spSetTargetFlags(
+        SlangCompileRequest* request,
+        int targetIndex,
+        SlangTargetFlags flags);
+
+
+    /*! @see slang::ICompileRequest::setTargetFloatingPointMode */
+    SLANG_API void spSetTargetFloatingPointMode(
+        SlangCompileRequest* request,
+        int targetIndex,
+        SlangFloatingPointMode mode);
+
+    /*! @see slang::ICompileRequest::addTargetCapability */
+    SLANG_API void spAddTargetCapability(
+        slang::ICompileRequest* request,
+        int targetIndex,
+        SlangCapabilityID capability);
+
+    /* DEPRECATED: use `spSetMatrixLayoutMode` instead. */
+    SLANG_API void spSetTargetMatrixLayoutMode(
+        SlangCompileRequest* request,
+        int targetIndex,
+        SlangMatrixLayoutMode mode);
+
+    /*! @see slang::ICompileRequest::setMatrixLayoutMode */
+    SLANG_API void spSetMatrixLayoutMode(SlangCompileRequest* request, SlangMatrixLayoutMode mode);
+
+    /*! @see slang::ICompileRequest::setDebugInfoLevel */
+    SLANG_API void spSetDebugInfoLevel(SlangCompileRequest* request, SlangDebugInfoLevel level);
+
+    /*! @see slang::ICompileRequest::setDebugInfoFormat */
+    SLANG_API void spSetDebugInfoFormat(SlangCompileRequest* request, SlangDebugInfoFormat format);
+
+    /*! @see slang::ICompileRequest::setOptimizationLevel */
+    SLANG_API void spSetOptimizationLevel(
+        SlangCompileRequest* request,
+        SlangOptimizationLevel level);
+
+
+    /*! @see slang::ICompileRequest::setOutputContainerFormat */
+    SLANG_API void spSetOutputContainerFormat(
+        SlangCompileRequest* request,
+        SlangContainerFormat format);
+
+    /*! @see slang::ICompileRequest::setPassThrough */
+    SLANG_API void spSetPassThrough(SlangCompileRequest* request, SlangPassThrough passThrough);
+
+    /*! @see slang::ICompileRequest::setDiagnosticCallback */
+    SLANG_API void spSetDiagnosticCallback(
+        SlangCompileRequest* request,
+        SlangDiagnosticCallback callback,
+        void const* userData);
+
+    /*! @see slang::ICompileRequest::setWriter */
+    SLANG_API void spSetWriter(
+        SlangCompileRequest* request,
+        SlangWriterChannel channel,
+        ISlangWriter* writer);
+
+    /*! @see slang::ICompileRequest::getWriter */
+    SLANG_API ISlangWriter* spGetWriter(SlangCompileRequest* request, SlangWriterChannel channel);
+
+    /*! @see slang::ICompileRequest::addSearchPath */
+    SLANG_API void spAddSearchPath(SlangCompileRequest* request, const char* searchDir);
+
+    /*! @see slang::ICompileRequest::addPreprocessorDefine */
+    SLANG_API void spAddPreprocessorDefine(
+        SlangCompileRequest* request,
+        const char* key,
+        const char* value);
+
+    /*! @see slang::ICompileRequest::processCommandLineArguments */
+    SLANG_API SlangResult spProcessCommandLineArguments(
+        SlangCompileRequest* request,
+        char const* const* args,
+        int argCount);
+
+    /*! @see slang::ICompileRequest::addTranslationUnit */
+    SLANG_API int spAddTranslationUnit(
+        SlangCompileRequest* request,
+        SlangSourceLanguage language,
+        char const* name);
+
+
+    /*! @see slang::ICompileRequest::setDefaultModuleName */
+    SLANG_API void spSetDefaultModuleName(
+        SlangCompileRequest* request,
+        const char* defaultModuleName);
+
+    /*! @see slang::ICompileRequest::addPreprocessorDefine */
+    SLANG_API void spTranslationUnit_addPreprocessorDefine(
+        SlangCompileRequest* request,
+        int translationUnitIndex,
+        const char* key,
+        const char* value);
+
+
+    /*! @see slang::ICompileRequest::addTranslationUnitSourceFile */
+    SLANG_API void spAddTranslationUnitSourceFile(
+        SlangCompileRequest* request,
+        int translationUnitIndex,
+        char const* path);
+
+    /*! @see slang::ICompileRequest::addTranslationUnitSourceString */
+    SLANG_API void spAddTranslationUnitSourceString(
+        SlangCompileRequest* request,
+        int translationUnitIndex,
+        char const* path,
+        char const* source);
+
+
+    /*! @see slang::ICompileRequest::addLibraryReference */
+    SLANG_API SlangResult spAddLibraryReference(
+        SlangCompileRequest* request,
+        const char* basePath,
+        const void* libData,
+        size_t libDataSize);
+
+    /*! @see slang::ICompileRequest::addTranslationUnitSourceStringSpan */
+    SLANG_API void spAddTranslationUnitSourceStringSpan(
+        SlangCompileRequest* request,
+        int translationUnitIndex,
+        char const* path,
+        char const* sourceBegin,
+        char const* sourceEnd);
+
+    /*! @see slang::ICompileRequest::addTranslationUnitSourceBlob */
+    SLANG_API void spAddTranslationUnitSourceBlob(
+        SlangCompileRequest* request,
+        int translationUnitIndex,
+        char const* path,
+        ISlangBlob* sourceBlob);
+
+    /*! @see slang::IGlobalSession::findProfile */
+    SLANG_API SlangProfileID spFindProfile(SlangSession* session, char const* name);
+
+    /*! @see slang::IGlobalSession::findCapability */
+    SLANG_API SlangCapabilityID spFindCapability(SlangSession* session, char const* name);
+
+    /*! @see slang::ICompileRequest::addEntryPoint */
+    SLANG_API int spAddEntryPoint(
+        SlangCompileRequest* request,
+        int translationUnitIndex,
+        char const* name,
+        SlangStage stage);
+
+    /*! @see slang::ICompileRequest::addEntryPointEx */
+    SLANG_API int spAddEntryPointEx(
+        SlangCompileRequest* request,
+        int translationUnitIndex,
+        char const* name,
+        SlangStage stage,
+        int genericArgCount,
+        char const** genericArgs);
+
+    /*! @see slang::ICompileRequest::setGlobalGenericArgs */
+    SLANG_API SlangResult spSetGlobalGenericArgs(
+        SlangCompileRequest* request,
+        int genericArgCount,
+        char const** genericArgs);
+
+    /*! @see slang::ICompileRequest::setTypeNameForGlobalExistentialTypeParam */
+    SLANG_API SlangResult spSetTypeNameForGlobalExistentialTypeParam(
+        SlangCompileRequest* request,
+        int slotIndex,
+        char const* typeName);
+
+    /*! @see slang::ICompileRequest::setTypeNameForEntryPointExistentialTypeParam */
+    SLANG_API SlangResult spSetTypeNameForEntryPointExistentialTypeParam(
+        SlangCompileRequest* request,
+        int entryPointIndex,
+        int slotIndex,
+        char const* typeName);
+
+    /*! @see slang::ICompileRequest::compile */
+    SLANG_API SlangResult spCompile(SlangCompileRequest* request);
+
+
+    /*! @see slang::ICompileRequest::getDiagnosticOutput */
+    SLANG_API char const* spGetDiagnosticOutput(SlangCompileRequest* request);
+
+    /*! @see slang::ICompileRequest::getDiagnosticOutputBlob */
+    SLANG_API SlangResult
+    spGetDiagnosticOutputBlob(SlangCompileRequest* request, ISlangBlob** outBlob);
+
+
+    /*! @see slang::ICompileRequest::getDependencyFileCount */
+    SLANG_API int spGetDependencyFileCount(SlangCompileRequest* request);
+
+    /*! @see slang::ICompileRequest::getDependencyFilePath */
+    SLANG_API char const* spGetDependencyFilePath(SlangCompileRequest* request, int index);
+
+    /*! @see slang::ICompileRequest::getTranslationUnitCount */
+    SLANG_API int spGetTranslationUnitCount(SlangCompileRequest* request);
+
+    /*! @see slang::ICompileRequest::getEntryPointSource */
+    SLANG_API char const* spGetEntryPointSource(SlangCompileRequest* request, int entryPointIndex);
+
+    /*! @see slang::ICompileRequest::getEntryPointCode */
+    SLANG_API void const* spGetEntryPointCode(
+        SlangCompileRequest* request,
+        int entryPointIndex,
+        size_t* outSize);
+
+    /*! @see slang::ICompileRequest::getEntryPointCodeBlob */
+    SLANG_API SlangResult spGetEntryPointCodeBlob(
+        SlangCompileRequest* request,
+        int entryPointIndex,
+        int targetIndex,
+        ISlangBlob** outBlob);
+
+    /*! @see slang::ICompileRequest::getEntryPointHostCallable */
+    SLANG_API SlangResult spGetEntryPointHostCallable(
+        SlangCompileRequest* request,
+        int entryPointIndex,
+        int targetIndex,
+        ISlangSharedLibrary** outSharedLibrary);
+
+    /*! @see slang::ICompileRequest::getTargetCodeBlob */
+    SLANG_API SlangResult
+    spGetTargetCodeBlob(SlangCompileRequest* request, int targetIndex, ISlangBlob** outBlob);
+
+    /*! @see slang::ICompileRequest::getTargetHostCallable */
+    SLANG_API SlangResult spGetTargetHostCallable(
+        SlangCompileRequest* request,
+        int targetIndex,
+        ISlangSharedLibrary** outSharedLibrary);
+
+    /*! @see slang::ICompileRequest::getCompileRequestCode */
+    SLANG_API void const* spGetCompileRequestCode(SlangCompileRequest* request, size_t* outSize);
+
+    /*! @see slang::ICompileRequest::getContainerCode */
+    SLANG_API SlangResult spGetContainerCode(SlangCompileRequest* request, ISlangBlob** outBlob);
+
+    /*! @see slang::ICompileRequest::loadRepro */
+    SLANG_API SlangResult spLoadRepro(
+        SlangCompileRequest* request,
+        ISlangFileSystem* fileSystem,
+        const void* data,
+        size_t size);
+
+    /*! @see slang::ICompileRequest::saveRepro */
+    SLANG_API SlangResult spSaveRepro(SlangCompileRequest* request, ISlangBlob** outBlob);
+
+    /*! @see slang::ICompileRequest::enableReproCapture */
+    SLANG_API SlangResult spEnableReproCapture(SlangCompileRequest* request);
+
+    /*! @see slang::ICompileRequest::getCompileTimeProfile */
+    SLANG_API SlangResult spGetCompileTimeProfile(
+        SlangCompileRequest* request,
+        ISlangProfiler** compileTimeProfile,
+        bool shouldClear);
+
+
+    /** Extract contents of a repro.
+
+    Writes the contained files and manifest with their 'unique' names into fileSystem. For more
+    details read the docs/repro.md documentation.
+
+    @param session          The slang session
+    @param reproData        Holds the repro data
+    @param reproDataSize    The size of the repro data
+    @param fileSystem       File system that the contents of the repro will be written to
+    @returns                A `SlangResult` to indicate success or failure.
+    */
+    SLANG_API SlangResult spExtractRepro(
+        SlangSession* session,
+        const void* reproData,
+        size_t reproDataSize,
+        ISlangMutableFileSystem* fileSystem);
+
+    /* Turns a repro into a file system.
+
+    Makes the contents of the repro available as a file system - that is able to access the files
+    with the same paths as were used on the original repro file system.
+
+    @param session          The slang session
+    @param reproData        The repro data
+    @param reproDataSize    The size of the repro data
+    @param replaceFileSystem  Will attempt to load by unique names from this file system before
+    using contents of the repro. Optional.
+    @param outFileSystem    The file system that can be used to access contents
+    @returns                A `SlangResult` to indicate success or failure.
+    */
+    SLANG_API SlangResult spLoadReproAsFileSystem(
+        SlangSession* session,
+        const void* reproData,
+        size_t reproDataSize,
+        ISlangFileSystem* replaceFileSystem,
+        ISlangFileSystemExt** outFileSystem);
+
+    /*! @see slang::ICompileRequest::overrideDiagnosticSeverity */
+    SLANG_API void spOverrideDiagnosticSeverity(
+        SlangCompileRequest* request,
+        SlangInt messageID,
+        SlangSeverity overrideSeverity);
+
+    /*! @see slang::ICompileRequest::getDiagnosticFlags */
+    SLANG_API SlangDiagnosticFlags spGetDiagnosticFlags(SlangCompileRequest* request);
+
+    /*! @see slang::ICompileRequest::setDiagnosticFlags */
+    SLANG_API void spSetDiagnosticFlags(SlangCompileRequest* request, SlangDiagnosticFlags flags);
+
+
+    // get reflection data from a compilation request
+    SLANG_API SlangReflection* spGetReflection(SlangCompileRequest* request);
+
+    // User Attribute
+    SLANG_API char const* spReflectionUserAttribute_GetName(SlangReflectionUserAttribute* attrib);
+    SLANG_API unsigned int spReflectionUserAttribute_GetArgumentCount(
+        SlangReflectionUserAttribute* attrib);
+    SLANG_API SlangReflectionType* spReflectionUserAttribute_GetArgumentType(
+        SlangReflectionUserAttribute* attrib,
+        unsigned int index);
+    SLANG_API SlangResult spReflectionUserAttribute_GetArgumentValueInt(
+        SlangReflectionUserAttribute* attrib,
+        unsigned int index,
+        int* rs);
+    SLANG_API SlangResult spReflectionUserAttribute_GetArgumentValueFloat(
+        SlangReflectionUserAttribute* attrib,
+        unsigned int index,
+        float* rs);
+
+    /** Returns the string-typed value of a user attribute argument
+        The string returned is not null-terminated. The length of the string is returned via
+       `outSize`. If index of out of range, or if the specified argument is not a string, the
+       function will return nullptr.
+    */
+    SLANG_API const char* spReflectionUserAttribute_GetArgumentValueString(
+        SlangReflectionUserAttribute* attrib,
+        unsigned int index,
+        size_t* outSize);
+
+    // Type Reflection
+
+    SLANG_API SlangTypeKind spReflectionType_GetKind(SlangReflectionType* type);
+    SLANG_API unsigned int spReflectionType_GetUserAttributeCount(SlangReflectionType* type);
+    SLANG_API SlangReflectionUserAttribute* spReflectionType_GetUserAttribute(
+        SlangReflectionType* type,
+        unsigned int index);
+    SLANG_API SlangReflectionUserAttribute* spReflectionType_FindUserAttributeByName(
+        SlangReflectionType* type,
+        char const* name);
+    SLANG_API SlangReflectionType* spReflectionType_applySpecializations(
+        SlangReflectionType* type,
+        SlangReflectionGeneric* generic);
+
+    SLANG_API unsigned int spReflectionType_GetFieldCount(SlangReflectionType* type);
+    SLANG_API SlangReflectionVariable* spReflectionType_GetFieldByIndex(
+        SlangReflectionType* type,
+        unsigned index);
+
+    /** Returns the number of elements in the given type.
+
+    This operation is valid for vector and array types. For other types it returns zero.
+
+    When invoked on an unbounded-size array it will return `SLANG_UNBOUNDED_SIZE`,
+    which is defined to be `~size_t(0)`.
+
+    If the size of a type cannot be statically computed, perhaps because it depends on
+    a generic parameter that has not been bound to a specific value, this function returns zero.
+    */
+    SLANG_API size_t spReflectionType_GetElementCount(SlangReflectionType* type);
+
+    SLANG_API SlangReflectionType* spReflectionType_GetElementType(SlangReflectionType* type);
+
+    SLANG_API unsigned int spReflectionType_GetRowCount(SlangReflectionType* type);
+    SLANG_API unsigned int spReflectionType_GetColumnCount(SlangReflectionType* type);
+    SLANG_API SlangScalarType spReflectionType_GetScalarType(SlangReflectionType* type);
+
+    SLANG_API SlangResourceShape spReflectionType_GetResourceShape(SlangReflectionType* type);
+    SLANG_API SlangResourceAccess spReflectionType_GetResourceAccess(SlangReflectionType* type);
+    SLANG_API SlangReflectionType* spReflectionType_GetResourceResultType(
+        SlangReflectionType* type);
+
+    SLANG_API char const* spReflectionType_GetName(SlangReflectionType* type);
+    SLANG_API SlangResult
+    spReflectionType_GetFullName(SlangReflectionType* type, ISlangBlob** outNameBlob);
+    SLANG_API SlangReflectionGeneric* spReflectionType_GetGenericContainer(
+        SlangReflectionType* type);
+
+    // Type Layout Reflection
+
+    SLANG_API SlangReflectionType* spReflectionTypeLayout_GetType(SlangReflectionTypeLayout* type);
+    SLANG_API SlangTypeKind spReflectionTypeLayout_getKind(SlangReflectionTypeLayout* type);
+    SLANG_API size_t spReflectionTypeLayout_GetSize(
+        SlangReflectionTypeLayout* type,
+        SlangParameterCategory category);
+    SLANG_API size_t spReflectionTypeLayout_GetStride(
+        SlangReflectionTypeLayout* type,
+        SlangParameterCategory category);
+    SLANG_API int32_t spReflectionTypeLayout_getAlignment(
+        SlangReflectionTypeLayout* type,
+        SlangParameterCategory category);
+
+    SLANG_API uint32_t spReflectionTypeLayout_GetFieldCount(SlangReflectionTypeLayout* type);
+    SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_GetFieldByIndex(
+        SlangReflectionTypeLayout* type,
+        unsigned index);
+
+    SLANG_API SlangInt spReflectionTypeLayout_findFieldIndexByName(
+        SlangReflectionTypeLayout* typeLayout,
+        const char* nameBegin,
+        const char* nameEnd);
+
+    SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_GetExplicitCounter(
+        SlangReflectionTypeLayout* typeLayout);
+
+    SLANG_API size_t spReflectionTypeLayout_GetElementStride(
+        SlangReflectionTypeLayout* type,
+        SlangParameterCategory category);
+    SLANG_API SlangReflectionTypeLayout* spReflectionTypeLayout_GetElementTypeLayout(
+        SlangReflectionTypeLayout* type);
+    SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_GetElementVarLayout(
+        SlangReflectionTypeLayout* type);
+    SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_getContainerVarLayout(
+        SlangReflectionTypeLayout* type);
+
+    SLANG_API SlangParameterCategory
+    spReflectionTypeLayout_GetParameterCategory(SlangReflectionTypeLayout* type);
+
+    SLANG_API unsigned spReflectionTypeLayout_GetCategoryCount(SlangReflectionTypeLayout* type);
+    SLANG_API SlangParameterCategory
+    spReflectionTypeLayout_GetCategoryByIndex(SlangReflectionTypeLayout* type, unsigned index);
+
+    SLANG_API SlangMatrixLayoutMode
+    spReflectionTypeLayout_GetMatrixLayoutMode(SlangReflectionTypeLayout* type);
+
+    SLANG_API int spReflectionTypeLayout_getGenericParamIndex(SlangReflectionTypeLayout* type);
+
+    SLANG_API SlangReflectionTypeLayout* spReflectionTypeLayout_getPendingDataTypeLayout(
+        SlangReflectionTypeLayout* type);
+
+    SLANG_API SlangReflectionVariableLayout*
+    spReflectionTypeLayout_getSpecializedTypePendingDataVarLayout(SlangReflectionTypeLayout* type);
+    SLANG_API SlangInt spReflectionType_getSpecializedTypeArgCount(SlangReflectionType* type);
+    SLANG_API SlangReflectionType* spReflectionType_getSpecializedTypeArgType(
+        SlangReflectionType* type,
+        SlangInt index);
+
+    SLANG_API SlangInt
+    spReflectionTypeLayout_getBindingRangeCount(SlangReflectionTypeLayout* typeLayout);
+    SLANG_API SlangBindingType spReflectionTypeLayout_getBindingRangeType(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt index);
+    SLANG_API SlangInt spReflectionTypeLayout_isBindingRangeSpecializable(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt index);
+    SLANG_API SlangInt spReflectionTypeLayout_getBindingRangeBindingCount(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt index);
+    SLANG_API SlangReflectionTypeLayout* spReflectionTypeLayout_getBindingRangeLeafTypeLayout(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt index);
+    SLANG_API SlangReflectionVariable* spReflectionTypeLayout_getBindingRangeLeafVariable(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt index);
+    SLANG_API SlangImageFormat spReflectionTypeLayout_getBindingRangeImageFormat(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt index);
+    SLANG_API SlangInt spReflectionTypeLayout_getFieldBindingRangeOffset(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt fieldIndex);
+    SLANG_API SlangInt spReflectionTypeLayout_getExplicitCounterBindingRangeOffset(
+        SlangReflectionTypeLayout* inTypeLayout);
+
+    SLANG_API SlangInt spReflectionTypeLayout_getBindingRangeDescriptorSetIndex(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt index);
+    SLANG_API SlangInt spReflectionTypeLayout_getBindingRangeFirstDescriptorRangeIndex(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt index);
+    SLANG_API SlangInt spReflectionTypeLayout_getBindingRangeDescriptorRangeCount(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt index);
+
+    SLANG_API SlangInt
+    spReflectionTypeLayout_getDescriptorSetCount(SlangReflectionTypeLayout* typeLayout);
+    SLANG_API SlangInt spReflectionTypeLayout_getDescriptorSetSpaceOffset(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt setIndex);
+    SLANG_API SlangInt spReflectionTypeLayout_getDescriptorSetDescriptorRangeCount(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt setIndex);
+    SLANG_API SlangInt spReflectionTypeLayout_getDescriptorSetDescriptorRangeIndexOffset(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt setIndex,
+        SlangInt rangeIndex);
+    SLANG_API SlangInt spReflectionTypeLayout_getDescriptorSetDescriptorRangeDescriptorCount(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt setIndex,
+        SlangInt rangeIndex);
+    SLANG_API SlangBindingType spReflectionTypeLayout_getDescriptorSetDescriptorRangeType(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt setIndex,
+        SlangInt rangeIndex);
+    SLANG_API SlangParameterCategory spReflectionTypeLayout_getDescriptorSetDescriptorRangeCategory(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt setIndex,
+        SlangInt rangeIndex);
+
+    SLANG_API SlangInt
+    spReflectionTypeLayout_getSubObjectRangeCount(SlangReflectionTypeLayout* typeLayout);
+    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeBindingRangeIndex(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt subObjectRangeIndex);
+    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeSpaceOffset(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt subObjectRangeIndex);
+    SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_getSubObjectRangeOffset(
+        SlangReflectionTypeLayout* typeLayout,
+        SlangInt subObjectRangeIndex);
+
+#if 0
+    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeCount(SlangReflectionTypeLayout* typeLayout);
+    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeObjectCount(SlangReflectionTypeLayout* typeLayout, SlangInt index);
+    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeBindingRangeIndex(SlangReflectionTypeLayout* typeLayout, SlangInt index);
+    SLANG_API SlangReflectionTypeLayout* spReflectionTypeLayout_getSubObjectRangeTypeLayout(SlangReflectionTypeLayout* typeLayout, SlangInt index);
+
+    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeDescriptorRangeCount(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex);
+    SLANG_API SlangBindingType spReflectionTypeLayout_getSubObjectRangeDescriptorRangeBindingType(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex, SlangInt bindingRangeIndexInSubObject);
+    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeDescriptorRangeBindingCount(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex, SlangInt bindingRangeIndexInSubObject);
+    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeDescriptorRangeIndexOffset(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex, SlangInt bindingRangeIndexInSubObject);
+    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeDescriptorRangeSpaceOffset(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex, SlangInt bindingRangeIndexInSubObject);
+#endif
+
+    // Variable Reflection
+
+    SLANG_API char const* spReflectionVariable_GetName(SlangReflectionVariable* var);
+    SLANG_API SlangReflectionType* spReflectionVariable_GetType(SlangReflectionVariable* var);
+    SLANG_API SlangReflectionModifier* spReflectionVariable_FindModifier(
+        SlangReflectionVariable* var,
+        SlangModifierID modifierID);
+    SLANG_API unsigned int spReflectionVariable_GetUserAttributeCount(SlangReflectionVariable* var);
+    SLANG_API SlangReflectionUserAttribute* spReflectionVariable_GetUserAttribute(
+        SlangReflectionVariable* var,
+        unsigned int index);
+    SLANG_API SlangReflectionUserAttribute* spReflectionVariable_FindUserAttributeByName(
+        SlangReflectionVariable* var,
+        SlangSession* globalSession,
+        char const* name);
+    SLANG_API bool spReflectionVariable_HasDefaultValue(SlangReflectionVariable* inVar);
+    SLANG_API SlangResult
+    spReflectionVariable_GetDefaultValueInt(SlangReflectionVariable* inVar, int64_t* rs);
+    SLANG_API SlangReflectionGeneric* spReflectionVariable_GetGenericContainer(
+        SlangReflectionVariable* var);
+    SLANG_API SlangReflectionVariable* spReflectionVariable_applySpecializations(
+        SlangReflectionVariable* var,
+        SlangReflectionGeneric* generic);
+
+    // Variable Layout Reflection
+
+    SLANG_API SlangReflectionVariable* spReflectionVariableLayout_GetVariable(
+        SlangReflectionVariableLayout* var);
+
+    SLANG_API SlangReflectionTypeLayout* spReflectionVariableLayout_GetTypeLayout(
+        SlangReflectionVariableLayout* var);
+
+    SLANG_API size_t spReflectionVariableLayout_GetOffset(
+        SlangReflectionVariableLayout* var,
+        SlangParameterCategory category);
+    SLANG_API size_t spReflectionVariableLayout_GetSpace(
+        SlangReflectionVariableLayout* var,
+        SlangParameterCategory category);
+    SLANG_API SlangImageFormat
+    spReflectionVariableLayout_GetImageFormat(SlangReflectionVariableLayout* var);
+
+    SLANG_API char const* spReflectionVariableLayout_GetSemanticName(
+        SlangReflectionVariableLayout* var);
+    SLANG_API size_t
+    spReflectionVariableLayout_GetSemanticIndex(SlangReflectionVariableLayout* var);
+
+
+    // Function Reflection
+
+    SLANG_API SlangReflectionDecl* spReflectionFunction_asDecl(SlangReflectionFunction* func);
+    SLANG_API char const* spReflectionFunction_GetName(SlangReflectionFunction* func);
+    SLANG_API SlangReflectionModifier* spReflectionFunction_FindModifier(
+        SlangReflectionFunction* var,
+        SlangModifierID modifierID);
+    SLANG_API unsigned int spReflectionFunction_GetUserAttributeCount(
+        SlangReflectionFunction* func);
+    SLANG_API SlangReflectionUserAttribute* spReflectionFunction_GetUserAttribute(
+        SlangReflectionFunction* func,
+        unsigned int index);
+    SLANG_API SlangReflectionUserAttribute* spReflectionFunction_FindUserAttributeByName(
+        SlangReflectionFunction* func,
+        SlangSession* globalSession,
+        char const* name);
+    SLANG_API unsigned int spReflectionFunction_GetParameterCount(SlangReflectionFunction* func);
+    SLANG_API SlangReflectionVariable* spReflectionFunction_GetParameter(
+        SlangReflectionFunction* func,
+        unsigned index);
+    SLANG_API SlangReflectionType* spReflectionFunction_GetResultType(
+        SlangReflectionFunction* func);
+    SLANG_API SlangReflectionGeneric* spReflectionFunction_GetGenericContainer(
+        SlangReflectionFunction* func);
+    SLANG_API SlangReflectionFunction* spReflectionFunction_applySpecializations(
+        SlangReflectionFunction* func,
+        SlangReflectionGeneric* generic);
+    SLANG_API SlangReflectionFunction* spReflectionFunction_specializeWithArgTypes(
+        SlangReflectionFunction* func,
+        SlangInt argTypeCount,
+        SlangReflectionType* const* argTypes);
+    SLANG_API bool spReflectionFunction_isOverloaded(SlangReflectionFunction* func);
+    SLANG_API unsigned int spReflectionFunction_getOverloadCount(SlangReflectionFunction* func);
+    SLANG_API SlangReflectionFunction* spReflectionFunction_getOverload(
+        SlangReflectionFunction* func,
+        unsigned int index);
+
+    // Abstract Decl Reflection
+
+    SLANG_API unsigned int spReflectionDecl_getChildrenCount(SlangReflectionDecl* parentDecl);
+    SLANG_API SlangReflectionDecl* spReflectionDecl_getChild(
+        SlangReflectionDecl* parentDecl,
+        unsigned int index);
+    SLANG_API char const* spReflectionDecl_getName(SlangReflectionDecl* decl);
+    SLANG_API SlangDeclKind spReflectionDecl_getKind(SlangReflectionDecl* decl);
+    SLANG_API SlangReflectionFunction* spReflectionDecl_castToFunction(SlangReflectionDecl* decl);
+    SLANG_API SlangReflectionVariable* spReflectionDecl_castToVariable(SlangReflectionDecl* decl);
+    SLANG_API SlangReflectionGeneric* spReflectionDecl_castToGeneric(SlangReflectionDecl* decl);
+    SLANG_API SlangReflectionType* spReflection_getTypeFromDecl(SlangReflectionDecl* decl);
+    SLANG_API SlangReflectionDecl* spReflectionDecl_getParent(SlangReflectionDecl* decl);
+
+    // Generic Reflection
+
+    SLANG_API SlangReflectionDecl* spReflectionGeneric_asDecl(SlangReflectionGeneric* generic);
+    SLANG_API char const* spReflectionGeneric_GetName(SlangReflectionGeneric* generic);
+    SLANG_API unsigned int spReflectionGeneric_GetTypeParameterCount(
+        SlangReflectionGeneric* generic);
+    SLANG_API SlangReflectionVariable* spReflectionGeneric_GetTypeParameter(
+        SlangReflectionGeneric* generic,
+        unsigned index);
+    SLANG_API unsigned int spReflectionGeneric_GetValueParameterCount(
+        SlangReflectionGeneric* generic);
+    SLANG_API SlangReflectionVariable* spReflectionGeneric_GetValueParameter(
+        SlangReflectionGeneric* generic,
+        unsigned index);
+    SLANG_API unsigned int spReflectionGeneric_GetTypeParameterConstraintCount(
+        SlangReflectionGeneric* generic,
+        SlangReflectionVariable* typeParam);
+    SLANG_API SlangReflectionType* spReflectionGeneric_GetTypeParameterConstraintType(
+        SlangReflectionGeneric* generic,
+        SlangReflectionVariable* typeParam,
+        unsigned index);
+    SLANG_API SlangDeclKind spReflectionGeneric_GetInnerKind(SlangReflectionGeneric* generic);
+    SLANG_API SlangReflectionDecl* spReflectionGeneric_GetInnerDecl(
+        SlangReflectionGeneric* generic);
+    SLANG_API SlangReflectionGeneric* spReflectionGeneric_GetOuterGenericContainer(
+        SlangReflectionGeneric* generic);
+    SLANG_API SlangReflectionType* spReflectionGeneric_GetConcreteType(
+        SlangReflectionGeneric* generic,
+        SlangReflectionVariable* typeParam);
+    SLANG_API int64_t spReflectionGeneric_GetConcreteIntVal(
+        SlangReflectionGeneric* generic,
+        SlangReflectionVariable* valueParam);
+    SLANG_API SlangReflectionGeneric* spReflectionGeneric_applySpecializations(
+        SlangReflectionGeneric* currGeneric,
+        SlangReflectionGeneric* generic);
+
+
+    /** Get the stage that a variable belongs to (if any).
+
+    A variable "belongs" to a specific stage when it is a varying input/output
+    parameter either defined as part of the parameter list for an entry
+    point *or* at the global scope of a stage-specific GLSL code file (e.g.,
+    an `in` parameter in a GLSL `.vs` file belongs to the vertex stage).
+    */
+    SLANG_API SlangStage spReflectionVariableLayout_getStage(SlangReflectionVariableLayout* var);
+
+
+    SLANG_API SlangReflectionVariableLayout* spReflectionVariableLayout_getPendingDataLayout(
+        SlangReflectionVariableLayout* var);
+
+    // Shader Parameter Reflection
+
+    SLANG_API unsigned spReflectionParameter_GetBindingIndex(SlangReflectionParameter* parameter);
+    SLANG_API unsigned spReflectionParameter_GetBindingSpace(SlangReflectionParameter* parameter);
+
+    SLANG_API SlangResult spIsParameterLocationUsed(
+        SlangCompileRequest* request,
+        SlangInt entryPointIndex,
+        SlangInt targetIndex,
+        SlangParameterCategory category, // is this a `t` register? `s` register?
+        SlangUInt spaceIndex,            // `space` for D3D12, `set` for Vulkan
+        SlangUInt registerIndex,         // `register` for D3D12, `binding` for Vulkan
+        bool& outUsed);
+
+    // Entry Point Reflection
+
+    SLANG_API char const* spReflectionEntryPoint_getName(SlangReflectionEntryPoint* entryPoint);
+
+    SLANG_API char const* spReflectionEntryPoint_getNameOverride(
+        SlangReflectionEntryPoint* entryPoint);
+
+    SLANG_API SlangReflectionFunction* spReflectionEntryPoint_getFunction(
+        SlangReflectionEntryPoint* entryPoint);
+
+    SLANG_API unsigned spReflectionEntryPoint_getParameterCount(
+        SlangReflectionEntryPoint* entryPoint);
+
+    SLANG_API SlangReflectionVariableLayout* spReflectionEntryPoint_getParameterByIndex(
+        SlangReflectionEntryPoint* entryPoint,
+        unsigned index);
+
+    SLANG_API SlangStage spReflectionEntryPoint_getStage(SlangReflectionEntryPoint* entryPoint);
+
+    SLANG_API void spReflectionEntryPoint_getComputeThreadGroupSize(
+        SlangReflectionEntryPoint* entryPoint,
+        SlangUInt axisCount,
+        SlangUInt* outSizeAlongAxis);
+
+    SLANG_API void spReflectionEntryPoint_getComputeWaveSize(
+        SlangReflectionEntryPoint* entryPoint,
+        SlangUInt* outWaveSize);
+
+    SLANG_API int spReflectionEntryPoint_usesAnySampleRateInput(
+        SlangReflectionEntryPoint* entryPoint);
+
+    SLANG_API SlangReflectionVariableLayout* spReflectionEntryPoint_getVarLayout(
+        SlangReflectionEntryPoint* entryPoint);
+
+    SLANG_API SlangReflectionVariableLayout* spReflectionEntryPoint_getResultVarLayout(
+        SlangReflectionEntryPoint* entryPoint);
+
+    SLANG_API int spReflectionEntryPoint_hasDefaultConstantBuffer(
+        SlangReflectionEntryPoint* entryPoint);
+
+    // SlangReflectionTypeParameter
+    SLANG_API char const* spReflectionTypeParameter_GetName(
+        SlangReflectionTypeParameter* typeParam);
+    SLANG_API unsigned spReflectionTypeParameter_GetIndex(SlangReflectionTypeParameter* typeParam);
+    SLANG_API unsigned spReflectionTypeParameter_GetConstraintCount(
+        SlangReflectionTypeParameter* typeParam);
+    SLANG_API SlangReflectionType* spReflectionTypeParameter_GetConstraintByIndex(
+        SlangReflectionTypeParameter* typeParam,
+        unsigned int index);
+
+    // Shader Reflection
+
+    SLANG_API SlangResult spReflection_ToJson(
+        SlangReflection* reflection,
+        SlangCompileRequest* request,
+        ISlangBlob** outBlob);
+
+    SLANG_API unsigned spReflection_GetParameterCount(SlangReflection* reflection);
+    SLANG_API SlangReflectionParameter* spReflection_GetParameterByIndex(
+        SlangReflection* reflection,
+        unsigned index);
+
+    SLANG_API unsigned int spReflection_GetTypeParameterCount(SlangReflection* reflection);
+    SLANG_API SlangReflectionTypeParameter* spReflection_GetTypeParameterByIndex(
+        SlangReflection* reflection,
+        unsigned int index);
+    SLANG_API SlangReflectionTypeParameter* spReflection_FindTypeParameter(
+        SlangReflection* reflection,
+        char const* name);
+
+    SLANG_API SlangReflectionType* spReflection_FindTypeByName(
+        SlangReflection* reflection,
+        char const* name);
+    SLANG_API SlangReflectionTypeLayout* spReflection_GetTypeLayout(
+        SlangReflection* reflection,
+        SlangReflectionType* reflectionType,
+        SlangLayoutRules rules);
+
+    SLANG_API SlangReflectionFunction* spReflection_FindFunctionByName(
+        SlangReflection* reflection,
+        char const* name);
+    SLANG_API SlangReflectionFunction* spReflection_FindFunctionByNameInType(
+        SlangReflection* reflection,
+        SlangReflectionType* reflType,
+        char const* name);
+    SLANG_API SlangReflectionVariable* spReflection_FindVarByNameInType(
+        SlangReflection* reflection,
+        SlangReflectionType* reflType,
+        char const* name);
+
+    SLANG_API SlangUInt spReflection_getEntryPointCount(SlangReflection* reflection);
+    SLANG_API SlangReflectionEntryPoint* spReflection_getEntryPointByIndex(
+        SlangReflection* reflection,
+        SlangUInt index);
+    SLANG_API SlangReflectionEntryPoint* spReflection_findEntryPointByName(
+        SlangReflection* reflection,
+        char const* name);
+
+    SLANG_API SlangUInt spReflection_getGlobalConstantBufferBinding(SlangReflection* reflection);
+    SLANG_API size_t spReflection_getGlobalConstantBufferSize(SlangReflection* reflection);
+
+    SLANG_API SlangReflectionType* spReflection_specializeType(
+        SlangReflection* reflection,
+        SlangReflectionType* type,
+        SlangInt specializationArgCount,
+        SlangReflectionType* const* specializationArgs,
+        ISlangBlob** outDiagnostics);
+
+    SLANG_API SlangReflectionGeneric* spReflection_specializeGeneric(
+        SlangReflection* inProgramLayout,
+        SlangReflectionGeneric* generic,
+        SlangInt argCount,
+        SlangReflectionGenericArgType const* argTypes,
+        SlangReflectionGenericArg const* args,
+        ISlangBlob** outDiagnostics);
+
+    SLANG_API bool spReflection_isSubType(
+        SlangReflection* reflection,
+        SlangReflectionType* subType,
+        SlangReflectionType* superType);
+
+    /// Get the number of hashed strings
+    SLANG_API SlangUInt spReflection_getHashedStringCount(SlangReflection* reflection);
+
+    /// Get a hashed string. The number of chars is written in outCount.
+    /// The count does *NOT* including terminating 0. The returned string will be 0 terminated.
+    SLANG_API const char* spReflection_getHashedString(
+        SlangReflection* reflection,
+        SlangUInt index,
+        size_t* outCount);
+
+    /// Compute a string hash.
+    /// Count should *NOT* include terminating zero.
+    SLANG_API SlangUInt32 spComputeStringHash(const char* chars, size_t count);
+
+    /// Get a type layout representing reflection information for the global-scope parameters.
+    SLANG_API SlangReflectionTypeLayout* spReflection_getGlobalParamsTypeLayout(
+        SlangReflection* reflection);
+
+    /// Get a variable layout representing reflection information for the global-scope parameters.
+    SLANG_API SlangReflectionVariableLayout* spReflection_getGlobalParamsVarLayout(
+        SlangReflection* reflection);
+
+    SLANG_API char const* spGetTranslationUnitSource(
+        SlangCompileRequest* request,
+        int translationUnitIndex);
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __cplusplus
+SLANG_API slang::ISession* spReflection_GetSession(SlangReflection* reflection);
+
+namespace slang
+{
+struct IComponentType;
+struct IModule;
+} // namespace slang
+
+extern "C"
+{
+    /** @see slang::ICompileRequest::getProgram
+     */
+    SLANG_API SlangResult
+    spCompileRequest_getProgram(SlangCompileRequest* request, slang::IComponentType** outProgram);
+
+    /** @see slang::ICompileRequest::getProgramWithEntryPoints
+     */
+    SLANG_API SlangResult spCompileRequest_getProgramWithEntryPoints(
+        SlangCompileRequest* request,
+        slang::IComponentType** outProgram);
+
+    /** @see slang::ICompileRequest::getEntryPoint
+     */
+    SLANG_API SlangResult spCompileRequest_getEntryPoint(
+        SlangCompileRequest* request,
+        SlangInt entryPointIndex,
+        slang::IComponentType** outEntryPoint);
+
+    /** @see slang::ICompileRequest::getModule
+     */
+    SLANG_API SlangResult spCompileRequest_getModule(
+        SlangCompileRequest* request,
+        SlangInt translationUnitIndex,
+        slang::IModule** outModule);
+
+    /** @see slang::ICompileRequest::getSession
+     */
+    SLANG_API SlangResult
+    spCompileRequest_getSession(SlangCompileRequest* request, slang::ISession** outSession);
+}
+
+namespace slang
+{
+/*!
+@brief A request for one or more compilation actions to be performed.
+*/
+struct ICompileRequest : public ISlangUnknown
+{
+    SLANG_COM_INTERFACE(
+        0x96d33993,
+        0x317c,
+        0x4db5,
+        {0xaf, 0xd8, 0x66, 0x6e, 0xe7, 0x72, 0x48, 0xe2})
+
+    /** Set the filesystem hook to use for a compile request
+
+    The provided `fileSystem` will be used to load any files that
+    need to be loaded during processing of the compile `request`.
+    This includes:
+
+      - Source files loaded via `spAddTranslationUnitSourceFile`
+      - Files referenced via `#include`
+      - Files loaded to resolve `#import` operations
+        */
+    virtual SLANG_NO_THROW void SLANG_MCALL setFileSystem(ISlangFileSystem* fileSystem) = 0;
+
+    /*!
+    @brief Set flags to be used for compilation.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL setCompileFlags(SlangCompileFlags flags) = 0;
+
+    /*!
+    @brief Returns the compilation flags previously set with `setCompileFlags`
+    */
+    virtual SLANG_NO_THROW SlangCompileFlags SLANG_MCALL getCompileFlags() = 0;
+
+    /*!
+    @brief Set whether to dump intermediate results (for debugging) or not.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL setDumpIntermediates(int enable) = 0;
+
+    virtual SLANG_NO_THROW void SLANG_MCALL setDumpIntermediatePrefix(const char* prefix) = 0;
+
+    /*!
+    @brief Set whether (and how) `#line` directives should be output.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL setLineDirectiveMode(SlangLineDirectiveMode mode) = 0;
+
+    /*!
+    @brief Sets the target for code generation.
+    @param target The code generation target. Possible values are:
+    - SLANG_GLSL. Generates GLSL code.
+    - SLANG_HLSL. Generates HLSL code.
+    - SLANG_SPIRV. Generates SPIR-V code.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL setCodeGenTarget(SlangCompileTarget target) = 0;
+
+    /*!
+    @brief Add a code-generation target to be used.
+    */
+    virtual SLANG_NO_THROW int SLANG_MCALL addCodeGenTarget(SlangCompileTarget target) = 0;
+
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setTargetProfile(int targetIndex, SlangProfileID profile) = 0;
+
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setTargetFlags(int targetIndex, SlangTargetFlags flags) = 0;
+
+    /*!
+    @brief Set the floating point mode (e.g., precise or fast) to use a target.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setTargetFloatingPointMode(int targetIndex, SlangFloatingPointMode mode) = 0;
+
+    /* DEPRECATED: use `spSetMatrixLayoutMode` instead. */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setTargetMatrixLayoutMode(int targetIndex, SlangMatrixLayoutMode mode) = 0;
+
+    virtual SLANG_NO_THROW void SLANG_MCALL setMatrixLayoutMode(SlangMatrixLayoutMode mode) = 0;
+
+    /*!
+    @brief Set the level of debug information to produce.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL setDebugInfoLevel(SlangDebugInfoLevel level) = 0;
+
+    /*!
+    @brief Set the level of optimization to perform.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL setOptimizationLevel(SlangOptimizationLevel level) = 0;
+
+
+    /*!
+    @brief Set the container format to be used for binary output.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setOutputContainerFormat(SlangContainerFormat format) = 0;
+
+    virtual SLANG_NO_THROW void SLANG_MCALL setPassThrough(SlangPassThrough passThrough) = 0;
+
+
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setDiagnosticCallback(SlangDiagnosticCallback callback, void const* userData) = 0;
+
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setWriter(SlangWriterChannel channel, ISlangWriter* writer) = 0;
+
+    virtual SLANG_NO_THROW ISlangWriter* SLANG_MCALL getWriter(SlangWriterChannel channel) = 0;
+
+    /*!
+    @brief Add a path to use when searching for referenced files.
+    This will be used for both `#include` directives and also for explicit `__import` declarations.
+    @param ctx The compilation context.
+    @param searchDir The additional search directory.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL addSearchPath(const char* searchDir) = 0;
+
+    /*!
+    @brief Add a macro definition to be used during preprocessing.
+    @param key The name of the macro to define.
+    @param value The value of the macro to define.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    addPreprocessorDefine(const char* key, const char* value) = 0;
+
+    /*!
+    @brief Set options using arguments as if specified via command line.
+    @return Returns SlangResult. On success SLANG_SUCCEEDED(result) is true.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    processCommandLineArguments(char const* const* args, int argCount) = 0;
+
+    /** Add a distinct translation unit to the compilation request
+
+    `name` is optional.
+    Returns the zero-based index of the translation unit created.
+    */
+    virtual SLANG_NO_THROW int SLANG_MCALL
+    addTranslationUnit(SlangSourceLanguage language, char const* name) = 0;
+
+
+    /** Set a default module name. Translation units will default to this module name if one is not
+    passed. If not set each translation unit will get a unique name.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL setDefaultModuleName(const char* defaultModuleName) = 0;
+
+    /** Add a preprocessor definition that is scoped to a single translation unit.
+
+    @param translationUnitIndex The index of the translation unit to get the definition.
+    @param key The name of the macro to define.
+    @param value The value of the macro to define.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL addTranslationUnitPreprocessorDefine(
+        int translationUnitIndex,
+        const char* key,
+        const char* value) = 0;
+
+
+    /** Add a source file to the given translation unit.
+
+    If a user-defined file system has been specified via
+    `spSetFileSystem`, then it will be used to load the
+    file at `path`. Otherwise, Slang will use the OS
+    file system.
+
+    This function does *not* search for a file using
+    the registered search paths (`spAddSearchPath`),
+    and instead using the given `path` as-is.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    addTranslationUnitSourceFile(int translationUnitIndex, char const* path) = 0;
+
+    /** Add a source string to the given translation unit.
+
+    @param translationUnitIndex The index of the translation unit to add source to.
+    @param path The file-system path that should be assumed for the source code.
+    @param source A null-terminated UTF-8 encoded string of source code.
+
+    The implementation will make a copy of the source code data.
+    An application may free the buffer immediately after this call returns.
+
+    The `path` will be used in any diagnostic output, as well
+    as to determine the base path when resolving relative
+    `#include`s.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL addTranslationUnitSourceString(
+        int translationUnitIndex,
+        char const* path,
+        char const* source) = 0;
+
+
+    /** Add a slang library - such that its contents can be referenced during linking.
+    This is equivalent to the -r command line option.
+
+    @param basePath The base path used to lookup referenced modules.
+    @param libData The library data
+    @param libDataSize The size of the library data
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    addLibraryReference(const char* basePath, const void* libData, size_t libDataSize) = 0;
+
+    /** Add a source string to the given translation unit.
+
+    @param translationUnitIndex The index of the translation unit to add source to.
+    @param path The file-system path that should be assumed for the source code.
+    @param sourceBegin A pointer to a buffer of UTF-8 encoded source code.
+    @param sourceEnd A pointer to to the end of the buffer specified in `sourceBegin`
+
+    The implementation will make a copy of the source code data.
+    An application may free the buffer immediately after this call returns.
+
+    The `path` will be used in any diagnostic output, as well
+    as to determine the base path when resolving relative
+    `#include`s.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL addTranslationUnitSourceStringSpan(
+        int translationUnitIndex,
+        char const* path,
+        char const* sourceBegin,
+        char const* sourceEnd) = 0;
+
+    /** Add a blob of source code to the given translation unit.
+
+    @param translationUnitIndex The index of the translation unit to add source to.
+    @param path The file-system path that should be assumed for the source code.
+    @param sourceBlob A blob containing UTF-8 encoded source code.
+    @param sourceEnd A pointer to to the end of the buffer specified in `sourceBegin`
+
+    The compile request will retain a reference to the blob.
+
+    The `path` will be used in any diagnostic output, as well
+    as to determine the base path when resolving relative
+    `#include`s.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL addTranslationUnitSourceBlob(
+        int translationUnitIndex,
+        char const* path,
+        ISlangBlob* sourceBlob) = 0;
+
+    /** Add an entry point in a particular translation unit
+     */
+    virtual SLANG_NO_THROW int SLANG_MCALL
+    addEntryPoint(int translationUnitIndex, char const* name, SlangStage stage) = 0;
+
+    /** Add an entry point in a particular translation unit,
+        with additional arguments that specify the concrete
+        type names for entry-point generic type parameters.
+    */
+    virtual SLANG_NO_THROW int SLANG_MCALL addEntryPointEx(
+        int translationUnitIndex,
+        char const* name,
+        SlangStage stage,
+        int genericArgCount,
+        char const** genericArgs) = 0;
+
+    /** Specify the arguments to use for global generic parameters.
+     */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    setGlobalGenericArgs(int genericArgCount, char const** genericArgs) = 0;
+
+    /** Specify the concrete type to be used for a global "existential slot."
+
+    Every shader parameter (or leaf field of a `struct`-type shader parameter)
+    that has an interface or array-of-interface type introduces an existential
+    slot. The number of slots consumed by a shader parameter, and the starting
+    slot of each parameter can be queried via the reflection API using
+    `SLANG_PARAMETER_CATEGORY_EXISTENTIAL_TYPE_PARAM`.
+
+    In order to generate specialized code, a concrete type needs to be specified
+    for each existential slot. This function specifies the name of the type
+    (or in general a type *expression*) to use for a specific slot at the
+    global scope.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    setTypeNameForGlobalExistentialTypeParam(int slotIndex, char const* typeName) = 0;
+
+    /** Specify the concrete type to be used for an entry-point "existential slot."
+
+    Every shader parameter (or leaf field of a `struct`-type shader parameter)
+    that has an interface or array-of-interface type introduces an existential
+    slot. The number of slots consumed by a shader parameter, and the starting
+    slot of each parameter can be queried via the reflection API using
+    `SLANG_PARAMETER_CATEGORY_EXISTENTIAL_TYPE_PARAM`.
+
+    In order to generate specialized code, a concrete type needs to be specified
+    for each existential slot. This function specifies the name of the type
+    (or in general a type *expression*) to use for a specific slot at the
+    entry-point scope.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL setTypeNameForEntryPointExistentialTypeParam(
+        int entryPointIndex,
+        int slotIndex,
+        char const* typeName) = 0;
+
+    /** Enable or disable an experimental, best-effort GLSL frontend
+     */
+    virtual SLANG_NO_THROW void SLANG_MCALL setAllowGLSLInput(bool value) = 0;
+
+    /** Execute the compilation request.
+
+    @returns  SlangResult, SLANG_OK on success. Use SLANG_SUCCEEDED() and SLANG_FAILED() to test
+    SlangResult.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL compile() = 0;
+
+
+    /** Get any diagnostic messages reported by the compiler.
+
+    @returns A null-terminated UTF-8 encoded string of diagnostic messages.
+
+    The returned pointer is only guaranteed to be valid
+    until `request` is destroyed. Applications that wish to
+    hold on to the diagnostic output for longer should use
+    `getDiagnosticOutputBlob`.
+    */
+    virtual SLANG_NO_THROW char const* SLANG_MCALL getDiagnosticOutput() = 0;
+
+    /** Get diagnostic messages reported by the compiler.
+
+    @param outBlob A pointer to receive a blob holding a nul-terminated UTF-8 encoded string of
+    diagnostic messages.
+    @returns A `SlangResult` indicating success or failure.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    getDiagnosticOutputBlob(ISlangBlob** outBlob) = 0;
+
+
+    /** Get the number of files that this compilation depended on.
+
+    This includes both the explicit source files, as well as any
+    additional files that were transitively referenced (e.g., via
+    a `#include` directive).
+    */
+    virtual SLANG_NO_THROW int SLANG_MCALL getDependencyFileCount() = 0;
+
+    /** Get the path to a file this compilation depended on.
+     */
+    virtual SLANG_NO_THROW char const* SLANG_MCALL getDependencyFilePath(int index) = 0;
+
+    /** Get the number of translation units associated with the compilation request
+     */
+    virtual SLANG_NO_THROW int SLANG_MCALL getTranslationUnitCount() = 0;
+
+    /** Get the output source code associated with a specific entry point.
+
+    The lifetime of the output pointer is the same as `request`.
+    */
+    virtual SLANG_NO_THROW char const* SLANG_MCALL getEntryPointSource(int entryPointIndex) = 0;
+
+    /** Get the output bytecode associated with a specific entry point.
+
+    The lifetime of the output pointer is the same as `request`.
+    */
+    virtual SLANG_NO_THROW void const* SLANG_MCALL
+    getEntryPointCode(int entryPointIndex, size_t* outSize) = 0;
+
+    /** Get the output code associated with a specific entry point.
+
+    @param entryPointIndex The index of the entry point to get code for.
+    @param targetIndex The index of the target to get code for (default: zero).
+    @param outBlob A pointer that will receive the blob of code
+    @returns A `SlangResult` to indicate success or failure.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    getEntryPointCodeBlob(int entryPointIndex, int targetIndex, ISlangBlob** outBlob) = 0;
+
+    /** Get entry point 'callable' functions accessible through the ISlangSharedLibrary interface.
+
+    That the functions remain in scope as long as the ISlangSharedLibrary interface is in scope.
+
+    NOTE! Requires a compilation target of SLANG_HOST_CALLABLE.
+
+    @param entryPointIndex  The index of the entry point to get code for.
+    @param targetIndex      The index of the target to get code for (default: zero).
+    @param outSharedLibrary A pointer to a ISharedLibrary interface which functions can be queried
+    on.
+    @returns                A `SlangResult` to indicate success or failure.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPointHostCallable(
+        int entryPointIndex,
+        int targetIndex,
+        ISlangSharedLibrary** outSharedLibrary) = 0;
+
+    /** Get the output code associated with a specific target.
+
+    @param targetIndex The index of the target to get code for (default: zero).
+    @param outBlob A pointer that will receive the blob of code
+    @returns A `SlangResult` to indicate success or failure.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    getTargetCodeBlob(int targetIndex, ISlangBlob** outBlob) = 0;
+
+    /** Get 'callable' functions for a target accessible through the ISlangSharedLibrary interface.
+
+    That the functions remain in scope as long as the ISlangSharedLibrary interface is in scope.
+
+    NOTE! Requires a compilation target of SLANG_HOST_CALLABLE.
+
+    @param targetIndex      The index of the target to get code for (default: zero).
+    @param outSharedLibrary A pointer to a ISharedLibrary interface which functions can be queried
+    on.
+    @returns                A `SlangResult` to indicate success or failure.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    getTargetHostCallable(int targetIndex, ISlangSharedLibrary** outSharedLibrary) = 0;
+
+    /** Get the output bytecode associated with an entire compile request.
+
+    The lifetime of the output pointer is the same as `request` and the last spCompile.
+
+    @param outSize          The size of the containers contents in bytes. Will be zero if there is
+    no code available.
+    @returns                Pointer to start of the contained data, or nullptr if there is no code
+    available.
+    */
+    virtual SLANG_NO_THROW void const* SLANG_MCALL getCompileRequestCode(size_t* outSize) = 0;
+
+    /** Get the compilation result as a file system.
+    The result is not written to the actual OS file system, but is made available as an
+    in memory representation.
+    */
+    virtual SLANG_NO_THROW ISlangMutableFileSystem* SLANG_MCALL
+    getCompileRequestResultAsFileSystem() = 0;
+
+    /** Return the container code as a blob. The container blob is created as part of a compilation
+    (with spCompile), and a container is produced with a suitable ContainerFormat.
+
+    @param outSize          The blob containing the container data.
+    @returns                A `SlangResult` to indicate success or failure.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL getContainerCode(ISlangBlob** outBlob) = 0;
+
+    /** Load repro from memory specified.
+
+    Should only be performed on a newly created request.
+
+    NOTE! When using the fileSystem, files will be loaded via their `unique names` as if they are
+    part of the flat file system. This mechanism is described more fully in docs/repro.md.
+
+    @param fileSystem       An (optional) filesystem. Pass nullptr to just use contents of repro
+    held in data.
+    @param data             The data to load from.
+    @param size             The size of the data to load from.
+    @returns                A `SlangResult` to indicate success or failure.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    loadRepro(ISlangFileSystem* fileSystem, const void* data, size_t size) = 0;
+
+    /** Save repro state. Should *typically* be performed after spCompile, so that everything
+    that is needed for a compilation is available.
+
+    @param outBlob          Blob that will hold the serialized state
+    @returns                A `SlangResult` to indicate success or failure.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL saveRepro(ISlangBlob** outBlob) = 0;
+
+    /** Enable repro capture.
+
+    Should be set after any ISlangFileSystem has been set, but before any compilation. It ensures
+    that everything that the ISlangFileSystem accesses will be correctly recorded. Note that if a
+    ISlangFileSystem/ISlangFileSystemExt isn't explicitly set (ie the default is used), then the
+    request will automatically be set up to record everything appropriate.
+
+    @returns                A `SlangResult` to indicate success or failure.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL enableReproCapture() = 0;
+
+    /** Get the (linked) program for a compile request.
+
+    The linked program will include all of the global-scope modules for the
+    translation units in the program, plus any modules that they `import`
+    (transitively), specialized to any global specialization arguments that
+    were provided via the API.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    getProgram(slang::IComponentType** outProgram) = 0;
+
+    /** Get the (partially linked) component type for an entry point.
+
+    The returned component type will include the entry point at the
+    given index, and will be specialized using any specialization arguments
+    that were provided for it via the API.
+
+    The returned component will *not* include the modules representing
+    the global scope and its dependencies/specialization, so a client
+    program will typically want to compose this component type with
+    the one returned by `spCompileRequest_getProgram` to get a complete
+    and usable component type from which kernel code can be requested.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    getEntryPoint(SlangInt entryPointIndex, slang::IComponentType** outEntryPoint) = 0;
+
+    /** Get the (un-linked) module for a translation unit.
+
+    The returned module will not be linked against any dependencies,
+    nor against any entry points (even entry points declared inside
+    the module). Similarly, the module will not be specialized
+    to the arguments that might have been provided via the API.
+
+    This function provides an atomic unit of loaded code that
+    is suitable for looking up types and entry points in the
+    given module, and for linking together to produce a composite
+    program that matches the needs of an application.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    getModule(SlangInt translationUnitIndex, slang::IModule** outModule) = 0;
+
+    /** Get the `ISession` handle behind the `SlangCompileRequest`.
+    TODO(JS): Arguably this should just return the session pointer.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL getSession(slang::ISession** outSession) = 0;
+
+    /** get reflection data from a compilation request */
+    virtual SLANG_NO_THROW SlangReflection* SLANG_MCALL getReflection() = 0;
+
+    /** Make output specially handled for command line output */
+    virtual SLANG_NO_THROW void SLANG_MCALL setCommandLineCompilerMode() = 0;
+
+    /** Add a defined capability that should be assumed available on the target */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    addTargetCapability(SlangInt targetIndex, SlangCapabilityID capability) = 0;
+
+    /** Get the (linked) program for a compile request, including all entry points.
+
+    The resulting program will include all of the global-scope modules for the
+    translation units in the program, plus any modules that they `import`
+    (transitively), specialized to any global specialization arguments that
+    were provided via the API, as well as all entry points specified for compilation,
+    specialized to their entry-point specialization arguments.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    getProgramWithEntryPoints(slang::IComponentType** outProgram) = 0;
+
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL isParameterLocationUsed(
+        SlangInt entryPointIndex,
+        SlangInt targetIndex,
+        SlangParameterCategory category,
+        SlangUInt spaceIndex,
+        SlangUInt registerIndex,
+        bool& outUsed) = 0;
+
+    /** Set the line directive mode for a target.
+     */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setTargetLineDirectiveMode(SlangInt targetIndex, SlangLineDirectiveMode mode) = 0;
+
+    /** Set whether to use scalar buffer layouts for GLSL/Vulkan targets.
+        If true, the generated GLSL/Vulkan code will use `scalar` layout for storage buffers.
+        If false, the resulting code will std430 for storage buffers.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setTargetForceGLSLScalarBufferLayout(int targetIndex, bool forceScalarLayout) = 0;
+
+    /** Overrides the severity of a specific diagnostic message.
+
+    @param messageID            Numeric identifier of the message to override,
+                                as defined in the 1st parameter of the DIAGNOSTIC macro.
+    @param overrideSeverity     New severity of the message. If the message is originally Error or
+    Fatal, the new severity cannot be lower than that.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    overrideDiagnosticSeverity(SlangInt messageID, SlangSeverity overrideSeverity) = 0;
+
+    /** Returns the currently active flags of the request's diagnostic sink. */
+    virtual SLANG_NO_THROW SlangDiagnosticFlags SLANG_MCALL getDiagnosticFlags() = 0;
+
+    /** Sets the flags of the request's diagnostic sink.
+        The previously specified flags are discarded. */
+    virtual SLANG_NO_THROW void SLANG_MCALL setDiagnosticFlags(SlangDiagnosticFlags flags) = 0;
+
+    /** Set the debug format to be used for debugging information */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setDebugInfoFormat(SlangDebugInfoFormat debugFormat) = 0;
+
+    virtual SLANG_NO_THROW void SLANG_MCALL setEnableEffectAnnotations(bool value) = 0;
+
+    virtual SLANG_NO_THROW void SLANG_MCALL setReportDownstreamTime(bool value) = 0;
+
+    virtual SLANG_NO_THROW void SLANG_MCALL setReportPerfBenchmark(bool value) = 0;
+
+    virtual SLANG_NO_THROW void SLANG_MCALL setSkipSPIRVValidation(bool value) = 0;
+
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setTargetUseMinimumSlangOptimization(int targetIndex, bool value) = 0;
+
+    virtual SLANG_NO_THROW void SLANG_MCALL setIgnoreCapabilityCheck(bool value) = 0;
+
+    // return a copy of internal profiling results, and if `shouldClear` is true, clear the internal
+    // profiling results before returning.
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    getCompileTimeProfile(ISlangProfiler** compileTimeProfile, bool shouldClear) = 0;
+
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setTargetGenerateWholeProgram(int targetIndex, bool value) = 0;
+
+    virtual SLANG_NO_THROW void SLANG_MCALL setTargetForceDXLayout(int targetIndex, bool value) = 0;
+
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setTargetEmbedDownstreamIR(int targetIndex, bool value) = 0;
+};
+
+    #define SLANG_UUID_ICompileRequest ICompileRequest::getTypeGuid()
+
+} // namespace slang
+#endif
diff --git a/external/slang/slang-gfx.h b/external/slang/include/slang-gfx.h
similarity index 67%
rename from external/slang/slang-gfx.h
rename to external/slang/include/slang-gfx.h
index 0ecae4f3..db9dcbac 100644
--- a/external/slang/slang-gfx.h
+++ b/external/slang/include/slang-gfx.h
@@ -1,30 +1,30 @@
 // render.h
 #pragma once
 
-#include <float.h>
-#include <assert.h>
-
-#include "slang.h"
 #include "slang-com-ptr.h"
+#include "slang.h"
+
+#include <assert.h>
+#include <float.h>
 
 
 #if defined(SLANG_GFX_DYNAMIC)
-#    if defined(_MSC_VER)
-#        ifdef SLANG_GFX_DYNAMIC_EXPORT
-#            define SLANG_GFX_API SLANG_DLL_EXPORT
-#        else
-#            define SLANG_GFX_API __declspec(dllimport)
-#        endif
-#    else
-// TODO: need to consider compiler capabilities
-//#     ifdef SLANG_DYNAMIC_EXPORT
-#        define SLANG_GFX_API SLANG_DLL_EXPORT
-//#     endif
-#    endif
+    #if defined(_MSC_VER)
+        #ifdef SLANG_GFX_DYNAMIC_EXPORT
+            #define SLANG_GFX_API SLANG_DLL_EXPORT
+        #else
+            #define SLANG_GFX_API __declspec(dllimport)
+        #endif
+    #else
+        // TODO: need to consider compiler capabilities
+        // #     ifdef SLANG_DYNAMIC_EXPORT
+        #define SLANG_GFX_API SLANG_DLL_EXPORT
+    // #     endif
+    #endif
 #endif
 
 #ifndef SLANG_GFX_API
-#    define SLANG_GFX_API
+    #define SLANG_GFX_API
 #endif
 
 // Needed for building on cygwin with gcc
@@ -34,10 +34,13 @@
 // GLOBAL TODO: doc comments
 // GLOBAL TODO: Rationalize integer types (not a smush of uint/int/Uint/Int/etc)
 //    - need typedefs in gfx namespace for Count, Index, Size, Offset (ex. DeviceAddress)
-//    - Index and Count are for arrays, and indexing into array - like things(XY coordinates of pixels, etc.)
-//         - Count is also for anything where we need to measure how many of something there are. This includes things like extents.
+//    - Index and Count are for arrays, and indexing into array - like things(XY coordinates of
+//    pixels, etc.)
+//         - Count is also for anything where we need to measure how many of something there are.
+//         This includes things like extents.
 //    - Offset and Size are almost always for bytes and things measured in bytes.
-namespace gfx {
+namespace gfx
+{
 
 using Slang::ComPtr;
 
@@ -56,7 +59,10 @@ const uint64_t kTimeoutInfinite = 0xFFFFFFFFFFFFFFFF;
 
 enum class StructType
 {
-    D3D12DeviceExtendedDesc, D3D12ExperimentalFeaturesDesc, SlangSessionExtendedDesc
+    D3D12DeviceExtendedDesc,
+    D3D12ExperimentalFeaturesDesc,
+    SlangSessionExtendedDesc,
+    RayTracingValidationDesc
 };
 
 // TODO: Rename to Stage
@@ -92,6 +98,7 @@ enum class DeviceType
     Metal,
     CPU,
     CUDA,
+    WebGPU,
     CountOf,
 };
 
@@ -135,26 +142,33 @@ class ITransientResourceHeap;
 
 enum class ShaderModuleSourceType
 {
-    SlangSource, // a slang source string in memory.
-    SlangModuleBinary, // a slang module binary code in memory.
-    SlangSourceFile, // a slang source from file.
+    SlangSource,           // a slang source string in memory.
+    SlangModuleBinary,     // a slang module binary code in memory.
+    SlangSourceFile,       // a slang source from file.
     SlangModuleBinaryFile, // a slang module binary code from file.
 };
 
-class IShaderProgram: public ISlangUnknown
+class IShaderProgram : public ISlangUnknown
 {
 public:
     // Defines how linking should be performed for a shader program.
     enum class LinkingStyle
     {
-        // Compose all entry-points in a single program, then compile all entry-points together with the same
-        // set of root shader arguments.
+        // Compose all entry-points in a single program, then compile all entry-points together with
+        // the same set of root shader arguments.
         SingleProgram,
 
-        // Link and compile each entry-point individually, potentially with different specializations.
+        // Link and compile each entry-point individually, potentially with different
+        // specializations.
         SeparateEntryPointCompilation
     };
 
+    enum class DownstreamLinkMode
+    {
+        None,
+        Deferred,
+    };
+
     struct Desc
     {
         // TODO: Tess doesn't like this but doesn't know what to do about it
@@ -162,7 +176,7 @@ class IShaderProgram: public ISlangUnknown
         LinkingStyle linkingStyle = LinkingStyle::SingleProgram;
 
         // The global scope or a Slang composite component that represents the entire program.
-        slang::IComponentType*  slangGlobalScope;
+        slang::IComponentType* slangGlobalScope;
 
         // Number of separate entry point components in the `slangEntryPoints` array to link in.
         // If set to 0, then `slangGlobalScope` must contain Slang EntryPoint components.
@@ -172,6 +186,9 @@ class IShaderProgram: public ISlangUnknown
         // An array of Slang entry points. The size of the array must be `entryPointCount`.
         // Each element must define only 1 Slang EntryPoint.
         slang::IComponentType** slangEntryPoints = nullptr;
+
+        // Indicates whether the app is responsible for final downstream linking.
+        DownstreamLinkMode downstreamLinkMode = DownstreamLinkMode::None;
     };
 
     struct CreateDesc2
@@ -190,14 +207,18 @@ class IShaderProgram: public ISlangUnknown
 
     virtual SLANG_NO_THROW slang::TypeReflection* SLANG_MCALL findTypeByName(const char* name) = 0;
 };
-#define SLANG_UUID_IShaderProgram                                                       \
-    {                                                                                  \
-        0x9d32d0ad, 0x915c, 0x4ffd, { 0x91, 0xe2, 0x50, 0x85, 0x54, 0xa0, 0x4a, 0x76 } \
+#define SLANG_UUID_IShaderProgram                          \
+    {                                                      \
+        0x9d32d0ad, 0x915c, 0x4ffd,                        \
+        {                                                  \
+            0x91, 0xe2, 0x50, 0x85, 0x54, 0xa0, 0x4a, 0x76 \
+        }                                                  \
     }
 
 // TODO: Confirm with Yong that we really want this naming convention
 // TODO: Rename to what?
 // Dont' change without keeping in sync with Format
+// clang-format off
 #define GFX_FORMAT(x) \
     x( Unknown, 0, 0) \
     \
@@ -303,17 +324,20 @@ class IShaderProgram: public ISlangUnknown
     \
     x(R64_UINT, 8, 1) \
     \
-    x(R64_SINT, 8, 1) \
-    \
+    x(R64_SINT, 8, 1)
+// clang-format on
+
 // TODO: This should be generated from above
 // TODO: enum class should be explicitly uint32_t or whatever's appropriate
 /// Different formats of things like pixels or elements of vertices
-/// NOTE! Any change to this type (adding, removing, changing order) - must also be reflected in changes GFX_FORMAT
+/// NOTE! Any change to this type (adding, removing, changing order) - must also be reflected in
+/// changes GFX_FORMAT
 enum class Format
 {
     // D3D formats omitted: 19-22, 44-47, 65-66, 68-70, 73, 76, 79, 82, 88-89, 92-94, 97, 100-114
-    // These formats are omitted due to lack of a corresponding Vulkan format. D24_UNORM_S8_UINT (DXGI_FORMAT 45)
-    // has a matching Vulkan format but is also omitted as it is only supported by Nvidia.
+    // These formats are omitted due to lack of a corresponding Vulkan format. D24_UNORM_S8_UINT
+    // (DXGI_FORMAT 45) has a matching Vulkan format but is also omitted as it is only supported by
+    // Nvidia.
     Unknown,
 
     R32G32B32A32_TYPELESS,
@@ -417,7 +441,7 @@ enum class Format
     BC7_UNORM_SRGB,
 
     R64_UINT,
-    
+
     R64_SINT,
 
     _Count,
@@ -430,27 +454,32 @@ enum class Format
 // TODO: Width/Height/Depth/whatever should not be used. We should use extentX, extentY, etc.
 struct FormatInfo
 {
-    GfxCount channelCount;         ///< The amount of channels in the format. Only set if the channelType is set
-    uint8_t channelType;           ///< One of SlangScalarType None if type isn't made up of elements of type. TODO: Change to uint32_t?
+    GfxCount
+        channelCount; ///< The amount of channels in the format. Only set if the channelType is set
+    uint8_t channelType; ///< One of SlangScalarType None if type isn't made up of elements of type.
+                         ///< TODO: Change to uint32_t?
 
-    Size blockSizeInBytes;         ///< The size of a block in bytes.
-    GfxCount pixelsPerBlock;       ///< The number of pixels contained in a block.
-    GfxCount blockWidth;           ///< The width of a block in pixels.
-    GfxCount blockHeight;          ///< The height of a block in pixels.
+    Size blockSizeInBytes;   ///< The size of a block in bytes.
+    GfxCount pixelsPerBlock; ///< The number of pixels contained in a block.
+    GfxCount blockWidth;     ///< The width of a block in pixels.
+    GfxCount blockHeight;    ///< The height of a block in pixels.
 };
 
 enum class InputSlotClass
 {
-    PerVertex, PerInstance
+    PerVertex,
+    PerInstance
 };
 
 struct InputElementDesc
 {
-    char const* semanticName;      ///< The name of the corresponding parameter in shader code.
-    GfxIndex semanticIndex;        ///< The index of the corresponding parameter in shader code. Only needed if multiple parameters share a semantic name.
-    Format format;                 ///< The format of the data being fetched for this element.
-    Offset offset;                 ///< The offset in bytes of this element from the start of the corresponding chunk of vertex stream data.
-    GfxIndex bufferSlotIndex;      ///< The index of the vertex stream to fetch this element's data from.
+    char const* semanticName; ///< The name of the corresponding parameter in shader code.
+    GfxIndex semanticIndex;   ///< The index of the corresponding parameter in shader code. Only
+                              ///< needed if multiple parameters share a semantic name.
+    Format format;            ///< The format of the data being fetched for this element.
+    Offset offset; ///< The offset in bytes of this element from the start of the corresponding
+                   ///< chunk of vertex stream data.
+    GfxIndex bufferSlotIndex; ///< The index of the vertex stream to fetch this element's data from.
 };
 
 struct VertexStreamDesc
@@ -462,12 +491,19 @@ struct VertexStreamDesc
 
 enum class PrimitiveType
 {
-    Point, Line, Triangle, Patch
+    Point,
+    Line,
+    Triangle,
+    Patch
 };
 
 enum class PrimitiveTopology
 {
-    TriangleList, TriangleStrip, PointList, LineList, LineStrip
+    TriangleList,
+    TriangleStrip,
+    PointList,
+    LineList,
+    LineStrip
 };
 
 enum class ResourceState
@@ -501,18 +537,24 @@ struct ResourceStateSet
 {
 public:
     void add(ResourceState state) { m_bitFields |= (1LL << (uint32_t)state); }
-    template <typename... TResourceState> void add(ResourceState s, TResourceState... states)
+    template<typename... TResourceState>
+    void add(ResourceState s, TResourceState... states)
     {
         add(s);
         add(states...);
     }
-    bool contains(ResourceState state) const { return (m_bitFields & (1LL << (uint32_t)state)) != 0; }
+    bool contains(ResourceState state) const
+    {
+        return (m_bitFields & (1LL << (uint32_t)state)) != 0;
+    }
     ResourceStateSet()
         : m_bitFields(0)
-    {}
+    {
+    }
     ResourceStateSet(const ResourceStateSet& other) = default;
     ResourceStateSet(ResourceState state) { add(state); }
-    template <typename... TResourceState> ResourceStateSet(TResourceState... states)
+    template<typename... TResourceState>
+    ResourceStateSet(TResourceState... states)
     {
         add(states...);
     }
@@ -541,14 +583,14 @@ enum class MemoryType
 enum class InteropHandleAPI
 {
     Unknown,
-    D3D12, // A D3D12 object pointer.
-    Vulkan, // A general Vulkan object handle.
-    CUDA, // A general CUDA object handle.
-    Win32, // A general Win32 HANDLE.
-    FileDescriptor, // A file descriptor.
-    DeviceAddress, // A device address.
+    D3D12,                    // A D3D12 object pointer.
+    Vulkan,                   // A general Vulkan object handle.
+    CUDA,                     // A general CUDA object handle.
+    Win32,                    // A general Win32 HANDLE.
+    FileDescriptor,           // A file descriptor.
+    DeviceAddress,            // A device address.
     D3D12CpuDescriptorHandle, // A D3D12_CPU_DESCRIPTOR_HANDLE value.
-    Metal, // A general Metal object handle.
+    Metal,                    // A general Metal object handle.
 };
 
 struct InteropHandle
@@ -569,28 +611,32 @@ class IInputLayout : public ISlangUnknown
         GfxCount vertexStreamCount = 0;
     };
 };
-#define SLANG_UUID_IInputLayout                                                         \
-    {                                                                                  \
-        0x45223711, 0xa84b, 0x455c, { 0xbe, 0xfa, 0x49, 0x37, 0x42, 0x1e, 0x8e, 0x2e } \
+#define SLANG_UUID_IInputLayout                            \
+    {                                                      \
+        0x45223711, 0xa84b, 0x455c,                        \
+        {                                                  \
+            0xbe, 0xfa, 0x49, 0x37, 0x42, 0x1e, 0x8e, 0x2e \
+        }                                                  \
     }
 
-class IResource: public ISlangUnknown
+class IResource : public ISlangUnknown
 {
 public:
-        /// The type of resource.
-        /// NOTE! The order needs to be such that all texture types are at or after Texture1D (otherwise isTexture won't work correctly)
+    /// The type of resource.
+    /// NOTE! The order needs to be such that all texture types are at or after Texture1D (otherwise
+    /// isTexture won't work correctly)
     enum class Type
     {
-        Unknown,            ///< Unknown
-        Buffer,             ///< A buffer (like a constant/index/vertex buffer)
-        Texture1D,          ///< A 1d texture
-        Texture2D,          ///< A 2d texture
-        Texture3D,          ///< A 3d texture
-        TextureCube,        ///< A cubemap consists of 6 Texture2D like faces
+        Unknown,     ///< Unknown
+        Buffer,      ///< A buffer (like a constant/index/vertex buffer)
+        Texture1D,   ///< A 1d texture
+        Texture2D,   ///< A 2d texture
+        Texture3D,   ///< A 3d texture
+        TextureCube, ///< A cubemap consists of 6 Texture2D like faces
         _Count,
     };
 
-        /// Base class for Descs
+    /// Base class for Descs
     struct DescBase
     {
         Type type = Type::Unknown;
@@ -607,11 +653,13 @@ class IResource: public ISlangUnknown
 
     virtual SLANG_NO_THROW Result SLANG_MCALL setDebugName(const char* name) = 0;
     virtual SLANG_NO_THROW const char* SLANG_MCALL getDebugName() = 0;
-
 };
-#define SLANG_UUID_IResource                                                           \
-    {                                                                                  \
-        0xa0e39f34, 0x8398, 0x4522, { 0x95, 0xc2, 0xeb, 0xc0, 0xf9, 0x84, 0xef, 0x3f } \
+#define SLANG_UUID_IResource                               \
+    {                                                      \
+        0xa0e39f34, 0x8398, 0x4522,                        \
+        {                                                  \
+            0x95, 0xc2, 0xeb, 0xc0, 0xf9, 0x84, 0xef, 0x3f \
+        }                                                  \
     }
 
 struct MemoryRange
@@ -621,13 +669,13 @@ struct MemoryRange
     uint64_t size;
 };
 
-class IBufferResource: public IResource
+class IBufferResource : public IResource
 {
 public:
-    struct Desc: public DescBase
+    struct Desc : public DescBase
     {
-        Size sizeInBytes = 0;        ///< Total size in bytes
-        Size elementSize = 0;        ///< Get the element stride. If > 0, this is a structured buffer
+        Size sizeInBytes = 0; ///< Total size in bytes
+        Size elementSize = 0; ///< Get the element stride. If > 0, this is a structured buffer
         Format format = Format::Unknown;
     };
 
@@ -636,9 +684,12 @@ class IBufferResource: public IResource
     virtual SLANG_NO_THROW Result SLANG_MCALL map(MemoryRange* rangeToRead, void** outPointer) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL unmap(MemoryRange* writtenRange) = 0;
 };
-#define SLANG_UUID_IBufferResource                                                     \
-    {                                                                                  \
-        0x1b274efe, 0x5e37, 0x492b, { 0x82, 0x6e, 0x7e, 0xe7, 0xe8, 0xf5, 0xa4, 0x9b } \
+#define SLANG_UUID_IBufferResource                         \
+    {                                                      \
+        0x1b274efe, 0x5e37, 0x492b,                        \
+        {                                                  \
+            0x82, 0x6e, 0x7e, 0xe7, 0xe8, 0xf5, 0xa4, 0x9b \
+        }                                                  \
     }
 
 struct DepthStencilClearValue
@@ -659,8 +710,8 @@ struct ClearValue
 
 struct BufferRange
 {
-    Offset offset;  ///< Offset in bytes.
-    Size size;      ///< Size in bytes.
+    Offset offset; ///< Offset in bytes.
+    Size size;     ///< Size in bytes.
 };
 
 enum class TextureAspect : uint32_t
@@ -683,10 +734,10 @@ struct SubresourceRange
     GfxIndex mipLevel;
     GfxCount mipLevelCount;
     GfxIndex baseArrayLayer; // For Texture3D, this is WSlice.
-    GfxCount layerCount; // For cube maps, this is a multiple of 6.
+    GfxCount layerCount;     // For cube maps, this is a multiple of 6.
 };
 
-class ITextureResource: public IResource
+class ITextureResource : public IResource
 {
 public:
     static const GfxCount kRemainingTextureSize = 0xffffffff;
@@ -696,97 +747,103 @@ class ITextureResource: public IResource
         GfxIndex y = 0;
         GfxIndex z = 0;
         Offset3D() = default;
-        Offset3D(GfxIndex _x, GfxIndex _y, GfxIndex _z) :x(_x), y(_y), z(_z) {}
+        Offset3D(GfxIndex _x, GfxIndex _y, GfxIndex _z)
+            : x(_x), y(_y), z(_z)
+        {
+        }
     };
 
     struct SampleDesc
     {
-        GfxCount numSamples = 1;                ///< Number of samples per pixel
-        int quality = 0;                        ///< The quality measure for the samples
+        GfxCount numSamples = 1; ///< Number of samples per pixel
+        int quality = 0;         ///< The quality measure for the samples
     };
 
     struct Extents
     {
-        GfxCount width = 0;              ///< Width in pixels
-        GfxCount height = 0;             ///< Height in pixels (if 2d or 3d)
-        GfxCount depth = 0;              ///< Depth (if 3d)
+        GfxCount width = 0;  ///< Width in pixels
+        GfxCount height = 0; ///< Height in pixels (if 2d or 3d)
+        GfxCount depth = 0;  ///< Depth (if 3d)
     };
 
-    struct Desc: public DescBase
+    struct Desc : public DescBase
     {
         Extents size;
 
-        GfxCount arraySize = 0;          ///< Array size
+        GfxCount arraySize = 0; ///< Array size
 
-        GfxCount numMipLevels = 0;       ///< Number of mip levels - if 0 will create all mip levels
-        Format format;                   ///< The resources format
-        SampleDesc sampleDesc;           ///< How the resource is sampled
+        GfxCount numMipLevels = 0; ///< Number of mip levels - if 0 will create all mip levels
+        Format format;             ///< The resources format
+        SampleDesc sampleDesc;     ///< How the resource is sampled
         ClearValue* optimalClearValue = nullptr;
     };
 
-        /// Data for a single subresource of a texture.
-        ///
-        /// Each subresource is a tensor with `1 <= rank <= 3`,
-        /// where the rank is deterined by the base shape of the
-        /// texture (Buffer, 1D, 2D, 3D, or Cube). For the common
-        /// case of a 2D texture, `rank == 2` and each subresource
-        /// is a 2D image.
-        ///
-        /// Subresource tensors must be stored in a row-major layout,
-        /// so that the X axis strides over texels, the Y axis strides
-        /// over 1D rows of texels, and the Z axis strides over 2D
-        /// "layers" of texels.
-        ///
-        /// For a texture with multiple mip levels or array elements,
-        /// each mip level and array element is stores as a distinct
-        /// subresource. When indexing into an array of subresources,
-        /// the index of a subresoruce for mip level `m` and array
-        /// index `a` is `m + a*mipLevelCount`.
-        ///
+    /// Data for a single subresource of a texture.
+    ///
+    /// Each subresource is a tensor with `1 <= rank <= 3`,
+    /// where the rank is deterined by the base shape of the
+    /// texture (Buffer, 1D, 2D, 3D, or Cube). For the common
+    /// case of a 2D texture, `rank == 2` and each subresource
+    /// is a 2D image.
+    ///
+    /// Subresource tensors must be stored in a row-major layout,
+    /// so that the X axis strides over texels, the Y axis strides
+    /// over 1D rows of texels, and the Z axis strides over 2D
+    /// "layers" of texels.
+    ///
+    /// For a texture with multiple mip levels or array elements,
+    /// each mip level and array element is stores as a distinct
+    /// subresource. When indexing into an array of subresources,
+    /// the index of a subresoruce for mip level `m` and array
+    /// index `a` is `m + a*mipLevelCount`.
+    ///
     struct SubresourceData
     {
-            /// Pointer to texel data for the subresource tensor.
+        /// Pointer to texel data for the subresource tensor.
         void const* data;
 
-            /// Stride in bytes between rows of the subresource tensor.
-            ///
-            /// This is the number of bytes to add to a pointer to a texel
-            /// at (X,Y,Z) to get to a texel at (X,Y+1,Z).
-            ///
-            /// Devices may not support all possible values for `strideY`.
-            /// In particular, they may only support strictly positive strides.
-            ///
+        /// Stride in bytes between rows of the subresource tensor.
+        ///
+        /// This is the number of bytes to add to a pointer to a texel
+        /// at (X,Y,Z) to get to a texel at (X,Y+1,Z).
+        ///
+        /// Devices may not support all possible values for `strideY`.
+        /// In particular, they may only support strictly positive strides.
+        ///
         gfx::Size strideY;
 
-            /// Stride in bytes between layers of the subresource tensor.
-            ///
-            /// This is the number of bytes to add to a pointer to a texel
-            /// at (X,Y,Z) to get to a texel at (X,Y,Z+1).
-            ///
-            /// Devices may not support all possible values for `strideZ`.
-            /// In particular, they may only support strictly positive strides.
-            ///
+        /// Stride in bytes between layers of the subresource tensor.
+        ///
+        /// This is the number of bytes to add to a pointer to a texel
+        /// at (X,Y,Z) to get to a texel at (X,Y,Z+1).
+        ///
+        /// Devices may not support all possible values for `strideZ`.
+        /// In particular, they may only support strictly positive strides.
+        ///
         gfx::Size strideZ;
     };
 
     virtual SLANG_NO_THROW Desc* SLANG_MCALL getDesc() = 0;
 };
-#define SLANG_UUID_ITextureResource                                                    \
-    {                                                                                  \
-        0xcf88a31c, 0x6187, 0x46c5, { 0xa4, 0xb7, 0xeb, 0x58, 0xc7, 0x33, 0x40, 0x17 } \
+#define SLANG_UUID_ITextureResource                        \
+    {                                                      \
+        0xcf88a31c, 0x6187, 0x46c5,                        \
+        {                                                  \
+            0xa4, 0xb7, 0xeb, 0x58, 0xc7, 0x33, 0x40, 0x17 \
+        }                                                  \
     }
 
 
 enum class ComparisonFunc : uint8_t
 {
-    Never           = 0x0,
-    Less            = 0x1,
-    Equal           = 0x2,
-    LessEqual       = 0x3,
-    Greater         = 0x4,
-    NotEqual        = 0x5,
-    GreaterEqual    = 0x6,
-    Always          = 0x7,
+    Never = 0x0,
+    Less = 0x1,
+    Equal = 0x2,
+    LessEqual = 0x3,
+    Greater = 0x4,
+    NotEqual = 0x5,
+    GreaterEqual = 0x6,
+    Always = 0x7,
 };
 
 enum class TextureFilteringMode
@@ -817,19 +874,19 @@ class ISamplerState : public ISlangUnknown
 public:
     struct Desc
     {
-        TextureFilteringMode    minFilter       = TextureFilteringMode::Linear;
-        TextureFilteringMode    magFilter       = TextureFilteringMode::Linear;
-        TextureFilteringMode    mipFilter       = TextureFilteringMode::Linear;
-        TextureReductionOp      reductionOp     = TextureReductionOp::Average;
-        TextureAddressingMode   addressU        = TextureAddressingMode::Wrap;
-        TextureAddressingMode   addressV        = TextureAddressingMode::Wrap;
-        TextureAddressingMode   addressW        = TextureAddressingMode::Wrap;
-        float                   mipLODBias      = 0.0f;
-        uint32_t                maxAnisotropy   = 1;
-        ComparisonFunc          comparisonFunc  = ComparisonFunc::Never;
-        float                   borderColor[4]  = { 1.0f, 1.0f, 1.0f, 1.0f };
-        float                   minLOD          = -FLT_MAX;
-        float                   maxLOD          = FLT_MAX;
+        TextureFilteringMode minFilter = TextureFilteringMode::Linear;
+        TextureFilteringMode magFilter = TextureFilteringMode::Linear;
+        TextureFilteringMode mipFilter = TextureFilteringMode::Linear;
+        TextureReductionOp reductionOp = TextureReductionOp::Average;
+        TextureAddressingMode addressU = TextureAddressingMode::Wrap;
+        TextureAddressingMode addressV = TextureAddressingMode::Wrap;
+        TextureAddressingMode addressW = TextureAddressingMode::Wrap;
+        float mipLODBias = 0.0f;
+        uint32_t maxAnisotropy = 1;
+        ComparisonFunc comparisonFunc = ComparisonFunc::Never;
+        float borderColor[4] = {1.0f, 1.0f, 1.0f, 1.0f};
+        float minLOD = -FLT_MAX;
+        float maxLOD = FLT_MAX;
     };
 
     /// Returns a native API handle representing this sampler state object.
@@ -837,9 +894,12 @@ class ISamplerState : public ISlangUnknown
     /// When using Vulkan, this will be a VkSampler.
     virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outNativeHandle) = 0;
 };
-#define SLANG_UUID_ISamplerState                                                        \
-    {                                                                                  \
-        0x8b8055df, 0x9377, 0x401d, { 0x91, 0xff, 0x3f, 0xa3, 0xbf, 0x66, 0x64, 0xf4 } \
+#define SLANG_UUID_ISamplerState                           \
+    {                                                      \
+        0x8b8055df, 0x9377, 0x401d,                        \
+        {                                                  \
+            0x91, 0xff, 0x3f, 0xa3, 0xbf, 0x66, 0x64, 0xf4 \
+        }                                                  \
     }
 
 class IResourceView : public ISlangUnknown
@@ -866,12 +926,13 @@ class IResourceView : public ISlangUnknown
 
     struct Desc
     {
-        Type    type;
-        Format  format;
+        Type type;
+        Format format;
 
         // Required fields for `RenderTarget` and `DepthStencil` views.
         RenderTargetDesc renderTarget;
-        // Specifies the range of a texture resource for a ShaderRsource/UnorderedAccess/RenderTarget/DepthStencil view.
+        // Specifies the range of a texture resource for a
+        // ShaderRsource/UnorderedAccess/RenderTarget/DepthStencil view.
         SubresourceRange subresourceRange;
         // Specifies the range of a buffer resource for a ShaderResource/UnorderedAccess view.
         BufferRange bufferRange;
@@ -879,15 +940,18 @@ class IResourceView : public ISlangUnknown
     virtual SLANG_NO_THROW Desc* SLANG_MCALL getViewDesc() = 0;
 
     /// Returns a native API handle representing this resource view object.
-    /// When using D3D12, this will be a D3D12_CPU_DESCRIPTOR_HANDLE or a buffer device address depending
-    /// on the type of the resource view.
-    /// When using Vulkan, this will be a VkImageView, VkBufferView, VkAccelerationStructure or a VkBuffer
-    /// depending on the type of the resource view.
+    /// When using D3D12, this will be a D3D12_CPU_DESCRIPTOR_HANDLE or a buffer device address
+    /// depending on the type of the resource view. When using Vulkan, this will be a VkImageView,
+    /// VkBufferView, VkAccelerationStructure or a VkBuffer depending on the type of the resource
+    /// view.
     virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outNativeHandle) = 0;
 };
-#define SLANG_UUID_IResourceView                                                      \
-    {                                                                                 \
-        0x7b6c4926, 0x884, 0x408c, { 0xad, 0x8a, 0x50, 0x3a, 0x8e, 0x23, 0x98, 0xa4 } \
+#define SLANG_UUID_IResourceView                           \
+    {                                                      \
+        0x7b6c4926, 0x884, 0x408c,                         \
+        {                                                  \
+            0xad, 0x8a, 0x50, 0x3a, 0x8e, 0x23, 0x98, 0xa4 \
+        }                                                  \
     }
 
 class IAccelerationStructure : public IResourceView
@@ -917,7 +981,8 @@ class IAccelerationStructure : public IResourceView
 
     enum class GeometryType
     {
-        Triangles, ProcedurePrimitives
+        Triangles,
+        ProcedurePrimitives
     };
 
     struct GeometryFlags
@@ -1046,9 +1111,12 @@ class IAccelerationStructure : public IResourceView
 
     virtual SLANG_NO_THROW DeviceAddress SLANG_MCALL getDeviceAddress() = 0;
 };
-#define SLANG_UUID_IAccelerationStructure                                             \
-    {                                                                                 \
-        0xa5cdda3c, 0x1d4e, 0x4df7, { 0x8e, 0xf2, 0xb7, 0x3f, 0xce, 0x4, 0xde, 0x3b } \
+#define SLANG_UUID_IAccelerationStructure                 \
+    {                                                     \
+        0xa5cdda3c, 0x1d4e, 0x4df7,                       \
+        {                                                 \
+            0x8e, 0xf2, 0xb7, 0x3f, 0xce, 0x4, 0xde, 0x3b \
+        }                                                 \
     }
 
 class IFence : public ISlangUnknown
@@ -1069,9 +1137,12 @@ class IFence : public ISlangUnknown
     virtual SLANG_NO_THROW Result SLANG_MCALL getSharedHandle(InteropHandle* outHandle) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outNativeHandle) = 0;
 };
-#define SLANG_UUID_IFence                                                             \
-    {                                                                                 \
-        0x7fe1c283, 0xd3f4, 0x48ed, { 0xaa, 0xf3, 0x1, 0x51, 0x96, 0x4e, 0x7c, 0xb5 } \
+#define SLANG_UUID_IFence                                 \
+    {                                                     \
+        0x7fe1c283, 0xd3f4, 0x48ed,                       \
+        {                                                 \
+            0xaa, 0xf3, 0x1, 0x51, 0x96, 0x4e, 0x7c, 0xb5 \
+        }                                                 \
     }
 
 struct ShaderOffset
@@ -1085,14 +1156,11 @@ struct ShaderOffset
     }
     bool operator==(const ShaderOffset& other) const
     {
-        return uniformOffset == other.uniformOffset
-            && bindingRangeIndex == other.bindingRangeIndex
-            && bindingArrayIndex == other.bindingArrayIndex;
-    }
-    bool operator!=(const ShaderOffset& other) const
-    {
-        return !this->operator==(other);
+        return uniformOffset == other.uniformOffset &&
+               bindingRangeIndex == other.bindingRangeIndex &&
+               bindingArrayIndex == other.bindingArrayIndex;
     }
+    bool operator!=(const ShaderOffset& other) const { return !this->operator==(other); }
     bool operator<(const ShaderOffset& other) const
     {
         if (bindingRangeIndex < other.bindingRangeIndex)
@@ -1112,7 +1180,9 @@ struct ShaderOffset
 
 enum class ShaderObjectContainerType
 {
-    None, Array, StructuredBuffer
+    None,
+    Array,
+    StructuredBuffer
 };
 
 class IShaderObject : public ISlangUnknown
@@ -1122,38 +1192,40 @@ class IShaderObject : public ISlangUnknown
     virtual SLANG_NO_THROW ShaderObjectContainerType SLANG_MCALL getContainerType() = 0;
     virtual SLANG_NO_THROW GfxCount SLANG_MCALL getEntryPointCount() = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        getEntryPoint(GfxIndex index, IShaderObject** entryPoint) = 0;
+    getEntryPoint(GfxIndex index, IShaderObject** entryPoint) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        setData(ShaderOffset const& offset, void const* data, Size size) = 0;
+    setData(ShaderOffset const& offset, void const* data, Size size) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        getObject(ShaderOffset const& offset, IShaderObject** object) = 0;
+    getObject(ShaderOffset const& offset, IShaderObject** object) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        setObject(ShaderOffset const& offset, IShaderObject* object) = 0;
+    setObject(ShaderOffset const& offset, IShaderObject* object) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        setResource(ShaderOffset const& offset, IResourceView* resourceView) = 0;
+    setResource(ShaderOffset const& offset, IResourceView* resourceView) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        setSampler(ShaderOffset const& offset, ISamplerState* sampler) = 0;
+    setSampler(ShaderOffset const& offset, ISamplerState* sampler) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL setCombinedTextureSampler(
-        ShaderOffset const& offset, IResourceView* textureView, ISamplerState* sampler) = 0;
+        ShaderOffset const& offset,
+        IResourceView* textureView,
+        ISamplerState* sampler) = 0;
 
-        /// Manually overrides the specialization argument for the sub-object binding at `offset`.
-        /// Specialization arguments are passed to the shader compiler to specialize the type
-        /// of interface-typed shader parameters.
+    /// Manually overrides the specialization argument for the sub-object binding at `offset`.
+    /// Specialization arguments are passed to the shader compiler to specialize the type
+    /// of interface-typed shader parameters.
     virtual SLANG_NO_THROW Result SLANG_MCALL setSpecializationArgs(
         ShaderOffset const& offset,
         const slang::SpecializationArg* args,
         GfxCount count) = 0;
 
-    virtual SLANG_NO_THROW Result SLANG_MCALL getCurrentVersion(
-        ITransientResourceHeap* transientHeap,
-        IShaderObject** outObject) = 0;
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+    getCurrentVersion(ITransientResourceHeap* transientHeap, IShaderObject** outObject) = 0;
 
     virtual SLANG_NO_THROW const void* SLANG_MCALL getRawData() = 0;
 
     virtual SLANG_NO_THROW Size SLANG_MCALL getSize() = 0;
 
-        /// Use the provided constant buffer instead of the internally created one.
-    virtual SLANG_NO_THROW Result SLANG_MCALL setConstantBufferOverride(IBufferResource* constantBuffer) = 0;
+    /// Use the provided constant buffer instead of the internally created one.
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+    setConstantBufferOverride(IBufferResource* constantBuffer) = 0;
 
 
     inline ComPtr<IShaderObject> getObject(ShaderOffset const& offset)
@@ -1169,9 +1241,12 @@ class IShaderObject : public ISlangUnknown
         return entryPoint;
     }
 };
-#define SLANG_UUID_IShaderObject                                                       \
-    {                                                                                 \
-        0xc1fa997e, 0x5ca2, 0x45ae, { 0x9b, 0xcb, 0xc4, 0x35, 0x9e, 0x85, 0x5, 0x85 } \
+#define SLANG_UUID_IShaderObject                          \
+    {                                                     \
+        0xc1fa997e, 0x5ca2, 0x45ae,                       \
+        {                                                 \
+            0x9b, 0xcb, 0xc4, 0x35, 0x9e, 0x85, 0x5, 0x85 \
+        }                                                 \
     }
 
 enum class StencilOp : uint8_t
@@ -1207,23 +1282,23 @@ enum class FrontFaceMode : uint8_t
 
 struct DepthStencilOpDesc
 {
-    StencilOp       stencilFailOp       = StencilOp::Keep;
-    StencilOp       stencilDepthFailOp  = StencilOp::Keep;
-    StencilOp       stencilPassOp       = StencilOp::Keep;
-    ComparisonFunc  stencilFunc         = ComparisonFunc::Always;
+    StencilOp stencilFailOp = StencilOp::Keep;
+    StencilOp stencilDepthFailOp = StencilOp::Keep;
+    StencilOp stencilPassOp = StencilOp::Keep;
+    ComparisonFunc stencilFunc = ComparisonFunc::Always;
 };
 
 struct DepthStencilDesc
 {
-    bool            depthTestEnable     = false;
-    bool            depthWriteEnable    = true;
-    ComparisonFunc  depthFunc           = ComparisonFunc::Less;
+    bool depthTestEnable = false;
+    bool depthWriteEnable = true;
+    ComparisonFunc depthFunc = ComparisonFunc::Less;
 
-    bool                stencilEnable       = false;
-    uint32_t            stencilReadMask     = 0xFFFFFFFF;
-    uint32_t            stencilWriteMask    = 0xFFFFFFFF;
-    DepthStencilOpDesc  frontFace;
-    DepthStencilOpDesc  backFace;
+    bool stencilEnable = false;
+    uint32_t stencilReadMask = 0xFFFFFFFF;
+    uint32_t stencilWriteMask = 0xFFFFFFFF;
+    DepthStencilOpDesc frontFace;
+    DepthStencilOpDesc backFace;
 
     uint32_t stencilRef = 0; // TODO: this should be removed
 };
@@ -1281,24 +1356,24 @@ enum class BlendFactor
 
 namespace RenderTargetWriteMask
 {
-    typedef uint8_t Type;
-    enum
-    {
-        EnableNone  = 0,
-        EnableRed   = 0x01,
-        EnableGreen = 0x02,
-        EnableBlue  = 0x04,
-        EnableAlpha = 0x08,
-        EnableAll   = 0x0F,
-    };
+typedef uint8_t Type;
+enum
+{
+    EnableNone = 0,
+    EnableRed = 0x01,
+    EnableGreen = 0x02,
+    EnableBlue = 0x04,
+    EnableAlpha = 0x08,
+    EnableAll = 0x0F,
 };
+}; // namespace RenderTargetWriteMask
 typedef RenderTargetWriteMask::Type RenderTargetWriteMaskT;
 
 struct AspectBlendDesc
 {
-    BlendFactor     srcFactor   = BlendFactor::One;
-    BlendFactor     dstFactor   = BlendFactor::Zero;
-    BlendOp         op          = BlendOp::Add;
+    BlendFactor srcFactor = BlendFactor::One;
+    BlendFactor dstFactor = BlendFactor::Zero;
+    BlendOp op = BlendOp::Add;
 };
 
 struct TargetBlendDesc
@@ -1306,16 +1381,16 @@ struct TargetBlendDesc
     AspectBlendDesc color;
     AspectBlendDesc alpha;
     bool enableBlend = false;
-    LogicOp                 logicOp     = LogicOp::NoOp;
-    RenderTargetWriteMaskT  writeMask   = RenderTargetWriteMask::EnableAll;
+    LogicOp logicOp = LogicOp::NoOp;
+    RenderTargetWriteMaskT writeMask = RenderTargetWriteMask::EnableAll;
 };
 
 struct BlendDesc
 {
-    TargetBlendDesc         targets[kMaxRenderTargetCount];
-    GfxCount                targetCount = 0;
+    TargetBlendDesc targets[kMaxRenderTargetCount];
+    GfxCount targetCount = 0;
 
-    bool alphaToCoverageEnable  = false;
+    bool alphaToCoverageEnable = false;
 };
 
 class IFramebufferLayout : public ISlangUnknown
@@ -1333,26 +1408,29 @@ class IFramebufferLayout : public ISlangUnknown
         TargetLayout* depthStencil = nullptr;
     };
 };
-#define SLANG_UUID_IFramebufferLayout                                                \
-    {                                                                                \
-        0xa838785, 0xc13a, 0x4832, { 0xad, 0x88, 0x64, 0x6, 0xb5, 0x4b, 0x5e, 0xba } \
+#define SLANG_UUID_IFramebufferLayout                     \
+    {                                                     \
+        0xa838785, 0xc13a, 0x4832,                        \
+        {                                                 \
+            0xad, 0x88, 0x64, 0x6, 0xb5, 0x4b, 0x5e, 0xba \
+        }                                                 \
     }
 
 struct GraphicsPipelineStateDesc
 {
-    IShaderProgram*      program = nullptr;
+    IShaderProgram* program = nullptr;
 
-    IInputLayout*       inputLayout = nullptr;
+    IInputLayout* inputLayout = nullptr;
     IFramebufferLayout* framebufferLayout = nullptr;
-    PrimitiveType       primitiveType = PrimitiveType::Triangle;
-    DepthStencilDesc    depthStencil;
-    RasterizerDesc      rasterizer;
-    BlendDesc           blend;
+    PrimitiveType primitiveType = PrimitiveType::Triangle;
+    DepthStencilDesc depthStencil;
+    RasterizerDesc rasterizer;
+    BlendDesc blend;
 };
 
 struct ComputePipelineStateDesc
 {
-    IShaderProgram*  program = nullptr;
+    IShaderProgram* program = nullptr;
     void* d3d12RootSignatureOverride = nullptr;
 };
 
@@ -1391,8 +1469,8 @@ class IShaderTable : public ISlangUnknown
     // Specifies the bytes to overwrite into a record in the shader table.
     struct ShaderRecordOverwrite
     {
-        Offset offset; // Offset within the shader record.
-        Size size; // Number of bytes to overwrite.
+        Offset offset;   // Offset within the shader record.
+        Size size;       // Number of bytes to overwrite.
         uint8_t data[8]; // Content to overwrite.
     };
 
@@ -1417,9 +1495,12 @@ class IShaderTable : public ISlangUnknown
         IShaderProgram* program;
     };
 };
-#define SLANG_UUID_IShaderTable                                                        \
-    {                                                                                  \
-        0xa721522c, 0xdf31, 0x4c2f, { 0xa5, 0xe7, 0x3b, 0xe0, 0x12, 0x4b, 0x31, 0x78 } \
+#define SLANG_UUID_IShaderTable                            \
+    {                                                      \
+        0xa721522c, 0xdf31, 0x4c2f,                        \
+        {                                                  \
+            0xa5, 0xe7, 0x3b, 0xe0, 0x12, 0x4b, 0x31, 0x78 \
+        }                                                  \
     }
 
 class IPipelineState : public ISlangUnknown
@@ -1427,9 +1508,12 @@ class IPipelineState : public ISlangUnknown
 public:
     virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) = 0;
 };
-#define SLANG_UUID_IPipelineState                                                      \
-    {                                                                                 \
-        0xca7e57d, 0x8a90, 0x44f3, { 0xbd, 0xb1, 0xfe, 0x9b, 0x35, 0x3f, 0x5a, 0x72 } \
+#define SLANG_UUID_IPipelineState                          \
+    {                                                      \
+        0xca7e57d, 0x8a90, 0x44f3,                         \
+        {                                                  \
+            0xbd, 0xb1, 0xfe, 0x9b, 0x35, 0x3f, 0x5a, 0x72 \
+        }                                                  \
     }
 
 
@@ -1447,8 +1531,8 @@ struct Viewport
     float originY = 0.0f;
     float extentX = 0.0f;
     float extentY = 0.0f;
-    float minZ    = 0.0f;
-    float maxZ    = 1.0f;
+    float minZ = 0.0f;
+    float maxZ = 1.0f;
 };
 
 class IFramebuffer : public ISlangUnknown
@@ -1462,9 +1546,12 @@ class IFramebuffer : public ISlangUnknown
         IFramebufferLayout* layout;
     };
 };
-#define SLANG_UUID_IFrameBuffer                                                       \
-    {                                                                                 \
-        0xf0c0d9a, 0x4ef3, 0x4e18, { 0x9b, 0xa9, 0x34, 0x60, 0xea, 0x69, 0x87, 0x95 } \
+#define SLANG_UUID_IFrameBuffer                            \
+    {                                                      \
+        0xf0c0d9a, 0x4ef3, 0x4e18,                         \
+        {                                                  \
+            0x9b, 0xa9, 0x34, 0x60, 0xea, 0x69, 0x87, 0x95 \
+        }                                                  \
     }
 
 struct WindowHandle
@@ -1506,7 +1593,8 @@ struct FaceMask
 {
     enum Enum
     {
-        Front = 1, Back = 2
+        Front = 1,
+        Back = 2
     };
 };
 
@@ -1515,11 +1603,14 @@ class IRenderPassLayout : public ISlangUnknown
 public:
     enum class TargetLoadOp
     {
-        Load, Clear, DontCare
+        Load,
+        Clear,
+        DontCare
     };
     enum class TargetStoreOp
     {
-        Store, DontCare
+        Store,
+        DontCare
     };
     struct TargetAccessDesc
     {
@@ -1538,9 +1629,12 @@ class IRenderPassLayout : public ISlangUnknown
         TargetAccessDesc* depthStencilAccess = nullptr;
     };
 };
-#define SLANG_UUID_IRenderPassLayout                                                   \
-    {                                                                                  \
-        0xdaab0b1a, 0xf45d, 0x4ae9, { 0xbf, 0x2c, 0xe0, 0xbb, 0x76, 0x7d, 0xfa, 0xd1 } \
+#define SLANG_UUID_IRenderPassLayout                       \
+    {                                                      \
+        0xdaab0b1a, 0xf45d, 0x4ae9,                        \
+        {                                                  \
+            0xbf, 0x2c, 0xe0, 0xbb, 0x76, 0x7d, 0xfa, 0xd1 \
+        }                                                  \
     }
 
 enum class QueryType
@@ -1559,20 +1653,33 @@ class IQueryPool : public ISlangUnknown
         QueryType type;
         GfxCount count;
     };
+
 public:
-    virtual SLANG_NO_THROW Result SLANG_MCALL getResult(GfxIndex queryIndex, GfxCount count, uint64_t* data) = 0;
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+    getResult(GfxIndex queryIndex, GfxCount count, uint64_t* data) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL reset() = 0;
 };
-#define SLANG_UUID_IQueryPool                                                         \
-    { 0xc2cc3784, 0x12da, 0x480a, { 0xa8, 0x74, 0x8b, 0x31, 0x96, 0x1c, 0xa4, 0x36 } }
+#define SLANG_UUID_IQueryPool                              \
+    {                                                      \
+        0xc2cc3784, 0x12da, 0x480a,                        \
+        {                                                  \
+            0xa8, 0x74, 0x8b, 0x31, 0x96, 0x1c, 0xa4, 0x36 \
+        }                                                  \
+    }
 
 
 class ICommandEncoder : public ISlangUnknown
 {
-    SLANG_COM_INTERFACE( 0x77ea6383, 0xbe3d, 0x40aa, { 0x8b, 0x45, 0xfd, 0xf0, 0xd7, 0x5b, 0xfa, 0x34 });
+    SLANG_COM_INTERFACE(
+        0x77ea6383,
+        0xbe3d,
+        0x40aa,
+        {0x8b, 0x45, 0xfd, 0xf0, 0xd7, 0x5b, 0xfa, 0x34});
+
 public:
     virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() = 0;
-    virtual SLANG_NO_THROW void SLANG_MCALL writeTimestamp(IQueryPool* queryPool, GfxIndex queryIndex) = 0;
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    writeTimestamp(IQueryPool* queryPool, GfxIndex queryIndex) = 0;
 };
 
 struct IndirectDispatchArguments
@@ -1619,7 +1726,11 @@ struct ClearResourceViewFlags
 class IResourceCommandEncoder : public ICommandEncoder
 {
     // {F99A00E9-ED50-4088-8A0E-3B26755031EA}
-    SLANG_COM_INTERFACE(0xf99a00e9, 0xed50, 0x4088, { 0x8a, 0xe, 0x3b, 0x26, 0x75, 0x50, 0x31, 0xea });
+    SLANG_COM_INTERFACE(
+        0xf99a00e9,
+        0xed50,
+        0x4088,
+        {0x8a, 0xe, 0x3b, 0x26, 0x75, 0x50, 0x31, 0xea});
 
 public:
     virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer(
@@ -1662,18 +1773,26 @@ class IResourceCommandEncoder : public ICommandEncoder
         ITextureResource::SubresourceData* subResourceData,
         GfxCount subResourceDataCount) = 0;
     virtual SLANG_NO_THROW void SLANG_MCALL
-        uploadBufferData(IBufferResource* dst, Offset offset, Size size, void* data) = 0;
+    uploadBufferData(IBufferResource* dst, Offset offset, Size size, void* data) = 0;
     virtual SLANG_NO_THROW void SLANG_MCALL textureBarrier(
-        GfxCount count, ITextureResource* const* textures, ResourceState src, ResourceState dst) = 0;
+        GfxCount count,
+        ITextureResource* const* textures,
+        ResourceState src,
+        ResourceState dst) = 0;
     virtual SLANG_NO_THROW void SLANG_MCALL textureSubresourceBarrier(
         ITextureResource* texture,
         SubresourceRange subresourceRange,
         ResourceState src,
         ResourceState dst) = 0;
     virtual SLANG_NO_THROW void SLANG_MCALL bufferBarrier(
-        GfxCount count, IBufferResource* const* buffers, ResourceState src, ResourceState dst) = 0;
+        GfxCount count,
+        IBufferResource* const* buffers,
+        ResourceState src,
+        ResourceState dst) = 0;
     virtual SLANG_NO_THROW void SLANG_MCALL clearResourceView(
-        IResourceView* view, ClearValue* clearValue, ClearResourceViewFlags::Enum flags) = 0;
+        IResourceView* view,
+        ClearValue* clearValue,
+        ClearResourceViewFlags::Enum flags) = 0;
     virtual SLANG_NO_THROW void SLANG_MCALL resolveResource(
         ITextureResource* source,
         ResourceState sourceState,
@@ -1687,7 +1806,8 @@ class IResourceCommandEncoder : public ICommandEncoder
         GfxCount count,
         IBufferResource* buffer,
         Offset offset) = 0;
-    virtual SLANG_NO_THROW void SLANG_MCALL beginDebugEvent(const char* name, float rgbColor[3]) = 0;
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    beginDebugEvent(const char* name, float rgbColor[3]) = 0;
     virtual SLANG_NO_THROW void SLANG_MCALL endDebugEvent() = 0;
     inline void textureBarrier(ITextureResource* texture, ResourceState src, ResourceState dst)
     {
@@ -1702,7 +1822,11 @@ class IResourceCommandEncoder : public ICommandEncoder
 class IRenderCommandEncoder : public IResourceCommandEncoder
 {
     // {7A8D56D0-53E6-4AD6-85F7-D14DC110FDCE}
-    SLANG_COM_INTERFACE(0x7a8d56d0, 0x53e6, 0x4ad6, { 0x85, 0xf7, 0xd1, 0x4d, 0xc1, 0x10, 0xfd, 0xce })
+    SLANG_COM_INTERFACE(
+        0x7a8d56d0,
+        0x53e6,
+        0x4ad6,
+        {0x85, 0xf7, 0xd1, 0x4d, 0xc1, 0x10, 0xfd, 0xce})
 public:
     // Sets the current pipeline state. This method returns a transient shader object for
     // writing shader parameters. This shader object will not retain any resources or
@@ -1710,7 +1834,7 @@ class IRenderCommandEncoder : public IResourceCommandEncoder
     // resources or shader objects that is set into `outRootShaderObject` stays alive during
     // the execution of the command buffer.
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        bindPipeline(IPipelineState* state, IShaderObject** outRootShaderObject) = 0;
+    bindPipeline(IPipelineState* state, IShaderObject** outRootShaderObject) = 0;
     inline IShaderObject* bindPipeline(IPipelineState* state)
     {
         IShaderObject* rootObject = nullptr;
@@ -1720,12 +1844,12 @@ class IRenderCommandEncoder : public IResourceCommandEncoder
 
     // Sets the current pipeline state along with a pre-created mutable root shader object.
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) = 0;
+    bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) = 0;
 
-    virtual SLANG_NO_THROW void
-        SLANG_MCALL setViewports(GfxCount count, const Viewport* viewports) = 0;
-    virtual SLANG_NO_THROW void
-        SLANG_MCALL setScissorRects(GfxCount count, const ScissorRect* scissors) = 0;
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setViewports(GfxCount count, const Viewport* viewports) = 0;
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setScissorRects(GfxCount count, const ScissorRect* scissors) = 0;
 
     /// Sets the viewport, and sets the scissor rect to match the viewport.
     inline void setViewportAndScissor(Viewport const& viewport)
@@ -1743,18 +1867,17 @@ class IRenderCommandEncoder : public IResourceCommandEncoder
         GfxCount slotCount,
         IBufferResource* const* buffers,
         const Offset* offsets) = 0;
-    inline void setVertexBuffer(
-        GfxIndex slot, IBufferResource* buffer, Offset offset = 0)
+    inline void setVertexBuffer(GfxIndex slot, IBufferResource* buffer, Offset offset = 0)
     {
         setVertexBuffers(slot, 1, &buffer, &offset);
     }
 
     virtual SLANG_NO_THROW void SLANG_MCALL
-        setIndexBuffer(IBufferResource* buffer, Format indexFormat, Offset offset = 0) = 0;
+    setIndexBuffer(IBufferResource* buffer, Format indexFormat, Offset offset = 0) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        draw(GfxCount vertexCount, GfxIndex startVertex = 0) = 0;
+    draw(GfxCount vertexCount, GfxIndex startVertex = 0) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        drawIndexed(GfxCount indexCount, GfxIndex startIndex = 0, GfxIndex baseVertex = 0) = 0;
+    drawIndexed(GfxCount indexCount, GfxIndex startIndex = 0, GfxIndex baseVertex = 0) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL drawIndirect(
         GfxCount maxDrawCount,
         IBufferResource* argBuffer,
@@ -1769,7 +1892,9 @@ class IRenderCommandEncoder : public IResourceCommandEncoder
         Offset countOffset = 0) = 0;
     virtual SLANG_NO_THROW void SLANG_MCALL setStencilReference(uint32_t referenceValue) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL setSamplePositions(
-        GfxCount samplesPerPixel, GfxCount pixelCount, const SamplePosition* samplePositions) = 0;
+        GfxCount samplesPerPixel,
+        GfxCount pixelCount,
+        const SamplePosition* samplePositions) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL drawInstanced(
         GfxCount vertexCount,
         GfxCount instanceCount,
@@ -1781,14 +1906,17 @@ class IRenderCommandEncoder : public IResourceCommandEncoder
         GfxIndex startIndexLocation,
         GfxIndex baseVertexLocation,
         GfxIndex startInstanceLocation) = 0;
-    virtual SLANG_NO_THROW Result SLANG_MCALL
-        drawMeshTasks(int x, int y, int z) = 0;
+    virtual SLANG_NO_THROW Result SLANG_MCALL drawMeshTasks(int x, int y, int z) = 0;
 };
 
 class IComputeCommandEncoder : public IResourceCommandEncoder
 {
     // {88AA9322-82F7-4FE6-A68A-29C7FE798737}
-    SLANG_COM_INTERFACE(0x88aa9322, 0x82f7, 0x4fe6, { 0xa6, 0x8a, 0x29, 0xc7, 0xfe, 0x79, 0x87, 0x37 })
+    SLANG_COM_INTERFACE(
+        0x88aa9322,
+        0x82f7,
+        0x4fe6,
+        {0xa6, 0x8a, 0x29, 0xc7, 0xfe, 0x79, 0x87, 0x37})
 
 public:
     // Sets the current pipeline state. This method returns a transient shader object for
@@ -1797,7 +1925,7 @@ class IComputeCommandEncoder : public IResourceCommandEncoder
     // resources or shader objects that is set into `outRooShaderObject` stays alive during
     // the execution of the command buffer.
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        bindPipeline(IPipelineState* state, IShaderObject** outRootShaderObject) = 0;
+    bindPipeline(IPipelineState* state, IShaderObject** outRootShaderObject) = 0;
     inline IShaderObject* bindPipeline(IPipelineState* state)
     {
         IShaderObject* rootObject = nullptr;
@@ -1806,14 +1934,16 @@ class IComputeCommandEncoder : public IResourceCommandEncoder
     }
     // Sets the current pipeline state along with a pre-created mutable root shader object.
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) = 0;
+    bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL dispatchCompute(int x, int y, int z) = 0;
-    virtual SLANG_NO_THROW Result SLANG_MCALL dispatchComputeIndirect(IBufferResource* cmdBuffer, Offset offset) = 0;
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+    dispatchComputeIndirect(IBufferResource* cmdBuffer, Offset offset) = 0;
 };
 
 enum class AccelerationStructureCopyMode
 {
-    Clone, Compact
+    Clone,
+    Compact
 };
 
 struct AccelerationStructureQueryDesc
@@ -1827,7 +1957,11 @@ struct AccelerationStructureQueryDesc
 
 class IRayTracingCommandEncoder : public IResourceCommandEncoder
 {
-    SLANG_COM_INTERFACE(0x9a672b87, 0x5035, 0x45e3, { 0x96, 0x7c, 0x1f, 0x85, 0xcd, 0xb3, 0x63, 0x4f })
+    SLANG_COM_INTERFACE(
+        0x9a672b87,
+        0x5035,
+        0x45e3,
+        {0x96, 0x7c, 0x1f, 0x85, 0xcd, 0xb3, 0x63, 0x4f})
 public:
     virtual SLANG_NO_THROW void SLANG_MCALL buildAccelerationStructure(
         const IAccelerationStructure::BuildDesc& desc,
@@ -1843,18 +1977,19 @@ class IRayTracingCommandEncoder : public IResourceCommandEncoder
         GfxCount queryCount,
         AccelerationStructureQueryDesc* queryDescs) = 0;
     virtual SLANG_NO_THROW void SLANG_MCALL
-        serializeAccelerationStructure(DeviceAddress dest, IAccelerationStructure* source) = 0;
+    serializeAccelerationStructure(DeviceAddress dest, IAccelerationStructure* source) = 0;
     virtual SLANG_NO_THROW void SLANG_MCALL
-        deserializeAccelerationStructure(IAccelerationStructure* dest, DeviceAddress source) = 0;
+    deserializeAccelerationStructure(IAccelerationStructure* dest, DeviceAddress source) = 0;
 
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        bindPipeline(IPipelineState* state, IShaderObject** outRootObject) = 0;
+    bindPipeline(IPipelineState* state, IShaderObject** outRootObject) = 0;
     // Sets the current pipeline state along with a pre-created mutable root shader object.
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) = 0;
+    bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) = 0;
 
     /// Issues a dispatch command to start ray tracing workload with a ray tracing pipeline.
-    /// `rayGenShaderIndex` specifies the index into the shader table that identifies the ray generation shader.
+    /// `rayGenShaderIndex` specifies the index into the shader table that identifies the ray
+    /// generation shader.
     virtual SLANG_NO_THROW Result SLANG_MCALL dispatchRays(
         GfxIndex rayGenShaderIndex,
         IShaderTable* shaderTable,
@@ -1875,8 +2010,9 @@ class ICommandBuffer : public ISlangUnknown
         IRenderPassLayout* renderPass,
         IFramebuffer* framebuffer,
         IRenderCommandEncoder** outEncoder) = 0;
-    inline IRenderCommandEncoder*
-        encodeRenderCommands(IRenderPassLayout* renderPass, IFramebuffer* framebuffer)
+    inline IRenderCommandEncoder* encodeRenderCommands(
+        IRenderPassLayout* renderPass,
+        IFramebuffer* framebuffer)
     {
         IRenderCommandEncoder* result;
         encodeRenderCommands(renderPass, framebuffer, &result);
@@ -1884,7 +2020,7 @@ class ICommandBuffer : public ISlangUnknown
     }
 
     virtual SLANG_NO_THROW void SLANG_MCALL
-        encodeComputeCommands(IComputeCommandEncoder** outEncoder) = 0;
+    encodeComputeCommands(IComputeCommandEncoder** outEncoder) = 0;
     inline IComputeCommandEncoder* encodeComputeCommands()
     {
         IComputeCommandEncoder* result;
@@ -1893,7 +2029,7 @@ class ICommandBuffer : public ISlangUnknown
     }
 
     virtual SLANG_NO_THROW void SLANG_MCALL
-        encodeResourceCommands(IResourceCommandEncoder** outEncoder) = 0;
+    encodeResourceCommands(IResourceCommandEncoder** outEncoder) = 0;
     inline IResourceCommandEncoder* encodeResourceCommands()
     {
         IResourceCommandEncoder* result;
@@ -1902,7 +2038,7 @@ class ICommandBuffer : public ISlangUnknown
     }
 
     virtual SLANG_NO_THROW void SLANG_MCALL
-        encodeRayTracingCommands(IRayTracingCommandEncoder** outEncoder) = 0;
+    encodeRayTracingCommands(IRayTracingCommandEncoder** outEncoder) = 0;
     inline IRayTracingCommandEncoder* encodeRayTracingCommands()
     {
         IRayTracingCommandEncoder* result;
@@ -1914,9 +2050,12 @@ class ICommandBuffer : public ISlangUnknown
 
     virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) = 0;
 };
-#define SLANG_UUID_ICommandBuffer                                                      \
-    {                                                                                  \
-        0x5d56063f, 0x91d4, 0x4723, { 0xa7, 0xa7, 0x7a, 0x15, 0xaf, 0x93, 0xeb, 0x48 } \
+#define SLANG_UUID_ICommandBuffer                          \
+    {                                                      \
+        0x5d56063f, 0x91d4, 0x4723,                        \
+        {                                                  \
+            0xa7, 0xa7, 0x7a, 0x15, 0xaf, 0x93, 0xeb, 0x48 \
+        }                                                  \
     }
 
 class ICommandBufferD3D12 : public ICommandBuffer
@@ -1925,9 +2064,12 @@ class ICommandBufferD3D12 : public ICommandBuffer
     virtual SLANG_NO_THROW void SLANG_MCALL invalidateDescriptorHeapBinding() = 0;
     virtual SLANG_NO_THROW void SLANG_MCALL ensureInternalDescriptorHeapsBound() = 0;
 };
-#define SLANG_UUID_ICommandBufferD3D12                                                 \
-    {                                                                                  \
-        0xd56b7616, 0x6c14, 0x4841, { 0x9d, 0x9c, 0x7b, 0x7f, 0xdb, 0x9f, 0xd9, 0xb8 } \
+#define SLANG_UUID_ICommandBufferD3D12                     \
+    {                                                      \
+        0xd56b7616, 0x6c14, 0x4841,                        \
+        {                                                  \
+            0x9d, 0x9c, 0x7b, 0x7f, 0xdb, 0x9f, 0xd9, 0xb8 \
+        }                                                  \
     }
 
 class ICommandQueue : public ISlangUnknown
@@ -1953,7 +2095,9 @@ class ICommandQueue : public ISlangUnknown
         IFence* fenceToSignal,
         uint64_t newFenceValue) = 0;
     inline void executeCommandBuffer(
-        ICommandBuffer* commandBuffer, IFence* fenceToSignal = nullptr, uint64_t newFenceValue = 0)
+        ICommandBuffer* commandBuffer,
+        IFence* fenceToSignal = nullptr,
+        uint64_t newFenceValue = 0)
     {
         executeCommandBuffers(1, &commandBuffer, fenceToSignal, newFenceValue);
     }
@@ -1964,11 +2108,14 @@ class ICommandQueue : public ISlangUnknown
 
     /// Queues a device side wait for the given fences.
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        waitForFenceValuesOnDevice(GfxCount fenceCount, IFence** fences, uint64_t* waitValues) = 0;
+    waitForFenceValuesOnDevice(GfxCount fenceCount, IFence** fences, uint64_t* waitValues) = 0;
 };
-#define SLANG_UUID_ICommandQueue                                                    \
-    {                                                                               \
-        0x14e2bed0, 0xad0, 0x4dc8, { 0xb3, 0x41, 0x6, 0x3f, 0xe7, 0x2d, 0xbf, 0xe } \
+#define SLANG_UUID_ICommandQueue                         \
+    {                                                    \
+        0x14e2bed0, 0xad0, 0x4dc8,                       \
+        {                                                \
+            0xb3, 0x41, 0x6, 0x3f, 0xe7, 0x2d, 0xbf, 0xe \
+        }                                                \
     }
 
 class ITransientResourceHeap : public ISlangUnknown
@@ -1999,8 +2146,8 @@ class ITransientResourceHeap : public ISlangUnknown
     // In most situations this method should be called at the beginning of each frame.
     virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() = 0;
 
-    // Must be called when the application has done using this heap to issue commands. In most situations
-    // this method should be called at the end of each frame.
+    // Must be called when the application has done using this heap to issue commands. In most
+    // situations this method should be called at the end of each frame.
     virtual SLANG_NO_THROW Result SLANG_MCALL finish() = 0;
 
     // Command buffers are one-time use. Once it is submitted to the queue via
@@ -2009,7 +2156,7 @@ class ITransientResourceHeap : public ISlangUnknown
     // that only one command buffer maybe recorded at a time. User must finish recording a command
     // buffer before creating another command buffer.
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        createCommandBuffer(ICommandBuffer** outCommandBuffer) = 0;
+    createCommandBuffer(ICommandBuffer** outCommandBuffer) = 0;
     inline ComPtr<ICommandBuffer> createCommandBuffer()
     {
         ComPtr<ICommandBuffer> result;
@@ -2017,9 +2164,12 @@ class ITransientResourceHeap : public ISlangUnknown
         return result;
     }
 };
-#define SLANG_UUID_ITransientResourceHeap                                             \
-    {                                                                                 \
-        0xcd48bd29, 0xee72, 0x41b8, { 0xbc, 0xff, 0xa, 0x2b, 0x3a, 0xaa, 0x6d, 0xeb } \
+#define SLANG_UUID_ITransientResourceHeap                 \
+    {                                                     \
+        0xcd48bd29, 0xee72, 0x41b8,                       \
+        {                                                 \
+            0xbc, 0xff, 0xa, 0x2b, 0x3a, 0xaa, 0x6d, 0xeb \
+        }                                                 \
     }
 
 class ITransientResourceHeapD3D12 : public ISlangUnknown
@@ -2027,7 +2177,8 @@ class ITransientResourceHeapD3D12 : public ISlangUnknown
 public:
     enum class DescriptorType
     {
-        ResourceView, Sampler
+        ResourceView,
+        Sampler
     };
     virtual SLANG_NO_THROW Result SLANG_MCALL allocateTransientDescriptorTable(
         DescriptorType type,
@@ -2035,9 +2186,12 @@ class ITransientResourceHeapD3D12 : public ISlangUnknown
         Offset& outDescriptorOffset,
         void** outD3DDescriptorHeapHandle) = 0;
 };
-#define SLANG_UUID_ITransientResourceHeapD3D12                                             \
-    {                                                                                  \
-        0x9bc6a8bc, 0x5f7a, 0x454a, { 0x93, 0xef, 0x3b, 0x10, 0x5b, 0xb7, 0x63, 0x7e } \
+#define SLANG_UUID_ITransientResourceHeapD3D12             \
+    {                                                      \
+        0x9bc6a8bc, 0x5f7a, 0x454a,                        \
+        {                                                  \
+            0x93, 0xef, 0x3b, 0x10, 0x5b, 0xb7, 0x63, 0x7e \
+        }                                                  \
     }
 
 class ISwapchain : public ISlangUnknown
@@ -2055,7 +2209,7 @@ class ISwapchain : public ISlangUnknown
 
     /// Returns the back buffer image at `index`.
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        getImage(GfxIndex index, ITextureResource** outResource) = 0;
+    getImage(GfxIndex index, ITextureResource** outResource) = 0;
 
     /// Present the next image in the swapchain.
     virtual SLANG_NO_THROW Result SLANG_MCALL present() = 0;
@@ -2074,9 +2228,12 @@ class ISwapchain : public ISlangUnknown
     // Toggle full screen mode.
     virtual SLANG_NO_THROW Result SLANG_MCALL setFullScreenMode(bool mode) = 0;
 };
-#define SLANG_UUID_ISwapchain                                                        \
-    {                                                                                \
-        0xbe91ba6c, 0x784, 0x4308, { 0xa1, 0x0, 0x19, 0xc3, 0x66, 0x83, 0x44, 0xb2 } \
+#define SLANG_UUID_ISwapchain                             \
+    {                                                     \
+        0xbe91ba6c, 0x784, 0x4308,                        \
+        {                                                 \
+            0xa1, 0x0, 0x19, 0xc3, 0x66, 0x83, 0x44, 0xb2 \
+        }                                                 \
     }
 
 struct AdapterLUID
@@ -2090,10 +2247,7 @@ struct AdapterLUID
                 return false;
         return true;
     }
-    bool operator!=(const AdapterLUID& other) const
-    {
-        return !this->operator==(other);
-    }
+    bool operator!=(const AdapterLUID& other) const { return !this->operator==(other); }
 };
 
 struct AdapterInfo
@@ -2104,7 +2258,8 @@ struct AdapterInfo
     // Unique identifier for the vendor (only available for D3D and Vulkan).
     uint32_t vendorID;
 
-    // Unique identifier for the physical device among devices from the vendor (only available for D3D and Vulkan)
+    // Unique identifier for the physical device among devices from the vendor (only available for
+    // D3D and Vulkan)
     uint32_t deviceID;
 
     // Logically unique identifier of the adapter.
@@ -2114,7 +2269,10 @@ struct AdapterInfo
 class AdapterList
 {
 public:
-    AdapterList(ISlangBlob* blob) : m_blob(blob) {}
+    AdapterList(ISlangBlob* blob)
+        : m_blob(blob)
+    {
+    }
 
     const AdapterInfo* getAdapters() const
     {
@@ -2196,17 +2354,21 @@ struct DeviceInfo
 
 enum class DebugMessageType
 {
-    Info, Warning, Error
+    Info,
+    Warning,
+    Error
 };
 enum class DebugMessageSource
 {
-    Layer, Driver, Slang
+    Layer,
+    Driver,
+    Slang
 };
 class IDebugCallback
 {
 public:
     virtual SLANG_NO_THROW void SLANG_MCALL
-        handleMessage(DebugMessageType type, DebugMessageSource source, const char* message) = 0;
+    handleMessage(DebugMessageType type, DebugMessageSource source, const char* message) = 0;
 };
 
 class IDevice : public ISlangUnknown
@@ -2214,21 +2376,23 @@ class IDevice : public ISlangUnknown
 public:
     struct SlangDesc
     {
-        slang::IGlobalSession* slangGlobalSession = nullptr; // (optional) A slang global session object. If null will create automatically.
+        slang::IGlobalSession* slangGlobalSession =
+            nullptr; // (optional) A slang global session object. If null will create automatically.
 
         SlangMatrixLayoutMode defaultMatrixLayoutMode = SLANG_MATRIX_LAYOUT_ROW_MAJOR;
 
         char const* const* searchPaths = nullptr;
-        GfxCount           searchPathCount = 0;
+        GfxCount searchPathCount = 0;
 
         slang::PreprocessorMacroDesc const* preprocessorMacros = nullptr;
-        GfxCount                            preprocessorMacroCount = 0;
+        GfxCount preprocessorMacroCount = 0;
 
-        const char* targetProfile = nullptr; // (optional) Target shader profile. If null this will be set to platform dependent default.
+        const char* targetProfile = nullptr; // (optional) Target shader profile. If null this will
+                                             // be set to platform dependent default.
         SlangFloatingPointMode floatingPointMode = SLANG_FLOATING_POINT_MODE_DEFAULT;
         SlangOptimizationLevel optimizationLevel = SLANG_OPTIMIZATION_LEVEL_DEFAULT;
         SlangTargetFlags targetFlags = kDefaultTargetFlags;
-        SlangLineDirectiveMode lineDirectiveMode = SLANG_LINE_DIRECTIVE_MODE_DEFAULT;\
+        SlangLineDirectiveMode lineDirectiveMode = SLANG_LINE_DIRECTIVE_MODE_DEFAULT;
     };
 
     struct ShaderCacheDesc
@@ -2248,9 +2412,10 @@ class IDevice : public ISlangUnknown
     {
         // The underlying API/Platform of the device.
         DeviceType deviceType = DeviceType::Default;
-        // The device's handles (if they exist) and their associated API. For D3D12, this contains a single InteropHandle
-        // for the ID3D12Device. For Vulkan, the first InteropHandle is the VkInstance, the second is the VkPhysicalDevice,
-        // and the third is the VkDevice. For CUDA, this only contains a single value for the CUDADevice.
+        // The device's handles (if they exist) and their associated API. For D3D12, this contains a
+        // single InteropHandle for the ID3D12Device. For Vulkan, the first InteropHandle is the
+        // VkInstance, the second is the VkPhysicalDevice, and the third is the VkDevice. For CUDA,
+        // this only contains a single value for the CUDADevice.
         InteropHandles existingDeviceHandles;
         // LUID of the adapter to use. Use getGfxAdapters() to get a list of available adapters.
         const AdapterLUID* adapterLUID = nullptr;
@@ -2271,16 +2436,20 @@ class IDevice : public ISlangUnknown
         void** extendedDescs = nullptr;
     };
 
-    virtual SLANG_NO_THROW Result SLANG_MCALL getNativeDeviceHandles(InteropHandles* outHandles) = 0;
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+    getNativeDeviceHandles(InteropHandles* outHandles) = 0;
 
     virtual SLANG_NO_THROW bool SLANG_MCALL hasFeature(const char* feature) = 0;
 
-        /// Returns a list of features supported by the renderer.
-    virtual SLANG_NO_THROW Result SLANG_MCALL getFeatures(const char** outFeatures, Size bufferSize, GfxCount* outFeatureCount) = 0;
+    /// Returns a list of features supported by the renderer.
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+    getFeatures(const char** outFeatures, Size bufferSize, GfxCount* outFeatureCount) = 0;
 
-    virtual SLANG_NO_THROW Result SLANG_MCALL getFormatSupportedResourceStates(Format format, ResourceStateSet* outStates) = 0;
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+    getFormatSupportedResourceStates(Format format, ResourceStateSet* outStates) = 0;
 
-    virtual SLANG_NO_THROW Result SLANG_MCALL getSlangSession(slang::ISession** outSlangSession) = 0;
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+    getSlangSession(slang::ISession** outSlangSession) = 0;
 
     inline ComPtr<slang::ISession> getSlangSession()
     {
@@ -2300,26 +2469,27 @@ class IDevice : public ISlangUnknown
         return result;
     }
 
-        /// Create a texture resource.
-        ///
-        /// If `initData` is non-null, then it must point to an array of
-        /// `ITextureResource::SubresourceData` with one element for each
-        /// subresource of the texture being created.
-        ///
-        /// The number of subresources in a texture is:
-        ///
-        ///     effectiveElementCount * mipLevelCount
-        ///
-        /// where the effective element count is computed as:
-        ///
-        ///     effectiveElementCount = (isArray ? arrayElementCount : 1) * (isCube ? 6 : 1);
-        ///
+    /// Create a texture resource.
+    ///
+    /// If `initData` is non-null, then it must point to an array of
+    /// `ITextureResource::SubresourceData` with one element for each
+    /// subresource of the texture being created.
+    ///
+    /// The number of subresources in a texture is:
+    ///
+    ///     effectiveElementCount * mipLevelCount
+    ///
+    /// where the effective element count is computed as:
+    ///
+    ///     effectiveElementCount = (isArray ? arrayElementCount : 1) * (isCube ? 6 : 1);
+    ///
     virtual SLANG_NO_THROW Result SLANG_MCALL createTextureResource(
         const ITextureResource::Desc& desc,
         const ITextureResource::SubresourceData* initData,
         ITextureResource** outResource) = 0;
 
-        /// Create a texture resource. initData holds the initialize data to set the contents of the texture when constructed.
+    /// Create a texture resource. initData holds the initialize data to set the contents of the
+    /// texture when constructed.
     inline SLANG_NO_THROW ComPtr<ITextureResource> createTextureResource(
         const ITextureResource::Desc& desc,
         const ITextureResource::SubresourceData* initData = nullptr)
@@ -2340,7 +2510,7 @@ class IDevice : public ISlangUnknown
         const Size size,
         ITextureResource** outResource) = 0;
 
-        /// Create a buffer resource
+    /// Create a buffer resource
     virtual SLANG_NO_THROW Result SLANG_MCALL createBufferResource(
         const IBufferResource::Desc& desc,
         const void* initData,
@@ -2366,7 +2536,7 @@ class IDevice : public ISlangUnknown
         IBufferResource** outResource) = 0;
 
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) = 0;
+    createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) = 0;
 
     inline ComPtr<ISamplerState> createSamplerState(ISamplerState::Desc const& desc)
     {
@@ -2376,9 +2546,13 @@ class IDevice : public ISlangUnknown
     }
 
     virtual SLANG_NO_THROW Result SLANG_MCALL createTextureView(
-        ITextureResource* texture, IResourceView::Desc const& desc, IResourceView** outView) = 0;
+        ITextureResource* texture,
+        IResourceView::Desc const& desc,
+        IResourceView** outView) = 0;
 
-    inline ComPtr<IResourceView> createTextureView(ITextureResource* texture, IResourceView::Desc const& desc)
+    inline ComPtr<IResourceView> createTextureView(
+        ITextureResource* texture,
+        IResourceView::Desc const& desc)
     {
         ComPtr<IResourceView> view;
         SLANG_RETURN_NULL_ON_FAIL(createTextureView(texture, desc, view.writeRef()));
@@ -2392,15 +2566,18 @@ class IDevice : public ISlangUnknown
         IResourceView** outView) = 0;
 
     inline ComPtr<IResourceView> createBufferView(
-        IBufferResource* buffer, IBufferResource* counterBuffer, IResourceView::Desc const& desc)
+        IBufferResource* buffer,
+        IBufferResource* counterBuffer,
+        IResourceView::Desc const& desc)
     {
         ComPtr<IResourceView> view;
         SLANG_RETURN_NULL_ON_FAIL(createBufferView(buffer, counterBuffer, desc, view.writeRef()));
         return view;
     }
 
-    virtual SLANG_NO_THROW Result SLANG_MCALL
-        createFramebufferLayout(IFramebufferLayout::Desc const& desc, IFramebufferLayout** outFrameBuffer) = 0;
+    virtual SLANG_NO_THROW Result SLANG_MCALL createFramebufferLayout(
+        IFramebufferLayout::Desc const& desc,
+        IFramebufferLayout** outFrameBuffer) = 0;
     inline ComPtr<IFramebufferLayout> createFramebufferLayout(IFramebufferLayout::Desc const& desc)
     {
         ComPtr<IFramebufferLayout> fb;
@@ -2409,7 +2586,7 @@ class IDevice : public ISlangUnknown
     }
 
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        createFramebuffer(IFramebuffer::Desc const& desc, IFramebuffer** outFrameBuffer) = 0;
+    createFramebuffer(IFramebuffer::Desc const& desc, IFramebuffer** outFrameBuffer) = 0;
     inline ComPtr<IFramebuffer> createFramebuffer(IFramebuffer::Desc const& desc)
     {
         ComPtr<IFramebuffer> fb;
@@ -2428,7 +2605,9 @@ class IDevice : public ISlangUnknown
     }
 
     virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain(
-        ISwapchain::Desc const& desc, WindowHandle window, ISwapchain** outSwapchain) = 0;
+        ISwapchain::Desc const& desc,
+        WindowHandle window,
+        ISwapchain** outSwapchain) = 0;
     inline ComPtr<ISwapchain> createSwapchain(ISwapchain::Desc const& desc, WindowHandle window)
     {
         ComPtr<ISwapchain> swapchain;
@@ -2436,8 +2615,8 @@ class IDevice : public ISlangUnknown
         return swapchain;
     }
 
-    virtual SLANG_NO_THROW Result SLANG_MCALL createInputLayout(
-        IInputLayout::Desc const& desc, IInputLayout** outLayout) = 0;
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+    createInputLayout(IInputLayout::Desc const& desc, IInputLayout** outLayout) = 0;
 
     inline ComPtr<IInputLayout> createInputLayout(IInputLayout::Desc const& desc)
     {
@@ -2446,9 +2625,13 @@ class IDevice : public ISlangUnknown
         return layout;
     }
 
-    inline Result createInputLayout(Size vertexSize, InputElementDesc const* inputElements, GfxCount inputElementCount, IInputLayout** outLayout)
+    inline Result createInputLayout(
+        Size vertexSize,
+        InputElementDesc const* inputElements,
+        GfxCount inputElementCount,
+        IInputLayout** outLayout)
     {
-        VertexStreamDesc streamDesc = { vertexSize, InputSlotClass::PerVertex, 0 };
+        VertexStreamDesc streamDesc = {vertexSize, InputSlotClass::PerVertex, 0};
 
         IInputLayout::Desc inputLayoutDesc = {};
         inputLayoutDesc.inputElementCount = inputElementCount;
@@ -2458,15 +2641,19 @@ class IDevice : public ISlangUnknown
         return createInputLayout(inputLayoutDesc, outLayout);
     }
 
-    inline ComPtr<IInputLayout> createInputLayout(Size vertexSize, InputElementDesc const* inputElements, GfxCount inputElementCount)
+    inline ComPtr<IInputLayout> createInputLayout(
+        Size vertexSize,
+        InputElementDesc const* inputElements,
+        GfxCount inputElementCount)
     {
         ComPtr<IInputLayout> layout;
-        SLANG_RETURN_NULL_ON_FAIL(createInputLayout(vertexSize, inputElements, inputElementCount, layout.writeRef()));
+        SLANG_RETURN_NULL_ON_FAIL(
+            createInputLayout(vertexSize, inputElements, inputElementCount, layout.writeRef()));
         return layout;
     }
 
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) = 0;
+    createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) = 0;
     inline ComPtr<ICommandQueue> createCommandQueue(const ICommandQueue::Desc& desc)
     {
         ComPtr<ICommandQueue> queue;
@@ -2482,7 +2669,8 @@ class IDevice : public ISlangUnknown
     inline ComPtr<IShaderObject> createShaderObject(slang::TypeReflection* type)
     {
         ComPtr<IShaderObject> object;
-        SLANG_RETURN_NULL_ON_FAIL(createShaderObject(type, ShaderObjectContainerType::None, object.writeRef()));
+        SLANG_RETURN_NULL_ON_FAIL(
+            createShaderObject(type, ShaderObjectContainerType::None, object.writeRef()));
         return object;
     }
 
@@ -2492,17 +2680,18 @@ class IDevice : public ISlangUnknown
         IShaderObject** outObject) = 0;
 
     virtual SLANG_NO_THROW Result SLANG_MCALL createShaderObjectFromTypeLayout(
-        slang::TypeLayoutReflection* typeLayout, IShaderObject** outObject) = 0;
+        slang::TypeLayoutReflection* typeLayout,
+        IShaderObject** outObject) = 0;
 
     virtual SLANG_NO_THROW Result SLANG_MCALL createMutableShaderObjectFromTypeLayout(
-        slang::TypeLayoutReflection* typeLayout, IShaderObject** outObject) = 0;
-
-    virtual SLANG_NO_THROW Result SLANG_MCALL createMutableRootShaderObject(
-        IShaderProgram* program,
+        slang::TypeLayoutReflection* typeLayout,
         IShaderObject** outObject) = 0;
 
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        createShaderTable(const IShaderTable::Desc& desc, IShaderTable** outTable) = 0;
+    createMutableRootShaderObject(IShaderProgram* program, IShaderObject** outObject) = 0;
+
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+    createShaderTable(const IShaderTable::Desc& desc, IShaderTable** outTable) = 0;
 
     virtual SLANG_NO_THROW Result SLANG_MCALL createProgram(
         const IShaderProgram::Desc& desc,
@@ -2522,23 +2711,20 @@ class IDevice : public ISlangUnknown
         ISlangBlob** outDiagnosticBlob = nullptr) = 0;
 
     virtual SLANG_NO_THROW Result SLANG_MCALL createGraphicsPipelineState(
-        const GraphicsPipelineStateDesc&    desc,
-        IPipelineState**                    outState) = 0;
+        const GraphicsPipelineStateDesc& desc,
+        IPipelineState** outState) = 0;
 
-    inline ComPtr<IPipelineState> createGraphicsPipelineState(
-        const GraphicsPipelineStateDesc& desc)
+    inline ComPtr<IPipelineState> createGraphicsPipelineState(const GraphicsPipelineStateDesc& desc)
     {
         ComPtr<IPipelineState> state;
         SLANG_RETURN_NULL_ON_FAIL(createGraphicsPipelineState(desc, state.writeRef()));
         return state;
     }
 
-    virtual SLANG_NO_THROW Result SLANG_MCALL createComputePipelineState(
-        const ComputePipelineStateDesc&    desc,
-        IPipelineState**                     outState) = 0;
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+    createComputePipelineState(const ComputePipelineStateDesc& desc, IPipelineState** outState) = 0;
 
-    inline ComPtr<IPipelineState> createComputePipelineState(
-        const ComputePipelineStateDesc& desc)
+    inline ComPtr<IPipelineState> createComputePipelineState(const ComputePipelineStateDesc& desc)
     {
         ComPtr<IPipelineState> state;
         SLANG_RETURN_NULL_ON_FAIL(createComputePipelineState(desc, state.writeRef()));
@@ -2546,9 +2732,10 @@ class IDevice : public ISlangUnknown
     }
 
     virtual SLANG_NO_THROW Result SLANG_MCALL createRayTracingPipelineState(
-        const RayTracingPipelineStateDesc& desc, IPipelineState** outState) = 0;
+        const RayTracingPipelineStateDesc& desc,
+        IPipelineState** outState) = 0;
 
-        /// Read back texture resource and stores the result in `outBlob`.
+    /// Read back texture resource and stores the result in `outBlob`.
     virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource(
         ITextureResource* resource,
         ResourceState state,
@@ -2556,17 +2743,14 @@ class IDevice : public ISlangUnknown
         Size* outRowPitch,
         Size* outPixelSize) = 0;
 
-    virtual SLANG_NO_THROW SlangResult SLANG_MCALL readBufferResource(
-        IBufferResource* buffer,
-        Offset offset,
-        Size size,
-        ISlangBlob** outBlob) = 0;
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    readBufferResource(IBufferResource* buffer, Offset offset, Size size, ISlangBlob** outBlob) = 0;
 
-        /// Get the type of this renderer
+    /// Get the type of this renderer
     virtual SLANG_NO_THROW const DeviceInfo& SLANG_MCALL getDeviceInfo() const = 0;
 
-    virtual SLANG_NO_THROW Result SLANG_MCALL createQueryPool(
-        const IQueryPool::Desc& desc, IQueryPool** outPool) = 0;
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+    createQueryPool(const IQueryPool::Desc& desc, IQueryPool** outPool) = 0;
 
 
     virtual SLANG_NO_THROW Result SLANG_MCALL getAccelerationStructurePrebuildInfo(
@@ -2578,7 +2762,7 @@ class IDevice : public ISlangUnknown
         IAccelerationStructure** outView) = 0;
 
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        createFence(const IFence::Desc& desc, IFence** outFence) = 0;
+    createFence(const IFence::Desc& desc, IFence** outFence) = 0;
 
     /// Wait on the host for the fences to signals.
     /// `timeout` is in nanoseconds, can be set to `kTimeoutInfinite`.
@@ -2590,7 +2774,9 @@ class IDevice : public ISlangUnknown
         uint64_t timeout) = 0;
 
     virtual SLANG_NO_THROW Result SLANG_MCALL getTextureAllocationInfo(
-        const ITextureResource::Desc& desc, Size* outSize, Size* outAlignment) = 0;
+        const ITextureResource::Desc& desc,
+        Size* outSize,
+        Size* outAlignment) = 0;
 
     virtual SLANG_NO_THROW Result SLANG_MCALL getTextureRowAlignment(Size* outAlignment) = 0;
 
@@ -2607,9 +2793,12 @@ class IDevice : public ISlangUnknown
         IShaderObject** outObject) = 0;
 };
 
-#define SLANG_UUID_IDevice                                                               \
-    {                                                                                    \
-          0x715bdf26, 0x5135, 0x11eb, { 0xAE, 0x93, 0x02, 0x42, 0xAC, 0x13, 0x00, 0x02 } \
+#define SLANG_UUID_IDevice                                 \
+    {                                                      \
+        0x715bdf26, 0x5135, 0x11eb,                        \
+        {                                                  \
+            0xAE, 0x93, 0x02, 0x42, 0xAC, 0x13, 0x00, 0x02 \
+        }                                                  \
     }
 
 struct ShaderCacheStats
@@ -2631,9 +2820,12 @@ class IShaderCache : public ISlangUnknown
     virtual SLANG_NO_THROW Result SLANG_MCALL resetShaderCacheStats() = 0;
 };
 
-#define SLANG_UUID_IShaderCache                                                          \
-    {                                                                                    \
-          0x8eccc8ec, 0x5c04, 0x4a51, { 0x99, 0x75, 0x13, 0xf8, 0xfe, 0xa1, 0x59, 0xf3 } \
+#define SLANG_UUID_IShaderCache                            \
+    {                                                      \
+        0x8eccc8ec, 0x5c04, 0x4a51,                        \
+        {                                                  \
+            0x99, 0x75, 0x13, 0xf8, 0xfe, 0xa1, 0x59, 0xf3 \
+        }                                                  \
     }
 
 class IPipelineCreationAPIDispatcher : public ISlangUnknown
@@ -2655,18 +2847,24 @@ class IPipelineCreationAPIDispatcher : public ISlangUnknown
         void* pipelineDesc,
         void** outPipelineState) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        beforeCreateRayTracingState(IDevice* device, slang::IComponentType* program) = 0;
+    beforeCreateRayTracingState(IDevice* device, slang::IComponentType* program) = 0;
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        afterCreateRayTracingState(IDevice* device, slang::IComponentType* program) = 0;
+    afterCreateRayTracingState(IDevice* device, slang::IComponentType* program) = 0;
 };
-#define SLANG_UUID_IPipelineCreationAPIDispatcher                                     \
-    {                                                                                 \
-        0xc3d5f782, 0xeae1, 0x4da6, { 0xab, 0x40, 0x75, 0x32, 0x31, 0x2, 0xb7, 0xdc } \
+#define SLANG_UUID_IPipelineCreationAPIDispatcher         \
+    {                                                     \
+        0xc3d5f782, 0xeae1, 0x4da6,                       \
+        {                                                 \
+            0xab, 0x40, 0x75, 0x32, 0x31, 0x2, 0xb7, 0xdc \
+        }                                                 \
     }
 
-#define SLANG_UUID_IVulkanPipelineCreationAPIDispatcher                                 \
-    {                                                                                   \
-        0x4fcf1274, 0x8752, 0x4743, { 0xb3, 0x51, 0x47, 0xcb, 0x83, 0x71, 0xef, 0x99 }  \
+#define SLANG_UUID_IVulkanPipelineCreationAPIDispatcher    \
+    {                                                      \
+        0x4fcf1274, 0x8752, 0x4743,                        \
+        {                                                  \
+            0xb3, 0x51, 0x47, 0xcb, 0x83, 0x71, 0xef, 0x99 \
+        }                                                  \
     }
 
 // Global public functions
@@ -2683,11 +2881,12 @@ extern "C"
     SLANG_GFX_API SlangResult SLANG_MCALL gfxGetFormatInfo(Format format, FormatInfo* outInfo);
 
     /// Gets a list of available adapters for a given device type
-    SLANG_GFX_API SlangResult SLANG_MCALL gfxGetAdapters(DeviceType type, ISlangBlob** outAdaptersBlob);
+    SLANG_GFX_API SlangResult SLANG_MCALL
+    gfxGetAdapters(DeviceType type, ISlangBlob** outAdaptersBlob);
 
     /// Given a type returns a function that can construct it, or nullptr if there isn't one
     SLANG_GFX_API SlangResult SLANG_MCALL
-        gfxCreateDevice(const IDevice::Desc* desc, IDevice** outDevice);
+    gfxCreateDevice(const IDevice::Desc* desc, IDevice** outDevice);
 
     /// Reports current set of live objects in gfx.
     /// Currently this only calls D3D's ReportLiveObjects.
@@ -2696,10 +2895,10 @@ extern "C"
     /// Sets a callback for receiving debug messages.
     /// The layer does not hold a strong reference to the callback object.
     /// The user is responsible for holding the callback object alive.
-    SLANG_GFX_API SlangResult SLANG_MCALL
-        gfxSetDebugCallback(IDebugCallback* callback);
+    SLANG_GFX_API SlangResult SLANG_MCALL gfxSetDebugCallback(IDebugCallback* callback);
 
-    /// Enables debug layer. The debug layer will check all `gfx` calls and verify that uses are valid.
+    /// Enables debug layer. The debug layer will check all `gfx` calls and verify that uses are
+    /// valid.
     SLANG_GFX_API void SLANG_MCALL gfxEnableDebugLayer();
 
     SLANG_GFX_API const char* SLANG_MCALL gfxGetDeviceTypeName(DeviceType type);
@@ -2738,4 +2937,12 @@ struct SlangSessionExtendedDesc
     slang::CompilerOptionEntry* compilerOptionEntries = nullptr;
 };
 
-}
+/// Whether to enable ray tracing validation (currently only Vulkan - D3D requires app layer to use
+/// NVAPI)
+struct RayTracingValidationDesc
+{
+    StructType structType = StructType::RayTracingValidationDesc;
+    bool enableRaytracingValidation = false;
+};
+
+} // namespace gfx
diff --git a/external/slang/include/slang-hlsl-prelude.h b/external/slang/include/slang-hlsl-prelude.h
new file mode 100644
index 00000000..8e77201f
--- /dev/null
+++ b/external/slang/include/slang-hlsl-prelude.h
@@ -0,0 +1,8 @@
+#ifdef SLANG_HLSL_ENABLE_NVAPI
+#include "nvHLSLExtns.h"
+#endif
+
+#ifndef __DXC_VERSION_MAJOR
+// warning X3557: loop doesn't seem to do anything, forcing loop to unroll
+#pragma warning(disable : 3557)
+#endif
diff --git a/external/slang/include/slang-image-format-defs.h b/external/slang/include/slang-image-format-defs.h
new file mode 100644
index 00000000..5e7922f4
--- /dev/null
+++ b/external/slang/include/slang-image-format-defs.h
@@ -0,0 +1,50 @@
+// slang-image-format-defs.h
+#ifndef SLANG_FORMAT
+    #error Must define SLANG_FORMAT macro before including image-format-defs.h
+#endif
+
+SLANG_FORMAT(unknown, (NONE, 0, 0))
+SLANG_FORMAT(rgba32f, (FLOAT32, 4, sizeof(float) * 4))
+SLANG_FORMAT(rgba16f, (FLOAT16, 4, sizeof(uint16_t) * 4))
+SLANG_FORMAT(rg32f, (FLOAT32, 2, sizeof(float) * 2))
+SLANG_FORMAT(rg16f, (FLOAT16, 2, sizeof(uint16_t) * 2))
+SLANG_FORMAT(r11f_g11f_b10f, (NONE, 3, sizeof(uint32_t)))
+SLANG_FORMAT(r32f, (FLOAT32, 1, sizeof(float)))
+SLANG_FORMAT(r16f, (FLOAT16, 1, sizeof(uint16_t)))
+SLANG_FORMAT(rgba16, (UINT16, 4, sizeof(uint16_t) * 4))
+SLANG_FORMAT(rgb10_a2, (NONE, 4, sizeof(uint32_t)))
+SLANG_FORMAT(rgba8, (UINT8, 4, sizeof(uint32_t)))
+SLANG_FORMAT(rg16, (UINT16, 2, sizeof(uint16_t) * 2))
+SLANG_FORMAT(rg8, (UINT8, 2, sizeof(char) * 2))
+SLANG_FORMAT(r16, (UINT16, 1, sizeof(uint16_t)))
+SLANG_FORMAT(r8, (UINT8, 1, sizeof(uint8_t)))
+SLANG_FORMAT(rgba16_snorm, (UINT16, 4, sizeof(uint16_t) * 4))
+SLANG_FORMAT(rgba8_snorm, (UINT8, 4, sizeof(uint8_t) * 4))
+SLANG_FORMAT(rg16_snorm, (UINT16, 2, sizeof(uint16_t) * 2))
+SLANG_FORMAT(rg8_snorm, (UINT8, 2, sizeof(uint8_t) * 2))
+SLANG_FORMAT(r16_snorm, (UINT16, 1, sizeof(uint16_t)))
+SLANG_FORMAT(r8_snorm, (UINT8, 1, sizeof(uint8_t)))
+SLANG_FORMAT(rgba32i, (INT32, 4, sizeof(int32_t) * 4))
+SLANG_FORMAT(rgba16i, (INT16, 4, sizeof(int16_t) * 4))
+SLANG_FORMAT(rgba8i, (INT8, 4, sizeof(int8_t) * 4))
+SLANG_FORMAT(rg32i, (INT32, 2, sizeof(int32_t) * 2))
+SLANG_FORMAT(rg16i, (INT16, 2, sizeof(int16_t) * 2))
+SLANG_FORMAT(rg8i, (INT8, 2, sizeof(int8_t) * 2))
+SLANG_FORMAT(r32i, (INT32, 1, sizeof(int32_t)))
+SLANG_FORMAT(r16i, (INT16, 1, sizeof(int16_t)))
+SLANG_FORMAT(r8i, (INT8, 1, sizeof(int8_t)))
+SLANG_FORMAT(rgba32ui, (UINT32, 4, sizeof(uint32_t) * 4))
+SLANG_FORMAT(rgba16ui, (UINT16, 4, sizeof(uint16_t) * 4))
+SLANG_FORMAT(rgb10_a2ui, (NONE, 4, sizeof(uint32_t)))
+SLANG_FORMAT(rgba8ui, (UINT8, 4, sizeof(uint8_t) * 4))
+SLANG_FORMAT(rg32ui, (UINT32, 2, sizeof(uint32_t) * 2))
+SLANG_FORMAT(rg16ui, (UINT16, 2, sizeof(uint16_t) * 2))
+SLANG_FORMAT(rg8ui, (UINT8, 2, sizeof(uint8_t) * 2))
+SLANG_FORMAT(r32ui, (UINT32, 1, sizeof(uint32_t)))
+SLANG_FORMAT(r16ui, (UINT16, 1, sizeof(uint16_t)))
+SLANG_FORMAT(r8ui, (UINT8, 1, sizeof(uint8_t)))
+SLANG_FORMAT(r64ui, (UINT64, 1, sizeof(uint64_t)))
+SLANG_FORMAT(r64i, (INT64, 1, sizeof(int64_t)))
+SLANG_FORMAT(bgra8, (UINT8, 4, sizeof(uint32_t)))
+
+#undef SLANG_FORMAT
diff --git a/external/slang/prelude/slang-llvm.h b/external/slang/include/slang-llvm.h
similarity index 51%
rename from external/slang/prelude/slang-llvm.h
rename to external/slang/include/slang-llvm.h
index b4138058..e0bbbd14 100644
--- a/external/slang/prelude/slang-llvm.h
+++ b/external/slang/include/slang-llvm.h
@@ -1,46 +1,54 @@
 #ifndef SLANG_LLVM_H
 #define SLANG_LLVM_H
 
-// TODO(JS): 
+// TODO(JS):
 // Disable exception declspecs, as not supported on LLVM without some extra options.
 // We could enable with `-fms-extensions`
 #define SLANG_DISABLE_EXCEPTIONS 1
 
 #ifndef SLANG_PRELUDE_ASSERT
-#   ifdef SLANG_PRELUDE_ENABLE_ASSERT
+#ifdef SLANG_PRELUDE_ENABLE_ASSERT
 extern "C" void assertFailure(const char* msg);
-#       define SLANG_PRELUDE_EXPECT(VALUE, MSG) if(VALUE) {} else assertFailure("assertion failed: '" MSG "'")
-#       define SLANG_PRELUDE_ASSERT(VALUE) SLANG_PRELUDE_EXPECT(VALUE, #VALUE)
-#   else // SLANG_PRELUDE_ENABLE_ASSERT
-#       define SLANG_PRELUDE_EXPECT(VALUE, MSG)
-#       define SLANG_PRELUDE_ASSERT(x) 
-#   endif // SLANG_PRELUDE_ENABLE_ASSERT
+#define SLANG_PRELUDE_EXPECT(VALUE, MSG) \
+    if (VALUE)                           \
+    {                                    \
+    }                                    \
+    else                                 \
+        assertFailure("assertion failed: '" MSG "'")
+#define SLANG_PRELUDE_ASSERT(VALUE) SLANG_PRELUDE_EXPECT(VALUE, #VALUE)
+#else // SLANG_PRELUDE_ENABLE_ASSERT
+#define SLANG_PRELUDE_EXPECT(VALUE, MSG)
+#define SLANG_PRELUDE_ASSERT(x)
+#endif // SLANG_PRELUDE_ENABLE_ASSERT
 #endif
 
 /*
-Taken from stddef.h 
+Taken from stddef.h
 */
 
 typedef __PTRDIFF_TYPE__ ptrdiff_t;
 typedef __SIZE_TYPE__ size_t;
 typedef __SIZE_TYPE__ rsize_t;
 
-//typedef __WCHAR_TYPE__ wchar_t;
+// typedef __WCHAR_TYPE__ wchar_t;
 
 #if defined(__need_NULL)
 #undef NULL
 #ifdef __cplusplus
-#  if !defined(__MINGW32__) && !defined(_MSC_VER)
-#    define NULL __null
-#  else
-#    define NULL 0
-#  endif
+#if !defined(__MINGW32__) && !defined(_MSC_VER)
+#define NULL __null
 #else
-#  define NULL ((void*)0)
+#define NULL 0
+#endif
+#else
+#define NULL ((void*)0)
 #endif
 #ifdef __cplusplus
 #if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)
-namespace std { typedef decltype(nullptr) nullptr_t; }
+namespace std
+{
+typedef decltype(nullptr) nullptr_t;
+}
 using ::std::nullptr_t;
 #endif
 #endif
@@ -49,18 +57,18 @@ using ::std::nullptr_t;
 
 
 /*
-The following are taken verbatim from stdint.h from Clang in LLVM. Only 8/16/32/64 types are needed. 
+The following are taken verbatim from stdint.h from Clang in LLVM. Only 8/16/32/64 types are needed.
 */
 
 // LLVM/Clang types such that we can use LLVM/Clang without headers for C++ output from Slang
 
 #ifdef __INT64_TYPE__
-# ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/
+#ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/
 typedef __INT64_TYPE__ int64_t;
-# endif /* __int8_t_defined */
+#endif /* __int8_t_defined */
 typedef __UINT64_TYPE__ uint64_t;
-# define __int_least64_t int64_t
-# define __uint_least64_t uint64_t
+#define __int_least64_t int64_t
+#define __uint_least64_t uint64_t
 #endif /* __INT64_TYPE__ */
 
 #ifdef __int_least64_t
@@ -72,17 +80,17 @@ typedef __uint_least64_t uint_fast64_t;
 
 #ifdef __INT32_TYPE__
 
-# ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/
+#ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/
 typedef __INT32_TYPE__ int32_t;
-# endif /* __int8_t_defined */
+#endif /* __int8_t_defined */
 
-# ifndef __uint32_t_defined  /* more glibc compatibility */
-# define __uint32_t_defined
+#ifndef __uint32_t_defined /* more glibc compatibility */
+#define __uint32_t_defined
 typedef __UINT32_TYPE__ uint32_t;
-# endif /* __uint32_t_defined */
+#endif /* __uint32_t_defined */
 
-# define __int_least32_t int32_t
-# define __uint_least32_t uint32_t
+#define __int_least32_t int32_t
+#define __uint_least32_t uint32_t
 #endif /* __INT32_TYPE__ */
 
 #ifdef __int_least32_t
@@ -97,8 +105,8 @@ typedef __uint_least32_t uint_fast32_t;
 typedef __INT16_TYPE__ int16_t;
 #endif /* __int8_t_defined */
 typedef __UINT16_TYPE__ uint16_t;
-# define __int_least16_t int16_t
-# define __uint_least16_t uint16_t
+#define __int_least16_t int16_t
+#define __uint_least16_t uint16_t
 #endif /* __INT16_TYPE__ */
 
 #ifdef __int_least16_t
@@ -109,12 +117,12 @@ typedef __uint_least16_t uint_fast16_t;
 #endif /* __int_least16_t */
 
 #ifdef __INT8_TYPE__
-#ifndef __int8_t_defined  /* glibc sys/types.h also defines int8_t*/
+#ifndef __int8_t_defined /* glibc sys/types.h also defines int8_t*/
 typedef __INT8_TYPE__ int8_t;
 #endif /* __int8_t_defined */
 typedef __UINT8_TYPE__ uint8_t;
-# define __int_least8_t int8_t
-# define __uint_least8_t uint8_t
+#define __int_least8_t int8_t
+#define __uint_least8_t uint8_t
 #endif /* __INT8_TYPE__ */
 
 #ifdef __int_least8_t
@@ -126,12 +134,12 @@ typedef __uint_least8_t uint_fast8_t;
 
 /* prevent glibc sys/types.h from defining conflicting types */
 #ifndef __int8_t_defined
-# define __int8_t_defined
+#define __int8_t_defined
 #endif /* __int8_t_defined */
 
 /* C99 7.18.1.4 Integer types capable of holding object pointers.
  */
-#define __stdint_join3(a,b,c) a ## b ## c
+#define __stdint_join3(a, b, c) a##b##c
 
 #ifndef _INTPTR_T
 #ifndef __intptr_t_defined
@@ -148,7 +156,7 @@ typedef __UINTPTR_TYPE__ uintptr_t;
 
 /* C99 7.18.1.5 Greatest-width integer types.
  */
-typedef __INTMAX_TYPE__  intmax_t;
+typedef __INTMAX_TYPE__ intmax_t;
 typedef __UINTMAX_TYPE__ uintmax_t;
 
 /* C99 7.18.4 Macros for minimum-width integer constants.
@@ -168,82 +176,82 @@ typedef __UINTMAX_TYPE__ uintmax_t;
  * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).
  */
 
-#define __int_c_join(a, b) a ## b
+#define __int_c_join(a, b) a##b
 #define __int_c(v, suffix) __int_c_join(v, suffix)
 #define __uint_c(v, suffix) __int_c_join(v##U, suffix)
 
 #ifdef __INT64_TYPE__
-# ifdef __INT64_C_SUFFIX__
-#  define __int64_c_suffix __INT64_C_SUFFIX__
-# else
-#  undef __int64_c_suffix
-# endif /* __INT64_C_SUFFIX__ */
+#ifdef __INT64_C_SUFFIX__
+#define __int64_c_suffix __INT64_C_SUFFIX__
+#else
+#undef __int64_c_suffix
+#endif /* __INT64_C_SUFFIX__ */
 #endif /* __INT64_TYPE__ */
 
 #ifdef __int_least64_t
-# ifdef __int64_c_suffix
-#  define INT64_C(v) __int_c(v, __int64_c_suffix)
-#  define UINT64_C(v) __uint_c(v, __int64_c_suffix)
-# else
-#  define INT64_C(v) v
-#  define UINT64_C(v) v ## U
-# endif /* __int64_c_suffix */
+#ifdef __int64_c_suffix
+#define INT64_C(v) __int_c(v, __int64_c_suffix)
+#define UINT64_C(v) __uint_c(v, __int64_c_suffix)
+#else
+#define INT64_C(v) v
+#define UINT64_C(v) v##U
+#endif /* __int64_c_suffix */
 #endif /* __int_least64_t */
 
 
 #ifdef __INT32_TYPE__
-# ifdef __INT32_C_SUFFIX__
-#  define __int32_c_suffix __INT32_C_SUFFIX__
+#ifdef __INT32_C_SUFFIX__
+#define __int32_c_suffix __INT32_C_SUFFIX__
 #else
-#  undef __int32_c_suffix
-# endif /* __INT32_C_SUFFIX__ */
+#undef __int32_c_suffix
+#endif /* __INT32_C_SUFFIX__ */
 #endif /* __INT32_TYPE__ */
 
 #ifdef __int_least32_t
-# ifdef __int32_c_suffix
-#  define INT32_C(v) __int_c(v, __int32_c_suffix)
-#  define UINT32_C(v) __uint_c(v, __int32_c_suffix)
-# else
-#  define INT32_C(v) v
-#  define UINT32_C(v) v ## U
-# endif /* __int32_c_suffix */
+#ifdef __int32_c_suffix
+#define INT32_C(v) __int_c(v, __int32_c_suffix)
+#define UINT32_C(v) __uint_c(v, __int32_c_suffix)
+#else
+#define INT32_C(v) v
+#define UINT32_C(v) v##U
+#endif /* __int32_c_suffix */
 #endif /* __int_least32_t */
 
 #ifdef __INT16_TYPE__
-# ifdef __INT16_C_SUFFIX__
-#  define __int16_c_suffix __INT16_C_SUFFIX__
+#ifdef __INT16_C_SUFFIX__
+#define __int16_c_suffix __INT16_C_SUFFIX__
 #else
-#  undef __int16_c_suffix
-# endif /* __INT16_C_SUFFIX__ */
+#undef __int16_c_suffix
+#endif /* __INT16_C_SUFFIX__ */
 #endif /* __INT16_TYPE__ */
 
 #ifdef __int_least16_t
-# ifdef __int16_c_suffix
-#  define INT16_C(v) __int_c(v, __int16_c_suffix)
-#  define UINT16_C(v) __uint_c(v, __int16_c_suffix)
-# else
-#  define INT16_C(v) v
-#  define UINT16_C(v) v ## U
-# endif /* __int16_c_suffix */
+#ifdef __int16_c_suffix
+#define INT16_C(v) __int_c(v, __int16_c_suffix)
+#define UINT16_C(v) __uint_c(v, __int16_c_suffix)
+#else
+#define INT16_C(v) v
+#define UINT16_C(v) v##U
+#endif /* __int16_c_suffix */
 #endif /* __int_least16_t */
 
 
 #ifdef __INT8_TYPE__
-# ifdef __INT8_C_SUFFIX__
-#  define __int8_c_suffix __INT8_C_SUFFIX__
+#ifdef __INT8_C_SUFFIX__
+#define __int8_c_suffix __INT8_C_SUFFIX__
 #else
-#  undef  __int8_c_suffix
-# endif /* __INT8_C_SUFFIX__ */
+#undef __int8_c_suffix
+#endif /* __INT8_C_SUFFIX__ */
 #endif /* __INT8_TYPE__ */
 
 #ifdef __int_least8_t
-# ifdef __int8_c_suffix
-#  define INT8_C(v) __int_c(v, __int8_c_suffix)
-#  define UINT8_C(v) __uint_c(v, __int8_c_suffix)
-# else
-#  define INT8_C(v) v
-#  define UINT8_C(v) v ## U
-# endif /* __int8_c_suffix */
+#ifdef __int8_c_suffix
+#define INT8_C(v) __int_c(v, __int8_c_suffix)
+#define UINT8_C(v) __uint_c(v, __int8_c_suffix)
+#else
+#define INT8_C(v) v
+#define UINT8_C(v) v##U
+#endif /* __int8_c_suffix */
 #endif /* __int_least8_t */
 
 /* C99 7.18.2.1 Limits of exact-width integer types.
@@ -266,133 +274,131 @@ typedef __UINTMAX_TYPE__ uintmax_t;
  */
 
 #ifdef __INT64_TYPE__
-# define INT64_MAX           INT64_C( 9223372036854775807)
-# define INT64_MIN         (-INT64_C( 9223372036854775807)-1)
-# define UINT64_MAX         UINT64_C(18446744073709551615)
-# define __INT_LEAST64_MIN   INT64_MIN
-# define __INT_LEAST64_MAX   INT64_MAX
-# define __UINT_LEAST64_MAX UINT64_MAX
+#define INT64_MAX INT64_C(9223372036854775807)
+#define INT64_MIN (-INT64_C(9223372036854775807) - 1)
+#define UINT64_MAX UINT64_C(18446744073709551615)
+#define __INT_LEAST64_MIN INT64_MIN
+#define __INT_LEAST64_MAX INT64_MAX
+#define __UINT_LEAST64_MAX UINT64_MAX
 #endif /* __INT64_TYPE__ */
 
 #ifdef __INT_LEAST64_MIN
-# define INT_LEAST64_MIN   __INT_LEAST64_MIN
-# define INT_LEAST64_MAX   __INT_LEAST64_MAX
-# define UINT_LEAST64_MAX __UINT_LEAST64_MAX
-# define INT_FAST64_MIN    __INT_LEAST64_MIN
-# define INT_FAST64_MAX    __INT_LEAST64_MAX
-# define UINT_FAST64_MAX  __UINT_LEAST64_MAX
+#define INT_LEAST64_MIN __INT_LEAST64_MIN
+#define INT_LEAST64_MAX __INT_LEAST64_MAX
+#define UINT_LEAST64_MAX __UINT_LEAST64_MAX
+#define INT_FAST64_MIN __INT_LEAST64_MIN
+#define INT_FAST64_MAX __INT_LEAST64_MAX
+#define UINT_FAST64_MAX __UINT_LEAST64_MAX
 #endif /* __INT_LEAST64_MIN */
 
 #ifdef __INT32_TYPE__
-# define INT32_MAX           INT32_C(2147483647)
-# define INT32_MIN         (-INT32_C(2147483647)-1)
-# define UINT32_MAX         UINT32_C(4294967295)
-# define __INT_LEAST32_MIN   INT32_MIN
-# define __INT_LEAST32_MAX   INT32_MAX
-# define __UINT_LEAST32_MAX UINT32_MAX
+#define INT32_MAX INT32_C(2147483647)
+#define INT32_MIN (-INT32_C(2147483647) - 1)
+#define UINT32_MAX UINT32_C(4294967295)
+#define __INT_LEAST32_MIN INT32_MIN
+#define __INT_LEAST32_MAX INT32_MAX
+#define __UINT_LEAST32_MAX UINT32_MAX
 #endif /* __INT32_TYPE__ */
 
 #ifdef __INT_LEAST32_MIN
-# define INT_LEAST32_MIN   __INT_LEAST32_MIN
-# define INT_LEAST32_MAX   __INT_LEAST32_MAX
-# define UINT_LEAST32_MAX __UINT_LEAST32_MAX
-# define INT_FAST32_MIN    __INT_LEAST32_MIN
-# define INT_FAST32_MAX    __INT_LEAST32_MAX
-# define UINT_FAST32_MAX  __UINT_LEAST32_MAX
+#define INT_LEAST32_MIN __INT_LEAST32_MIN
+#define INT_LEAST32_MAX __INT_LEAST32_MAX
+#define UINT_LEAST32_MAX __UINT_LEAST32_MAX
+#define INT_FAST32_MIN __INT_LEAST32_MIN
+#define INT_FAST32_MAX __INT_LEAST32_MAX
+#define UINT_FAST32_MAX __UINT_LEAST32_MAX
 #endif /* __INT_LEAST32_MIN */
 
 #ifdef __INT16_TYPE__
-#define INT16_MAX            INT16_C(32767)
-#define INT16_MIN          (-INT16_C(32767)-1)
-#define UINT16_MAX          UINT16_C(65535)
-# define __INT_LEAST16_MIN   INT16_MIN
-# define __INT_LEAST16_MAX   INT16_MAX
-# define __UINT_LEAST16_MAX UINT16_MAX
+#define INT16_MAX INT16_C(32767)
+#define INT16_MIN (-INT16_C(32767) - 1)
+#define UINT16_MAX UINT16_C(65535)
+#define __INT_LEAST16_MIN INT16_MIN
+#define __INT_LEAST16_MAX INT16_MAX
+#define __UINT_LEAST16_MAX UINT16_MAX
 #endif /* __INT16_TYPE__ */
 
 #ifdef __INT_LEAST16_MIN
-# define INT_LEAST16_MIN   __INT_LEAST16_MIN
-# define INT_LEAST16_MAX   __INT_LEAST16_MAX
-# define UINT_LEAST16_MAX __UINT_LEAST16_MAX
-# define INT_FAST16_MIN    __INT_LEAST16_MIN
-# define INT_FAST16_MAX    __INT_LEAST16_MAX
-# define UINT_FAST16_MAX  __UINT_LEAST16_MAX
+#define INT_LEAST16_MIN __INT_LEAST16_MIN
+#define INT_LEAST16_MAX __INT_LEAST16_MAX
+#define UINT_LEAST16_MAX __UINT_LEAST16_MAX
+#define INT_FAST16_MIN __INT_LEAST16_MIN
+#define INT_FAST16_MAX __INT_LEAST16_MAX
+#define UINT_FAST16_MAX __UINT_LEAST16_MAX
 #endif /* __INT_LEAST16_MIN */
 
 
 #ifdef __INT8_TYPE__
-# define INT8_MAX            INT8_C(127)
-# define INT8_MIN          (-INT8_C(127)-1)
-# define UINT8_MAX          UINT8_C(255)
-# define __INT_LEAST8_MIN    INT8_MIN
-# define __INT_LEAST8_MAX    INT8_MAX
-# define __UINT_LEAST8_MAX  UINT8_MAX
+#define INT8_MAX INT8_C(127)
+#define INT8_MIN (-INT8_C(127) - 1)
+#define UINT8_MAX UINT8_C(255)
+#define __INT_LEAST8_MIN INT8_MIN
+#define __INT_LEAST8_MAX INT8_MAX
+#define __UINT_LEAST8_MAX UINT8_MAX
 #endif /* __INT8_TYPE__ */
 
 #ifdef __INT_LEAST8_MIN
-# define INT_LEAST8_MIN   __INT_LEAST8_MIN
-# define INT_LEAST8_MAX   __INT_LEAST8_MAX
-# define UINT_LEAST8_MAX __UINT_LEAST8_MAX
-# define INT_FAST8_MIN    __INT_LEAST8_MIN
-# define INT_FAST8_MAX    __INT_LEAST8_MAX
-# define UINT_FAST8_MAX  __UINT_LEAST8_MAX
+#define INT_LEAST8_MIN __INT_LEAST8_MIN
+#define INT_LEAST8_MAX __INT_LEAST8_MAX
+#define UINT_LEAST8_MAX __UINT_LEAST8_MAX
+#define INT_FAST8_MIN __INT_LEAST8_MIN
+#define INT_FAST8_MAX __INT_LEAST8_MAX
+#define UINT_FAST8_MAX __UINT_LEAST8_MAX
 #endif /* __INT_LEAST8_MIN */
 
 /* Some utility macros */
-#define  __INTN_MIN(n)  __stdint_join3( INT, n, _MIN)
-#define  __INTN_MAX(n)  __stdint_join3( INT, n, _MAX)
-#define __UINTN_MAX(n)  __stdint_join3(UINT, n, _MAX)
-#define  __INTN_C(n, v) __stdint_join3( INT, n, _C(v))
+#define __INTN_MIN(n) __stdint_join3(INT, n, _MIN)
+#define __INTN_MAX(n) __stdint_join3(INT, n, _MAX)
+#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX)
+#define __INTN_C(n, v) __stdint_join3(INT, n, _C(v))
 #define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v))
 
 /* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */
 /* C99 7.18.3 Limits of other integer types. */
 
-#define  INTPTR_MIN  (-__INTPTR_MAX__-1)
-#define  INTPTR_MAX    __INTPTR_MAX__
-#define UINTPTR_MAX   __UINTPTR_MAX__
-#define PTRDIFF_MIN (-__PTRDIFF_MAX__-1)
-#define PTRDIFF_MAX   __PTRDIFF_MAX__
-#define    SIZE_MAX      __SIZE_MAX__
+#define INTPTR_MIN (-__INTPTR_MAX__ - 1)
+#define INTPTR_MAX __INTPTR_MAX__
+#define UINTPTR_MAX __UINTPTR_MAX__
+#define PTRDIFF_MIN (-__PTRDIFF_MAX__ - 1)
+#define PTRDIFF_MAX __PTRDIFF_MAX__
+#define SIZE_MAX __SIZE_MAX__
 
 /* ISO9899:2011 7.20 (C11 Annex K): Define RSIZE_MAX if __STDC_WANT_LIB_EXT1__
  * is enabled. */
 #if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1
-#define   RSIZE_MAX            (SIZE_MAX >> 1)
+#define RSIZE_MAX (SIZE_MAX >> 1)
 #endif
 
 /* C99 7.18.2.5 Limits of greatest-width integer types. */
-#define  INTMAX_MIN (-__INTMAX_MAX__-1)
-#define  INTMAX_MAX   __INTMAX_MAX__
-#define UINTMAX_MAX  __UINTMAX_MAX__
+#define INTMAX_MIN (-__INTMAX_MAX__ - 1)
+#define INTMAX_MAX __INTMAX_MAX__
+#define UINTMAX_MAX __UINTMAX_MAX__
 
 /* C99 7.18.3 Limits of other integer types. */
 #define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__)
 #define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__)
 #ifdef __WINT_UNSIGNED__
-# define WINT_MIN       __UINTN_C(__WINT_WIDTH__, 0)
-# define WINT_MAX       __UINTN_MAX(__WINT_WIDTH__)
+#define WINT_MIN __UINTN_C(__WINT_WIDTH__, 0)
+#define WINT_MAX __UINTN_MAX(__WINT_WIDTH__)
 #else
-# define WINT_MIN       __INTN_MIN(__WINT_WIDTH__)
-# define WINT_MAX       __INTN_MAX(__WINT_WIDTH__)
+#define WINT_MIN __INTN_MIN(__WINT_WIDTH__)
+#define WINT_MAX __INTN_MAX(__WINT_WIDTH__)
 #endif
 
 #ifndef WCHAR_MAX
-# define WCHAR_MAX __WCHAR_MAX__
+#define WCHAR_MAX __WCHAR_MAX__
 #endif
 #ifndef WCHAR_MIN
-# if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__)
-#  define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__)
-# else
-#  define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0)
-# endif
+#if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__)
+#define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__)
+#else
+#define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0)
+#endif
 #endif
 
 /* 7.18.4.2 Macros for greatest-width integer constants. */
-#define  INTMAX_C(v) __int_c(v,  __INTMAX_C_SUFFIX__)
+#define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__)
 #define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__)
 
 
 #endif // SLANG_LLVM_H
-
-
diff --git a/external/slang/include/slang-tag-version.h b/external/slang/include/slang-tag-version.h
new file mode 100644
index 00000000..b8186e17
--- /dev/null
+++ b/external/slang/include/slang-tag-version.h
@@ -0,0 +1 @@
+#define SLANG_TAG_VERSION "2025.6.3"
diff --git a/external/slang/prelude/slang-torch-prelude.h b/external/slang/include/slang-torch-prelude.h
similarity index 59%
rename from external/slang/prelude/slang-torch-prelude.h
rename to external/slang/include/slang-torch-prelude.h
index bdba620f..8ece877b 100644
--- a/external/slang/prelude/slang-torch-prelude.h
+++ b/external/slang/include/slang-torch-prelude.h
@@ -1,64 +1,70 @@
 // Prelude for PyTorch cpp binding.
 
+// clang-format off
 #include <torch/extension.h>
+// clang-format on
+
 #include <ATen/cuda/CUDAContext.h>
 #include <ATen/cuda/CUDAUtils.h>
-#include <vector>
 #include <stdexcept>
 #include <string>
+#include <vector>
 
 #ifdef SLANG_LLVM
 #include "slang-llvm.h"
 #else // SLANG_LLVM
-#   if SLANG_GCC_FAMILY && __GNUC__ < 6
-#       include <cmath>
-#       define SLANG_PRELUDE_STD std::
-#   else
-#       include <math.h>
-#       define SLANG_PRELUDE_STD
-#   endif
-
-#   include <assert.h>
-#   include <stdlib.h>
-#   include <string.h>
-#   include <stdint.h>
+#if SLANG_GCC_FAMILY && __GNUC__ < 6
+#include <cmath>
+#define SLANG_PRELUDE_STD std::
+#else
+#include <math.h>
+#define SLANG_PRELUDE_STD
+#endif
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
 #endif // SLANG_LLVM
 
 #include "../source/core/slang-string.h"
 
 #if defined(_MSC_VER)
-#   define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
+#define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
 #else
-#   define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
-//#   define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport)) __attribute__((__visibility__("default")))
-#endif    
-
-#ifdef __cplusplus    
-#   define SLANG_PRELUDE_EXTERN_C extern "C"
-#   define SLANG_PRELUDE_EXTERN_C_START extern "C" {
-#   define SLANG_PRELUDE_EXTERN_C_END }
+#define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
+// #   define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport))
+// __attribute__((__visibility__("default")))
+#endif
+
+#ifdef __cplusplus
+#define SLANG_PRELUDE_EXTERN_C extern "C"
+#define SLANG_PRELUDE_EXTERN_C_START \
+    extern "C"                       \
+    {
+#define SLANG_PRELUDE_EXTERN_C_END }
 #else
-#   define SLANG_PRELUDE_EXTERN_C 
-#   define SLANG_PRELUDE_EXTERN_C_START
-#   define SLANG_PRELUDE_EXTERN_C_END 
-#endif    
+#define SLANG_PRELUDE_EXTERN_C
+#define SLANG_PRELUDE_EXTERN_C_START
+#define SLANG_PRELUDE_EXTERN_C_END
+#endif
 
 #define SLANG_PRELUDE_NAMESPACE
 
 #ifndef SLANG_NO_THROW
-#   define SLANG_NO_THROW
+#define SLANG_NO_THROW
 #endif
 #ifndef SLANG_STDCALL
-#   define SLANG_STDCALL
+#define SLANG_STDCALL
 #endif
 #ifndef SLANG_MCALL
-#   define SLANG_MCALL SLANG_STDCALL
+#define SLANG_MCALL SLANG_STDCALL
 #endif
 #ifndef SLANG_FORCE_INLINE
-#    define SLANG_FORCE_INLINE inline
+#define SLANG_FORCE_INLINE inline
 #endif
-#include "slang-cpp-types-core.h"
 #include "slang-cpp-scalar-intrinsics.h"
+#include "slang-cpp-types-core.h"
 
 
 static const int kSlangTorchTensorMaxDim = 5;
@@ -72,20 +78,26 @@ struct TensorView
 };
 
 
-TensorView make_tensor_view(torch::Tensor val, const char* name, torch::ScalarType targetScalarType, bool requireContiguous)
+TensorView make_tensor_view(
+    torch::Tensor val,
+    const char* name,
+    torch::ScalarType targetScalarType,
+    bool requireContiguous)
 {
     // We're currently not trying to implicitly cast or transfer to device for two reasons:
     // 1. There appears to be a bug with .to() where successive calls after the first one fail.
-    // 2. Silent casts like this can cause large memory allocations & unexpected overheads. 
+    // 2. Silent casts like this can cause large memory allocations & unexpected overheads.
     //    It's better to be explicit.
 
     // Expect tensors to be on CUDA device
     if (!val.device().is_cuda())
-        throw std::runtime_error(std::string(name).append(": tensor is not on CUDA device.").c_str());
+        throw std::runtime_error(
+            std::string(name).append(": tensor is not on CUDA device.").c_str());
 
     // Expect tensors to be the right type.
     if (val.dtype() != targetScalarType)
-        throw std::runtime_error(std::string(name).append(": tensor is not of the expected type.").c_str());
+        throw std::runtime_error(
+            std::string(name).append(": tensor is not of the expected type.").c_str());
 
     // Check that the tensor is contiguous
     if (requireContiguous && !val.is_contiguous())
@@ -107,6 +119,10 @@ TensorView make_tensor_view(torch::Tensor val, const char* name, torch::ScalarTy
         elementSize = 2;
         res.data = (uint8_t*)val.data_ptr<torch::BFloat16>();
         break;
+    case torch::kFloat16:
+        elementSize = 2;
+        res.data = (uint8_t*)val.data_ptr<at::Half>();
+        break;
     case torch::kInt16:
         elementSize = 2;
         res.data = (uint8_t*)val.data_ptr<int16_t>();
@@ -134,12 +150,23 @@ TensorView make_tensor_view(torch::Tensor val, const char* name, torch::ScalarTy
     }
 
     if (val.dim() > kSlangTorchTensorMaxDim)
-        throw std::runtime_error(std::string(name).append(": number of dimensions exceeds limit (").append(std::to_string(kSlangTorchTensorMaxDim)).append(")").c_str());
+        throw std::runtime_error(std::string(name)
+                                     .append(": number of dimensions exceeds limit (")
+                                     .append(std::to_string(kSlangTorchTensorMaxDim))
+                                     .append(")")
+                                     .c_str());
 
     bool isEmpty = true;
     for (int i = 0; i < val.dim(); ++i)
     {
         res.strides[i] = val.stride(i) * elementSize;
+        if (res.strides[i] == 0)
+            throw std::runtime_error(
+                std::string(name)
+                    .append(": tensors with broadcasted dimensions are not supported (use "
+                            "tensor.contiguous() to make tensor whole)")
+                    .c_str());
+
         res.sizes[i] = val.size(i);
         if (res.sizes[i] > 0)
             isEmpty = false;
diff --git a/external/slang/include/slang.h b/external/slang/include/slang.h
new file mode 100644
index 00000000..27fa9598
--- /dev/null
+++ b/external/slang/include/slang.h
@@ -0,0 +1,4599 @@
+#ifndef SLANG_H
+#define SLANG_H
+
+#ifdef SLANG_USER_CONFIG
+    #include SLANG_USER_CONFIG
+#endif
+
+/** \file slang.h
+
+The Slang API provides services to compile, reflect, and specialize code
+written in the Slang shading language.
+*/
+
+/*
+The following section attempts to detect the compiler and version in use.
+
+If an application defines `SLANG_COMPILER` before including this header,
+they take responsibility for setting any compiler-dependent macros
+used later in the file.
+
+Most applications should not need to touch this section.
+*/
+#ifndef SLANG_COMPILER
+    #define SLANG_COMPILER
+
+    /*
+    Compiler defines, see http://sourceforge.net/p/predef/wiki/Compilers/
+    NOTE that SLANG_VC holds the compiler version - not just 1 or 0
+    */
+    #if defined(_MSC_VER)
+        #if _MSC_VER >= 1900
+            #define SLANG_VC 14
+        #elif _MSC_VER >= 1800
+            #define SLANG_VC 12
+        #elif _MSC_VER >= 1700
+            #define SLANG_VC 11
+        #elif _MSC_VER >= 1600
+            #define SLANG_VC 10
+        #elif _MSC_VER >= 1500
+            #define SLANG_VC 9
+        #else
+            #error "unknown version of Visual C++ compiler"
+        #endif
+    #elif defined(__clang__)
+        #define SLANG_CLANG 1
+    #elif defined(__SNC__)
+        #define SLANG_SNC 1
+    #elif defined(__ghs__)
+        #define SLANG_GHS 1
+    #elif defined(__GNUC__) /* note: __clang__, __SNC__, or __ghs__ imply __GNUC__ */
+        #define SLANG_GCC 1
+    #else
+        #error "unknown compiler"
+    #endif
+    /*
+    Any compilers not detected by the above logic are now now explicitly zeroed out.
+    */
+    #ifndef SLANG_VC
+        #define SLANG_VC 0
+    #endif
+    #ifndef SLANG_CLANG
+        #define SLANG_CLANG 0
+    #endif
+    #ifndef SLANG_SNC
+        #define SLANG_SNC 0
+    #endif
+    #ifndef SLANG_GHS
+        #define SLANG_GHS 0
+    #endif
+    #ifndef SLANG_GCC
+        #define SLANG_GCC 0
+    #endif
+#endif /* SLANG_COMPILER */
+
+/*
+The following section attempts to detect the target platform being compiled for.
+
+If an application defines `SLANG_PLATFORM` before including this header,
+they take responsibility for setting any compiler-dependent macros
+used later in the file.
+
+Most applications should not need to touch this section.
+*/
+#ifndef SLANG_PLATFORM
+    #define SLANG_PLATFORM
+    /**
+    Operating system defines, see http://sourceforge.net/p/predef/wiki/OperatingSystems/
+    */
+    #if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_PARTITION_APP
+        #define SLANG_WINRT 1 /* Windows Runtime, either on Windows RT or Windows 8 */
+    #elif defined(XBOXONE)
+        #define SLANG_XBOXONE 1
+    #elif defined(_WIN64) /* note: XBOXONE implies _WIN64 */
+        #define SLANG_WIN64 1
+    #elif defined(_M_PPC)
+        #define SLANG_X360 1
+    #elif defined(_WIN32) /* note: _M_PPC implies _WIN32 */
+        #define SLANG_WIN32 1
+    #elif defined(__ANDROID__)
+        #define SLANG_ANDROID 1
+    #elif defined(__linux__) || defined(__CYGWIN__) /* note: __ANDROID__ implies __linux__ */
+        #define SLANG_LINUX 1
+    #elif defined(__APPLE__)
+        #include "TargetConditionals.h"
+        #if TARGET_OS_MAC
+            #define SLANG_OSX 1
+        #else
+            #define SLANG_IOS 1
+        #endif
+    #elif defined(__CELLOS_LV2__)
+        #define SLANG_PS3 1
+    #elif defined(__ORBIS__)
+        #define SLANG_PS4 1
+    #elif defined(__SNC__) && defined(__arm__)
+        #define SLANG_PSP2 1
+    #elif defined(__ghs__)
+        #define SLANG_WIIU 1
+    #elif defined(__EMSCRIPTEN__)
+        #define SLANG_WASM 1
+    #else
+        #error "unknown target platform"
+    #endif
+    /*
+    Any platforms not detected by the above logic are now now explicitly zeroed out.
+    */
+    #ifndef SLANG_WINRT
+        #define SLANG_WINRT 0
+    #endif
+    #ifndef SLANG_XBOXONE
+        #define SLANG_XBOXONE 0
+    #endif
+    #ifndef SLANG_WIN64
+        #define SLANG_WIN64 0
+    #endif
+    #ifndef SLANG_X360
+        #define SLANG_X360 0
+    #endif
+    #ifndef SLANG_WIN32
+        #define SLANG_WIN32 0
+    #endif
+    #ifndef SLANG_ANDROID
+        #define SLANG_ANDROID 0
+    #endif
+    #ifndef SLANG_LINUX
+        #define SLANG_LINUX 0
+    #endif
+    #ifndef SLANG_IOS
+        #define SLANG_IOS 0
+    #endif
+    #ifndef SLANG_OSX
+        #define SLANG_OSX 0
+    #endif
+    #ifndef SLANG_PS3
+        #define SLANG_PS3 0
+    #endif
+    #ifndef SLANG_PS4
+        #define SLANG_PS4 0
+    #endif
+    #ifndef SLANG_PSP2
+        #define SLANG_PSP2 0
+    #endif
+    #ifndef SLANG_WIIU
+        #define SLANG_WIIU 0
+    #endif
+#endif /* SLANG_PLATFORM */
+
+/* Shorthands for "families" of compilers/platforms */
+#define SLANG_GCC_FAMILY (SLANG_CLANG || SLANG_SNC || SLANG_GHS || SLANG_GCC)
+#define SLANG_WINDOWS_FAMILY (SLANG_WINRT || SLANG_WIN32 || SLANG_WIN64)
+#define SLANG_MICROSOFT_FAMILY (SLANG_XBOXONE || SLANG_X360 || SLANG_WINDOWS_FAMILY)
+#define SLANG_LINUX_FAMILY (SLANG_LINUX || SLANG_ANDROID)
+#define SLANG_APPLE_FAMILY (SLANG_IOS || SLANG_OSX) /* equivalent to #if __APPLE__ */
+#define SLANG_UNIX_FAMILY \
+    (SLANG_LINUX_FAMILY || SLANG_APPLE_FAMILY) /* shortcut for unix/posix platforms */
+
+/* Macros concerning DirectX */
+#if !defined(SLANG_CONFIG_DX_ON_VK) || !SLANG_CONFIG_DX_ON_VK
+    #define SLANG_ENABLE_DXVK 0
+    #define SLANG_ENABLE_VKD3D 0
+#else
+    #define SLANG_ENABLE_DXVK 1
+    #define SLANG_ENABLE_VKD3D 1
+#endif
+
+#if SLANG_WINDOWS_FAMILY
+    #define SLANG_ENABLE_DIRECTX 1
+    #define SLANG_ENABLE_DXGI_DEBUG 1
+    #define SLANG_ENABLE_DXBC_SUPPORT 1
+    #define SLANG_ENABLE_PIX 1
+#elif SLANG_LINUX_FAMILY
+    #define SLANG_ENABLE_DIRECTX (SLANG_ENABLE_DXVK || SLANG_ENABLE_VKD3D)
+    #define SLANG_ENABLE_DXGI_DEBUG 0
+    #define SLANG_ENABLE_DXBC_SUPPORT 0
+    #define SLANG_ENABLE_PIX 0
+#else
+    #define SLANG_ENABLE_DIRECTX 0
+    #define SLANG_ENABLE_DXGI_DEBUG 0
+    #define SLANG_ENABLE_DXBC_SUPPORT 0
+    #define SLANG_ENABLE_PIX 0
+#endif
+
+/* Macro for declaring if a method is no throw. Should be set before the return parameter. */
+#ifndef SLANG_NO_THROW
+    #if SLANG_WINDOWS_FAMILY && !defined(SLANG_DISABLE_EXCEPTIONS)
+        #define SLANG_NO_THROW __declspec(nothrow)
+    #endif
+#endif
+#ifndef SLANG_NO_THROW
+    #define SLANG_NO_THROW
+#endif
+
+/* The `SLANG_STDCALL` and `SLANG_MCALL` defines are used to set the calling
+convention for interface methods.
+*/
+#ifndef SLANG_STDCALL
+    #if SLANG_MICROSOFT_FAMILY
+        #define SLANG_STDCALL __stdcall
+    #else
+        #define SLANG_STDCALL
+    #endif
+#endif
+#ifndef SLANG_MCALL
+    #define SLANG_MCALL SLANG_STDCALL
+#endif
+
+
+#if !defined(SLANG_STATIC) && !defined(SLANG_DYNAMIC)
+    #define SLANG_DYNAMIC
+#endif
+
+#if defined(_MSC_VER)
+    #define SLANG_DLL_EXPORT __declspec(dllexport)
+#else
+    #if SLANG_WINDOWS_FAMILY
+        #define SLANG_DLL_EXPORT \
+            __attribute__((dllexport)) __attribute__((__visibility__("default")))
+    #else
+        #define SLANG_DLL_EXPORT __attribute__((__visibility__("default")))
+    #endif
+#endif
+
+#if defined(SLANG_DYNAMIC)
+    #if defined(_MSC_VER)
+        #ifdef SLANG_DYNAMIC_EXPORT
+            #define SLANG_API SLANG_DLL_EXPORT
+        #else
+            #define SLANG_API __declspec(dllimport)
+        #endif
+    #else
+        // TODO: need to consider compiler capabilities
+        // #     ifdef SLANG_DYNAMIC_EXPORT
+        #define SLANG_API SLANG_DLL_EXPORT
+    // #     endif
+    #endif
+#endif
+
+#ifndef SLANG_API
+    #define SLANG_API
+#endif
+
+// GCC Specific
+#if SLANG_GCC_FAMILY
+    #define SLANG_NO_INLINE __attribute__((noinline))
+    #define SLANG_FORCE_INLINE inline __attribute__((always_inline))
+    #define SLANG_BREAKPOINT(id) __builtin_trap();
+    #define SLANG_ALIGN_OF(T) __alignof__(T)
+#endif // SLANG_GCC_FAMILY
+
+#if SLANG_GCC_FAMILY || defined(__clang__)
+    // Use the builtin directly so we don't need to have an include of stddef.h
+    #define SLANG_OFFSET_OF(T, ELEMENT) __builtin_offsetof(T, ELEMENT)
+#endif
+
+#ifndef SLANG_OFFSET_OF
+    #define SLANG_OFFSET_OF(T, ELEMENT) (size_t(&((T*)1)->ELEMENT) - 1)
+#endif
+
+// Microsoft VC specific
+#if SLANG_VC
+    #define SLANG_NO_INLINE __declspec(noinline)
+    #define SLANG_FORCE_INLINE __forceinline
+    #define SLANG_BREAKPOINT(id) __debugbreak();
+    #define SLANG_ALIGN_OF(T) __alignof(T)
+
+    #define SLANG_INT64(x) (x##i64)
+    #define SLANG_UINT64(x) (x##ui64)
+#endif // SLANG_MICROSOFT_FAMILY
+
+#ifndef SLANG_FORCE_INLINE
+    #define SLANG_FORCE_INLINE inline
+#endif
+#ifndef SLANG_NO_INLINE
+    #define SLANG_NO_INLINE
+#endif
+
+#ifndef SLANG_COMPILE_TIME_ASSERT
+    #define SLANG_COMPILE_TIME_ASSERT(x) static_assert(x)
+#endif
+
+#ifndef SLANG_BREAKPOINT
+    // Make it crash with a write to 0!
+    #define SLANG_BREAKPOINT(id) (*((int*)0) = int(id));
+#endif
+
+// Use for getting the amount of members of a standard C array.
+// Use 0[x] here to catch the case where x has an overloaded subscript operator
+#define SLANG_COUNT_OF(x) (SlangSSizeT(sizeof(x) / sizeof(0 [x])))
+/// SLANG_INLINE exists to have a way to inline consistent with SLANG_ALWAYS_INLINE
+#define SLANG_INLINE inline
+
+// If explicitly disabled and not set, set to not available
+#if !defined(SLANG_HAS_EXCEPTIONS) && defined(SLANG_DISABLE_EXCEPTIONS)
+    #define SLANG_HAS_EXCEPTIONS 0
+#endif
+
+// If not set, the default is exceptions are available
+#ifndef SLANG_HAS_EXCEPTIONS
+    #define SLANG_HAS_EXCEPTIONS 1
+#endif
+
+// Other defines
+#define SLANG_STRINGIZE_HELPER(X) #X
+#define SLANG_STRINGIZE(X) SLANG_STRINGIZE_HELPER(X)
+
+#define SLANG_CONCAT_HELPER(X, Y) X##Y
+#define SLANG_CONCAT(X, Y) SLANG_CONCAT_HELPER(X, Y)
+
+#ifndef SLANG_UNUSED
+    #define SLANG_UNUSED(v) (void)v;
+#endif
+
+#if defined(__llvm__)
+    #define SLANG_MAYBE_UNUSED [[maybe_unused]]
+#else
+    #define SLANG_MAYBE_UNUSED
+#endif
+
+// Used for doing constant literals
+#ifndef SLANG_INT64
+    #define SLANG_INT64(x) (x##ll)
+#endif
+#ifndef SLANG_UINT64
+    #define SLANG_UINT64(x) (x##ull)
+#endif
+
+
+#ifdef __cplusplus
+    #define SLANG_EXTERN_C extern "C"
+#else
+    #define SLANG_EXTERN_C
+#endif
+
+#ifdef __cplusplus
+    // C++ specific macros
+    // Clang
+    #if SLANG_CLANG
+        #if (__clang_major__ * 10 + __clang_minor__) >= 33
+            #define SLANG_HAS_MOVE_SEMANTICS 1
+            #define SLANG_HAS_ENUM_CLASS 1
+            #define SLANG_OVERRIDE override
+        #endif
+
+    // Gcc
+    #elif SLANG_GCC_FAMILY
+        // Check for C++11
+        #if (__cplusplus >= 201103L)
+            #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405
+                #define SLANG_HAS_MOVE_SEMANTICS 1
+            #endif
+            #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406
+                #define SLANG_HAS_ENUM_CLASS 1
+            #endif
+            #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407
+                #define SLANG_OVERRIDE override
+            #endif
+        #endif
+    #endif // SLANG_GCC_FAMILY
+
+    // Visual Studio
+
+    #if SLANG_VC
+        // C4481: nonstandard extension used: override specifier 'override'
+        #if _MSC_VER < 1700
+            #pragma warning(disable : 4481)
+        #endif
+        #define SLANG_OVERRIDE override
+        #if _MSC_VER >= 1600
+            #define SLANG_HAS_MOVE_SEMANTICS 1
+        #endif
+        #if _MSC_VER >= 1700
+            #define SLANG_HAS_ENUM_CLASS 1
+        #endif
+    #endif // SLANG_VC
+
+    // Set non set
+    #ifndef SLANG_OVERRIDE
+        #define SLANG_OVERRIDE
+    #endif
+    #ifndef SLANG_HAS_ENUM_CLASS
+        #define SLANG_HAS_ENUM_CLASS 0
+    #endif
+    #ifndef SLANG_HAS_MOVE_SEMANTICS
+        #define SLANG_HAS_MOVE_SEMANTICS 0
+    #endif
+
+#endif // __cplusplus
+
+/* Macros for detecting processor */
+#if defined(_M_ARM) || defined(__ARM_EABI__)
+    // This is special case for nVidia tegra
+    #define SLANG_PROCESSOR_ARM 1
+#elif defined(__i386__) || defined(_M_IX86)
+    #define SLANG_PROCESSOR_X86 1
+#elif defined(_M_AMD64) || defined(_M_X64) || defined(__amd64) || defined(__x86_64)
+    #define SLANG_PROCESSOR_X86_64 1
+#elif defined(_PPC_) || defined(__ppc__) || defined(__POWERPC__) || defined(_M_PPC)
+    #if defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) || \
+        defined(__64BIT__) || defined(_LP64) || defined(__LP64__)
+        #define SLANG_PROCESSOR_POWER_PC_64 1
+    #else
+        #define SLANG_PROCESSOR_POWER_PC 1
+    #endif
+#elif defined(__arm__)
+    #define SLANG_PROCESSOR_ARM 1
+#elif defined(_M_ARM64) || defined(__aarch64__)
+    #define SLANG_PROCESSOR_ARM_64 1
+#elif defined(__EMSCRIPTEN__)
+    #define SLANG_PROCESSOR_WASM 1
+#endif
+
+#ifndef SLANG_PROCESSOR_ARM
+    #define SLANG_PROCESSOR_ARM 0
+#endif
+
+#ifndef SLANG_PROCESSOR_ARM_64
+    #define SLANG_PROCESSOR_ARM_64 0
+#endif
+
+#ifndef SLANG_PROCESSOR_X86
+    #define SLANG_PROCESSOR_X86 0
+#endif
+
+#ifndef SLANG_PROCESSOR_X86_64
+    #define SLANG_PROCESSOR_X86_64 0
+#endif
+
+#ifndef SLANG_PROCESSOR_POWER_PC
+    #define SLANG_PROCESSOR_POWER_PC 0
+#endif
+
+#ifndef SLANG_PROCESSOR_POWER_PC_64
+    #define SLANG_PROCESSOR_POWER_PC_64 0
+#endif
+
+// Processor families
+
+#define SLANG_PROCESSOR_FAMILY_X86 (SLANG_PROCESSOR_X86_64 | SLANG_PROCESSOR_X86)
+#define SLANG_PROCESSOR_FAMILY_ARM (SLANG_PROCESSOR_ARM | SLANG_PROCESSOR_ARM_64)
+#define SLANG_PROCESSOR_FAMILY_POWER_PC (SLANG_PROCESSOR_POWER_PC_64 | SLANG_PROCESSOR_POWER_PC)
+
+// Pointer size
+#define SLANG_PTR_IS_64 \
+    (SLANG_PROCESSOR_ARM_64 | SLANG_PROCESSOR_X86_64 | SLANG_PROCESSOR_POWER_PC_64)
+#define SLANG_PTR_IS_32 (SLANG_PTR_IS_64 ^ 1)
+
+// Processor features
+#if SLANG_PROCESSOR_FAMILY_X86
+    #define SLANG_LITTLE_ENDIAN 1
+    #define SLANG_UNALIGNED_ACCESS 1
+#elif SLANG_PROCESSOR_FAMILY_ARM
+    #if defined(__ARMEB__)
+        #define SLANG_BIG_ENDIAN 1
+    #else
+        #define SLANG_LITTLE_ENDIAN 1
+    #endif
+#elif SLANG_PROCESSOR_FAMILY_POWER_PC
+    #define SLANG_BIG_ENDIAN 1
+#elif SLANG_WASM
+    #define SLANG_LITTLE_ENDIAN 1
+#endif
+
+#ifndef SLANG_LITTLE_ENDIAN
+    #define SLANG_LITTLE_ENDIAN 0
+#endif
+
+#ifndef SLANG_BIG_ENDIAN
+    #define SLANG_BIG_ENDIAN 0
+#endif
+
+#ifndef SLANG_UNALIGNED_ACCESS
+    #define SLANG_UNALIGNED_ACCESS 0
+#endif
+
+// One endianness must be set
+#if ((SLANG_BIG_ENDIAN | SLANG_LITTLE_ENDIAN) == 0)
+    #error "Couldn't determine endianness"
+#endif
+
+#ifndef SLANG_NO_INTTYPES
+    #include <inttypes.h>
+#endif // ! SLANG_NO_INTTYPES
+
+#ifndef SLANG_NO_STDDEF
+    #include <stddef.h>
+#endif // ! SLANG_NO_STDDEF
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+    /*!
+    @mainpage Introduction
+
+    API Reference: slang.h
+
+    @file slang.h
+    */
+
+    typedef uint32_t SlangUInt32;
+    typedef int32_t SlangInt32;
+
+    // Use SLANG_PTR_ macros to determine SlangInt/SlangUInt types.
+    // This is used over say using size_t/ptrdiff_t/intptr_t/uintptr_t, because on some targets,
+    // these types are distinct from their uint_t/int_t equivalents and so produce ambiguity with
+    // function overloading.
+    //
+    // SlangSizeT is helpful as on some compilers size_t is distinct from a regular integer type and
+    // so overloading doesn't work. Casting to SlangSizeT works around this.
+#if SLANG_PTR_IS_64
+    typedef int64_t SlangInt;
+    typedef uint64_t SlangUInt;
+
+    typedef int64_t SlangSSizeT;
+    typedef uint64_t SlangSizeT;
+#else
+typedef int32_t SlangInt;
+typedef uint32_t SlangUInt;
+
+typedef int32_t SlangSSizeT;
+typedef uint32_t SlangSizeT;
+#endif
+
+    typedef bool SlangBool;
+
+
+    /*!
+    @brief Severity of a diagnostic generated by the compiler.
+    Values come from the enum below, with higher values representing more severe
+    conditions, and all values >= SLANG_SEVERITY_ERROR indicating compilation
+    failure.
+    */
+    typedef int SlangSeverityIntegral;
+    enum SlangSeverity : SlangSeverityIntegral
+    {
+        SLANG_SEVERITY_DISABLED = 0, /**< A message that is disabled, filtered out. */
+        SLANG_SEVERITY_NOTE,         /**< An informative message. */
+        SLANG_SEVERITY_WARNING,      /**< A warning, which indicates a possible problem. */
+        SLANG_SEVERITY_ERROR,        /**< An error, indicating that compilation failed. */
+        SLANG_SEVERITY_FATAL,    /**< An unrecoverable error, which forced compilation to abort. */
+        SLANG_SEVERITY_INTERNAL, /**< An internal error, indicating a logic error in the compiler.
+                                  */
+    };
+
+    typedef int SlangDiagnosticFlags;
+    enum
+    {
+        SLANG_DIAGNOSTIC_FLAG_VERBOSE_PATHS = 0x01,
+        SLANG_DIAGNOSTIC_FLAG_TREAT_WARNINGS_AS_ERRORS = 0x02
+    };
+
+    typedef int SlangBindableResourceIntegral;
+    enum SlangBindableResourceType : SlangBindableResourceIntegral
+    {
+        SLANG_NON_BINDABLE = 0,
+        SLANG_TEXTURE,
+        SLANG_SAMPLER,
+        SLANG_UNIFORM_BUFFER,
+        SLANG_STORAGE_BUFFER,
+    };
+
+    /* NOTE! To keep binary compatibility care is needed with this enum!
+
+    * To add value, only add at the bottom (before COUNT_OF)
+    * To remove a value, add _DEPRECATED as a suffix, but leave in the list
+
+    This will make the enum values stable, and compatible with libraries that might not use the
+    latest enum values.
+    */
+    typedef int SlangCompileTargetIntegral;
+    enum SlangCompileTarget : SlangCompileTargetIntegral
+    {
+        SLANG_TARGET_UNKNOWN,
+        SLANG_TARGET_NONE,
+        SLANG_GLSL,
+        SLANG_GLSL_VULKAN_DEPRECATED,          //< deprecated and removed: just use `SLANG_GLSL`.
+        SLANG_GLSL_VULKAN_ONE_DESC_DEPRECATED, //< deprecated and removed.
+        SLANG_HLSL,
+        SLANG_SPIRV,
+        SLANG_SPIRV_ASM,
+        SLANG_DXBC,
+        SLANG_DXBC_ASM,
+        SLANG_DXIL,
+        SLANG_DXIL_ASM,
+        SLANG_C_SOURCE,              ///< The C language
+        SLANG_CPP_SOURCE,            ///< C++ code for shader kernels.
+        SLANG_HOST_EXECUTABLE,       ///< Standalone binary executable (for hosting CPU/OS)
+        SLANG_SHADER_SHARED_LIBRARY, ///< A shared library/Dll for shader kernels (for hosting
+                                     ///< CPU/OS)
+        SLANG_SHADER_HOST_CALLABLE,  ///< A CPU target that makes the compiled shader code available
+                                     ///< to be run immediately
+        SLANG_CUDA_SOURCE,           ///< Cuda source
+        SLANG_PTX,                   ///< PTX
+        SLANG_CUDA_OBJECT_CODE,      ///< Object code that contains CUDA functions.
+        SLANG_OBJECT_CODE,           ///< Object code that can be used for later linking
+        SLANG_HOST_CPP_SOURCE,       ///< C++ code for host library or executable.
+        SLANG_HOST_HOST_CALLABLE,    ///< Host callable host code (ie non kernel/shader)
+        SLANG_CPP_PYTORCH_BINDING,   ///< C++ PyTorch binding code.
+        SLANG_METAL,                 ///< Metal shading language
+        SLANG_METAL_LIB,             ///< Metal library
+        SLANG_METAL_LIB_ASM,         ///< Metal library assembly
+        SLANG_HOST_SHARED_LIBRARY,   ///< A shared library/Dll for host code (for hosting CPU/OS)
+        SLANG_WGSL,                  ///< WebGPU shading language
+        SLANG_WGSL_SPIRV_ASM,        ///< SPIR-V assembly via WebGPU shading language
+        SLANG_WGSL_SPIRV,            ///< SPIR-V via WebGPU shading language
+        SLANG_TARGET_COUNT_OF,
+    };
+
+    /* A "container format" describes the way that the outputs
+    for multiple files, entry points, targets, etc. should be
+    combined into a single artifact for output. */
+    typedef int SlangContainerFormatIntegral;
+    enum SlangContainerFormat : SlangContainerFormatIntegral
+    {
+        /* Don't generate a container. */
+        SLANG_CONTAINER_FORMAT_NONE,
+
+        /* Generate a container in the `.slang-module` format,
+        which includes reflection information, compiled kernels, etc. */
+        SLANG_CONTAINER_FORMAT_SLANG_MODULE,
+    };
+
+    typedef int SlangPassThroughIntegral;
+    enum SlangPassThrough : SlangPassThroughIntegral
+    {
+        SLANG_PASS_THROUGH_NONE,
+        SLANG_PASS_THROUGH_FXC,
+        SLANG_PASS_THROUGH_DXC,
+        SLANG_PASS_THROUGH_GLSLANG,
+        SLANG_PASS_THROUGH_SPIRV_DIS,
+        SLANG_PASS_THROUGH_CLANG,         ///< Clang C/C++ compiler
+        SLANG_PASS_THROUGH_VISUAL_STUDIO, ///< Visual studio C/C++ compiler
+        SLANG_PASS_THROUGH_GCC,           ///< GCC C/C++ compiler
+        SLANG_PASS_THROUGH_GENERIC_C_CPP, ///< Generic C or C++ compiler, which is decided by the
+                                          ///< source type
+        SLANG_PASS_THROUGH_NVRTC,         ///< NVRTC Cuda compiler
+        SLANG_PASS_THROUGH_LLVM,          ///< LLVM 'compiler' - includes LLVM and Clang
+        SLANG_PASS_THROUGH_SPIRV_OPT,     ///< SPIRV-opt
+        SLANG_PASS_THROUGH_METAL,         ///< Metal compiler
+        SLANG_PASS_THROUGH_TINT,          ///< Tint WGSL compiler
+        SLANG_PASS_THROUGH_SPIRV_LINK,    ///< SPIRV-link
+        SLANG_PASS_THROUGH_COUNT_OF,
+    };
+
+    /* Defines an archive type used to holds a 'file system' type structure. */
+    typedef int SlangArchiveTypeIntegral;
+    enum SlangArchiveType : SlangArchiveTypeIntegral
+    {
+        SLANG_ARCHIVE_TYPE_UNDEFINED,
+        SLANG_ARCHIVE_TYPE_ZIP,
+        SLANG_ARCHIVE_TYPE_RIFF, ///< Riff container with no compression
+        SLANG_ARCHIVE_TYPE_RIFF_DEFLATE,
+        SLANG_ARCHIVE_TYPE_RIFF_LZ4,
+        SLANG_ARCHIVE_TYPE_COUNT_OF,
+    };
+
+    /*!
+    Flags to control compilation behavior.
+    */
+    typedef unsigned int SlangCompileFlags;
+    enum
+    {
+        /* Do as little mangling of names as possible, to try to preserve original names */
+        SLANG_COMPILE_FLAG_NO_MANGLING = 1 << 3,
+
+        /* Skip code generation step, just check the code and generate layout */
+        SLANG_COMPILE_FLAG_NO_CODEGEN = 1 << 4,
+
+        /* Obfuscate shader names on release products */
+        SLANG_COMPILE_FLAG_OBFUSCATE = 1 << 5,
+
+        /* Deprecated flags: kept around to allow existing applications to
+        compile. Note that the relevant features will still be left in
+        their default state. */
+        SLANG_COMPILE_FLAG_NO_CHECKING = 0,
+        SLANG_COMPILE_FLAG_SPLIT_MIXED_TYPES = 0,
+    };
+
+    /*!
+    @brief Flags to control code generation behavior of a compilation target */
+    typedef unsigned int SlangTargetFlags;
+    enum
+    {
+        /* When compiling for a D3D Shader Model 5.1 or higher target, allocate
+           distinct register spaces for parameter blocks.
+
+           @deprecated This behavior is now enabled unconditionally.
+        */
+        SLANG_TARGET_FLAG_PARAMETER_BLOCKS_USE_REGISTER_SPACES = 1 << 4,
+
+        /* When set, will generate target code that contains all entrypoints defined
+           in the input source or specified via the `spAddEntryPoint` function in a
+           single output module (library/source file).
+        */
+        SLANG_TARGET_FLAG_GENERATE_WHOLE_PROGRAM = 1 << 8,
+
+        /* When set, will dump out the IR between intermediate compilation steps.*/
+        SLANG_TARGET_FLAG_DUMP_IR = 1 << 9,
+
+        /* When set, will generate SPIRV directly rather than via glslang. */
+        // This flag will be deprecated, use CompilerOption instead.
+        SLANG_TARGET_FLAG_GENERATE_SPIRV_DIRECTLY = 1 << 10,
+    };
+    constexpr static SlangTargetFlags kDefaultTargetFlags =
+        SLANG_TARGET_FLAG_GENERATE_SPIRV_DIRECTLY;
+
+    /*!
+    @brief Options to control floating-point precision guarantees for a target.
+    */
+    typedef unsigned int SlangFloatingPointModeIntegral;
+    enum SlangFloatingPointMode : SlangFloatingPointModeIntegral
+    {
+        SLANG_FLOATING_POINT_MODE_DEFAULT = 0,
+        SLANG_FLOATING_POINT_MODE_FAST,
+        SLANG_FLOATING_POINT_MODE_PRECISE,
+    };
+
+    /*!
+    @brief Options to control emission of `#line` directives
+    */
+    typedef unsigned int SlangLineDirectiveModeIntegral;
+    enum SlangLineDirectiveMode : SlangLineDirectiveModeIntegral
+    {
+        SLANG_LINE_DIRECTIVE_MODE_DEFAULT =
+            0,                              /**< Default behavior: pick behavior base on target. */
+        SLANG_LINE_DIRECTIVE_MODE_NONE,     /**< Don't emit line directives at all. */
+        SLANG_LINE_DIRECTIVE_MODE_STANDARD, /**< Emit standard C-style `#line` directives. */
+        SLANG_LINE_DIRECTIVE_MODE_GLSL, /**< Emit GLSL-style directives with file *number* instead
+                                           of name */
+        SLANG_LINE_DIRECTIVE_MODE_SOURCE_MAP, /**< Use a source map to track line mappings (ie no
+                                                 #line will appear in emitting source) */
+    };
+
+    typedef int SlangSourceLanguageIntegral;
+    enum SlangSourceLanguage : SlangSourceLanguageIntegral
+    {
+        SLANG_SOURCE_LANGUAGE_UNKNOWN,
+        SLANG_SOURCE_LANGUAGE_SLANG,
+        SLANG_SOURCE_LANGUAGE_HLSL,
+        SLANG_SOURCE_LANGUAGE_GLSL,
+        SLANG_SOURCE_LANGUAGE_C,
+        SLANG_SOURCE_LANGUAGE_CPP,
+        SLANG_SOURCE_LANGUAGE_CUDA,
+        SLANG_SOURCE_LANGUAGE_SPIRV,
+        SLANG_SOURCE_LANGUAGE_METAL,
+        SLANG_SOURCE_LANGUAGE_WGSL,
+        SLANG_SOURCE_LANGUAGE_COUNT_OF,
+    };
+
+    typedef unsigned int SlangProfileIDIntegral;
+    enum SlangProfileID : SlangProfileIDIntegral
+    {
+        SLANG_PROFILE_UNKNOWN,
+    };
+
+
+    typedef SlangInt32 SlangCapabilityIDIntegral;
+    enum SlangCapabilityID : SlangCapabilityIDIntegral
+    {
+        SLANG_CAPABILITY_UNKNOWN = 0,
+    };
+
+    typedef unsigned int SlangMatrixLayoutModeIntegral;
+    enum SlangMatrixLayoutMode : SlangMatrixLayoutModeIntegral
+    {
+        SLANG_MATRIX_LAYOUT_MODE_UNKNOWN = 0,
+        SLANG_MATRIX_LAYOUT_ROW_MAJOR,
+        SLANG_MATRIX_LAYOUT_COLUMN_MAJOR,
+    };
+
+    typedef SlangUInt32 SlangStageIntegral;
+    enum SlangStage : SlangStageIntegral
+    {
+        SLANG_STAGE_NONE,
+        SLANG_STAGE_VERTEX,
+        SLANG_STAGE_HULL,
+        SLANG_STAGE_DOMAIN,
+        SLANG_STAGE_GEOMETRY,
+        SLANG_STAGE_FRAGMENT,
+        SLANG_STAGE_COMPUTE,
+        SLANG_STAGE_RAY_GENERATION,
+        SLANG_STAGE_INTERSECTION,
+        SLANG_STAGE_ANY_HIT,
+        SLANG_STAGE_CLOSEST_HIT,
+        SLANG_STAGE_MISS,
+        SLANG_STAGE_CALLABLE,
+        SLANG_STAGE_MESH,
+        SLANG_STAGE_AMPLIFICATION,
+        //
+        SLANG_STAGE_COUNT,
+
+        // alias:
+        SLANG_STAGE_PIXEL = SLANG_STAGE_FRAGMENT,
+    };
+
+    typedef SlangUInt32 SlangDebugInfoLevelIntegral;
+    enum SlangDebugInfoLevel : SlangDebugInfoLevelIntegral
+    {
+        SLANG_DEBUG_INFO_LEVEL_NONE = 0, /**< Don't emit debug information at all. */
+        SLANG_DEBUG_INFO_LEVEL_MINIMAL,  /**< Emit as little debug information as possible, while
+                                            still supporting stack trackers. */
+        SLANG_DEBUG_INFO_LEVEL_STANDARD, /**< Emit whatever is the standard level of debug
+                                            information for each target. */
+        SLANG_DEBUG_INFO_LEVEL_MAXIMAL,  /**< Emit as much debug information as possible for each
+                                            target. */
+    };
+
+    /* Describes the debugging information format produced during a compilation. */
+    typedef SlangUInt32 SlangDebugInfoFormatIntegral;
+    enum SlangDebugInfoFormat : SlangDebugInfoFormatIntegral
+    {
+        SLANG_DEBUG_INFO_FORMAT_DEFAULT, ///< Use the default debugging format for the target
+        SLANG_DEBUG_INFO_FORMAT_C7,  ///< CodeView C7 format (typically means debugging information
+                                     ///< is embedded in the binary)
+        SLANG_DEBUG_INFO_FORMAT_PDB, ///< Program database
+
+        SLANG_DEBUG_INFO_FORMAT_STABS, ///< Stabs
+        SLANG_DEBUG_INFO_FORMAT_COFF,  ///< COFF debug info
+        SLANG_DEBUG_INFO_FORMAT_DWARF, ///< DWARF debug info (we may want to support specifying the
+                                       ///< version)
+
+        SLANG_DEBUG_INFO_FORMAT_COUNT_OF,
+    };
+
+    typedef SlangUInt32 SlangOptimizationLevelIntegral;
+    enum SlangOptimizationLevel : SlangOptimizationLevelIntegral
+    {
+        SLANG_OPTIMIZATION_LEVEL_NONE = 0, /**< Don't optimize at all. */
+        SLANG_OPTIMIZATION_LEVEL_DEFAULT,  /**< Default optimization level: balance code quality and
+                                              compilation time. */
+        SLANG_OPTIMIZATION_LEVEL_HIGH,     /**< Optimize aggressively. */
+        SLANG_OPTIMIZATION_LEVEL_MAXIMAL, /**< Include optimizations that may take a very long time,
+                                             or may involve severe space-vs-speed tradeoffs */
+    };
+
+    enum SlangEmitSpirvMethod
+    {
+        SLANG_EMIT_SPIRV_DEFAULT = 0,
+        SLANG_EMIT_SPIRV_VIA_GLSL,
+        SLANG_EMIT_SPIRV_DIRECTLY,
+    };
+
+    // All compiler option names supported by Slang.
+    namespace slang
+    {
+    enum class CompilerOptionName
+    {
+        MacroDefine, // stringValue0: macro name;  stringValue1: macro value
+        DepFile,
+        EntryPointName,
+        Specialize,
+        Help,
+        HelpStyle,
+        Include, // stringValue: additional include path.
+        Language,
+        MatrixLayoutColumn,         // bool
+        MatrixLayoutRow,            // bool
+        ZeroInitialize,             // bool
+        IgnoreCapabilities,         // bool
+        RestrictiveCapabilityCheck, // bool
+        ModuleName,                 // stringValue0: module name.
+        Output,
+        Profile, // intValue0: profile
+        Stage,   // intValue0: stage
+        Target,  // intValue0: CodeGenTarget
+        Version,
+        WarningsAsErrors, // stringValue0: "all" or comma separated list of warning codes or names.
+        DisableWarnings,  // stringValue0: comma separated list of warning codes or names.
+        EnableWarning,    // stringValue0: warning code or name.
+        DisableWarning,   // stringValue0: warning code or name.
+        DumpWarningDiagnostics,
+        InputFilesRemain,
+        EmitIr,                        // bool
+        ReportDownstreamTime,          // bool
+        ReportPerfBenchmark,           // bool
+        ReportCheckpointIntermediates, // bool
+        SkipSPIRVValidation,           // bool
+        SourceEmbedStyle,
+        SourceEmbedName,
+        SourceEmbedLanguage,
+        DisableShortCircuit,            // bool
+        MinimumSlangOptimization,       // bool
+        DisableNonEssentialValidations, // bool
+        DisableSourceMap,               // bool
+        UnscopedEnum,                   // bool
+        PreserveParameters, // bool: preserve all resource parameters in the output code.
+
+        // Target
+
+        Capability,                // intValue0: CapabilityName
+        DefaultImageFormatUnknown, // bool
+        DisableDynamicDispatch,    // bool
+        DisableSpecialization,     // bool
+        FloatingPointMode,         // intValue0: FloatingPointMode
+        DebugInformation,          // intValue0: DebugInfoLevel
+        LineDirectiveMode,
+        Optimization, // intValue0: OptimizationLevel
+        Obfuscate,    // bool
+
+        VulkanBindShift, // intValue0 (higher 8 bits): kind; intValue0(lower bits): set; intValue1:
+                         // shift
+        VulkanBindGlobals,       // intValue0: index; intValue1: set
+        VulkanInvertY,           // bool
+        VulkanUseDxPositionW,    // bool
+        VulkanUseEntryPointName, // bool
+        VulkanUseGLLayout,       // bool
+        VulkanEmitReflection,    // bool
+
+        GLSLForceScalarLayout,   // bool
+        EnableEffectAnnotations, // bool
+
+        EmitSpirvViaGLSL,     // bool (will be deprecated)
+        EmitSpirvDirectly,    // bool (will be deprecated)
+        SPIRVCoreGrammarJSON, // stringValue0: json path
+        IncompleteLibrary,    // bool, when set, will not issue an error when the linked program has
+                              // unresolved extern function symbols.
+
+        // Downstream
+
+        CompilerPath,
+        DefaultDownstreamCompiler,
+        DownstreamArgs, // stringValue0: downstream compiler name. stringValue1: argument list, one
+                        // per line.
+        PassThrough,
+
+        // Repro
+
+        DumpRepro,
+        DumpReproOnError,
+        ExtractRepro,
+        LoadRepro,
+        LoadReproDirectory,
+        ReproFallbackDirectory,
+
+        // Debugging
+
+        DumpAst,
+        DumpIntermediatePrefix,
+        DumpIntermediates, // bool
+        DumpIr,            // bool
+        DumpIrIds,
+        PreprocessorOutput,
+        OutputIncludes,
+        ReproFileSystem,
+        SerialIr,    // bool
+        SkipCodeGen, // bool
+        ValidateIr,  // bool
+        VerbosePaths,
+        VerifyDebugSerialIr,
+        NoCodeGen, // Not used.
+
+        // Experimental
+
+        FileSystem,
+        Heterogeneous,
+        NoMangle,
+        NoHLSLBinding,
+        NoHLSLPackConstantBufferElements,
+        ValidateUniformity,
+        AllowGLSL,
+        EnableExperimentalPasses,
+        BindlessSpaceIndex, // int
+
+        // Internal
+
+        ArchiveType,
+        CompileCoreModule,
+        Doc,
+        IrCompression,
+        LoadCoreModule,
+        ReferenceModule,
+        SaveCoreModule,
+        SaveCoreModuleBinSource,
+        TrackLiveness,
+        LoopInversion, // bool, enable loop inversion optimization
+
+        // Deprecated
+        ParameterBlocksUseRegisterSpaces,
+
+        CountOfParsableOptions,
+
+        // Used in parsed options only.
+        DebugInformationFormat,  // intValue0: DebugInfoFormat
+        VulkanBindShiftAll,      // intValue0: kind; intValue1: shift
+        GenerateWholeProgram,    // bool
+        UseUpToDateBinaryModule, // bool, when set, will only load
+                                 // precompiled modules if it is up-to-date with its source.
+        EmbedDownstreamIR,       // bool
+        ForceDXLayout,           // bool
+
+        // Add this new option to the end of the list to avoid breaking ABI as much as possible.
+        // Setting of EmitSpirvDirectly or EmitSpirvViaGLSL will turn into this option internally.
+        EmitSpirvMethod, // enum SlangEmitSpirvMethod
+
+        EmitReflectionJSON, // bool
+        SaveGLSLModuleBinSource,
+
+        SkipDownstreamLinking, // bool, experimental
+        DumpModule,
+        CountOf,
+    };
+
+    enum class CompilerOptionValueKind
+    {
+        Int,
+        String
+    };
+
+    struct CompilerOptionValue
+    {
+        CompilerOptionValueKind kind = CompilerOptionValueKind::Int;
+        int32_t intValue0 = 0;
+        int32_t intValue1 = 0;
+        const char* stringValue0 = nullptr;
+        const char* stringValue1 = nullptr;
+    };
+
+    struct CompilerOptionEntry
+    {
+        CompilerOptionName name;
+        CompilerOptionValue value;
+    };
+    } // namespace slang
+
+    /** A result code for a Slang API operation.
+
+    This type is generally compatible with the Windows API `HRESULT` type. In particular, negative
+    values indicate failure results, while zero or positive results indicate success.
+
+    In general, Slang APIs always return a zero result on success, unless documented otherwise.
+    Strictly speaking a negative value indicates an error, a positive (or 0) value indicates
+    success. This can be tested for with the macros SLANG_SUCCEEDED(x) or SLANG_FAILED(x).
+
+    It can represent if the call was successful or not. It can also specify in an extensible manner
+    what facility produced the result (as the integral 'facility') as well as what caused it (as an
+    integral 'code'). Under the covers SlangResult is represented as a int32_t.
+
+    SlangResult is designed to be compatible with COM HRESULT.
+
+    It's layout in bits is as follows
+
+    Severity | Facility | Code
+    ---------|----------|-----
+    31       |    30-16 | 15-0
+
+    Severity - 1 fail, 0 is success - as SlangResult is signed 32 bits, means negative number
+    indicates failure. Facility is where the error originated from. Code is the code specific to the
+    facility.
+
+    Result codes have the following styles,
+    1) SLANG_name
+    2) SLANG_s_f_name
+    3) SLANG_s_name
+
+    where s is S for success, E for error
+    f is the short version of the facility name
+
+    Style 1 is reserved for SLANG_OK and SLANG_FAIL as they are so commonly used.
+
+    It is acceptable to expand 'f' to a longer name to differentiate a name or drop if unique
+    without it. ie for a facility 'DRIVER' it might make sense to have an error of the form
+    SLANG_E_DRIVER_OUT_OF_MEMORY
+    */
+
+    typedef int32_t SlangResult;
+
+    //! Use to test if a result was failure. Never use result != SLANG_OK to test for failure, as
+    //! there may be successful codes != SLANG_OK.
+#define SLANG_FAILED(status) ((status) < 0)
+    //! Use to test if a result succeeded. Never use result == SLANG_OK to test for success, as will
+    //! detect other successful codes as a failure.
+#define SLANG_SUCCEEDED(status) ((status) >= 0)
+
+    //! Get the facility the result is associated with
+#define SLANG_GET_RESULT_FACILITY(r) ((int32_t)(((r) >> 16) & 0x7fff))
+    //! Get the result code for the facility
+#define SLANG_GET_RESULT_CODE(r) ((int32_t)((r) & 0xffff))
+
+#define SLANG_MAKE_ERROR(fac, code) \
+    ((((int32_t)(fac)) << 16) | ((int32_t)(code)) | int32_t(0x80000000))
+#define SLANG_MAKE_SUCCESS(fac, code) ((((int32_t)(fac)) << 16) | ((int32_t)(code)))
+
+    /*************************** Facilities ************************************/
+
+    //! Facilities compatible with windows COM - only use if known code is compatible
+#define SLANG_FACILITY_WIN_GENERAL 0
+#define SLANG_FACILITY_WIN_INTERFACE 4
+#define SLANG_FACILITY_WIN_API 7
+
+    //! Base facility -> so as to not clash with HRESULT values (values in 0x200 range do not appear
+    //! used)
+#define SLANG_FACILITY_BASE 0x200
+
+    /*! Facilities numbers must be unique across a project to make the resulting result a unique
+    number. It can be useful to have a consistent short name for a facility, as used in the name
+    prefix */
+#define SLANG_FACILITY_CORE SLANG_FACILITY_BASE
+    /* Facility for codes, that are not uniquely defined/protected. Can be used to pass back a
+    specific error without requiring system wide facility uniqueness. Codes should never be part of
+    a public API. */
+#define SLANG_FACILITY_INTERNAL SLANG_FACILITY_BASE + 1
+
+    /// Base for external facilities. Facilities should be unique across modules.
+#define SLANG_FACILITY_EXTERNAL_BASE 0x210
+
+    /* ************************ Win COM compatible Results ******************************/
+    // https://msdn.microsoft.com/en-us/library/windows/desktop/aa378137(v=vs.85).aspx
+
+    //! SLANG_OK indicates success, and is equivalent to
+    //! SLANG_MAKE_SUCCESS(SLANG_FACILITY_WIN_GENERAL, 0)
+#define SLANG_OK 0
+    //! SLANG_FAIL is the generic failure code - meaning a serious error occurred and the call
+    //! couldn't complete
+#define SLANG_FAIL SLANG_MAKE_ERROR(SLANG_FACILITY_WIN_GENERAL, 0x4005)
+
+#define SLANG_MAKE_WIN_GENERAL_ERROR(code) SLANG_MAKE_ERROR(SLANG_FACILITY_WIN_GENERAL, code)
+
+    //! Functionality is not implemented
+#define SLANG_E_NOT_IMPLEMENTED SLANG_MAKE_WIN_GENERAL_ERROR(0x4001)
+    //! Interface not be found
+#define SLANG_E_NO_INTERFACE SLANG_MAKE_WIN_GENERAL_ERROR(0x4002)
+    //! Operation was aborted (did not correctly complete)
+#define SLANG_E_ABORT SLANG_MAKE_WIN_GENERAL_ERROR(0x4004)
+
+    //! Indicates that a handle passed in as parameter to a method is invalid.
+#define SLANG_E_INVALID_HANDLE SLANG_MAKE_ERROR(SLANG_FACILITY_WIN_API, 6)
+    //! Indicates that an argument passed in as parameter to a method is invalid.
+#define SLANG_E_INVALID_ARG SLANG_MAKE_ERROR(SLANG_FACILITY_WIN_API, 0x57)
+    //! Operation could not complete - ran out of memory
+#define SLANG_E_OUT_OF_MEMORY SLANG_MAKE_ERROR(SLANG_FACILITY_WIN_API, 0xe)
+
+    /* *************************** other Results **************************************/
+
+#define SLANG_MAKE_CORE_ERROR(code) SLANG_MAKE_ERROR(SLANG_FACILITY_CORE, code)
+
+    // Supplied buffer is too small to be able to complete
+#define SLANG_E_BUFFER_TOO_SMALL SLANG_MAKE_CORE_ERROR(1)
+    //! Used to identify a Result that has yet to be initialized.
+    //! It defaults to failure such that if used incorrectly will fail, as similar in concept to
+    //! using an uninitialized variable.
+#define SLANG_E_UNINITIALIZED SLANG_MAKE_CORE_ERROR(2)
+    //! Returned from an async method meaning the output is invalid (thus an error), but a result
+    //! for the request is pending, and will be returned on a subsequent call with the async handle.
+#define SLANG_E_PENDING SLANG_MAKE_CORE_ERROR(3)
+    //! Indicates a file/resource could not be opened
+#define SLANG_E_CANNOT_OPEN SLANG_MAKE_CORE_ERROR(4)
+    //! Indicates a file/resource could not be found
+#define SLANG_E_NOT_FOUND SLANG_MAKE_CORE_ERROR(5)
+    //! An unhandled internal failure (typically from unhandled exception)
+#define SLANG_E_INTERNAL_FAIL SLANG_MAKE_CORE_ERROR(6)
+    //! Could not complete because some underlying feature (hardware or software) was not available
+#define SLANG_E_NOT_AVAILABLE SLANG_MAKE_CORE_ERROR(7)
+    //! Could not complete because the operation times out.
+#define SLANG_E_TIME_OUT SLANG_MAKE_CORE_ERROR(8)
+
+    /** A "Universally Unique Identifier" (UUID)
+
+    The Slang API uses UUIDs to identify interfaces when
+    using `queryInterface`.
+
+    This type is compatible with the `GUID` type defined
+    by the Component Object Model (COM), but Slang is
+    not dependent on COM.
+    */
+    struct SlangUUID
+    {
+        uint32_t data1;
+        uint16_t data2;
+        uint16_t data3;
+        uint8_t data4[8];
+    };
+
+// Place at the start of an interface with the guid.
+// Guid should be specified as SLANG_COM_INTERFACE(0x00000000, 0x0000, 0x0000, { 0xC0, 0x00, 0x00,
+// 0x00, 0x00, 0x00, 0x00, 0x46 }) NOTE: it's the typical guid struct definition, without the
+// surrounding {} It is not necessary to use the multiple parameters (we can wrap in parens), but
+// this is simple.
+#define SLANG_COM_INTERFACE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
+public:                                                              \
+    SLANG_FORCE_INLINE constexpr static SlangUUID getTypeGuid()      \
+    {                                                                \
+        return {a, b, c, d0, d1, d2, d3, d4, d5, d6, d7};            \
+    }
+
+// Sometimes it's useful to associate a guid with a class to identify it. This macro can used for
+// this, and the guid extracted via the getTypeGuid() function defined in the type
+#define SLANG_CLASS_GUID(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
+    SLANG_FORCE_INLINE constexpr static SlangUUID getTypeGuid()   \
+    {                                                             \
+        return {a, b, c, d0, d1, d2, d3, d4, d5, d6, d7};         \
+    }
+
+// Helper to fill in pairs of GUIDs and return pointers. This ensures that the
+// type of the GUID passed matches the pointer type, and that it is derived
+// from ISlangUnknown,
+// TODO(c++20): would is_derived_from be more appropriate here for private inheritance of
+// ISlangUnknown?
+//
+// with     : void createFoo(SlangUUID, void**);
+//            Slang::ComPtr<Bar> myBar;
+// call with: createFoo(SLANG_IID_PPV_ARGS(myBar.writeRef()))
+// to call  : createFoo(Bar::getTypeGuid(), (void**)(myBar.writeRef()))
+#define SLANG_IID_PPV_ARGS(ppType)                                                         \
+    std::decay_t<decltype(**(ppType))>::getTypeGuid(),                                     \
+        (                                                                                  \
+            (void)[] {                                                                     \
+                static_assert(                                                             \
+                    std::is_base_of_v<ISlangUnknown, std::decay_t<decltype(**(ppType))>>); \
+            },                                                                             \
+            reinterpret_cast<void**>(ppType))
+
+
+    /** Base interface for components exchanged through the API.
+
+    This interface definition is compatible with the COM `IUnknown`,
+    and uses the same UUID, but Slang does not require applications
+    to use or initialize COM.
+    */
+    struct ISlangUnknown
+    {
+        SLANG_COM_INTERFACE(
+            0x00000000,
+            0x0000,
+            0x0000,
+            {0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46})
+
+        virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+        queryInterface(SlangUUID const& uuid, void** outObject) = 0;
+        virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() = 0;
+        virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() = 0;
+
+        /*
+        Inline methods are provided to allow the above operations to be called
+        using their traditional COM names/signatures:
+        */
+        SlangResult QueryInterface(struct _GUID const& uuid, void** outObject)
+        {
+            return queryInterface(*(SlangUUID const*)&uuid, outObject);
+        }
+        uint32_t AddRef() { return addRef(); }
+        uint32_t Release() { return release(); }
+    };
+#define SLANG_UUID_ISlangUnknown ISlangUnknown::getTypeGuid()
+
+
+    /* An interface to provide a mechanism to cast, that doesn't require ref counting
+    and doesn't have to return a pointer to a ISlangUnknown derived class */
+    class ISlangCastable : public ISlangUnknown
+    {
+        SLANG_COM_INTERFACE(
+            0x87ede0e1,
+            0x4852,
+            0x44b0,
+            {0x8b, 0xf2, 0xcb, 0x31, 0x87, 0x4d, 0xe2, 0x39});
+
+        /// Can be used to cast to interfaces without reference counting.
+        /// Also provides access to internal implementations, when they provide a guid
+        /// Can simulate a 'generated' interface as long as kept in scope by cast from.
+        virtual SLANG_NO_THROW void* SLANG_MCALL castAs(const SlangUUID& guid) = 0;
+    };
+
+    class ISlangClonable : public ISlangCastable
+    {
+        SLANG_COM_INTERFACE(
+            0x1ec36168,
+            0xe9f4,
+            0x430d,
+            {0xbb, 0x17, 0x4, 0x8a, 0x80, 0x46, 0xb3, 0x1f});
+
+        /// Note the use of guid is for the desired interface/object.
+        /// The object is returned *not* ref counted. Any type that can implements the interface,
+        /// derives from ICastable, and so (not withstanding some other issue) will always return
+        /// an ICastable interface which other interfaces/types are accessible from via castAs
+        SLANG_NO_THROW virtual void* SLANG_MCALL clone(const SlangUUID& guid) = 0;
+    };
+
+    /** A "blob" of binary data.
+
+    This interface definition is compatible with the `ID3DBlob` and `ID3D10Blob` interfaces.
+    */
+    struct ISlangBlob : public ISlangUnknown
+    {
+        SLANG_COM_INTERFACE(
+            0x8BA5FB08,
+            0x5195,
+            0x40e2,
+            {0xAC, 0x58, 0x0D, 0x98, 0x9C, 0x3A, 0x01, 0x02})
+
+        virtual SLANG_NO_THROW void const* SLANG_MCALL getBufferPointer() = 0;
+        virtual SLANG_NO_THROW size_t SLANG_MCALL getBufferSize() = 0;
+    };
+#define SLANG_UUID_ISlangBlob ISlangBlob::getTypeGuid()
+
+    /* Can be requested from ISlangCastable cast to indicate the contained chars are null
+     * terminated.
+     */
+    struct SlangTerminatedChars
+    {
+        SLANG_CLASS_GUID(
+            0xbe0db1a8,
+            0x3594,
+            0x4603,
+            {0xa7, 0x8b, 0xc4, 0x86, 0x84, 0x30, 0xdf, 0xbb});
+        operator const char*() const { return chars; }
+        char chars[1];
+    };
+
+    /** A (real or virtual) file system.
+
+    Slang can make use of this interface whenever it would otherwise try to load files
+    from disk, allowing applications to hook and/or override filesystem access from
+    the compiler.
+
+    It is the responsibility of
+    the caller of any method that returns a ISlangBlob to release the blob when it is no
+    longer used (using 'release').
+    */
+
+    struct ISlangFileSystem : public ISlangCastable
+    {
+        SLANG_COM_INTERFACE(
+            0x003A09FC,
+            0x3A4D,
+            0x4BA0,
+            {0xAD, 0x60, 0x1F, 0xD8, 0x63, 0xA9, 0x15, 0xAB})
+
+        /** Load a file from `path` and return a blob of its contents
+        @param path The path to load from, as a null-terminated UTF-8 string.
+        @param outBlob A destination pointer to receive the blob of the file contents.
+        @returns A `SlangResult` to indicate success or failure in loading the file.
+
+        NOTE! This is a *binary* load - the blob should contain the exact same bytes
+        as are found in the backing file.
+
+        If load is successful, the implementation should create a blob to hold
+        the file's content, store it to `outBlob`, and return 0.
+        If the load fails, the implementation should return a failure status
+        (any negative value will do).
+        */
+        virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+        loadFile(char const* path, ISlangBlob** outBlob) = 0;
+    };
+#define SLANG_UUID_ISlangFileSystem ISlangFileSystem::getTypeGuid()
+
+
+    typedef void (*SlangFuncPtr)(void);
+
+    /**
+    (DEPRECATED) ISlangSharedLibrary
+    */
+    struct ISlangSharedLibrary_Dep1 : public ISlangUnknown
+    {
+        SLANG_COM_INTERFACE(
+            0x9c9d5bc5,
+            0xeb61,
+            0x496f,
+            {0x80, 0xd7, 0xd1, 0x47, 0xc4, 0xa2, 0x37, 0x30})
+
+        virtual SLANG_NO_THROW void* SLANG_MCALL findSymbolAddressByName(char const* name) = 0;
+    };
+#define SLANG_UUID_ISlangSharedLibrary_Dep1 ISlangSharedLibrary_Dep1::getTypeGuid()
+
+    /** An interface that can be used to encapsulate access to a shared library. An implementation
+    does not have to implement the library as a shared library
+    */
+    struct ISlangSharedLibrary : public ISlangCastable
+    {
+        SLANG_COM_INTERFACE(
+            0x70dbc7c4,
+            0xdc3b,
+            0x4a07,
+            {0xae, 0x7e, 0x75, 0x2a, 0xf6, 0xa8, 0x15, 0x55})
+
+        /** Get a function by name. If the library is unloaded will only return nullptr.
+        @param name The name of the function
+        @return The function pointer related to the name or nullptr if not found
+        */
+        SLANG_FORCE_INLINE SlangFuncPtr findFuncByName(char const* name)
+        {
+            return (SlangFuncPtr)findSymbolAddressByName(name);
+        }
+
+        /** Get a symbol by name. If the library is unloaded will only return nullptr.
+        @param name The name of the symbol
+        @return The pointer related to the name or nullptr if not found
+        */
+        virtual SLANG_NO_THROW void* SLANG_MCALL findSymbolAddressByName(char const* name) = 0;
+    };
+#define SLANG_UUID_ISlangSharedLibrary ISlangSharedLibrary::getTypeGuid()
+
+    struct ISlangSharedLibraryLoader : public ISlangUnknown
+    {
+        SLANG_COM_INTERFACE(
+            0x6264ab2b,
+            0xa3e8,
+            0x4a06,
+            {0x97, 0xf1, 0x49, 0xbc, 0x2d, 0x2a, 0xb1, 0x4d})
+
+        /** Load a shared library. In typical usage the library name should *not* contain any
+        platform specific elements. For example on windows a dll name should *not* be passed with a
+        '.dll' extension, and similarly on linux a shared library should *not* be passed with the
+        'lib' prefix and '.so' extension
+        @path path The unadorned filename and/or path for the shared library
+        @ param sharedLibraryOut Holds the shared library if successfully loaded */
+        virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+        loadSharedLibrary(const char* path, ISlangSharedLibrary** sharedLibraryOut) = 0;
+    };
+#define SLANG_UUID_ISlangSharedLibraryLoader ISlangSharedLibraryLoader::getTypeGuid()
+
+    /* Type that identifies how a path should be interpreted */
+    typedef unsigned int SlangPathTypeIntegral;
+    enum SlangPathType : SlangPathTypeIntegral
+    {
+        SLANG_PATH_TYPE_DIRECTORY, /**< Path specified specifies a directory. */
+        SLANG_PATH_TYPE_FILE,      /**< Path specified is to a file. */
+    };
+
+    /* Callback to enumerate the contents of of a directory in a ISlangFileSystemExt.
+    The name is the name of a file system object (directory/file) in the specified path (ie it is
+    without a path) */
+    typedef void (
+        *FileSystemContentsCallBack)(SlangPathType pathType, const char* name, void* userData);
+
+    /* Determines how paths map to files on the OS file system */
+    enum class OSPathKind : uint8_t
+    {
+        None,            ///< Paths do not map to the file system
+        Direct,          ///< Paths map directly to the file system
+        OperatingSystem, ///< Only paths gained via PathKind::OperatingSystem map to the operating
+                         ///< system file system
+    };
+
+    /* Used to determine what kind of path is required from an input path */
+    enum class PathKind
+    {
+        /// Given a path, returns a simplified version of that path.
+        /// This typically means removing '..' and/or '.' from the path.
+        /// A simplified path must point to the same object as the original.
+        Simplified,
+
+        /// Given a path, returns a 'canonical path' to the item.
+        /// This may be the operating system 'canonical path' that is the unique path to the item.
+        ///
+        /// If the item exists the returned canonical path should always be usable to access the
+        /// item.
+        ///
+        /// If the item the path specifies doesn't exist, the canonical path may not be returnable
+        /// or be a path simplification.
+        /// Not all file systems support canonical paths.
+        Canonical,
+
+        /// Given a path returns a path such that it is suitable to be displayed to the user.
+        ///
+        /// For example if the file system is a zip file - it might include the path to the zip
+        /// container as well as the path to the specific file.
+        ///
+        /// NOTE! The display path won't necessarily work on the file system to access the item
+        Display,
+
+        /// Get the path to the item on the *operating system* file system, if available.
+        OperatingSystem,
+
+        CountOf,
+    };
+
+    /** An extended file system abstraction.
+
+    Implementing and using this interface over ISlangFileSystem gives much more control over how
+    paths are managed, as well as how it is determined if two files 'are the same'.
+
+    All paths as input char*, or output as ISlangBlobs are always encoded as UTF-8 strings.
+    Blobs that contain strings are always zero terminated.
+    */
+    struct ISlangFileSystemExt : public ISlangFileSystem
+    {
+        SLANG_COM_INTERFACE(
+            0x5fb632d2,
+            0x979d,
+            0x4481,
+            {0x9f, 0xee, 0x66, 0x3c, 0x3f, 0x14, 0x49, 0xe1})
+
+        /** Get a uniqueIdentity which uniquely identifies an object of the file system.
+
+        Given a path, returns a 'uniqueIdentity' which ideally is the same value for the same object
+        on the file system.
+
+        The uniqueIdentity is used to compare if two paths are the same - which amongst other things
+        allows Slang to cache source contents internally. It is also used for #pragma once
+        functionality.
+
+        A *requirement* is for any implementation is that two paths can only return the same
+        uniqueIdentity if the contents of the two files are *identical*. If an implementation breaks
+        this constraint it can produce incorrect compilation. If an implementation cannot *strictly*
+        identify *the same* files, this will only have an effect on #pragma once behavior.
+
+        The string for the uniqueIdentity is held zero terminated in the ISlangBlob of
+        outUniqueIdentity.
+
+        Note that there are many ways a uniqueIdentity may be generated for a file. For example it
+        could be the 'canonical path' - assuming it is available and unambiguous for a file system.
+        Another possible mechanism could be to store the filename combined with the file date time
+        to uniquely identify it.
+
+        The client must ensure the blob be released when no longer used, otherwise memory will leak.
+
+        NOTE! Ideally this method would be called 'getPathUniqueIdentity' but for historical reasons
+        and backward compatibility it's name remains with 'File' even though an implementation
+        should be made to work with directories too.
+
+        @param path
+        @param outUniqueIdentity
+        @returns A `SlangResult` to indicate success or failure getting the uniqueIdentity.
+        */
+        virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+        getFileUniqueIdentity(const char* path, ISlangBlob** outUniqueIdentity) = 0;
+
+        /** Calculate a path combining the 'fromPath' with 'path'
+
+        The client must ensure the blob be released when no longer used, otherwise memory will leak.
+
+        @param fromPathType How to interpret the from path - as a file or a directory.
+        @param fromPath The from path.
+        @param path Path to be determined relative to the fromPath
+        @param pathOut Holds the string which is the relative path. The string is held in the blob
+        zero terminated.
+        @returns A `SlangResult` to indicate success or failure in loading the file.
+        */
+        virtual SLANG_NO_THROW SlangResult SLANG_MCALL calcCombinedPath(
+            SlangPathType fromPathType,
+            const char* fromPath,
+            const char* path,
+            ISlangBlob** pathOut) = 0;
+
+        /** Gets the type of path that path is on the file system.
+        @param path
+        @param pathTypeOut
+        @returns SLANG_OK if located and type is known, else an error. SLANG_E_NOT_FOUND if not
+        found.
+        */
+        virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+        getPathType(const char* path, SlangPathType* pathTypeOut) = 0;
+
+        /** Get a path based on the kind.
+
+        @param kind The kind of path wanted
+        @param path The input path
+        @param outPath The output path held in a blob
+        @returns SLANG_OK if successfully simplified the path (SLANG_E_NOT_IMPLEMENTED if not
+        implemented, or some other error code)
+        */
+        virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+        getPath(PathKind kind, const char* path, ISlangBlob** outPath) = 0;
+
+        /** Clears any cached information */
+        virtual SLANG_NO_THROW void SLANG_MCALL clearCache() = 0;
+
+        /** Enumerate the contents of the path
+
+        Note that for normal Slang operation it isn't necessary to enumerate contents this can
+        return SLANG_E_NOT_IMPLEMENTED.
+
+        @param The path to enumerate
+        @param callback This callback is called for each entry in the path.
+        @param userData This is passed to the callback
+        @returns SLANG_OK if successful
+        */
+        virtual SLANG_NO_THROW SlangResult SLANG_MCALL enumeratePathContents(
+            const char* path,
+            FileSystemContentsCallBack callback,
+            void* userData) = 0;
+
+        /** Returns how paths map to the OS file system
+
+        @returns OSPathKind that describes how paths map to the Operating System file system
+        */
+        virtual SLANG_NO_THROW OSPathKind SLANG_MCALL getOSPathKind() = 0;
+    };
+
+#define SLANG_UUID_ISlangFileSystemExt ISlangFileSystemExt::getTypeGuid()
+
+    struct ISlangMutableFileSystem : public ISlangFileSystemExt
+    {
+        SLANG_COM_INTERFACE(
+            0xa058675c,
+            0x1d65,
+            0x452a,
+            {0x84, 0x58, 0xcc, 0xde, 0xd1, 0x42, 0x71, 0x5})
+
+        /** Write data to the specified path.
+
+        @param path The path for data to be saved to
+        @param data The data to be saved
+        @param size The size of the data in bytes
+        @returns SLANG_OK if successful (SLANG_E_NOT_IMPLEMENTED if not implemented, or some other
+        error code)
+        */
+        virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+        saveFile(const char* path, const void* data, size_t size) = 0;
+
+        /** Write data in the form of a blob to the specified path.
+
+        Depending on the implementation writing a blob might be faster/use less memory. It is
+        assumed the blob is *immutable* and that an implementation can reference count it.
+
+        It is not guaranteed loading the same file will return the *same* blob - just a blob with
+        same contents.
+
+        @param path The path for data to be saved to
+        @param dataBlob The data to be saved
+        @returns SLANG_OK if successful (SLANG_E_NOT_IMPLEMENTED if not implemented, or some other
+        error code)
+        */
+        virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+        saveFileBlob(const char* path, ISlangBlob* dataBlob) = 0;
+
+        /** Remove the entry in the path (directory of file). Will only delete an empty directory,
+        if not empty will return an error.
+
+        @param path The path to remove
+        @returns SLANG_OK if successful
+        */
+        virtual SLANG_NO_THROW SlangResult SLANG_MCALL remove(const char* path) = 0;
+
+        /** Create a directory.
+
+        The path to the directory must exist
+
+        @param path To the directory to create. The parent path *must* exist otherwise will return
+        an error.
+        @returns SLANG_OK if successful
+        */
+        virtual SLANG_NO_THROW SlangResult SLANG_MCALL createDirectory(const char* path) = 0;
+    };
+
+#define SLANG_UUID_ISlangMutableFileSystem ISlangMutableFileSystem::getTypeGuid()
+
+    /* Identifies different types of writer target*/
+    typedef unsigned int SlangWriterChannelIntegral;
+    enum SlangWriterChannel : SlangWriterChannelIntegral
+    {
+        SLANG_WRITER_CHANNEL_DIAGNOSTIC,
+        SLANG_WRITER_CHANNEL_STD_OUTPUT,
+        SLANG_WRITER_CHANNEL_STD_ERROR,
+        SLANG_WRITER_CHANNEL_COUNT_OF,
+    };
+
+    typedef unsigned int SlangWriterModeIntegral;
+    enum SlangWriterMode : SlangWriterModeIntegral
+    {
+        SLANG_WRITER_MODE_TEXT,
+        SLANG_WRITER_MODE_BINARY,
+    };
+
+    /** A stream typically of text, used for outputting diagnostic as well as other information.
+     */
+    struct ISlangWriter : public ISlangUnknown
+    {
+        SLANG_COM_INTERFACE(
+            0xec457f0e,
+            0x9add,
+            0x4e6b,
+            {0x85, 0x1c, 0xd7, 0xfa, 0x71, 0x6d, 0x15, 0xfd})
+
+        /** Begin an append buffer.
+        NOTE! Only one append buffer can be active at any time.
+        @param maxNumChars The maximum of chars that will be appended
+        @returns The start of the buffer for appending to. */
+        virtual SLANG_NO_THROW char* SLANG_MCALL beginAppendBuffer(size_t maxNumChars) = 0;
+        /** Ends the append buffer, and is equivalent to a write of the append buffer.
+        NOTE! That an endAppendBuffer is not necessary if there are no characters to write.
+        @param buffer is the start of the data to append and must be identical to last value
+        returned from beginAppendBuffer
+        @param numChars must be a value less than or equal to what was returned from last call to
+        beginAppendBuffer
+        @returns Result, will be SLANG_OK on success */
+        virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+        endAppendBuffer(char* buffer, size_t numChars) = 0;
+        /** Write text to the writer
+        @param chars The characters to write out
+        @param numChars The amount of characters
+        @returns SLANG_OK on success */
+        virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+        write(const char* chars, size_t numChars) = 0;
+        /** Flushes any content to the output */
+        virtual SLANG_NO_THROW void SLANG_MCALL flush() = 0;
+        /** Determines if the writer stream is to the console, and can be used to alter the output
+        @returns Returns true if is a console writer */
+        virtual SLANG_NO_THROW SlangBool SLANG_MCALL isConsole() = 0;
+        /** Set the mode for the writer to use
+        @param mode The mode to use
+        @returns SLANG_OK on success */
+        virtual SLANG_NO_THROW SlangResult SLANG_MCALL setMode(SlangWriterMode mode) = 0;
+    };
+
+#define SLANG_UUID_ISlangWriter ISlangWriter::getTypeGuid()
+
+    struct ISlangProfiler : public ISlangUnknown
+    {
+        SLANG_COM_INTERFACE(
+            0x197772c7,
+            0x0155,
+            0x4b91,
+            {0x84, 0xe8, 0x66, 0x68, 0xba, 0xff, 0x06, 0x19})
+        virtual SLANG_NO_THROW size_t SLANG_MCALL getEntryCount() = 0;
+        virtual SLANG_NO_THROW const char* SLANG_MCALL getEntryName(uint32_t index) = 0;
+        virtual SLANG_NO_THROW long SLANG_MCALL getEntryTimeMS(uint32_t index) = 0;
+        virtual SLANG_NO_THROW uint32_t SLANG_MCALL getEntryInvocationTimes(uint32_t index) = 0;
+    };
+#define SLANG_UUID_ISlangProfiler ISlangProfiler::getTypeGuid()
+
+    namespace slang
+    {
+    struct IGlobalSession;
+    struct ICompileRequest;
+
+    } // namespace slang
+
+    /*!
+    @brief An instance of the Slang library.
+    */
+    typedef slang::IGlobalSession SlangSession;
+
+
+    typedef struct SlangProgramLayout SlangProgramLayout;
+
+    /*!
+    @brief A request for one or more compilation actions to be performed.
+    */
+    typedef struct slang::ICompileRequest SlangCompileRequest;
+
+
+    /*!
+@brief Callback type used for diagnostic output.
+*/
+    typedef void (*SlangDiagnosticCallback)(char const* message, void* userData);
+
+    /*!
+    @brief Get the build version 'tag' string. The string is the same as
+    produced via `git describe --tags --match v*` for the project. If such a
+    version could not be determined at build time then the contents will be
+    0.0.0-unknown. Any string can be set by passing
+    -DSLANG_VERSION_FULL=whatever during the cmake invocation.
+
+    This function will return exactly the same result as the method
+    getBuildTagString on IGlobalSession.
+
+    An advantage of using this function over the method is that doing so does
+    not require the creation of a session, which can be a fairly costly
+    operation.
+
+    @return The build tag string
+    */
+    SLANG_API const char* spGetBuildTagString();
+
+    /*
+    Forward declarations of types used in the reflection interface;
+    */
+
+    typedef struct SlangProgramLayout SlangProgramLayout;
+    typedef struct SlangEntryPoint SlangEntryPoint;
+    typedef struct SlangEntryPointLayout SlangEntryPointLayout;
+
+    typedef struct SlangReflectionDecl SlangReflectionDecl;
+    typedef struct SlangReflectionModifier SlangReflectionModifier;
+    typedef struct SlangReflectionType SlangReflectionType;
+    typedef struct SlangReflectionTypeLayout SlangReflectionTypeLayout;
+    typedef struct SlangReflectionVariable SlangReflectionVariable;
+    typedef struct SlangReflectionVariableLayout SlangReflectionVariableLayout;
+    typedef struct SlangReflectionTypeParameter SlangReflectionTypeParameter;
+    typedef struct SlangReflectionUserAttribute SlangReflectionUserAttribute;
+    typedef SlangReflectionUserAttribute SlangReflectionAttribute;
+    typedef struct SlangReflectionFunction SlangReflectionFunction;
+    typedef struct SlangReflectionGeneric SlangReflectionGeneric;
+
+    union SlangReflectionGenericArg
+    {
+        SlangReflectionType* typeVal;
+        int64_t intVal;
+        bool boolVal;
+    };
+
+    enum SlangReflectionGenericArgType
+    {
+        SLANG_GENERIC_ARG_TYPE = 0,
+        SLANG_GENERIC_ARG_INT = 1,
+        SLANG_GENERIC_ARG_BOOL = 2
+    };
+
+    /*
+    Type aliases to maintain backward compatibility.
+    */
+    typedef SlangProgramLayout SlangReflection;
+    typedef SlangEntryPointLayout SlangReflectionEntryPoint;
+
+    // type reflection
+
+    typedef unsigned int SlangTypeKindIntegral;
+    enum SlangTypeKind : SlangTypeKindIntegral
+    {
+        SLANG_TYPE_KIND_NONE,
+        SLANG_TYPE_KIND_STRUCT,
+        SLANG_TYPE_KIND_ARRAY,
+        SLANG_TYPE_KIND_MATRIX,
+        SLANG_TYPE_KIND_VECTOR,
+        SLANG_TYPE_KIND_SCALAR,
+        SLANG_TYPE_KIND_CONSTANT_BUFFER,
+        SLANG_TYPE_KIND_RESOURCE,
+        SLANG_TYPE_KIND_SAMPLER_STATE,
+        SLANG_TYPE_KIND_TEXTURE_BUFFER,
+        SLANG_TYPE_KIND_SHADER_STORAGE_BUFFER,
+        SLANG_TYPE_KIND_PARAMETER_BLOCK,
+        SLANG_TYPE_KIND_GENERIC_TYPE_PARAMETER,
+        SLANG_TYPE_KIND_INTERFACE,
+        SLANG_TYPE_KIND_OUTPUT_STREAM,
+        SLANG_TYPE_KIND_MESH_OUTPUT,
+        SLANG_TYPE_KIND_SPECIALIZED,
+        SLANG_TYPE_KIND_FEEDBACK,
+        SLANG_TYPE_KIND_POINTER,
+        SLANG_TYPE_KIND_DYNAMIC_RESOURCE,
+        SLANG_TYPE_KIND_COUNT,
+    };
+
+    typedef unsigned int SlangScalarTypeIntegral;
+    enum SlangScalarType : SlangScalarTypeIntegral
+    {
+        SLANG_SCALAR_TYPE_NONE,
+        SLANG_SCALAR_TYPE_VOID,
+        SLANG_SCALAR_TYPE_BOOL,
+        SLANG_SCALAR_TYPE_INT32,
+        SLANG_SCALAR_TYPE_UINT32,
+        SLANG_SCALAR_TYPE_INT64,
+        SLANG_SCALAR_TYPE_UINT64,
+        SLANG_SCALAR_TYPE_FLOAT16,
+        SLANG_SCALAR_TYPE_FLOAT32,
+        SLANG_SCALAR_TYPE_FLOAT64,
+        SLANG_SCALAR_TYPE_INT8,
+        SLANG_SCALAR_TYPE_UINT8,
+        SLANG_SCALAR_TYPE_INT16,
+        SLANG_SCALAR_TYPE_UINT16,
+        SLANG_SCALAR_TYPE_INTPTR,
+        SLANG_SCALAR_TYPE_UINTPTR
+    };
+
+    // abstract decl reflection
+    typedef unsigned int SlangDeclKindIntegral;
+    enum SlangDeclKind : SlangDeclKindIntegral
+    {
+        SLANG_DECL_KIND_UNSUPPORTED_FOR_REFLECTION,
+        SLANG_DECL_KIND_STRUCT,
+        SLANG_DECL_KIND_FUNC,
+        SLANG_DECL_KIND_MODULE,
+        SLANG_DECL_KIND_GENERIC,
+        SLANG_DECL_KIND_VARIABLE,
+        SLANG_DECL_KIND_NAMESPACE
+    };
+
+#ifndef SLANG_RESOURCE_SHAPE
+    #define SLANG_RESOURCE_SHAPE
+    typedef unsigned int SlangResourceShapeIntegral;
+    enum SlangResourceShape : SlangResourceShapeIntegral
+    {
+        SLANG_RESOURCE_BASE_SHAPE_MASK = 0x0F,
+
+        SLANG_RESOURCE_NONE = 0x00,
+
+        SLANG_TEXTURE_1D = 0x01,
+        SLANG_TEXTURE_2D = 0x02,
+        SLANG_TEXTURE_3D = 0x03,
+        SLANG_TEXTURE_CUBE = 0x04,
+        SLANG_TEXTURE_BUFFER = 0x05,
+
+        SLANG_STRUCTURED_BUFFER = 0x06,
+        SLANG_BYTE_ADDRESS_BUFFER = 0x07,
+        SLANG_RESOURCE_UNKNOWN = 0x08,
+        SLANG_ACCELERATION_STRUCTURE = 0x09,
+        SLANG_TEXTURE_SUBPASS = 0x0A,
+
+        SLANG_RESOURCE_EXT_SHAPE_MASK = 0xF0,
+
+        SLANG_TEXTURE_FEEDBACK_FLAG = 0x10,
+        SLANG_TEXTURE_SHADOW_FLAG = 0x20,
+        SLANG_TEXTURE_ARRAY_FLAG = 0x40,
+        SLANG_TEXTURE_MULTISAMPLE_FLAG = 0x80,
+
+        SLANG_TEXTURE_1D_ARRAY = SLANG_TEXTURE_1D | SLANG_TEXTURE_ARRAY_FLAG,
+        SLANG_TEXTURE_2D_ARRAY = SLANG_TEXTURE_2D | SLANG_TEXTURE_ARRAY_FLAG,
+        SLANG_TEXTURE_CUBE_ARRAY = SLANG_TEXTURE_CUBE | SLANG_TEXTURE_ARRAY_FLAG,
+
+        SLANG_TEXTURE_2D_MULTISAMPLE = SLANG_TEXTURE_2D | SLANG_TEXTURE_MULTISAMPLE_FLAG,
+        SLANG_TEXTURE_2D_MULTISAMPLE_ARRAY =
+            SLANG_TEXTURE_2D | SLANG_TEXTURE_MULTISAMPLE_FLAG | SLANG_TEXTURE_ARRAY_FLAG,
+        SLANG_TEXTURE_SUBPASS_MULTISAMPLE = SLANG_TEXTURE_SUBPASS | SLANG_TEXTURE_MULTISAMPLE_FLAG,
+    };
+#endif
+    typedef unsigned int SlangResourceAccessIntegral;
+    enum SlangResourceAccess : SlangResourceAccessIntegral
+    {
+        SLANG_RESOURCE_ACCESS_NONE,
+        SLANG_RESOURCE_ACCESS_READ,
+        SLANG_RESOURCE_ACCESS_READ_WRITE,
+        SLANG_RESOURCE_ACCESS_RASTER_ORDERED,
+        SLANG_RESOURCE_ACCESS_APPEND,
+        SLANG_RESOURCE_ACCESS_CONSUME,
+        SLANG_RESOURCE_ACCESS_WRITE,
+        SLANG_RESOURCE_ACCESS_FEEDBACK,
+        SLANG_RESOURCE_ACCESS_UNKNOWN = 0x7FFFFFFF,
+    };
+
+    typedef unsigned int SlangParameterCategoryIntegral;
+    enum SlangParameterCategory : SlangParameterCategoryIntegral
+    {
+        SLANG_PARAMETER_CATEGORY_NONE,
+        SLANG_PARAMETER_CATEGORY_MIXED,
+        SLANG_PARAMETER_CATEGORY_CONSTANT_BUFFER,
+        SLANG_PARAMETER_CATEGORY_SHADER_RESOURCE,
+        SLANG_PARAMETER_CATEGORY_UNORDERED_ACCESS,
+        SLANG_PARAMETER_CATEGORY_VARYING_INPUT,
+        SLANG_PARAMETER_CATEGORY_VARYING_OUTPUT,
+        SLANG_PARAMETER_CATEGORY_SAMPLER_STATE,
+        SLANG_PARAMETER_CATEGORY_UNIFORM,
+        SLANG_PARAMETER_CATEGORY_DESCRIPTOR_TABLE_SLOT,
+        SLANG_PARAMETER_CATEGORY_SPECIALIZATION_CONSTANT,
+        SLANG_PARAMETER_CATEGORY_PUSH_CONSTANT_BUFFER,
+
+        // HLSL register `space`, Vulkan GLSL `set`
+        SLANG_PARAMETER_CATEGORY_REGISTER_SPACE,
+
+        // TODO: Ellie, Both APIs treat mesh outputs as more or less varying output,
+        // Does it deserve to be represented here??
+
+        // A parameter whose type is to be specialized by a global generic type argument
+        SLANG_PARAMETER_CATEGORY_GENERIC,
+
+        SLANG_PARAMETER_CATEGORY_RAY_PAYLOAD,
+        SLANG_PARAMETER_CATEGORY_HIT_ATTRIBUTES,
+        SLANG_PARAMETER_CATEGORY_CALLABLE_PAYLOAD,
+        SLANG_PARAMETER_CATEGORY_SHADER_RECORD,
+
+        // An existential type parameter represents a "hole" that
+        // needs to be filled with a concrete type to enable
+        // generation of specialized code.
+        //
+        // Consider this example:
+        //
+        //      struct MyParams
+        //      {
+        //          IMaterial material;
+        //          ILight lights[3];
+        //      };
+        //
+        // This `MyParams` type introduces two existential type parameters:
+        // one for `material` and one for `lights`. Even though `lights`
+        // is an array, it only introduces one type parameter, because
+        // we need to have a *single* concrete type for all the array
+        // elements to be able to generate specialized code.
+        //
+        SLANG_PARAMETER_CATEGORY_EXISTENTIAL_TYPE_PARAM,
+
+        // An existential object parameter represents a value
+        // that needs to be passed in to provide data for some
+        // interface-type shader parameter.
+        //
+        // Consider this example:
+        //
+        //      struct MyParams
+        //      {
+        //          IMaterial material;
+        //          ILight lights[3];
+        //      };
+        //
+        // This `MyParams` type introduces four existential object parameters:
+        // one for `material` and three for `lights` (one for each array
+        // element). This is consistent with the number of interface-type
+        // "objects" that are being passed through to the shader.
+        //
+        SLANG_PARAMETER_CATEGORY_EXISTENTIAL_OBJECT_PARAM,
+
+        // The register space offset for the sub-elements that occupies register spaces.
+        SLANG_PARAMETER_CATEGORY_SUB_ELEMENT_REGISTER_SPACE,
+
+        // The input_attachment_index subpass occupancy tracker
+        SLANG_PARAMETER_CATEGORY_SUBPASS,
+
+        // Metal tier-1 argument buffer element [[id]].
+        SLANG_PARAMETER_CATEGORY_METAL_ARGUMENT_BUFFER_ELEMENT,
+
+        // Metal [[attribute]] inputs.
+        SLANG_PARAMETER_CATEGORY_METAL_ATTRIBUTE,
+
+        // Metal [[payload]] inputs
+        SLANG_PARAMETER_CATEGORY_METAL_PAYLOAD,
+
+        //
+        SLANG_PARAMETER_CATEGORY_COUNT,
+
+        // Aliases for Metal-specific categories.
+        SLANG_PARAMETER_CATEGORY_METAL_BUFFER = SLANG_PARAMETER_CATEGORY_CONSTANT_BUFFER,
+        SLANG_PARAMETER_CATEGORY_METAL_TEXTURE = SLANG_PARAMETER_CATEGORY_SHADER_RESOURCE,
+        SLANG_PARAMETER_CATEGORY_METAL_SAMPLER = SLANG_PARAMETER_CATEGORY_SAMPLER_STATE,
+
+        // DEPRECATED:
+        SLANG_PARAMETER_CATEGORY_VERTEX_INPUT = SLANG_PARAMETER_CATEGORY_VARYING_INPUT,
+        SLANG_PARAMETER_CATEGORY_FRAGMENT_OUTPUT = SLANG_PARAMETER_CATEGORY_VARYING_OUTPUT,
+        SLANG_PARAMETER_CATEGORY_COUNT_V1 = SLANG_PARAMETER_CATEGORY_SUBPASS,
+    };
+
+    /** Types of API-managed bindings that a parameter might use.
+
+    `SlangBindingType` represents the distinct types of binding ranges that might be
+    understood by an underlying graphics API or cross-API abstraction layer.
+    Several of the enumeration cases here correspond to cases of `VkDescriptorType`
+    defined by the Vulkan API. Note however that the values of this enumeration
+    are not the same as those of any particular API.
+
+    The `SlangBindingType` enumeration is distinct from `SlangParameterCategory`
+    because `SlangParameterCategory` differentiates the types of parameters for
+    the purposes of layout, where the layout rules of some targets will treat
+    parameters of different types as occupying the same binding space for layout
+    (e.g., in SPIR-V both a `Texture2D` and `SamplerState` use the same space of
+    `binding` indices, and are not allowed to overlap), while those same types
+    map to different types of bindings in the API (e.g., both textures and samplers
+    use different `VkDescriptorType` values).
+
+    When you want to answer "what register/binding did this parameter use?" you
+    should use `SlangParameterCategory`.
+
+    When you want to answer "what type of descriptor range should this parameter use?"
+    you should use `SlangBindingType`.
+    */
+    typedef SlangUInt32 SlangBindingTypeIntegral;
+    enum SlangBindingType : SlangBindingTypeIntegral
+    {
+        SLANG_BINDING_TYPE_UNKNOWN = 0,
+
+        SLANG_BINDING_TYPE_SAMPLER,
+        SLANG_BINDING_TYPE_TEXTURE,
+        SLANG_BINDING_TYPE_CONSTANT_BUFFER,
+        SLANG_BINDING_TYPE_PARAMETER_BLOCK,
+        SLANG_BINDING_TYPE_TYPED_BUFFER,
+        SLANG_BINDING_TYPE_RAW_BUFFER,
+        SLANG_BINDING_TYPE_COMBINED_TEXTURE_SAMPLER,
+        SLANG_BINDING_TYPE_INPUT_RENDER_TARGET,
+        SLANG_BINDING_TYPE_INLINE_UNIFORM_DATA,
+        SLANG_BINDING_TYPE_RAY_TRACING_ACCELERATION_STRUCTURE,
+
+        SLANG_BINDING_TYPE_VARYING_INPUT,
+        SLANG_BINDING_TYPE_VARYING_OUTPUT,
+
+        SLANG_BINDING_TYPE_EXISTENTIAL_VALUE,
+        SLANG_BINDING_TYPE_PUSH_CONSTANT,
+
+        SLANG_BINDING_TYPE_MUTABLE_FLAG = 0x100,
+
+        SLANG_BINDING_TYPE_MUTABLE_TETURE =
+            SLANG_BINDING_TYPE_TEXTURE | SLANG_BINDING_TYPE_MUTABLE_FLAG,
+        SLANG_BINDING_TYPE_MUTABLE_TYPED_BUFFER =
+            SLANG_BINDING_TYPE_TYPED_BUFFER | SLANG_BINDING_TYPE_MUTABLE_FLAG,
+        SLANG_BINDING_TYPE_MUTABLE_RAW_BUFFER =
+            SLANG_BINDING_TYPE_RAW_BUFFER | SLANG_BINDING_TYPE_MUTABLE_FLAG,
+
+        SLANG_BINDING_TYPE_BASE_MASK = 0x00FF,
+        SLANG_BINDING_TYPE_EXT_MASK = 0xFF00,
+    };
+
+    typedef SlangUInt32 SlangLayoutRulesIntegral;
+    enum SlangLayoutRules : SlangLayoutRulesIntegral
+    {
+        SLANG_LAYOUT_RULES_DEFAULT,
+        SLANG_LAYOUT_RULES_METAL_ARGUMENT_BUFFER_TIER_2,
+    };
+
+    typedef SlangUInt32 SlangModifierIDIntegral;
+    enum SlangModifierID : SlangModifierIDIntegral
+    {
+        SLANG_MODIFIER_SHARED,
+        SLANG_MODIFIER_NO_DIFF,
+        SLANG_MODIFIER_STATIC,
+        SLANG_MODIFIER_CONST,
+        SLANG_MODIFIER_EXPORT,
+        SLANG_MODIFIER_EXTERN,
+        SLANG_MODIFIER_DIFFERENTIABLE,
+        SLANG_MODIFIER_MUTATING,
+        SLANG_MODIFIER_IN,
+        SLANG_MODIFIER_OUT,
+        SLANG_MODIFIER_INOUT
+    };
+
+    typedef SlangUInt32 SlangImageFormatIntegral;
+    enum SlangImageFormat : SlangImageFormatIntegral
+    {
+#define SLANG_FORMAT(NAME, DESC) SLANG_IMAGE_FORMAT_##NAME,
+#include "slang-image-format-defs.h"
+#undef SLANG_FORMAT
+    };
+
+#define SLANG_UNBOUNDED_SIZE (~size_t(0))
+
+    // Shader Parameter Reflection
+
+    typedef SlangReflectionVariableLayout SlangReflectionParameter;
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __cplusplus
+namespace slang
+{
+struct ISession;
+}
+#endif
+
+#include "slang-deprecated.h"
+
+#ifdef __cplusplus
+
+/* Helper interfaces for C++ users */
+namespace slang
+{
+struct BufferReflection;
+struct DeclReflection;
+struct TypeLayoutReflection;
+struct TypeReflection;
+struct VariableLayoutReflection;
+struct VariableReflection;
+struct FunctionReflection;
+struct GenericReflection;
+
+union GenericArgReflection
+{
+    TypeReflection* typeVal;
+    int64_t intVal;
+    bool boolVal;
+};
+
+struct Attribute
+{
+    char const* getName()
+    {
+        return spReflectionUserAttribute_GetName((SlangReflectionAttribute*)this);
+    }
+    uint32_t getArgumentCount()
+    {
+        return (uint32_t)spReflectionUserAttribute_GetArgumentCount(
+            (SlangReflectionAttribute*)this);
+    }
+    TypeReflection* getArgumentType(uint32_t index)
+    {
+        return (TypeReflection*)spReflectionUserAttribute_GetArgumentType(
+            (SlangReflectionAttribute*)this,
+            index);
+    }
+    SlangResult getArgumentValueInt(uint32_t index, int* value)
+    {
+        return spReflectionUserAttribute_GetArgumentValueInt(
+            (SlangReflectionAttribute*)this,
+            index,
+            value);
+    }
+    SlangResult getArgumentValueFloat(uint32_t index, float* value)
+    {
+        return spReflectionUserAttribute_GetArgumentValueFloat(
+            (SlangReflectionAttribute*)this,
+            index,
+            value);
+    }
+    const char* getArgumentValueString(uint32_t index, size_t* outSize)
+    {
+        return spReflectionUserAttribute_GetArgumentValueString(
+            (SlangReflectionAttribute*)this,
+            index,
+            outSize);
+    }
+};
+
+typedef Attribute UserAttribute;
+
+struct TypeReflection
+{
+    enum class Kind
+    {
+        None = SLANG_TYPE_KIND_NONE,
+        Struct = SLANG_TYPE_KIND_STRUCT,
+        Array = SLANG_TYPE_KIND_ARRAY,
+        Matrix = SLANG_TYPE_KIND_MATRIX,
+        Vector = SLANG_TYPE_KIND_VECTOR,
+        Scalar = SLANG_TYPE_KIND_SCALAR,
+        ConstantBuffer = SLANG_TYPE_KIND_CONSTANT_BUFFER,
+        Resource = SLANG_TYPE_KIND_RESOURCE,
+        SamplerState = SLANG_TYPE_KIND_SAMPLER_STATE,
+        TextureBuffer = SLANG_TYPE_KIND_TEXTURE_BUFFER,
+        ShaderStorageBuffer = SLANG_TYPE_KIND_SHADER_STORAGE_BUFFER,
+        ParameterBlock = SLANG_TYPE_KIND_PARAMETER_BLOCK,
+        GenericTypeParameter = SLANG_TYPE_KIND_GENERIC_TYPE_PARAMETER,
+        Interface = SLANG_TYPE_KIND_INTERFACE,
+        OutputStream = SLANG_TYPE_KIND_OUTPUT_STREAM,
+        Specialized = SLANG_TYPE_KIND_SPECIALIZED,
+        Feedback = SLANG_TYPE_KIND_FEEDBACK,
+        Pointer = SLANG_TYPE_KIND_POINTER,
+        DynamicResource = SLANG_TYPE_KIND_DYNAMIC_RESOURCE,
+    };
+
+    enum ScalarType : SlangScalarTypeIntegral
+    {
+        None = SLANG_SCALAR_TYPE_NONE,
+        Void = SLANG_SCALAR_TYPE_VOID,
+        Bool = SLANG_SCALAR_TYPE_BOOL,
+        Int32 = SLANG_SCALAR_TYPE_INT32,
+        UInt32 = SLANG_SCALAR_TYPE_UINT32,
+        Int64 = SLANG_SCALAR_TYPE_INT64,
+        UInt64 = SLANG_SCALAR_TYPE_UINT64,
+        Float16 = SLANG_SCALAR_TYPE_FLOAT16,
+        Float32 = SLANG_SCALAR_TYPE_FLOAT32,
+        Float64 = SLANG_SCALAR_TYPE_FLOAT64,
+        Int8 = SLANG_SCALAR_TYPE_INT8,
+        UInt8 = SLANG_SCALAR_TYPE_UINT8,
+        Int16 = SLANG_SCALAR_TYPE_INT16,
+        UInt16 = SLANG_SCALAR_TYPE_UINT16,
+    };
+
+    Kind getKind() { return (Kind)spReflectionType_GetKind((SlangReflectionType*)this); }
+
+    // only useful if `getKind() == Kind::Struct`
+    unsigned int getFieldCount()
+    {
+        return spReflectionType_GetFieldCount((SlangReflectionType*)this);
+    }
+
+    VariableReflection* getFieldByIndex(unsigned int index)
+    {
+        return (
+            VariableReflection*)spReflectionType_GetFieldByIndex((SlangReflectionType*)this, index);
+    }
+
+    bool isArray() { return getKind() == TypeReflection::Kind::Array; }
+
+    TypeReflection* unwrapArray()
+    {
+        TypeReflection* type = this;
+        while (type->isArray())
+        {
+            type = type->getElementType();
+        }
+        return type;
+    }
+
+    // only useful if `getKind() == Kind::Array`
+    size_t getElementCount()
+    {
+        return spReflectionType_GetElementCount((SlangReflectionType*)this);
+    }
+
+    size_t getTotalArrayElementCount()
+    {
+        if (!isArray())
+            return 0;
+        size_t result = 1;
+        TypeReflection* type = this;
+        for (;;)
+        {
+            if (!type->isArray())
+                return result;
+
+            result *= type->getElementCount();
+            type = type->getElementType();
+        }
+    }
+
+    TypeReflection* getElementType()
+    {
+        return (TypeReflection*)spReflectionType_GetElementType((SlangReflectionType*)this);
+    }
+
+    unsigned getRowCount() { return spReflectionType_GetRowCount((SlangReflectionType*)this); }
+
+    unsigned getColumnCount()
+    {
+        return spReflectionType_GetColumnCount((SlangReflectionType*)this);
+    }
+
+    ScalarType getScalarType()
+    {
+        return (ScalarType)spReflectionType_GetScalarType((SlangReflectionType*)this);
+    }
+
+    TypeReflection* getResourceResultType()
+    {
+        return (TypeReflection*)spReflectionType_GetResourceResultType((SlangReflectionType*)this);
+    }
+
+    SlangResourceShape getResourceShape()
+    {
+        return spReflectionType_GetResourceShape((SlangReflectionType*)this);
+    }
+
+    SlangResourceAccess getResourceAccess()
+    {
+        return spReflectionType_GetResourceAccess((SlangReflectionType*)this);
+    }
+
+    char const* getName() { return spReflectionType_GetName((SlangReflectionType*)this); }
+
+    SlangResult getFullName(ISlangBlob** outNameBlob)
+    {
+        return spReflectionType_GetFullName((SlangReflectionType*)this, outNameBlob);
+    }
+
+    unsigned int getUserAttributeCount()
+    {
+        return spReflectionType_GetUserAttributeCount((SlangReflectionType*)this);
+    }
+
+    UserAttribute* getUserAttributeByIndex(unsigned int index)
+    {
+        return (UserAttribute*)spReflectionType_GetUserAttribute((SlangReflectionType*)this, index);
+    }
+
+    UserAttribute* findAttributeByName(char const* name)
+    {
+        return (UserAttribute*)spReflectionType_FindUserAttributeByName(
+            (SlangReflectionType*)this,
+            name);
+    }
+
+    UserAttribute* findUserAttributeByName(char const* name) { return findAttributeByName(name); }
+
+    TypeReflection* applySpecializations(GenericReflection* generic)
+    {
+        return (TypeReflection*)spReflectionType_applySpecializations(
+            (SlangReflectionType*)this,
+            (SlangReflectionGeneric*)generic);
+    }
+
+    GenericReflection* getGenericContainer()
+    {
+        return (GenericReflection*)spReflectionType_GetGenericContainer((SlangReflectionType*)this);
+    }
+};
+
+enum ParameterCategory : SlangParameterCategoryIntegral
+{
+    // TODO: these aren't scoped...
+    None = SLANG_PARAMETER_CATEGORY_NONE,
+    Mixed = SLANG_PARAMETER_CATEGORY_MIXED,
+    ConstantBuffer = SLANG_PARAMETER_CATEGORY_CONSTANT_BUFFER,
+    ShaderResource = SLANG_PARAMETER_CATEGORY_SHADER_RESOURCE,
+    UnorderedAccess = SLANG_PARAMETER_CATEGORY_UNORDERED_ACCESS,
+    VaryingInput = SLANG_PARAMETER_CATEGORY_VARYING_INPUT,
+    VaryingOutput = SLANG_PARAMETER_CATEGORY_VARYING_OUTPUT,
+    SamplerState = SLANG_PARAMETER_CATEGORY_SAMPLER_STATE,
+    Uniform = SLANG_PARAMETER_CATEGORY_UNIFORM,
+    DescriptorTableSlot = SLANG_PARAMETER_CATEGORY_DESCRIPTOR_TABLE_SLOT,
+    SpecializationConstant = SLANG_PARAMETER_CATEGORY_SPECIALIZATION_CONSTANT,
+    PushConstantBuffer = SLANG_PARAMETER_CATEGORY_PUSH_CONSTANT_BUFFER,
+    RegisterSpace = SLANG_PARAMETER_CATEGORY_REGISTER_SPACE,
+    GenericResource = SLANG_PARAMETER_CATEGORY_GENERIC,
+
+    RayPayload = SLANG_PARAMETER_CATEGORY_RAY_PAYLOAD,
+    HitAttributes = SLANG_PARAMETER_CATEGORY_HIT_ATTRIBUTES,
+    CallablePayload = SLANG_PARAMETER_CATEGORY_CALLABLE_PAYLOAD,
+
+    ShaderRecord = SLANG_PARAMETER_CATEGORY_SHADER_RECORD,
+
+    ExistentialTypeParam = SLANG_PARAMETER_CATEGORY_EXISTENTIAL_TYPE_PARAM,
+    ExistentialObjectParam = SLANG_PARAMETER_CATEGORY_EXISTENTIAL_OBJECT_PARAM,
+
+    SubElementRegisterSpace = SLANG_PARAMETER_CATEGORY_SUB_ELEMENT_REGISTER_SPACE,
+
+    InputAttachmentIndex = SLANG_PARAMETER_CATEGORY_SUBPASS,
+
+    MetalBuffer = SLANG_PARAMETER_CATEGORY_CONSTANT_BUFFER,
+    MetalTexture = SLANG_PARAMETER_CATEGORY_METAL_TEXTURE,
+    MetalArgumentBufferElement = SLANG_PARAMETER_CATEGORY_METAL_ARGUMENT_BUFFER_ELEMENT,
+    MetalAttribute = SLANG_PARAMETER_CATEGORY_METAL_ATTRIBUTE,
+    MetalPayload = SLANG_PARAMETER_CATEGORY_METAL_PAYLOAD,
+
+    // DEPRECATED:
+    VertexInput = SLANG_PARAMETER_CATEGORY_VERTEX_INPUT,
+    FragmentOutput = SLANG_PARAMETER_CATEGORY_FRAGMENT_OUTPUT,
+};
+
+enum class BindingType : SlangBindingTypeIntegral
+{
+    Unknown = SLANG_BINDING_TYPE_UNKNOWN,
+
+    Sampler = SLANG_BINDING_TYPE_SAMPLER,
+    Texture = SLANG_BINDING_TYPE_TEXTURE,
+    ConstantBuffer = SLANG_BINDING_TYPE_CONSTANT_BUFFER,
+    ParameterBlock = SLANG_BINDING_TYPE_PARAMETER_BLOCK,
+    TypedBuffer = SLANG_BINDING_TYPE_TYPED_BUFFER,
+    RawBuffer = SLANG_BINDING_TYPE_RAW_BUFFER,
+    CombinedTextureSampler = SLANG_BINDING_TYPE_COMBINED_TEXTURE_SAMPLER,
+    InputRenderTarget = SLANG_BINDING_TYPE_INPUT_RENDER_TARGET,
+    InlineUniformData = SLANG_BINDING_TYPE_INLINE_UNIFORM_DATA,
+    RayTracingAccelerationStructure = SLANG_BINDING_TYPE_RAY_TRACING_ACCELERATION_STRUCTURE,
+    VaryingInput = SLANG_BINDING_TYPE_VARYING_INPUT,
+    VaryingOutput = SLANG_BINDING_TYPE_VARYING_OUTPUT,
+    ExistentialValue = SLANG_BINDING_TYPE_EXISTENTIAL_VALUE,
+    PushConstant = SLANG_BINDING_TYPE_PUSH_CONSTANT,
+
+    MutableFlag = SLANG_BINDING_TYPE_MUTABLE_FLAG,
+
+    MutableTexture = SLANG_BINDING_TYPE_MUTABLE_TETURE,
+    MutableTypedBuffer = SLANG_BINDING_TYPE_MUTABLE_TYPED_BUFFER,
+    MutableRawBuffer = SLANG_BINDING_TYPE_MUTABLE_RAW_BUFFER,
+
+    BaseMask = SLANG_BINDING_TYPE_BASE_MASK,
+    ExtMask = SLANG_BINDING_TYPE_EXT_MASK,
+};
+
+struct TypeLayoutReflection
+{
+    TypeReflection* getType()
+    {
+        return (TypeReflection*)spReflectionTypeLayout_GetType((SlangReflectionTypeLayout*)this);
+    }
+
+    TypeReflection::Kind getKind()
+    {
+        return (TypeReflection::Kind)spReflectionTypeLayout_getKind(
+            (SlangReflectionTypeLayout*)this);
+    }
+
+    size_t getSize(SlangParameterCategory category)
+    {
+        return spReflectionTypeLayout_GetSize((SlangReflectionTypeLayout*)this, category);
+    }
+
+    size_t getStride(SlangParameterCategory category)
+    {
+        return spReflectionTypeLayout_GetStride((SlangReflectionTypeLayout*)this, category);
+    }
+
+    int32_t getAlignment(SlangParameterCategory category)
+    {
+        return spReflectionTypeLayout_getAlignment((SlangReflectionTypeLayout*)this, category);
+    }
+
+    size_t getSize(slang::ParameterCategory category = slang::ParameterCategory::Uniform)
+    {
+        return spReflectionTypeLayout_GetSize(
+            (SlangReflectionTypeLayout*)this,
+            (SlangParameterCategory)category);
+    }
+
+    size_t getStride(slang::ParameterCategory category = slang::ParameterCategory::Uniform)
+    {
+        return spReflectionTypeLayout_GetStride(
+            (SlangReflectionTypeLayout*)this,
+            (SlangParameterCategory)category);
+    }
+
+    int32_t getAlignment(slang::ParameterCategory category = slang::ParameterCategory::Uniform)
+    {
+        return spReflectionTypeLayout_getAlignment(
+            (SlangReflectionTypeLayout*)this,
+            (SlangParameterCategory)category);
+    }
+
+
+    unsigned int getFieldCount()
+    {
+        return spReflectionTypeLayout_GetFieldCount((SlangReflectionTypeLayout*)this);
+    }
+
+    VariableLayoutReflection* getFieldByIndex(unsigned int index)
+    {
+        return (VariableLayoutReflection*)spReflectionTypeLayout_GetFieldByIndex(
+            (SlangReflectionTypeLayout*)this,
+            index);
+    }
+
+    SlangInt findFieldIndexByName(char const* nameBegin, char const* nameEnd = nullptr)
+    {
+        return spReflectionTypeLayout_findFieldIndexByName(
+            (SlangReflectionTypeLayout*)this,
+            nameBegin,
+            nameEnd);
+    }
+
+    VariableLayoutReflection* getExplicitCounter()
+    {
+        return (VariableLayoutReflection*)spReflectionTypeLayout_GetExplicitCounter(
+            (SlangReflectionTypeLayout*)this);
+    }
+
+    bool isArray() { return getType()->isArray(); }
+
+    TypeLayoutReflection* unwrapArray()
+    {
+        TypeLayoutReflection* typeLayout = this;
+        while (typeLayout->isArray())
+        {
+            typeLayout = typeLayout->getElementTypeLayout();
+        }
+        return typeLayout;
+    }
+
+    // only useful if `getKind() == Kind::Array`
+    size_t getElementCount() { return getType()->getElementCount(); }
+
+    size_t getTotalArrayElementCount() { return getType()->getTotalArrayElementCount(); }
+
+    size_t getElementStride(SlangParameterCategory category)
+    {
+        return spReflectionTypeLayout_GetElementStride((SlangReflectionTypeLayout*)this, category);
+    }
+
+    TypeLayoutReflection* getElementTypeLayout()
+    {
+        return (TypeLayoutReflection*)spReflectionTypeLayout_GetElementTypeLayout(
+            (SlangReflectionTypeLayout*)this);
+    }
+
+    VariableLayoutReflection* getElementVarLayout()
+    {
+        return (VariableLayoutReflection*)spReflectionTypeLayout_GetElementVarLayout(
+            (SlangReflectionTypeLayout*)this);
+    }
+
+    VariableLayoutReflection* getContainerVarLayout()
+    {
+        return (VariableLayoutReflection*)spReflectionTypeLayout_getContainerVarLayout(
+            (SlangReflectionTypeLayout*)this);
+    }
+
+    // How is this type supposed to be bound?
+    ParameterCategory getParameterCategory()
+    {
+        return (ParameterCategory)spReflectionTypeLayout_GetParameterCategory(
+            (SlangReflectionTypeLayout*)this);
+    }
+
+    unsigned int getCategoryCount()
+    {
+        return spReflectionTypeLayout_GetCategoryCount((SlangReflectionTypeLayout*)this);
+    }
+
+    ParameterCategory getCategoryByIndex(unsigned int index)
+    {
+        return (ParameterCategory)spReflectionTypeLayout_GetCategoryByIndex(
+            (SlangReflectionTypeLayout*)this,
+            index);
+    }
+
+    unsigned getRowCount() { return getType()->getRowCount(); }
+
+    unsigned getColumnCount() { return getType()->getColumnCount(); }
+
+    TypeReflection::ScalarType getScalarType() { return getType()->getScalarType(); }
+
+    TypeReflection* getResourceResultType() { return getType()->getResourceResultType(); }
+
+    SlangResourceShape getResourceShape() { return getType()->getResourceShape(); }
+
+    SlangResourceAccess getResourceAccess() { return getType()->getResourceAccess(); }
+
+    char const* getName() { return getType()->getName(); }
+
+    SlangMatrixLayoutMode getMatrixLayoutMode()
+    {
+        return spReflectionTypeLayout_GetMatrixLayoutMode((SlangReflectionTypeLayout*)this);
+    }
+
+    int getGenericParamIndex()
+    {
+        return spReflectionTypeLayout_getGenericParamIndex((SlangReflectionTypeLayout*)this);
+    }
+
+    TypeLayoutReflection* getPendingDataTypeLayout()
+    {
+        return (TypeLayoutReflection*)spReflectionTypeLayout_getPendingDataTypeLayout(
+            (SlangReflectionTypeLayout*)this);
+    }
+
+    VariableLayoutReflection* getSpecializedTypePendingDataVarLayout()
+    {
+        return (VariableLayoutReflection*)
+            spReflectionTypeLayout_getSpecializedTypePendingDataVarLayout(
+                (SlangReflectionTypeLayout*)this);
+    }
+
+    SlangInt getBindingRangeCount()
+    {
+        return spReflectionTypeLayout_getBindingRangeCount((SlangReflectionTypeLayout*)this);
+    }
+
+    BindingType getBindingRangeType(SlangInt index)
+    {
+        return (BindingType)spReflectionTypeLayout_getBindingRangeType(
+            (SlangReflectionTypeLayout*)this,
+            index);
+    }
+
+    bool isBindingRangeSpecializable(SlangInt index)
+    {
+        return (bool)spReflectionTypeLayout_isBindingRangeSpecializable(
+            (SlangReflectionTypeLayout*)this,
+            index);
+    }
+
+    SlangInt getBindingRangeBindingCount(SlangInt index)
+    {
+        return spReflectionTypeLayout_getBindingRangeBindingCount(
+            (SlangReflectionTypeLayout*)this,
+            index);
+    }
+
+    /*
+    SlangInt getBindingRangeIndexOffset(SlangInt index)
+    {
+        return spReflectionTypeLayout_getBindingRangeIndexOffset(
+            (SlangReflectionTypeLayout*) this,
+            index);
+    }
+
+    SlangInt getBindingRangeSpaceOffset(SlangInt index)
+    {
+        return spReflectionTypeLayout_getBindingRangeSpaceOffset(
+            (SlangReflectionTypeLayout*) this,
+            index);
+    }
+    */
+
+    SlangInt getFieldBindingRangeOffset(SlangInt fieldIndex)
+    {
+        return spReflectionTypeLayout_getFieldBindingRangeOffset(
+            (SlangReflectionTypeLayout*)this,
+            fieldIndex);
+    }
+
+    SlangInt getExplicitCounterBindingRangeOffset()
+    {
+        return spReflectionTypeLayout_getExplicitCounterBindingRangeOffset(
+            (SlangReflectionTypeLayout*)this);
+    }
+
+    TypeLayoutReflection* getBindingRangeLeafTypeLayout(SlangInt index)
+    {
+        return (TypeLayoutReflection*)spReflectionTypeLayout_getBindingRangeLeafTypeLayout(
+            (SlangReflectionTypeLayout*)this,
+            index);
+    }
+
+    VariableReflection* getBindingRangeLeafVariable(SlangInt index)
+    {
+        return (VariableReflection*)spReflectionTypeLayout_getBindingRangeLeafVariable(
+            (SlangReflectionTypeLayout*)this,
+            index);
+    }
+
+    SlangImageFormat getBindingRangeImageFormat(SlangInt index)
+    {
+        return spReflectionTypeLayout_getBindingRangeImageFormat(
+            (SlangReflectionTypeLayout*)this,
+            index);
+    }
+
+    SlangInt getBindingRangeDescriptorSetIndex(SlangInt index)
+    {
+        return spReflectionTypeLayout_getBindingRangeDescriptorSetIndex(
+            (SlangReflectionTypeLayout*)this,
+            index);
+    }
+
+    SlangInt getBindingRangeFirstDescriptorRangeIndex(SlangInt index)
+    {
+        return spReflectionTypeLayout_getBindingRangeFirstDescriptorRangeIndex(
+            (SlangReflectionTypeLayout*)this,
+            index);
+    }
+
+    SlangInt getBindingRangeDescriptorRangeCount(SlangInt index)
+    {
+        return spReflectionTypeLayout_getBindingRangeDescriptorRangeCount(
+            (SlangReflectionTypeLayout*)this,
+            index);
+    }
+
+    SlangInt getDescriptorSetCount()
+    {
+        return spReflectionTypeLayout_getDescriptorSetCount((SlangReflectionTypeLayout*)this);
+    }
+
+    SlangInt getDescriptorSetSpaceOffset(SlangInt setIndex)
+    {
+        return spReflectionTypeLayout_getDescriptorSetSpaceOffset(
+            (SlangReflectionTypeLayout*)this,
+            setIndex);
+    }
+
+    SlangInt getDescriptorSetDescriptorRangeCount(SlangInt setIndex)
+    {
+        return spReflectionTypeLayout_getDescriptorSetDescriptorRangeCount(
+            (SlangReflectionTypeLayout*)this,
+            setIndex);
+    }
+
+    SlangInt getDescriptorSetDescriptorRangeIndexOffset(SlangInt setIndex, SlangInt rangeIndex)
+    {
+        return spReflectionTypeLayout_getDescriptorSetDescriptorRangeIndexOffset(
+            (SlangReflectionTypeLayout*)this,
+            setIndex,
+            rangeIndex);
+    }
+
+    SlangInt getDescriptorSetDescriptorRangeDescriptorCount(SlangInt setIndex, SlangInt rangeIndex)
+    {
+        return spReflectionTypeLayout_getDescriptorSetDescriptorRangeDescriptorCount(
+            (SlangReflectionTypeLayout*)this,
+            setIndex,
+            rangeIndex);
+    }
+
+    BindingType getDescriptorSetDescriptorRangeType(SlangInt setIndex, SlangInt rangeIndex)
+    {
+        return (BindingType)spReflectionTypeLayout_getDescriptorSetDescriptorRangeType(
+            (SlangReflectionTypeLayout*)this,
+            setIndex,
+            rangeIndex);
+    }
+
+    ParameterCategory getDescriptorSetDescriptorRangeCategory(
+        SlangInt setIndex,
+        SlangInt rangeIndex)
+    {
+        return (ParameterCategory)spReflectionTypeLayout_getDescriptorSetDescriptorRangeCategory(
+            (SlangReflectionTypeLayout*)this,
+            setIndex,
+            rangeIndex);
+    }
+
+    SlangInt getSubObjectRangeCount()
+    {
+        return spReflectionTypeLayout_getSubObjectRangeCount((SlangReflectionTypeLayout*)this);
+    }
+
+    SlangInt getSubObjectRangeBindingRangeIndex(SlangInt subObjectRangeIndex)
+    {
+        return spReflectionTypeLayout_getSubObjectRangeBindingRangeIndex(
+            (SlangReflectionTypeLayout*)this,
+            subObjectRangeIndex);
+    }
+
+    SlangInt getSubObjectRangeSpaceOffset(SlangInt subObjectRangeIndex)
+    {
+        return spReflectionTypeLayout_getSubObjectRangeSpaceOffset(
+            (SlangReflectionTypeLayout*)this,
+            subObjectRangeIndex);
+    }
+
+    VariableLayoutReflection* getSubObjectRangeOffset(SlangInt subObjectRangeIndex)
+    {
+        return (VariableLayoutReflection*)spReflectionTypeLayout_getSubObjectRangeOffset(
+            (SlangReflectionTypeLayout*)this,
+            subObjectRangeIndex);
+    }
+};
+
+struct Modifier
+{
+    enum ID : SlangModifierIDIntegral
+    {
+        Shared = SLANG_MODIFIER_SHARED,
+        NoDiff = SLANG_MODIFIER_NO_DIFF,
+        Static = SLANG_MODIFIER_STATIC,
+        Const = SLANG_MODIFIER_CONST,
+        Export = SLANG_MODIFIER_EXPORT,
+        Extern = SLANG_MODIFIER_EXTERN,
+        Differentiable = SLANG_MODIFIER_DIFFERENTIABLE,
+        Mutating = SLANG_MODIFIER_MUTATING,
+        In = SLANG_MODIFIER_IN,
+        Out = SLANG_MODIFIER_OUT,
+        InOut = SLANG_MODIFIER_INOUT
+    };
+};
+
+struct VariableReflection
+{
+    char const* getName() { return spReflectionVariable_GetName((SlangReflectionVariable*)this); }
+
+    TypeReflection* getType()
+    {
+        return (TypeReflection*)spReflectionVariable_GetType((SlangReflectionVariable*)this);
+    }
+
+    Modifier* findModifier(Modifier::ID id)
+    {
+        return (Modifier*)spReflectionVariable_FindModifier(
+            (SlangReflectionVariable*)this,
+            (SlangModifierID)id);
+    }
+
+    unsigned int getUserAttributeCount()
+    {
+        return spReflectionVariable_GetUserAttributeCount((SlangReflectionVariable*)this);
+    }
+
+    Attribute* getUserAttributeByIndex(unsigned int index)
+    {
+        return (UserAttribute*)spReflectionVariable_GetUserAttribute(
+            (SlangReflectionVariable*)this,
+            index);
+    }
+
+    Attribute* findAttributeByName(SlangSession* globalSession, char const* name)
+    {
+        return (UserAttribute*)spReflectionVariable_FindUserAttributeByName(
+            (SlangReflectionVariable*)this,
+            globalSession,
+            name);
+    }
+
+    Attribute* findUserAttributeByName(SlangSession* globalSession, char const* name)
+    {
+        return findAttributeByName(globalSession, name);
+    }
+
+    bool hasDefaultValue()
+    {
+        return spReflectionVariable_HasDefaultValue((SlangReflectionVariable*)this);
+    }
+
+    SlangResult getDefaultValueInt(int64_t* value)
+    {
+        return spReflectionVariable_GetDefaultValueInt((SlangReflectionVariable*)this, value);
+    }
+
+    GenericReflection* getGenericContainer()
+    {
+        return (GenericReflection*)spReflectionVariable_GetGenericContainer(
+            (SlangReflectionVariable*)this);
+    }
+
+    VariableReflection* applySpecializations(GenericReflection* generic)
+    {
+        return (VariableReflection*)spReflectionVariable_applySpecializations(
+            (SlangReflectionVariable*)this,
+            (SlangReflectionGeneric*)generic);
+    }
+};
+
+struct VariableLayoutReflection
+{
+    VariableReflection* getVariable()
+    {
+        return (VariableReflection*)spReflectionVariableLayout_GetVariable(
+            (SlangReflectionVariableLayout*)this);
+    }
+
+    char const* getName() { return getVariable()->getName(); }
+
+    Modifier* findModifier(Modifier::ID id) { return getVariable()->findModifier(id); }
+
+    TypeLayoutReflection* getTypeLayout()
+    {
+        return (TypeLayoutReflection*)spReflectionVariableLayout_GetTypeLayout(
+            (SlangReflectionVariableLayout*)this);
+    }
+
+    ParameterCategory getCategory() { return getTypeLayout()->getParameterCategory(); }
+
+    unsigned int getCategoryCount() { return getTypeLayout()->getCategoryCount(); }
+
+    ParameterCategory getCategoryByIndex(unsigned int index)
+    {
+        return getTypeLayout()->getCategoryByIndex(index);
+    }
+
+
+    size_t getOffset(SlangParameterCategory category)
+    {
+        return spReflectionVariableLayout_GetOffset((SlangReflectionVariableLayout*)this, category);
+    }
+    size_t getOffset(slang::ParameterCategory category = slang::ParameterCategory::Uniform)
+    {
+        return spReflectionVariableLayout_GetOffset(
+            (SlangReflectionVariableLayout*)this,
+            (SlangParameterCategory)category);
+    }
+
+
+    TypeReflection* getType() { return getVariable()->getType(); }
+
+    unsigned getBindingIndex()
+    {
+        return spReflectionParameter_GetBindingIndex((SlangReflectionVariableLayout*)this);
+    }
+
+    unsigned getBindingSpace()
+    {
+        return spReflectionParameter_GetBindingSpace((SlangReflectionVariableLayout*)this);
+    }
+
+    size_t getBindingSpace(SlangParameterCategory category)
+    {
+        return spReflectionVariableLayout_GetSpace((SlangReflectionVariableLayout*)this, category);
+    }
+    size_t getBindingSpace(slang::ParameterCategory category)
+    {
+        return spReflectionVariableLayout_GetSpace(
+            (SlangReflectionVariableLayout*)this,
+            (SlangParameterCategory)category);
+    }
+
+    SlangImageFormat getImageFormat()
+    {
+        return spReflectionVariableLayout_GetImageFormat((SlangReflectionVariableLayout*)this);
+    }
+
+    char const* getSemanticName()
+    {
+        return spReflectionVariableLayout_GetSemanticName((SlangReflectionVariableLayout*)this);
+    }
+
+    size_t getSemanticIndex()
+    {
+        return spReflectionVariableLayout_GetSemanticIndex((SlangReflectionVariableLayout*)this);
+    }
+
+    SlangStage getStage()
+    {
+        return spReflectionVariableLayout_getStage((SlangReflectionVariableLayout*)this);
+    }
+
+    VariableLayoutReflection* getPendingDataLayout()
+    {
+        return (VariableLayoutReflection*)spReflectionVariableLayout_getPendingDataLayout(
+            (SlangReflectionVariableLayout*)this);
+    }
+};
+
+struct FunctionReflection
+{
+    char const* getName() { return spReflectionFunction_GetName((SlangReflectionFunction*)this); }
+
+    TypeReflection* getReturnType()
+    {
+        return (TypeReflection*)spReflectionFunction_GetResultType((SlangReflectionFunction*)this);
+    }
+
+    unsigned int getParameterCount()
+    {
+        return spReflectionFunction_GetParameterCount((SlangReflectionFunction*)this);
+    }
+
+    VariableReflection* getParameterByIndex(unsigned int index)
+    {
+        return (VariableReflection*)spReflectionFunction_GetParameter(
+            (SlangReflectionFunction*)this,
+            index);
+    }
+
+    unsigned int getUserAttributeCount()
+    {
+        return spReflectionFunction_GetUserAttributeCount((SlangReflectionFunction*)this);
+    }
+    Attribute* getUserAttributeByIndex(unsigned int index)
+    {
+        return (
+            Attribute*)spReflectionFunction_GetUserAttribute((SlangReflectionFunction*)this, index);
+    }
+    Attribute* findAttributeByName(SlangSession* globalSession, char const* name)
+    {
+        return (Attribute*)spReflectionFunction_FindUserAttributeByName(
+            (SlangReflectionFunction*)this,
+            globalSession,
+            name);
+    }
+    Attribute* findUserAttributeByName(SlangSession* globalSession, char const* name)
+    {
+        return findAttributeByName(globalSession, name);
+    }
+    Modifier* findModifier(Modifier::ID id)
+    {
+        return (Modifier*)spReflectionFunction_FindModifier(
+            (SlangReflectionFunction*)this,
+            (SlangModifierID)id);
+    }
+
+    GenericReflection* getGenericContainer()
+    {
+        return (GenericReflection*)spReflectionFunction_GetGenericContainer(
+            (SlangReflectionFunction*)this);
+    }
+
+    FunctionReflection* applySpecializations(GenericReflection* generic)
+    {
+        return (FunctionReflection*)spReflectionFunction_applySpecializations(
+            (SlangReflectionFunction*)this,
+            (SlangReflectionGeneric*)generic);
+    }
+
+    FunctionReflection* specializeWithArgTypes(unsigned int argCount, TypeReflection* const* types)
+    {
+        return (FunctionReflection*)spReflectionFunction_specializeWithArgTypes(
+            (SlangReflectionFunction*)this,
+            argCount,
+            (SlangReflectionType* const*)types);
+    }
+
+    bool isOverloaded()
+    {
+        return spReflectionFunction_isOverloaded((SlangReflectionFunction*)this);
+    }
+
+    unsigned int getOverloadCount()
+    {
+        return spReflectionFunction_getOverloadCount((SlangReflectionFunction*)this);
+    }
+
+    FunctionReflection* getOverload(unsigned int index)
+    {
+        return (FunctionReflection*)spReflectionFunction_getOverload(
+            (SlangReflectionFunction*)this,
+            index);
+    }
+};
+
+struct GenericReflection
+{
+
+    DeclReflection* asDecl()
+    {
+        return (DeclReflection*)spReflectionGeneric_asDecl((SlangReflectionGeneric*)this);
+    }
+
+    char const* getName() { return spReflectionGeneric_GetName((SlangReflectionGeneric*)this); }
+
+    unsigned int getTypeParameterCount()
+    {
+        return spReflectionGeneric_GetTypeParameterCount((SlangReflectionGeneric*)this);
+    }
+
+    VariableReflection* getTypeParameter(unsigned index)
+    {
+        return (VariableReflection*)spReflectionGeneric_GetTypeParameter(
+            (SlangReflectionGeneric*)this,
+            index);
+    }
+
+    unsigned int getValueParameterCount()
+    {
+        return spReflectionGeneric_GetValueParameterCount((SlangReflectionGeneric*)this);
+    }
+
+    VariableReflection* getValueParameter(unsigned index)
+    {
+        return (VariableReflection*)spReflectionGeneric_GetValueParameter(
+            (SlangReflectionGeneric*)this,
+            index);
+    }
+
+    unsigned int getTypeParameterConstraintCount(VariableReflection* typeParam)
+    {
+        return spReflectionGeneric_GetTypeParameterConstraintCount(
+            (SlangReflectionGeneric*)this,
+            (SlangReflectionVariable*)typeParam);
+    }
+
+    TypeReflection* getTypeParameterConstraintType(VariableReflection* typeParam, unsigned index)
+    {
+        return (TypeReflection*)spReflectionGeneric_GetTypeParameterConstraintType(
+            (SlangReflectionGeneric*)this,
+            (SlangReflectionVariable*)typeParam,
+            index);
+    }
+
+    DeclReflection* getInnerDecl()
+    {
+        return (DeclReflection*)spReflectionGeneric_GetInnerDecl((SlangReflectionGeneric*)this);
+    }
+
+    SlangDeclKind getInnerKind()
+    {
+        return spReflectionGeneric_GetInnerKind((SlangReflectionGeneric*)this);
+    }
+
+    GenericReflection* getOuterGenericContainer()
+    {
+        return (GenericReflection*)spReflectionGeneric_GetOuterGenericContainer(
+            (SlangReflectionGeneric*)this);
+    }
+
+    TypeReflection* getConcreteType(VariableReflection* typeParam)
+    {
+        return (TypeReflection*)spReflectionGeneric_GetConcreteType(
+            (SlangReflectionGeneric*)this,
+            (SlangReflectionVariable*)typeParam);
+    }
+
+    int64_t getConcreteIntVal(VariableReflection* valueParam)
+    {
+        return spReflectionGeneric_GetConcreteIntVal(
+            (SlangReflectionGeneric*)this,
+            (SlangReflectionVariable*)valueParam);
+    }
+
+    GenericReflection* applySpecializations(GenericReflection* generic)
+    {
+        return (GenericReflection*)spReflectionGeneric_applySpecializations(
+            (SlangReflectionGeneric*)this,
+            (SlangReflectionGeneric*)generic);
+    }
+};
+
+struct EntryPointReflection
+{
+    char const* getName()
+    {
+        return spReflectionEntryPoint_getName((SlangReflectionEntryPoint*)this);
+    }
+
+    char const* getNameOverride()
+    {
+        return spReflectionEntryPoint_getNameOverride((SlangReflectionEntryPoint*)this);
+    }
+
+    unsigned getParameterCount()
+    {
+        return spReflectionEntryPoint_getParameterCount((SlangReflectionEntryPoint*)this);
+    }
+
+    FunctionReflection* getFunction()
+    {
+        return (FunctionReflection*)spReflectionEntryPoint_getFunction(
+            (SlangReflectionEntryPoint*)this);
+    }
+
+    VariableLayoutReflection* getParameterByIndex(unsigned index)
+    {
+        return (VariableLayoutReflection*)spReflectionEntryPoint_getParameterByIndex(
+            (SlangReflectionEntryPoint*)this,
+            index);
+    }
+
+    SlangStage getStage()
+    {
+        return spReflectionEntryPoint_getStage((SlangReflectionEntryPoint*)this);
+    }
+
+    void getComputeThreadGroupSize(SlangUInt axisCount, SlangUInt* outSizeAlongAxis)
+    {
+        return spReflectionEntryPoint_getComputeThreadGroupSize(
+            (SlangReflectionEntryPoint*)this,
+            axisCount,
+            outSizeAlongAxis);
+    }
+
+    void getComputeWaveSize(SlangUInt* outWaveSize)
+    {
+        return spReflectionEntryPoint_getComputeWaveSize(
+            (SlangReflectionEntryPoint*)this,
+            outWaveSize);
+    }
+
+    bool usesAnySampleRateInput()
+    {
+        return 0 != spReflectionEntryPoint_usesAnySampleRateInput((SlangReflectionEntryPoint*)this);
+    }
+
+    VariableLayoutReflection* getVarLayout()
+    {
+        return (VariableLayoutReflection*)spReflectionEntryPoint_getVarLayout(
+            (SlangReflectionEntryPoint*)this);
+    }
+
+    TypeLayoutReflection* getTypeLayout() { return getVarLayout()->getTypeLayout(); }
+
+    VariableLayoutReflection* getResultVarLayout()
+    {
+        return (VariableLayoutReflection*)spReflectionEntryPoint_getResultVarLayout(
+            (SlangReflectionEntryPoint*)this);
+    }
+
+    bool hasDefaultConstantBuffer()
+    {
+        return spReflectionEntryPoint_hasDefaultConstantBuffer((SlangReflectionEntryPoint*)this) !=
+               0;
+    }
+};
+
+typedef EntryPointReflection EntryPointLayout;
+
+struct TypeParameterReflection
+{
+    char const* getName()
+    {
+        return spReflectionTypeParameter_GetName((SlangReflectionTypeParameter*)this);
+    }
+    unsigned getIndex()
+    {
+        return spReflectionTypeParameter_GetIndex((SlangReflectionTypeParameter*)this);
+    }
+    unsigned getConstraintCount()
+    {
+        return spReflectionTypeParameter_GetConstraintCount((SlangReflectionTypeParameter*)this);
+    }
+    TypeReflection* getConstraintByIndex(int index)
+    {
+        return (TypeReflection*)spReflectionTypeParameter_GetConstraintByIndex(
+            (SlangReflectionTypeParameter*)this,
+            index);
+    }
+};
+
+enum class LayoutRules : SlangLayoutRulesIntegral
+{
+    Default = SLANG_LAYOUT_RULES_DEFAULT,
+    MetalArgumentBufferTier2 = SLANG_LAYOUT_RULES_METAL_ARGUMENT_BUFFER_TIER_2,
+};
+
+typedef struct ShaderReflection ProgramLayout;
+typedef enum SlangReflectionGenericArgType GenericArgType;
+
+struct ShaderReflection
+{
+    unsigned getParameterCount() { return spReflection_GetParameterCount((SlangReflection*)this); }
+
+    unsigned getTypeParameterCount()
+    {
+        return spReflection_GetTypeParameterCount((SlangReflection*)this);
+    }
+
+    slang::ISession* getSession() { return spReflection_GetSession((SlangReflection*)this); }
+
+    TypeParameterReflection* getTypeParameterByIndex(unsigned index)
+    {
+        return (TypeParameterReflection*)spReflection_GetTypeParameterByIndex(
+            (SlangReflection*)this,
+            index);
+    }
+
+    TypeParameterReflection* findTypeParameter(char const* name)
+    {
+        return (
+            TypeParameterReflection*)spReflection_FindTypeParameter((SlangReflection*)this, name);
+    }
+
+    VariableLayoutReflection* getParameterByIndex(unsigned index)
+    {
+        return (VariableLayoutReflection*)spReflection_GetParameterByIndex(
+            (SlangReflection*)this,
+            index);
+    }
+
+    static ProgramLayout* get(SlangCompileRequest* request)
+    {
+        return (ProgramLayout*)spGetReflection(request);
+    }
+
+    SlangUInt getEntryPointCount()
+    {
+        return spReflection_getEntryPointCount((SlangReflection*)this);
+    }
+
+    EntryPointReflection* getEntryPointByIndex(SlangUInt index)
+    {
+        return (
+            EntryPointReflection*)spReflection_getEntryPointByIndex((SlangReflection*)this, index);
+    }
+
+    SlangUInt getGlobalConstantBufferBinding()
+    {
+        return spReflection_getGlobalConstantBufferBinding((SlangReflection*)this);
+    }
+
+    size_t getGlobalConstantBufferSize()
+    {
+        return spReflection_getGlobalConstantBufferSize((SlangReflection*)this);
+    }
+
+    TypeReflection* findTypeByName(const char* name)
+    {
+        return (TypeReflection*)spReflection_FindTypeByName((SlangReflection*)this, name);
+    }
+
+    FunctionReflection* findFunctionByName(const char* name)
+    {
+        return (FunctionReflection*)spReflection_FindFunctionByName((SlangReflection*)this, name);
+    }
+
+    FunctionReflection* findFunctionByNameInType(TypeReflection* type, const char* name)
+    {
+        return (FunctionReflection*)spReflection_FindFunctionByNameInType(
+            (SlangReflection*)this,
+            (SlangReflectionType*)type,
+            name);
+    }
+
+    VariableReflection* findVarByNameInType(TypeReflection* type, const char* name)
+    {
+        return (VariableReflection*)spReflection_FindVarByNameInType(
+            (SlangReflection*)this,
+            (SlangReflectionType*)type,
+            name);
+    }
+
+    TypeLayoutReflection* getTypeLayout(
+        TypeReflection* type,
+        LayoutRules rules = LayoutRules::Default)
+    {
+        return (TypeLayoutReflection*)spReflection_GetTypeLayout(
+            (SlangReflection*)this,
+            (SlangReflectionType*)type,
+            SlangLayoutRules(rules));
+    }
+
+    EntryPointReflection* findEntryPointByName(const char* name)
+    {
+        return (
+            EntryPointReflection*)spReflection_findEntryPointByName((SlangReflection*)this, name);
+    }
+
+    TypeReflection* specializeType(
+        TypeReflection* type,
+        SlangInt specializationArgCount,
+        TypeReflection* const* specializationArgs,
+        ISlangBlob** outDiagnostics)
+    {
+        return (TypeReflection*)spReflection_specializeType(
+            (SlangReflection*)this,
+            (SlangReflectionType*)type,
+            specializationArgCount,
+            (SlangReflectionType* const*)specializationArgs,
+            outDiagnostics);
+    }
+
+    GenericReflection* specializeGeneric(
+        GenericReflection* generic,
+        SlangInt specializationArgCount,
+        GenericArgType const* specializationArgTypes,
+        GenericArgReflection const* specializationArgVals,
+        ISlangBlob** outDiagnostics)
+    {
+        return (GenericReflection*)spReflection_specializeGeneric(
+            (SlangReflection*)this,
+            (SlangReflectionGeneric*)generic,
+            specializationArgCount,
+            (SlangReflectionGenericArgType const*)specializationArgTypes,
+            (SlangReflectionGenericArg const*)specializationArgVals,
+            outDiagnostics);
+    }
+
+    bool isSubType(TypeReflection* subType, TypeReflection* superType)
+    {
+        return spReflection_isSubType(
+            (SlangReflection*)this,
+            (SlangReflectionType*)subType,
+            (SlangReflectionType*)superType);
+    }
+
+    SlangUInt getHashedStringCount() const
+    {
+        return spReflection_getHashedStringCount((SlangReflection*)this);
+    }
+
+    const char* getHashedString(SlangUInt index, size_t* outCount) const
+    {
+        return spReflection_getHashedString((SlangReflection*)this, index, outCount);
+    }
+
+    TypeLayoutReflection* getGlobalParamsTypeLayout()
+    {
+        return (TypeLayoutReflection*)spReflection_getGlobalParamsTypeLayout(
+            (SlangReflection*)this);
+    }
+
+    VariableLayoutReflection* getGlobalParamsVarLayout()
+    {
+        return (VariableLayoutReflection*)spReflection_getGlobalParamsVarLayout(
+            (SlangReflection*)this);
+    }
+
+    SlangResult toJson(ISlangBlob** outBlob)
+    {
+        return spReflection_ToJson((SlangReflection*)this, nullptr, outBlob);
+    }
+};
+
+
+struct DeclReflection
+{
+    enum class Kind
+    {
+        Unsupported = SLANG_DECL_KIND_UNSUPPORTED_FOR_REFLECTION,
+        Struct = SLANG_DECL_KIND_STRUCT,
+        Func = SLANG_DECL_KIND_FUNC,
+        Module = SLANG_DECL_KIND_MODULE,
+        Generic = SLANG_DECL_KIND_GENERIC,
+        Variable = SLANG_DECL_KIND_VARIABLE,
+        Namespace = SLANG_DECL_KIND_NAMESPACE,
+    };
+
+    char const* getName() { return spReflectionDecl_getName((SlangReflectionDecl*)this); }
+
+    Kind getKind() { return (Kind)spReflectionDecl_getKind((SlangReflectionDecl*)this); }
+
+    unsigned int getChildrenCount()
+    {
+        return spReflectionDecl_getChildrenCount((SlangReflectionDecl*)this);
+    }
+
+    DeclReflection* getChild(unsigned int index)
+    {
+        return (DeclReflection*)spReflectionDecl_getChild((SlangReflectionDecl*)this, index);
+    }
+
+    TypeReflection* getType()
+    {
+        return (TypeReflection*)spReflection_getTypeFromDecl((SlangReflectionDecl*)this);
+    }
+
+    VariableReflection* asVariable()
+    {
+        return (VariableReflection*)spReflectionDecl_castToVariable((SlangReflectionDecl*)this);
+    }
+
+    FunctionReflection* asFunction()
+    {
+        return (FunctionReflection*)spReflectionDecl_castToFunction((SlangReflectionDecl*)this);
+    }
+
+    GenericReflection* asGeneric()
+    {
+        return (GenericReflection*)spReflectionDecl_castToGeneric((SlangReflectionDecl*)this);
+    }
+
+    DeclReflection* getParent()
+    {
+        return (DeclReflection*)spReflectionDecl_getParent((SlangReflectionDecl*)this);
+    }
+
+    template<Kind K>
+    struct FilteredList
+    {
+        unsigned int count;
+        DeclReflection* parent;
+
+        struct FilteredIterator
+        {
+            DeclReflection* parent;
+            unsigned int count;
+            unsigned int index;
+
+            DeclReflection* operator*() { return parent->getChild(index); }
+            void operator++()
+            {
+                index++;
+                while (index < count && !(parent->getChild(index)->getKind() == K))
+                {
+                    index++;
+                }
+            }
+            bool operator!=(FilteredIterator const& other) { return index != other.index; }
+        };
+
+        // begin/end for range-based for that checks the kind
+        FilteredIterator begin()
+        {
+            // Find the first child of the right kind
+            unsigned int index = 0;
+            while (index < count && !(parent->getChild(index)->getKind() == K))
+            {
+                index++;
+            }
+            return FilteredIterator{parent, count, index};
+        }
+
+        FilteredIterator end() { return FilteredIterator{parent, count, count}; }
+    };
+
+    template<Kind K>
+    FilteredList<K> getChildrenOfKind()
+    {
+        return FilteredList<K>{getChildrenCount(), (DeclReflection*)this};
+    }
+
+    struct IteratedList
+    {
+        unsigned int count;
+        DeclReflection* parent;
+
+        struct Iterator
+        {
+            DeclReflection* parent;
+            unsigned int count;
+            unsigned int index;
+
+            DeclReflection* operator*() { return parent->getChild(index); }
+            void operator++() { index++; }
+            bool operator!=(Iterator const& other) { return index != other.index; }
+        };
+
+        // begin/end for range-based for that checks the kind
+        IteratedList::Iterator begin() { return IteratedList::Iterator{parent, count, 0}; }
+        IteratedList::Iterator end() { return IteratedList::Iterator{parent, count, count}; }
+    };
+
+    IteratedList getChildren() { return IteratedList{getChildrenCount(), (DeclReflection*)this}; }
+};
+
+typedef uint32_t CompileCoreModuleFlags;
+struct CompileCoreModuleFlag
+{
+    enum Enum : CompileCoreModuleFlags
+    {
+        WriteDocumentation = 0x1,
+    };
+};
+
+typedef ISlangBlob IBlob;
+
+struct IComponentType;
+struct ITypeConformance;
+struct IGlobalSession;
+struct IModule;
+
+struct SessionDesc;
+struct SpecializationArg;
+struct TargetDesc;
+
+enum class BuiltinModuleName
+{
+    Core,
+    GLSL
+};
+
+/** A global session for interaction with the Slang library.
+
+An application may create and re-use a single global session across
+multiple sessions, in order to amortize startups costs (in current
+Slang this is mostly the cost of loading the Slang standard library).
+
+The global session is currently *not* thread-safe and objects created from
+a single global session should only be used from a single thread at
+a time.
+*/
+struct IGlobalSession : public ISlangUnknown
+{
+    SLANG_COM_INTERFACE(0xc140b5fd, 0xc78, 0x452e, {0xba, 0x7c, 0x1a, 0x1e, 0x70, 0xc7, 0xf7, 0x1c})
+
+    /** Create a new session for loading and compiling code.
+     */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    createSession(SessionDesc const& desc, ISession** outSession) = 0;
+
+    /** Look up the internal ID of a profile by its `name`.
+
+    Profile IDs are *not* guaranteed to be stable across versions
+    of the Slang library, so clients are expected to look up
+    profiles by name at runtime.
+    */
+    virtual SLANG_NO_THROW SlangProfileID SLANG_MCALL findProfile(char const* name) = 0;
+
+    /** Set the path that downstream compilers (aka back end compilers) will
+    be looked from.
+    @param passThrough Identifies the downstream compiler
+    @param path The path to find the downstream compiler (shared library/dll/executable)
+
+    For back ends that are dlls/shared libraries, it will mean the path will
+    be prefixed with the path when calls are made out to ISlangSharedLibraryLoader.
+    For executables - it will look for executables along the path */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setDownstreamCompilerPath(SlangPassThrough passThrough, char const* path) = 0;
+
+    /** DEPRECATED: Use setLanguagePrelude
+
+    Set the 'prelude' for generated code for a 'downstream compiler'.
+    @param passThrough The downstream compiler for generated code that will have the prelude applied
+    to it.
+    @param preludeText The text added pre-pended verbatim before the generated source
+
+    That for pass-through usage, prelude is not pre-pended, preludes are for code generation only.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setDownstreamCompilerPrelude(SlangPassThrough passThrough, const char* preludeText) = 0;
+
+    /** DEPRECATED: Use getLanguagePrelude
+
+    Get the 'prelude' for generated code for a 'downstream compiler'.
+    @param passThrough The downstream compiler for generated code that will have the prelude applied
+    to it.
+    @param outPrelude  On exit holds a blob that holds the string of the prelude.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    getDownstreamCompilerPrelude(SlangPassThrough passThrough, ISlangBlob** outPrelude) = 0;
+
+    /** Get the build version 'tag' string. The string is the same as produced via `git describe
+    --tags` for the project. If Slang is built separately from the automated build scripts the
+    contents will by default be 'unknown'. Any string can be set by changing the contents of
+    'slang-tag-version.h' file and recompiling the project.
+
+    This method will return exactly the same result as the free function spGetBuildTagString.
+
+    @return The build tag string
+    */
+    virtual SLANG_NO_THROW const char* SLANG_MCALL getBuildTagString() = 0;
+
+    /* For a given source language set the default compiler.
+    If a default cannot be chosen (for example the target cannot be achieved by the default),
+    the default will not be used.
+
+    @param sourceLanguage the source language
+    @param defaultCompiler the default compiler for that language
+    @return
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL setDefaultDownstreamCompiler(
+        SlangSourceLanguage sourceLanguage,
+        SlangPassThrough defaultCompiler) = 0;
+
+    /* For a source type get the default compiler
+
+    @param sourceLanguage the source language
+    @return The downstream compiler for that source language */
+    virtual SlangPassThrough SLANG_MCALL
+    getDefaultDownstreamCompiler(SlangSourceLanguage sourceLanguage) = 0;
+
+    /* Set the 'prelude' placed before generated code for a specific language type.
+
+    @param sourceLanguage The language the prelude should be inserted on.
+    @param preludeText The text added pre-pended verbatim before the generated source
+
+    Note! That for pass-through usage, prelude is not pre-pended, preludes are for code generation
+    only.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setLanguagePrelude(SlangSourceLanguage sourceLanguage, const char* preludeText) = 0;
+
+    /** Get the 'prelude' associated with a specific source language.
+    @param sourceLanguage The language the prelude should be inserted on.
+    @param outPrelude  On exit holds a blob that holds the string of the prelude.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    getLanguagePrelude(SlangSourceLanguage sourceLanguage, ISlangBlob** outPrelude) = 0;
+
+    /** Create a compile request.
+     */
+    [[deprecated]] virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    createCompileRequest(slang::ICompileRequest** outCompileRequest) = 0;
+
+    /** Add new builtin declarations to be used in subsequent compiles.
+     */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    addBuiltins(char const* sourcePath, char const* sourceString) = 0;
+
+    /** Set the session shared library loader. If this changes the loader, it may cause shared
+    libraries to be unloaded
+    @param loader The loader to set. Setting nullptr sets the default loader.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    setSharedLibraryLoader(ISlangSharedLibraryLoader* loader) = 0;
+
+    /** Gets the currently set shared library loader
+    @return Gets the currently set loader. If returns nullptr, it's the default loader
+    */
+    virtual SLANG_NO_THROW ISlangSharedLibraryLoader* SLANG_MCALL getSharedLibraryLoader() = 0;
+
+    /** Returns SLANG_OK if the compilation target is supported for this session
+
+    @param target The compilation target to test
+    @return SLANG_OK if the target is available
+    SLANG_E_NOT_IMPLEMENTED if not implemented in this build
+    SLANG_E_NOT_FOUND if other resources (such as shared libraries) required to make target work
+    could not be found SLANG_FAIL other kinds of failures */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    checkCompileTargetSupport(SlangCompileTarget target) = 0;
+
+    /** Returns SLANG_OK if the pass through support is supported for this session
+    @param session Session
+    @param target The compilation target to test
+    @return SLANG_OK if the target is available
+    SLANG_E_NOT_IMPLEMENTED if not implemented in this build
+    SLANG_E_NOT_FOUND if other resources (such as shared libraries) required to make target work
+    could not be found SLANG_FAIL other kinds of failures */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    checkPassThroughSupport(SlangPassThrough passThrough) = 0;
+
+    /** Compile from (embedded source) the core module on the session.
+    Will return a failure if there is already a core module available
+    NOTE! API is experimental and not ready for production code
+    @param flags to control compilation
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    compileCoreModule(CompileCoreModuleFlags flags) = 0;
+
+    /** Load the core module. Currently loads modules from the file system.
+    @param coreModule Start address of the serialized core module
+    @param coreModuleSizeInBytes The size in bytes of the serialized core module
+
+    NOTE! API is experimental and not ready for production code
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    loadCoreModule(const void* coreModule, size_t coreModuleSizeInBytes) = 0;
+
+    /** Save the core module to the file system
+    @param archiveType The type of archive used to hold the core module
+    @param outBlob The serialized blob containing the core module
+
+    NOTE! API is experimental and not ready for production code  */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    saveCoreModule(SlangArchiveType archiveType, ISlangBlob** outBlob) = 0;
+
+    /** Look up the internal ID of a capability by its `name`.
+
+    Capability IDs are *not* guaranteed to be stable across versions
+    of the Slang library, so clients are expected to look up
+    capabilities by name at runtime.
+    */
+    virtual SLANG_NO_THROW SlangCapabilityID SLANG_MCALL findCapability(char const* name) = 0;
+
+    /** Set the downstream/pass through compiler to be used for a transition from the source type to
+    the target type
+    @param source The source 'code gen target'
+    @param target The target 'code gen target'
+    @param compiler The compiler/pass through to use for the transition from source to target
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL setDownstreamCompilerForTransition(
+        SlangCompileTarget source,
+        SlangCompileTarget target,
+        SlangPassThrough compiler) = 0;
+
+    /** Get the downstream/pass through compiler for a transition specified by source and target
+    @param source The source 'code gen target'
+    @param target The target 'code gen target'
+    @return The compiler that is used for the transition. Returns SLANG_PASS_THROUGH_NONE it is not
+    defined
+    */
+    virtual SLANG_NO_THROW SlangPassThrough SLANG_MCALL
+    getDownstreamCompilerForTransition(SlangCompileTarget source, SlangCompileTarget target) = 0;
+
+    /** Get the time in seconds spent in the slang and downstream compiler.
+     */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    getCompilerElapsedTime(double* outTotalTime, double* outDownstreamTime) = 0;
+
+    /** Specify a spirv.core.grammar.json file to load and use when
+     * parsing and checking any SPIR-V code
+     */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL setSPIRVCoreGrammar(char const* jsonPath) = 0;
+
+    /** Parse slangc command line options into a SessionDesc that can be used to create a session
+     *   with all the compiler options specified in the command line.
+     *   @param argc The number of command line arguments.
+     *   @param argv An input array of command line arguments to parse.
+     *   @param outSessionDesc A pointer to a SessionDesc struct to receive parsed session desc.
+     *   @param outAuxAllocation Auxiliary memory allocated to hold data used in the session desc.
+     */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL parseCommandLineArguments(
+        int argc,
+        const char* const* argv,
+        SessionDesc* outSessionDesc,
+        ISlangUnknown** outAuxAllocation) = 0;
+
+    /** Computes a digest that uniquely identifies the session description.
+     */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    getSessionDescDigest(SessionDesc* sessionDesc, ISlangBlob** outBlob) = 0;
+
+    /** Compile from (embedded source) the builtin module on the session.
+    Will return a failure if there is already a builtin module available.
+    NOTE! API is experimental and not ready for production code.
+    @param module The builtin module name.
+    @param flags to control compilation
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    compileBuiltinModule(BuiltinModuleName module, CompileCoreModuleFlags flags) = 0;
+
+    /** Load a builtin module. Currently loads modules from the file system.
+    @param module The builtin module name
+    @param moduleData Start address of the serialized core module
+    @param sizeInBytes The size in bytes of the serialized builtin module
+
+    NOTE! API is experimental and not ready for production code
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    loadBuiltinModule(BuiltinModuleName module, const void* moduleData, size_t sizeInBytes) = 0;
+
+    /** Save the builtin module to the file system
+    @param module The builtin module name
+    @param archiveType The type of archive used to hold the builtin module
+    @param outBlob The serialized blob containing the builtin module
+
+    NOTE! API is experimental and not ready for production code  */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL saveBuiltinModule(
+        BuiltinModuleName module,
+        SlangArchiveType archiveType,
+        ISlangBlob** outBlob) = 0;
+};
+
+    #define SLANG_UUID_IGlobalSession IGlobalSession::getTypeGuid()
+
+/** Description of a code generation target.
+ */
+struct TargetDesc
+{
+    /** The size of this structure, in bytes.
+     */
+    size_t structureSize = sizeof(TargetDesc);
+
+    /** The target format to generate code for (e.g., SPIR-V, DXIL, etc.)
+     */
+    SlangCompileTarget format = SLANG_TARGET_UNKNOWN;
+
+    /** The compilation profile supported by the target (e.g., "Shader Model 5.1")
+     */
+    SlangProfileID profile = SLANG_PROFILE_UNKNOWN;
+
+    /** Flags for the code generation target. Currently unused. */
+    SlangTargetFlags flags = kDefaultTargetFlags;
+
+    /** Default mode to use for floating-point operations on the target.
+     */
+    SlangFloatingPointMode floatingPointMode = SLANG_FLOATING_POINT_MODE_DEFAULT;
+
+    /** The line directive mode for output source code.
+     */
+    SlangLineDirectiveMode lineDirectiveMode = SLANG_LINE_DIRECTIVE_MODE_DEFAULT;
+
+    /** Whether to force `scalar` layout for glsl shader storage buffers.
+     */
+    bool forceGLSLScalarBufferLayout = false;
+
+    /** Pointer to an array of compiler option entries, whose size is compilerOptionEntryCount.
+     */
+    CompilerOptionEntry* compilerOptionEntries = nullptr;
+
+    /** Number of additional compiler option entries.
+     */
+    uint32_t compilerOptionEntryCount = 0;
+};
+
+typedef uint32_t SessionFlags;
+enum
+{
+    kSessionFlags_None = 0
+};
+
+struct PreprocessorMacroDesc
+{
+    const char* name;
+    const char* value;
+};
+
+struct SessionDesc
+{
+    /** The size of this structure, in bytes.
+     */
+    size_t structureSize = sizeof(SessionDesc);
+
+    /** Code generation targets to include in the session.
+     */
+    TargetDesc const* targets = nullptr;
+    SlangInt targetCount = 0;
+
+    /** Flags to configure the session.
+     */
+    SessionFlags flags = kSessionFlags_None;
+
+    /** Default layout to assume for variables with matrix types.
+     */
+    SlangMatrixLayoutMode defaultMatrixLayoutMode = SLANG_MATRIX_LAYOUT_ROW_MAJOR;
+
+    /** Paths to use when searching for `#include`d or `import`ed files.
+     */
+    char const* const* searchPaths = nullptr;
+    SlangInt searchPathCount = 0;
+
+    PreprocessorMacroDesc const* preprocessorMacros = nullptr;
+    SlangInt preprocessorMacroCount = 0;
+
+    ISlangFileSystem* fileSystem = nullptr;
+
+    bool enableEffectAnnotations = false;
+    bool allowGLSLSyntax = false;
+
+    /** Pointer to an array of compiler option entries, whose size is compilerOptionEntryCount.
+     */
+    CompilerOptionEntry* compilerOptionEntries = nullptr;
+
+    /** Number of additional compiler option entries.
+     */
+    uint32_t compilerOptionEntryCount = 0;
+};
+
+enum class ContainerType
+{
+    None,
+    UnsizedArray,
+    StructuredBuffer,
+    ConstantBuffer,
+    ParameterBlock
+};
+
+/** A session provides a scope for code that is loaded.
+
+A session can be used to load modules of Slang source code,
+and to request target-specific compiled binaries and layout
+information.
+
+In order to be able to load code, the session owns a set
+of active "search paths" for resolving `#include` directives
+and `import` declarations, as well as a set of global
+preprocessor definitions that will be used for all code
+that gets `import`ed in the session.
+
+If multiple user shaders are loaded in the same session,
+and import the same module (e.g., two source files do `import X`)
+then there will only be one copy of `X` loaded within the session.
+
+In order to be able to generate target code, the session
+owns a list of available compilation targets, which specify
+code generation options.
+
+Code loaded and compiled within a session is owned by the session
+and will remain resident in memory until the session is released.
+Applications wishing to control the memory usage for compiled
+and loaded code should use multiple sessions.
+*/
+struct ISession : public ISlangUnknown
+{
+    SLANG_COM_INTERFACE(0x67618701, 0xd116, 0x468f, {0xab, 0x3b, 0x47, 0x4b, 0xed, 0xce, 0xe, 0x3d})
+
+    /** Get the global session thas was used to create this session.
+     */
+    virtual SLANG_NO_THROW IGlobalSession* SLANG_MCALL getGlobalSession() = 0;
+
+    /** Load a module as it would be by code using `import`.
+     */
+    virtual SLANG_NO_THROW IModule* SLANG_MCALL
+    loadModule(const char* moduleName, IBlob** outDiagnostics = nullptr) = 0;
+
+    /** Load a module from Slang source code.
+     */
+    virtual SLANG_NO_THROW IModule* SLANG_MCALL loadModuleFromSource(
+        const char* moduleName,
+        const char* path,
+        slang::IBlob* source,
+        slang::IBlob** outDiagnostics = nullptr) = 0;
+
+    /** Combine multiple component types to create a composite component type.
+
+    The `componentTypes` array must contain `componentTypeCount` pointers
+    to component types that were loaded or created using the same session.
+
+    The shader parameters and specialization parameters of the composite will
+    be the union of those in `componentTypes`. The relative order of child
+    component types is significant, and will affect the order in which
+    parameters are reflected and laid out.
+
+    The entry-point functions of the composite will be the union of those in
+    `componentTypes`, and will follow the ordering of `componentTypes`.
+
+    The requirements of the composite component type will be a subset of
+    those in `componentTypes`. If an entry in `componentTypes` has a requirement
+    that can be satisfied by another entry, then the composition will
+    satisfy the requirement and it will not appear as a requirement of
+    the composite. If multiple entries in `componentTypes` have a requirement
+    for the same type, then only the first such requirement will be retained
+    on the composite. The relative ordering of requirements on the composite
+    will otherwise match that of `componentTypes`.
+
+    If any diagnostics are generated during creation of the composite, they
+    will be written to `outDiagnostics`. If an error is encountered, the
+    function will return null.
+
+    It is an error to create a composite component type that recursively
+    aggregates a single module more than once.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL createCompositeComponentType(
+        IComponentType* const* componentTypes,
+        SlangInt componentTypeCount,
+        IComponentType** outCompositeComponentType,
+        ISlangBlob** outDiagnostics = nullptr) = 0;
+
+    /** Specialize a type based on type arguments.
+     */
+    virtual SLANG_NO_THROW TypeReflection* SLANG_MCALL specializeType(
+        TypeReflection* type,
+        SpecializationArg const* specializationArgs,
+        SlangInt specializationArgCount,
+        ISlangBlob** outDiagnostics = nullptr) = 0;
+
+
+    /** Get the layout `type` on the chosen `target`.
+     */
+    virtual SLANG_NO_THROW TypeLayoutReflection* SLANG_MCALL getTypeLayout(
+        TypeReflection* type,
+        SlangInt targetIndex = 0,
+        LayoutRules rules = LayoutRules::Default,
+        ISlangBlob** outDiagnostics = nullptr) = 0;
+
+    /** Get a container type from `elementType`. For example, given type `T`, returns
+        a type that represents `StructuredBuffer<T>`.
+
+        @param `elementType`: the element type to wrap around.
+        @param `containerType`: the type of the container to wrap `elementType` in.
+        @param `outDiagnostics`: a blob to receive diagnostic messages.
+    */
+    virtual SLANG_NO_THROW TypeReflection* SLANG_MCALL getContainerType(
+        TypeReflection* elementType,
+        ContainerType containerType,
+        ISlangBlob** outDiagnostics = nullptr) = 0;
+
+    /** Return a `TypeReflection` that represents the `__Dynamic` type.
+        This type can be used as a specialization argument to indicate using
+        dynamic dispatch.
+    */
+    virtual SLANG_NO_THROW TypeReflection* SLANG_MCALL getDynamicType() = 0;
+
+    /** Get the mangled name for a type RTTI object.
+     */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    getTypeRTTIMangledName(TypeReflection* type, ISlangBlob** outNameBlob) = 0;
+
+    /** Get the mangled name for a type witness.
+     */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL getTypeConformanceWitnessMangledName(
+        TypeReflection* type,
+        TypeReflection* interfaceType,
+        ISlangBlob** outNameBlob) = 0;
+
+    /** Get the sequential ID used to identify a type witness in a dynamic object.
+     */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL getTypeConformanceWitnessSequentialID(
+        slang::TypeReflection* type,
+        slang::TypeReflection* interfaceType,
+        uint32_t* outId) = 0;
+
+    /** Create a request to load/compile front-end code.
+     */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    createCompileRequest(SlangCompileRequest** outCompileRequest) = 0;
+
+
+    /** Creates a `IComponentType` that represents a type's conformance to an interface.
+        The retrieved `ITypeConformance` objects can be included in a composite `IComponentType`
+        to explicitly specify which implementation types should be included in the final compiled
+        code. For example, if an module defines `IMaterial` interface and `AMaterial`,
+        `BMaterial`, `CMaterial` types that implements the interface, the user can exclude
+        `CMaterial` implementation from the resulting shader code by explicitly adding
+        `AMaterial:IMaterial` and `BMaterial:IMaterial` conformances to a composite
+        `IComponentType` and get entry point code from it. The resulting code will not have
+        anything related to `CMaterial` in the dynamic dispatch logic. If the user does not
+        explicitly include any `TypeConformances` to an interface type, all implementations to
+        that interface will be included by default. By linking a `ITypeConformance`, the user is
+        also given the opportunity to specify the dispatch ID of the implementation type. If
+        `conformanceIdOverride` is -1, there will be no override behavior and Slang will
+        automatically assign IDs to implementation types. The automatically assigned IDs can be
+        queried via `ISession::getTypeConformanceWitnessSequentialID`.
+
+        Returns SLANG_OK if succeeds, or SLANG_FAIL if `type` does not conform to `interfaceType`.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL createTypeConformanceComponentType(
+        slang::TypeReflection* type,
+        slang::TypeReflection* interfaceType,
+        ITypeConformance** outConformance,
+        SlangInt conformanceIdOverride,
+        ISlangBlob** outDiagnostics) = 0;
+
+    /** Load a module from a Slang module blob.
+     */
+    virtual SLANG_NO_THROW IModule* SLANG_MCALL loadModuleFromIRBlob(
+        const char* moduleName,
+        const char* path,
+        slang::IBlob* source,
+        slang::IBlob** outDiagnostics = nullptr) = 0;
+
+    virtual SLANG_NO_THROW SlangInt SLANG_MCALL getLoadedModuleCount() = 0;
+    virtual SLANG_NO_THROW IModule* SLANG_MCALL getLoadedModule(SlangInt index) = 0;
+
+    /** Checks if a precompiled binary module is up-to-date with the current compiler
+     *   option settings and the source file contents.
+     */
+    virtual SLANG_NO_THROW bool SLANG_MCALL
+    isBinaryModuleUpToDate(const char* modulePath, slang::IBlob* binaryModuleBlob) = 0;
+
+    /** Load a module from a string.
+     */
+    virtual SLANG_NO_THROW IModule* SLANG_MCALL loadModuleFromSourceString(
+        const char* moduleName,
+        const char* path,
+        const char* string,
+        slang::IBlob** outDiagnostics = nullptr) = 0;
+};
+
+    #define SLANG_UUID_ISession ISession::getTypeGuid()
+
+struct IMetadata : public ISlangCastable
+{
+    SLANG_COM_INTERFACE(0x8044a8a3, 0xddc0, 0x4b7f, {0xaf, 0x8e, 0x2, 0x6e, 0x90, 0x5d, 0x73, 0x32})
+
+    /*
+    Returns whether a resource parameter at the specified binding location is actually being used
+    in the compiled shader.
+    */
+    virtual SlangResult isParameterLocationUsed(
+        SlangParameterCategory category, // is this a `t` register? `s` register?
+        SlangUInt spaceIndex,            // `space` for D3D12, `set` for Vulkan
+        SlangUInt registerIndex,         // `register` for D3D12, `binding` for Vulkan
+        bool& outUsed) = 0;
+};
+    #define SLANG_UUID_IMetadata IMetadata::getTypeGuid()
+
+/** A component type is a unit of shader code layout, reflection, and linking.
+
+A component type is a unit of shader code that can be included into
+a linked and compiled shader program. Each component type may have:
+
+* Zero or more uniform shader parameters, representing textures,
+  buffers, etc. that the code in the component depends on.
+
+* Zero or more *specialization* parameters, which are type or
+  value parameters that can be used to synthesize specialized
+  versions of the component type.
+
+* Zero or more entry points, which are the individually invocable
+  kernels that can have final code generated.
+
+* Zero or more *requirements*, which are other component
+  types on which the component type depends.
+
+One example of a component type is a module of Slang code:
+
+* The global-scope shader parameters declared in the module are
+  the parameters when considered as a component type.
+
+* Any global-scope generic or interface type parameters introduce
+  specialization parameters for the module.
+
+* A module does not by default include any entry points when
+  considered as a component type (although the code of the
+  module might *declare* some entry points).
+
+* Any other modules that are `import`ed in the source code
+  become requirements of the module, when considered as a
+  component type.
+
+An entry point is another example of a component type:
+
+* The `uniform` parameters of the entry point function are
+  its shader parameters when considered as a component type.
+
+* Any generic or interface-type parameters of the entry point
+  introduce specialization parameters.
+
+* An entry point component type exposes a single entry point (itself).
+
+* An entry point has one requirement for the module in which
+  it was defined.
+
+Component types can be manipulated in a few ways:
+
+* Multiple component types can be combined into a composite, which
+  combines all of their code, parameters, etc.
+
+* A component type can be specialized, by "plugging in" types and
+  values for its specialization parameters.
+
+* A component type can be laid out for a particular target, giving
+  offsets/bindings to the shader parameters it contains.
+
+* Generated kernel code can be requested for entry points.
+
+*/
+struct IComponentType : public ISlangUnknown
+{
+    SLANG_COM_INTERFACE(0x5bc42be8, 0x5c50, 0x4929, {0x9e, 0x5e, 0xd1, 0x5e, 0x7c, 0x24, 0x1, 0x5f})
+
+    /** Get the runtime session that this component type belongs to.
+     */
+    virtual SLANG_NO_THROW ISession* SLANG_MCALL getSession() = 0;
+
+    /** Get the layout for this program for the chosen `targetIndex`.
+
+    The resulting layout will establish offsets/bindings for all
+    of the global and entry-point shader parameters in the
+    component type.
+
+    If this component type has specialization parameters (that is,
+    it is not fully specialized), then the resulting layout may
+    be incomplete, and plugging in arguments for generic specialization
+    parameters may result in a component type that doesn't have
+    a compatible layout. If the component type only uses
+    interface-type specialization parameters, then the layout
+    for a specialization should be compatible with an unspecialized
+    layout (all parameters in the unspecialized layout will have
+    the same offset/binding in the specialized layout).
+
+    If this component type is combined into a composite, then
+    the absolute offsets/bindings of parameters may not stay the same.
+    If the shader parameters in a component type don't make
+    use of explicit binding annotations (e.g., `register(...)`),
+    then the *relative* offset of shader parameters will stay
+    the same when it is used in a composition.
+    */
+    virtual SLANG_NO_THROW ProgramLayout* SLANG_MCALL
+    getLayout(SlangInt targetIndex = 0, IBlob** outDiagnostics = nullptr) = 0;
+
+    /** Get the number of (unspecialized) specialization parameters for the component type.
+     */
+    virtual SLANG_NO_THROW SlangInt SLANG_MCALL getSpecializationParamCount() = 0;
+
+    /** Get the compiled code for the entry point at `entryPointIndex` for the chosen `targetIndex`
+
+    Entry point code can only be computed for a component type that
+    has no specialization parameters (it must be fully specialized)
+    and that has no requirements (it must be fully linked).
+
+    If code has not already been generated for the given entry point and target,
+    then a compilation error may be detected, in which case `outDiagnostics`
+    (if non-null) will be filled in with a blob of messages diagnosing the error.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPointCode(
+        SlangInt entryPointIndex,
+        SlangInt targetIndex,
+        IBlob** outCode,
+        IBlob** outDiagnostics = nullptr) = 0;
+
+    /** Get the compilation result as a file system.
+
+    Has the same requirements as getEntryPointCode.
+
+    The result is not written to the actual OS file system, but is made available as an
+    in memory representation.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL getResultAsFileSystem(
+        SlangInt entryPointIndex,
+        SlangInt targetIndex,
+        ISlangMutableFileSystem** outFileSystem) = 0;
+
+    /** Compute a hash for the entry point at `entryPointIndex` for the chosen `targetIndex`.
+
+    This computes a hash based on all the dependencies for this component type as well as the
+    target settings affecting the compiler backend. The computed hash is used as a key for caching
+    the output of the compiler backend to implement shader caching.
+    */
+    virtual SLANG_NO_THROW void SLANG_MCALL
+    getEntryPointHash(SlangInt entryPointIndex, SlangInt targetIndex, IBlob** outHash) = 0;
+
+    /** Specialize the component by binding its specialization parameters to concrete arguments.
+
+    The `specializationArgs` array must have `specializationArgCount` entries, and
+    this must match the number of specialization parameters on this component type.
+
+    If any diagnostics (error or warnings) are produced, they will be written to `outDiagnostics`.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL specialize(
+        SpecializationArg const* specializationArgs,
+        SlangInt specializationArgCount,
+        IComponentType** outSpecializedComponentType,
+        ISlangBlob** outDiagnostics = nullptr) = 0;
+
+    /** Link this component type against all of its unsatisfied dependencies.
+
+    A component type may have unsatisfied dependencies. For example, a module
+    depends on any other modules it `import`s, and an entry point depends
+    on the module that defined it.
+
+    A user can manually satisfy dependencies by creating a composite
+    component type, and when doing so they retain full control over
+    the relative ordering of shader parameters in the resulting layout.
+
+    It is an error to try to generate/access compiled kernel code for
+    a component type with unresolved dependencies, so if dependencies
+    remain after whatever manual composition steps an application
+    cares to perform, the `link()` function can be used to automatically
+    compose in any remaining dependencies. The order of parameters
+    (and hence the global layout) that results will be deterministic,
+    but is not currently documented.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    link(IComponentType** outLinkedComponentType, ISlangBlob** outDiagnostics = nullptr) = 0;
+
+    /** Get entry point 'callable' functions accessible through the ISlangSharedLibrary interface.
+
+    The functions remain in scope as long as the ISlangSharedLibrary interface is in scope.
+
+    NOTE! Requires a compilation target of SLANG_HOST_CALLABLE.
+
+    @param entryPointIndex  The index of the entry point to get code for.
+    @param targetIndex      The index of the target to get code for (default: zero).
+    @param outSharedLibrary A pointer to a ISharedLibrary interface which functions can be queried
+    on.
+    @returns                A `SlangResult` to indicate success or failure.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPointHostCallable(
+        int entryPointIndex,
+        int targetIndex,
+        ISlangSharedLibrary** outSharedLibrary,
+        slang::IBlob** outDiagnostics = 0) = 0;
+
+    /** Get a new ComponentType object that represents a renamed entry point.
+
+    The current object must be a single EntryPoint, or a CompositeComponentType or
+    SpecializedComponentType that contains one EntryPoint component.
+    */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    renameEntryPoint(const char* newName, IComponentType** outEntryPoint) = 0;
+
+    /** Link and specify additional compiler options when generating code
+     *   from the linked program.
+     */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL linkWithOptions(
+        IComponentType** outLinkedComponentType,
+        uint32_t compilerOptionEntryCount,
+        CompilerOptionEntry* compilerOptionEntries,
+        ISlangBlob** outDiagnostics = nullptr) = 0;
+
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    getTargetCode(SlangInt targetIndex, IBlob** outCode, IBlob** outDiagnostics = nullptr) = 0;
+
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL getTargetMetadata(
+        SlangInt targetIndex,
+        IMetadata** outMetadata,
+        IBlob** outDiagnostics = nullptr) = 0;
+
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPointMetadata(
+        SlangInt entryPointIndex,
+        SlangInt targetIndex,
+        IMetadata** outMetadata,
+        IBlob** outDiagnostics = nullptr) = 0;
+};
+    #define SLANG_UUID_IComponentType IComponentType::getTypeGuid()
+
+struct IEntryPoint : public IComponentType
+{
+    SLANG_COM_INTERFACE(0x8f241361, 0xf5bd, 0x4ca0, {0xa3, 0xac, 0x2, 0xf7, 0xfa, 0x24, 0x2, 0xb8})
+
+    virtual SLANG_NO_THROW FunctionReflection* SLANG_MCALL getFunctionReflection() = 0;
+};
+
+    #define SLANG_UUID_IEntryPoint IEntryPoint::getTypeGuid()
+
+struct ITypeConformance : public IComponentType
+{
+    SLANG_COM_INTERFACE(0x73eb3147, 0xe544, 0x41b5, {0xb8, 0xf0, 0xa2, 0x44, 0xdf, 0x21, 0x94, 0xb})
+};
+    #define SLANG_UUID_ITypeConformance ITypeConformance::getTypeGuid()
+
+/** A module is the granularity of shader code compilation and loading.
+
+In most cases a module corresponds to a single compile "translation unit."
+This will often be a single `.slang` or `.hlsl` file and everything it
+`#include`s.
+
+Notably, a module `M` does *not* include the things it `import`s, as these
+as distinct modules that `M` depends on. There is a directed graph of
+module dependencies, and all modules in the graph must belong to the
+same session (`ISession`).
+
+A module establishes a namespace for looking up types, functions, etc.
+*/
+struct IModule : public IComponentType
+{
+    SLANG_COM_INTERFACE(0xc720e64, 0x8722, 0x4d31, {0x89, 0x90, 0x63, 0x8a, 0x98, 0xb1, 0xc2, 0x79})
+
+    /// Find and an entry point by name.
+    /// Note that this does not work in case the function is not explicitly designated as an entry
+    /// point, e.g. using a `[shader("...")]` attribute. In such cases, consider using
+    /// `IModule::findAndCheckEntryPoint` instead.
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    findEntryPointByName(char const* name, IEntryPoint** outEntryPoint) = 0;
+
+    /// Get number of entry points defined in the module. An entry point defined in a module
+    /// is by default not included in the linkage, so calls to `IComponentType::getEntryPointCount`
+    /// on an `IModule` instance will always return 0. However `IModule::getDefinedEntryPointCount`
+    /// will return the number of defined entry points.
+    virtual SLANG_NO_THROW SlangInt32 SLANG_MCALL getDefinedEntryPointCount() = 0;
+    /// Get the name of an entry point defined in the module.
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    getDefinedEntryPoint(SlangInt32 index, IEntryPoint** outEntryPoint) = 0;
+
+    /// Get a serialized representation of the checked module.
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL serialize(ISlangBlob** outSerializedBlob) = 0;
+
+    /// Write the serialized representation of this module to a file.
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL writeToFile(char const* fileName) = 0;
+
+    /// Get the name of the module.
+    virtual SLANG_NO_THROW const char* SLANG_MCALL getName() = 0;
+
+    /// Get the path of the module.
+    virtual SLANG_NO_THROW const char* SLANG_MCALL getFilePath() = 0;
+
+    /// Get the unique identity of the module.
+    virtual SLANG_NO_THROW const char* SLANG_MCALL getUniqueIdentity() = 0;
+
+    /// Find and validate an entry point by name, even if the function is
+    /// not marked with the `[shader("...")]` attribute.
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL findAndCheckEntryPoint(
+        char const* name,
+        SlangStage stage,
+        IEntryPoint** outEntryPoint,
+        ISlangBlob** outDiagnostics) = 0;
+
+    /// Get the number of dependency files that this module depends on.
+    /// This includes both the explicit source files, as well as any
+    /// additional files that were transitively referenced (e.g., via
+    /// a `#include` directive).
+    virtual SLANG_NO_THROW SlangInt32 SLANG_MCALL getDependencyFileCount() = 0;
+
+    /// Get the path to a file this module depends on.
+    virtual SLANG_NO_THROW char const* SLANG_MCALL getDependencyFilePath(SlangInt32 index) = 0;
+
+    virtual SLANG_NO_THROW DeclReflection* SLANG_MCALL getModuleReflection() = 0;
+
+    /** Disassemble a module.
+     */
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    disassemble(slang::IBlob** outDisassembledBlob) = 0;
+};
+
+    #define SLANG_UUID_IModule IModule::getTypeGuid()
+
+/* Experimental interface for doing target precompilation of slang modules */
+struct IModulePrecompileService_Experimental : public ISlangUnknown
+{
+    // uuidgen output:     8e12e8e3 -  5fcd -  433e -    afcb -      13a088bc5ee5
+    SLANG_COM_INTERFACE(
+        0x8e12e8e3,
+        0x5fcd,
+        0x433e,
+        {0xaf, 0xcb, 0x13, 0xa0, 0x88, 0xbc, 0x5e, 0xe5})
+
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL
+    precompileForTarget(SlangCompileTarget target, ISlangBlob** outDiagnostics) = 0;
+
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL getPrecompiledTargetCode(
+        SlangCompileTarget target,
+        IBlob** outCode,
+        IBlob** outDiagnostics = nullptr) = 0;
+
+    virtual SLANG_NO_THROW SlangInt SLANG_MCALL getModuleDependencyCount() = 0;
+
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL getModuleDependency(
+        SlangInt dependencyIndex,
+        IModule** outModule,
+        IBlob** outDiagnostics = nullptr) = 0;
+};
+
+    #define SLANG_UUID_IModulePrecompileService_Experimental \
+        IModulePrecompileService_Experimental::getTypeGuid()
+
+/** Argument used for specialization to types/values.
+ */
+struct SpecializationArg
+{
+    enum class Kind : int32_t
+    {
+        Unknown, /**< An invalid specialization argument. */
+        Type,    /**< Specialize to a type. */
+    };
+
+    /** The kind of specialization argument. */
+    Kind kind;
+    union
+    {
+        /** A type specialization argument, used for `Kind::Type`. */
+        TypeReflection* type;
+    };
+
+    static SpecializationArg fromType(TypeReflection* inType)
+    {
+        SpecializationArg rs;
+        rs.kind = Kind::Type;
+        rs.type = inType;
+        return rs;
+    }
+};
+} // namespace slang
+
+    // Passed into functions to create globalSession to identify the API version client code is
+    // using.
+    #define SLANG_API_VERSION 0
+
+enum SlangLanguageVersion
+{
+    SLANG_LANGUAGE_VERSION_2025 = 2025
+};
+
+
+/* Description of a Slang global session.
+ */
+struct SlangGlobalSessionDesc
+{
+    /// Size of this struct.
+    uint32_t structureSize = sizeof(SlangGlobalSessionDesc);
+
+    /// Slang API version.
+    uint32_t apiVersion = SLANG_API_VERSION;
+
+    /// Slang language version.
+    uint32_t languageVersion = SLANG_LANGUAGE_VERSION_2025;
+
+    /// Whether to enable GLSL support.
+    bool enableGLSL = false;
+
+    /// Reserved for future use.
+    uint32_t reserved[16] = {};
+};
+
+/* Create a global session, with the built-in core module.
+
+@param apiVersion Pass in SLANG_API_VERSION
+@param outGlobalSession (out)The created global session.
+*/
+SLANG_EXTERN_C SLANG_API SlangResult
+slang_createGlobalSession(SlangInt apiVersion, slang::IGlobalSession** outGlobalSession);
+
+
+/* Create a global session, with the built-in core module.
+
+@param desc Description of the global session.
+@param outGlobalSession (out)The created global session.
+*/
+SLANG_EXTERN_C SLANG_API SlangResult slang_createGlobalSession2(
+    const SlangGlobalSessionDesc* desc,
+    slang::IGlobalSession** outGlobalSession);
+
+/* Create a global session, but do not set up the core module. The core module can
+then be loaded via loadCoreModule or compileCoreModule
+
+@param apiVersion Pass in SLANG_API_VERSION
+@param outGlobalSession (out)The created global session that doesn't have a core module setup.
+
+NOTE! API is experimental and not ready for production code
+*/
+SLANG_EXTERN_C SLANG_API SlangResult slang_createGlobalSessionWithoutCoreModule(
+    SlangInt apiVersion,
+    slang::IGlobalSession** outGlobalSession);
+
+/* Returns a blob that contains the serialized core module.
+Returns nullptr if there isn't an embedded core module.
+
+NOTE! API is experimental and not ready for production code
+*/
+SLANG_API ISlangBlob* slang_getEmbeddedCoreModule();
+
+
+/* Cleanup all global allocations used by Slang, to prevent memory leak detectors from
+ reporting them as leaks. This function should only be called after all Slang objects
+ have been released. No other Slang functions such as `createGlobalSession`
+ should be called after this function.
+ */
+SLANG_EXTERN_C SLANG_API void slang_shutdown();
+
+/* Return the last signaled internal error message.
+ */
+SLANG_EXTERN_C SLANG_API const char* slang_getLastInternalErrorMessage();
+
+namespace slang
+{
+inline SlangResult createGlobalSession(slang::IGlobalSession** outGlobalSession)
+{
+    SlangGlobalSessionDesc defaultDesc = {};
+    return slang_createGlobalSession2(&defaultDesc, outGlobalSession);
+}
+inline SlangResult createGlobalSession(
+    const SlangGlobalSessionDesc* desc,
+    slang::IGlobalSession** outGlobalSession)
+{
+    return slang_createGlobalSession2(desc, outGlobalSession);
+}
+inline void shutdown()
+{
+    slang_shutdown();
+}
+inline const char* getLastInternalErrorMessage()
+{
+    return slang_getLastInternalErrorMessage();
+}
+} // namespace slang
+
+#endif // C++ helpers
+
+#define SLANG_ERROR_INSUFFICIENT_BUFFER SLANG_E_BUFFER_TOO_SMALL
+#define SLANG_ERROR_INVALID_PARAMETER SLANG_E_INVALID_ARG
+
+#endif
diff --git a/external/slang/lib/gfx.lib b/external/slang/lib/gfx.lib
new file mode 100644
index 00000000..665e37d6
Binary files /dev/null and b/external/slang/lib/gfx.lib differ
diff --git a/external/slang/lib/slang-rt.lib b/external/slang/lib/slang-rt.lib
new file mode 100644
index 00000000..7e993182
Binary files /dev/null and b/external/slang/lib/slang-rt.lib differ
diff --git a/external/slang/lib/slang.lib b/external/slang/lib/slang.lib
new file mode 100644
index 00000000..45d46789
Binary files /dev/null and b/external/slang/lib/slang.lib differ
diff --git a/external/slang/prelude/slang-cpp-host-prelude.h b/external/slang/prelude/slang-cpp-host-prelude.h
deleted file mode 100644
index f69d03ee..00000000
--- a/external/slang/prelude/slang-cpp-host-prelude.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef SLANG_CPP_HOST_PRELUDE_H
-#define SLANG_CPP_HOST_PRELUDE_H
-
-#include <cstdio>
-#include <cmath>
-#include <cstring>
-
-#define SLANG_COM_PTR_ENABLE_REF_OPERATOR 1
-
-#include "../source/slang-rt/slang-rt.h"
-#include "../slang-com-ptr.h"
-#include "slang-cpp-types.h"
-
-#ifdef SLANG_LLVM
-#include "slang-llvm.h"
-#else // SLANG_LLVM
-#   if SLANG_GCC_FAMILY && __GNUC__ < 6
-#       include <cmath>
-#       define SLANG_PRELUDE_STD std::
-#   else
-#       include <math.h>
-#       define SLANG_PRELUDE_STD
-#   endif
-
-#   include <assert.h>
-#   include <stdlib.h>
-#   include <string.h>
-#   include <stdint.h>
-#endif // SLANG_LLVM
-
-#if defined(_MSC_VER)
-#   define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
-#else
-#   define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
-//#   define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport)) __attribute__((__visibility__("default")))
-#endif    
-
-#ifdef __cplusplus    
-#   define SLANG_PRELUDE_EXTERN_C extern "C"
-#   define SLANG_PRELUDE_EXTERN_C_START extern "C" {
-#   define SLANG_PRELUDE_EXTERN_C_END }
-#else
-#   define SLANG_PRELUDE_EXTERN_C 
-#   define SLANG_PRELUDE_EXTERN_C_START
-#   define SLANG_PRELUDE_EXTERN_C_END 
-#endif    
-
-#include "slang-cpp-scalar-intrinsics.h"
-
-using namespace Slang;
-
-template<typename TResult, typename... Args>
-using Slang_FuncType = TResult(SLANG_MCALL *)(Args...);
-
-#endif
diff --git a/external/slang/prelude/slang-cpp-prelude.h b/external/slang/prelude/slang-cpp-prelude.h
deleted file mode 100644
index 2b848dc3..00000000
--- a/external/slang/prelude/slang-cpp-prelude.h
+++ /dev/null
@@ -1,316 +0,0 @@
-#ifndef SLANG_CPP_PRELUDE_H
-#define SLANG_CPP_PRELUDE_H
-
-// Because the signiture of isnan, isfinite, and is isinf changed in C++, we use the macro
-// to use the version in the std namespace. 
-// https://stackoverflow.com/questions/39130040/cmath-hides-isnan-in-math-h-in-c14-c11
- 
-#ifdef SLANG_LLVM
-#include "slang-llvm.h"
-#else // SLANG_LLVM
-#   if SLANG_GCC_FAMILY && __GNUC__ < 6
-#       include <cmath>
-#       define SLANG_PRELUDE_STD std::
-#   else
-#       include <math.h>
-#       define SLANG_PRELUDE_STD
-#   endif
-
-#   include <assert.h>
-#   include <stdlib.h>
-#   include <string.h>
-#   include <stdint.h>
-#endif // SLANG_LLVM
-
-#if defined(_MSC_VER)
-#   define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
-#else
-#   define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
-//#   define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport)) __attribute__((__visibility__("default")))
-#endif    
-
-#ifdef __cplusplus    
-#   define SLANG_PRELUDE_EXTERN_C extern "C"
-#   define SLANG_PRELUDE_EXTERN_C_START extern "C" {
-#   define SLANG_PRELUDE_EXTERN_C_END }
-#else
-#   define SLANG_PRELUDE_EXTERN_C 
-#   define SLANG_PRELUDE_EXTERN_C_START
-#   define SLANG_PRELUDE_EXTERN_C_END 
-#endif    
-
-#define SLANG_PRELUDE_EXPORT SLANG_PRELUDE_EXTERN_C SLANG_PRELUDE_SHARED_LIB_EXPORT
-#define SLANG_PRELUDE_EXPORT_START SLANG_PRELUDE_EXTERN_C_START SLANG_PRELUDE_SHARED_LIB_EXPORT
-#define SLANG_PRELUDE_EXPORT_END SLANG_PRELUDE_EXTERN_C_END
-
-#ifndef INFINITY
-// Must overflow for double
-#   define INFINITY float(1e+300 * 1e+300)
-#endif
-
-#ifndef SLANG_INFINITY
-#   define SLANG_INFINITY   INFINITY
-#endif
-
-// Detect the compiler type
-
-#ifndef SLANG_COMPILER
-#    define SLANG_COMPILER
-
-/*
-Compiler defines, see http://sourceforge.net/p/predef/wiki/Compilers/
-NOTE that SLANG_VC holds the compiler version - not just 1 or 0
-*/
-#    if defined(_MSC_VER)
-#        if _MSC_VER >= 1900
-#            define SLANG_VC 14
-#        elif _MSC_VER >= 1800
-#            define SLANG_VC 12
-#        elif _MSC_VER >= 1700
-#            define SLANG_VC 11
-#        elif _MSC_VER >= 1600
-#            define SLANG_VC 10
-#        elif _MSC_VER >= 1500
-#            define SLANG_VC 9
-#        else
-#            error "unknown version of Visual C++ compiler"
-#        endif
-#    elif defined(__clang__)
-#        define SLANG_CLANG 1
-#    elif defined(__SNC__)
-#        define SLANG_SNC 1
-#    elif defined(__ghs__)
-#        define SLANG_GHS 1
-#    elif defined(__GNUC__) /* note: __clang__, __SNC__, or __ghs__ imply __GNUC__ */
-#        define SLANG_GCC 1
-#    else
-#        error "unknown compiler"
-#    endif
-/*
-Any compilers not detected by the above logic are now now explicitly zeroed out.
-*/
-#    ifndef SLANG_VC
-#        define SLANG_VC 0
-#    endif
-#    ifndef SLANG_CLANG
-#        define SLANG_CLANG 0
-#    endif
-#    ifndef SLANG_SNC
-#        define SLANG_SNC 0
-#    endif
-#    ifndef SLANG_GHS
-#        define SLANG_GHS 0
-#    endif
-#    ifndef SLANG_GCC
-#        define SLANG_GCC 0
-#    endif
-#endif /* SLANG_COMPILER */
-
-/*
-The following section attempts to detect the target platform being compiled for.
-
-If an application defines `SLANG_PLATFORM` before including this header,
-they take responsibility for setting any compiler-dependent macros
-used later in the file.
-
-Most applications should not need to touch this section.
-*/
-#ifndef SLANG_PLATFORM
-#    define SLANG_PLATFORM
-/**
-Operating system defines, see http://sourceforge.net/p/predef/wiki/OperatingSystems/
-*/
-#    if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_PARTITION_APP
-#        define SLANG_WINRT 1 /* Windows Runtime, either on Windows RT or Windows 8 */
-#    elif defined(XBOXONE)
-#        define SLANG_XBOXONE 1
-#    elif defined(_WIN64) /* note: XBOXONE implies _WIN64 */
-#        define SLANG_WIN64 1
-#    elif defined(_M_PPC)
-#        define SLANG_X360 1
-#    elif defined(_WIN32) /* note: _M_PPC implies _WIN32 */
-#        define SLANG_WIN32 1
-#    elif defined(__ANDROID__)
-#        define SLANG_ANDROID 1
-#    elif defined(__linux__) || defined(__CYGWIN__) /* note: __ANDROID__ implies __linux__ */
-#        define SLANG_LINUX 1
-#    elif defined(__APPLE__) && !defined(SLANG_LLVM)
-#        include "TargetConditionals.h"
-#        if TARGET_OS_MAC
-#            define SLANG_OSX 1
-#        else
-#            define SLANG_IOS 1
-#        endif
-#    elif defined(__APPLE__)
-// On `slang-llvm` we can't inclue "TargetConditionals.h" in general, so for now assume its OSX.
-#       define SLANG_OSX 1
-#    elif defined(__CELLOS_LV2__)
-#        define SLANG_PS3 1
-#    elif defined(__ORBIS__)
-#        define SLANG_PS4 1
-#    elif defined(__SNC__) && defined(__arm__)
-#        define SLANG_PSP2 1
-#    elif defined(__ghs__)
-#        define SLANG_WIIU 1
-#    else
-#        error "unknown target platform"
-#    endif
-
-
-/*
-Any platforms not detected by the above logic are now now explicitly zeroed out.
-*/
-#    ifndef SLANG_WINRT
-#        define SLANG_WINRT 0
-#    endif
-#    ifndef SLANG_XBOXONE
-#        define SLANG_XBOXONE 0
-#    endif
-#    ifndef SLANG_WIN64
-#        define SLANG_WIN64 0
-#    endif
-#    ifndef SLANG_X360
-#        define SLANG_X360 0
-#    endif
-#    ifndef SLANG_WIN32
-#        define SLANG_WIN32 0
-#    endif
-#    ifndef SLANG_ANDROID
-#        define SLANG_ANDROID 0
-#    endif
-#    ifndef SLANG_LINUX
-#        define SLANG_LINUX 0
-#    endif
-#    ifndef SLANG_IOS
-#        define SLANG_IOS 0
-#    endif
-#    ifndef SLANG_OSX
-#        define SLANG_OSX 0
-#    endif
-#    ifndef SLANG_PS3
-#        define SLANG_PS3 0
-#    endif
-#    ifndef SLANG_PS4
-#        define SLANG_PS4 0
-#    endif
-#    ifndef SLANG_PSP2
-#        define SLANG_PSP2 0
-#    endif
-#    ifndef SLANG_WIIU
-#        define SLANG_WIIU 0
-#    endif
-#endif /* SLANG_PLATFORM */
-
-/* Shorthands for "families" of compilers/platforms */
-#define SLANG_GCC_FAMILY (SLANG_CLANG || SLANG_SNC || SLANG_GHS || SLANG_GCC)
-#define SLANG_WINDOWS_FAMILY (SLANG_WINRT || SLANG_WIN32 || SLANG_WIN64)
-#define SLANG_MICROSOFT_FAMILY (SLANG_XBOXONE || SLANG_X360 || SLANG_WINDOWS_FAMILY)
-#define SLANG_LINUX_FAMILY (SLANG_LINUX || SLANG_ANDROID)
-#define SLANG_APPLE_FAMILY (SLANG_IOS || SLANG_OSX)                  /* equivalent to #if __APPLE__ */
-#define SLANG_UNIX_FAMILY (SLANG_LINUX_FAMILY || SLANG_APPLE_FAMILY) /* shortcut for unix/posix platforms */
-
-// GCC Specific
-#if SLANG_GCC_FAMILY
-#	define SLANG_ALIGN_OF(T)	__alignof__(T)
-
-#   define SLANG_BREAKPOINT(id) __builtin_trap()
-
-// Use this macro instead of offsetof, because gcc produces warning if offsetof is used on a 
-// non POD type, even though it produces the correct result
-#   define SLANG_OFFSET_OF(T, ELEMENT) (size_t(&((T*)1)->ELEMENT) - 1)
-#endif // SLANG_GCC_FAMILY
-
-// Microsoft VC specific
-#if SLANG_VC
-#   define SLANG_ALIGN_OF(T) __alignof(T)
-
-#	define SLANG_BREAKPOINT(id) __debugbreak();
-
-#endif // SLANG_VC
-
-// Default impls
-
-#ifndef SLANG_OFFSET_OF
-#   define SLANG_OFFSET_OF(X, Y) offsetof(X, Y)
-#endif
-
-#ifndef SLANG_BREAKPOINT
-// Make it crash with a write to 0!
-#   define SLANG_BREAKPOINT(id) (*((int*)0) = int(id));
-#endif
-
-// If slang.h has been included we don't need any of these definitions
-#ifndef SLANG_H
-
-/* Macro for declaring if a method is no throw. Should be set before the return parameter. */
-#ifndef SLANG_NO_THROW
-#   if SLANG_WINDOWS_FAMILY && !defined(SLANG_DISABLE_EXCEPTIONS)
-#       define SLANG_NO_THROW __declspec(nothrow)
-#   endif
-#endif
-#ifndef SLANG_NO_THROW
-#   define SLANG_NO_THROW
-#endif
-
-/* The `SLANG_STDCALL` and `SLANG_MCALL` defines are used to set the calling
-convention for interface methods.
-*/
-#ifndef SLANG_STDCALL
-#   if SLANG_MICROSOFT_FAMILY
-#       define SLANG_STDCALL __stdcall
-#   else
-#       define SLANG_STDCALL
-#   endif
-#endif
-#ifndef SLANG_MCALL
-#   define SLANG_MCALL SLANG_STDCALL
-#endif
-
-#ifndef SLANG_FORCE_INLINE
-#    define SLANG_FORCE_INLINE inline
-#endif
-
-// TODO(JS): Should these be in slang-cpp-types.h? 
-// They are more likely to clash with slang.h
-
-struct SlangUUID
-{
-    uint32_t data1;
-    uint16_t data2;
-    uint16_t data3;
-    uint8_t  data4[8];
-};
-
-typedef int32_t SlangResult;
-
-struct ISlangUnknown
-{
-    virtual SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface(SlangUUID const& uuid, void** outObject) = 0;
-    virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() = 0;
-    virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() = 0;
-};
-
-#define SLANG_COM_INTERFACE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
-    public: \
-    SLANG_FORCE_INLINE static const SlangUUID& getTypeGuid() \
-    { \
-        static const SlangUUID guid = { a, b, c, d0, d1, d2, d3, d4, d5, d6, d7 }; \
-        return guid; \
-    }
-#endif // SLANG_H
-
-// Includes
-
-#include "slang-cpp-scalar-intrinsics.h"
-#include "slang-cpp-types.h"
-
-// TODO(JS): Hack! Output C++ code from slang can copy uninitialized variables. 
-#if defined(_MSC_VER)
-#   pragma warning(disable : 4700)
-#endif
-
-#ifndef SLANG_UNROLL
-#   define SLANG_UNROLL
-#endif
-
-#endif
diff --git a/external/slang/prelude/slang-cpp-scalar-intrinsics.h b/external/slang/prelude/slang-cpp-scalar-intrinsics.h
deleted file mode 100644
index 8fc90fac..00000000
--- a/external/slang/prelude/slang-cpp-scalar-intrinsics.h
+++ /dev/null
@@ -1,498 +0,0 @@
-#ifndef SLANG_PRELUDE_SCALAR_INTRINSICS_H
-#define SLANG_PRELUDE_SCALAR_INTRINSICS_H
-
-#if !defined(SLANG_LLVM) && SLANG_PROCESSOR_X86_64 && SLANG_VC
-//  If we have visual studio and 64 bit processor, we can assume we have popcnt, and can include x86 intrinsics
-#   include <intrin.h>
-#endif
-
-#ifndef SLANG_FORCE_INLINE
-#    define SLANG_FORCE_INLINE inline
-#endif
-
-#ifdef SLANG_PRELUDE_NAMESPACE
-namespace SLANG_PRELUDE_NAMESPACE {
-#endif
-
-#ifndef SLANG_PRELUDE_PI
-#   define SLANG_PRELUDE_PI           3.14159265358979323846
-#endif
-
-
-union Union32 
-{
-    uint32_t u;
-    int32_t i;
-    float f;
-};
-
-union Union64
-{
-    uint64_t u;
-    int64_t i;
-    double d;
-};
-
-// 32 bit cast conversions
-SLANG_FORCE_INLINE int32_t _bitCastFloatToInt(float f) { Union32 u; u.f = f; return u.i; }
-SLANG_FORCE_INLINE float _bitCastIntToFloat(int32_t i) { Union32 u; u.i = i; return u.f; }
-SLANG_FORCE_INLINE uint32_t _bitCastFloatToUInt(float f) { Union32 u; u.f = f; return u.u; }
-SLANG_FORCE_INLINE float _bitCastUIntToFloat(uint32_t ui) { Union32 u; u.u = ui; return u.f; }
-
-// ----------------------------- F16 -----------------------------------------
-
-
-// This impl is based on FloatToHalf that is in Slang codebase
-SLANG_FORCE_INLINE uint32_t f32tof16(const float value)
-{
-    const uint32_t inBits = _bitCastFloatToUInt(value);
-
-    // bits initially set to just the sign bit
-    uint32_t bits = (inBits >> 16) & 0x8000;
-    // Mantissa can't be used as is, as it holds last bit, for rounding.
-    uint32_t m = (inBits >> 12) & 0x07ff;
-    uint32_t e = (inBits >> 23) & 0xff;
-
-    if (e < 103)
-    {
-        // It's zero
-        return bits;
-    }
-    if (e == 0xff)
-    {
-        // Could be a NAN or INF. Is INF if *input* mantissa is 0.
-        
-        // Remove last bit for rounding to make output mantissa.
-        m >>= 1;
-       
-        // We *assume* float16/float32 signaling bit and remaining bits
-        // semantics are the same. (The signalling bit convention is target specific!).
-        // Non signal bit's usage within mantissa for a NAN are also target specific.
-      
-        // If the m is 0, it could be because the result is INF, but it could also be because all the 
-        // bits that made NAN were dropped as we have less mantissa bits in f16. 
-           
-        // To fix for this we make non zero if m is 0 and the input mantissa was not.
-        // This will (typically) produce a signalling NAN.
-        m += uint32_t(m == 0 && (inBits & 0x007fffffu));
-       
-        // Combine for output
-        return (bits | 0x7c00u | m);
-    }
-    if (e > 142)
-    {
-        // INF. 
-        return bits | 0x7c00u;
-    }
-    if (e < 113)
-    {
-        m |= 0x0800u;
-        bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
-        return bits;
-    }
-    bits |= ((e - 112) << 10) | (m >> 1);
-    bits += m & 1;
-    return bits;
-}
-
-static const float g_f16tof32Magic = _bitCastIntToFloat((127 + (127 - 15)) << 23);
-
-SLANG_FORCE_INLINE float f16tof32(const uint32_t value)
-{
-    const uint32_t sign = (value & 0x8000) << 16;
-    uint32_t exponent = (value & 0x7c00) >> 10;
-    uint32_t mantissa = (value & 0x03ff);
-
-    if (exponent == 0)
-    {
-        // If mantissa is 0 we are done, as output is 0. 
-        // If it's not zero we must have a denormal.
-        if (mantissa)
-        {
-            // We have a denormal so use the magic to do exponent adjust
-            return _bitCastIntToFloat(sign | ((value & 0x7fff) << 13)) * g_f16tof32Magic;
-        }
-    }
-    else 
-    {
-        // If the exponent is NAN or INF exponent is 0x1f on input. 
-        // If that's the case, we just need to set the exponent to 0xff on output
-        // and the mantissa can just stay the same. If its 0 it's INF, else it is NAN and we just copy the bits
-        //
-        // Else we need to correct the exponent in the normalized case.
-        exponent = (exponent == 0x1F) ? 0xff : (exponent + (-15 + 127));
-    }
-    
-    return _bitCastUIntToFloat(sign | (exponent << 23) | (mantissa << 13));
-}
-
-// ----------------------------- F32 -----------------------------------------
-
-// Helpers
-SLANG_FORCE_INLINE float F32_calcSafeRadians(float radians);
-
-#ifdef SLANG_LLVM
-
-SLANG_PRELUDE_EXTERN_C_START
-
-// Unary 
-float F32_ceil(float f);
-float F32_floor(float f);
-float F32_round(float f);
-float F32_sin(float f);
-float F32_cos(float f);
-float F32_tan(float f);
-float F32_asin(float f);
-float F32_acos(float f);
-float F32_atan(float f);
-float F32_sinh(float f);
-float F32_cosh(float f);
-float F32_tanh(float f);
-float F32_log2(float f);
-float F32_log(float f);
-float F32_log10(float f);
-float F32_exp2(float f);
-float F32_exp(float f);
-float F32_abs(float f);
-float F32_trunc(float f);
-float F32_sqrt(float f);
-
-bool F32_isnan(float f);
-bool F32_isfinite(float f); 
-bool F32_isinf(float f);
-
-// Binary
-SLANG_FORCE_INLINE float F32_min(float a, float b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE float F32_max(float a, float b) { return a > b ? a : b; }
-float F32_pow(float a, float b);
-float F32_fmod(float a, float b);
-float F32_remainder(float a, float b);
-float F32_atan2(float a, float b);
-
-float F32_frexp(float x, int* e);
-
-float F32_modf(float x, float* ip);
-
-// Ternary
-SLANG_FORCE_INLINE float F32_fma(float a, float b, float c) { return a * b + c; }
-
-SLANG_PRELUDE_EXTERN_C_END
-
-#else
-
-// Unary 
-SLANG_FORCE_INLINE float F32_ceil(float f) { return ::ceilf(f); }
-SLANG_FORCE_INLINE float F32_floor(float f) { return ::floorf(f); }
-SLANG_FORCE_INLINE float F32_round(float f) { return ::roundf(f); }
-SLANG_FORCE_INLINE float F32_sin(float f) { return ::sinf(f); }
-SLANG_FORCE_INLINE float F32_cos(float f) { return ::cosf(f); }
-SLANG_FORCE_INLINE float F32_tan(float f) { return ::tanf(f); }
-SLANG_FORCE_INLINE float F32_asin(float f) { return ::asinf(f); }
-SLANG_FORCE_INLINE float F32_acos(float f) { return ::acosf(f); }
-SLANG_FORCE_INLINE float F32_atan(float f) { return ::atanf(f); }
-SLANG_FORCE_INLINE float F32_sinh(float f) { return ::sinhf(f); }
-SLANG_FORCE_INLINE float F32_cosh(float f) { return ::coshf(f); }
-SLANG_FORCE_INLINE float F32_tanh(float f) { return ::tanhf(f); }
-SLANG_FORCE_INLINE float F32_log2(float f) { return ::log2f(f); }
-SLANG_FORCE_INLINE float F32_log(float f) { return ::logf(f); }
-SLANG_FORCE_INLINE float F32_log10(float f) { return ::log10f(f); }
-SLANG_FORCE_INLINE float F32_exp2(float f) { return ::exp2f(f); }
-SLANG_FORCE_INLINE float F32_exp(float f) { return ::expf(f); }
-SLANG_FORCE_INLINE float F32_abs(float f) { return ::fabsf(f); }
-SLANG_FORCE_INLINE float F32_trunc(float f) { return ::truncf(f); }
-SLANG_FORCE_INLINE float F32_sqrt(float f) { return ::sqrtf(f); }
-
-SLANG_FORCE_INLINE bool F32_isnan(float f) { return SLANG_PRELUDE_STD isnan(f); }
-SLANG_FORCE_INLINE bool F32_isfinite(float f) { return SLANG_PRELUDE_STD isfinite(f); }
-SLANG_FORCE_INLINE bool F32_isinf(float f) { return SLANG_PRELUDE_STD isinf(f); }
-
-// Binary
-SLANG_FORCE_INLINE float F32_min(float a, float b) { return ::fminf(a, b); }
-SLANG_FORCE_INLINE float F32_max(float a, float b) { return ::fmaxf(a, b); }
-SLANG_FORCE_INLINE float F32_pow(float a, float b) { return ::powf(a, b); }
-SLANG_FORCE_INLINE float F32_fmod(float a, float b) { return ::fmodf(a, b); }
-SLANG_FORCE_INLINE float F32_remainder(float a, float b) { return ::remainderf(a, b); }
-SLANG_FORCE_INLINE float F32_atan2(float a, float b) { return float(::atan2(a, b)); }
-
-SLANG_FORCE_INLINE float F32_frexp(float x, int* e) { return ::frexpf(x, e); }
-
-SLANG_FORCE_INLINE float F32_modf(float x, float* ip)
-{
-    return ::modff(x, ip);
-}
-
-// Ternary
-SLANG_FORCE_INLINE float F32_fma(float a, float b, float c) { return ::fmaf(a, b, c); }
-
-#endif
-
-SLANG_FORCE_INLINE float F32_calcSafeRadians(float radians)
-{
-    // Put 0 to 2pi cycles to cycle around 0 to 1 
-	float a = radians * (1.0f /  float(SLANG_PRELUDE_PI * 2));
-    // Get truncated fraction, as value in  0 - 1 range
-    a = a - F32_floor(a);
-    // Convert back to 0 - 2pi range
-	return (a * float(SLANG_PRELUDE_PI * 2));
-}
-
-SLANG_FORCE_INLINE float F32_rsqrt(float f) { return 1.0f / F32_sqrt(f); }
-SLANG_FORCE_INLINE float F32_sign(float f) { return ( f == 0.0f) ? f : (( f < 0.0f) ? -1.0f : 1.0f); } 
-SLANG_FORCE_INLINE float F32_frac(float f) { return f - F32_floor(f); }
-
-SLANG_FORCE_INLINE uint32_t F32_asuint(float f) { Union32 u; u.f = f; return u.u; }
-SLANG_FORCE_INLINE int32_t F32_asint(float f) { Union32 u; u.f = f; return u.i; }
-
-// ----------------------------- F64 -----------------------------------------
-
-SLANG_FORCE_INLINE double F64_calcSafeRadians(double radians);
-
-#ifdef SLANG_LLVM
-
-SLANG_PRELUDE_EXTERN_C_START
-
-// Unary 
-double F64_ceil(double f);
-double F64_floor(double f);
-double F64_round(double f);
-double F64_sin(double f);
-double F64_cos(double f);
-double F64_tan(double f);
-double F64_asin(double f);
-double F64_acos(double f);
-double F64_atan(double f);
-double F64_sinh(double f);
-double F64_cosh(double f);
-double F64_tanh(double f);
-double F64_log2(double f);
-double F64_log(double f);
-double F64_log10(float f);
-double F64_exp2(double f);
-double F64_exp(double f);
-double F64_abs(double f);
-double F64_trunc(double f);
-double F64_sqrt(double f);
-
-bool F64_isnan(double f);
-bool F64_isfinite(double f);
-bool F64_isinf(double f);
-
-// Binary
-SLANG_FORCE_INLINE double F64_min(double a, double b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE double F64_max(double a, double b) { return a > b ? a : b; }
-double F64_pow(double a, double b);
-double F64_fmod(double a, double b);
-double F64_remainder(double a, double b);
-double F64_atan2(double a, double b);
-
-double F64_frexp(double x, int* e);
-
-double F64_modf(double x, double* ip);
-
-// Ternary
-SLANG_FORCE_INLINE double F64_fma(double a, double b, double c) { return a * b + c; }
-
-SLANG_PRELUDE_EXTERN_C_END
-
-#else // SLANG_LLVM
-
-// Unary 
-SLANG_FORCE_INLINE double F64_ceil(double f) { return ::ceil(f); }
-SLANG_FORCE_INLINE double F64_floor(double f) { return ::floor(f); }
-SLANG_FORCE_INLINE double F64_round(double f) { return ::round(f); }
-SLANG_FORCE_INLINE double F64_sin(double f) { return ::sin(f); }
-SLANG_FORCE_INLINE double F64_cos(double f) { return ::cos(f); }
-SLANG_FORCE_INLINE double F64_tan(double f) { return ::tan(f); }
-SLANG_FORCE_INLINE double F64_asin(double f) { return ::asin(f); }
-SLANG_FORCE_INLINE double F64_acos(double f) { return ::acos(f); }
-SLANG_FORCE_INLINE double F64_atan(double f) { return ::atan(f); }
-SLANG_FORCE_INLINE double F64_sinh(double f) { return ::sinh(f); }
-SLANG_FORCE_INLINE double F64_cosh(double f) { return ::cosh(f); }
-SLANG_FORCE_INLINE double F64_tanh(double f) { return ::tanh(f); }
-SLANG_FORCE_INLINE double F64_log2(double f) { return ::log2(f); }
-SLANG_FORCE_INLINE double F64_log(double f) { return ::log(f); }
-SLANG_FORCE_INLINE double F64_log10(float f) { return ::log10(f); }
-SLANG_FORCE_INLINE double F64_exp2(double f) { return ::exp2(f); }
-SLANG_FORCE_INLINE double F64_exp(double f) { return ::exp(f); }
-SLANG_FORCE_INLINE double F64_abs(double f) { return ::fabs(f); }
-SLANG_FORCE_INLINE double F64_trunc(double f) { return ::trunc(f); }
-SLANG_FORCE_INLINE double F64_sqrt(double f) { return ::sqrt(f); }
-
-
-SLANG_FORCE_INLINE bool F64_isnan(double f) { return SLANG_PRELUDE_STD isnan(f); }
-SLANG_FORCE_INLINE bool F64_isfinite(double f) { return SLANG_PRELUDE_STD isfinite(f); }
-SLANG_FORCE_INLINE bool F64_isinf(double f) { return SLANG_PRELUDE_STD isinf(f); }
-
-// Binary
-SLANG_FORCE_INLINE double F64_min(double a, double b) { return ::fmin(a, b); }
-SLANG_FORCE_INLINE double F64_max(double a, double b) { return ::fmax(a, b); }
-SLANG_FORCE_INLINE double F64_pow(double a, double b) { return ::pow(a, b); }
-SLANG_FORCE_INLINE double F64_fmod(double a, double b) { return ::fmod(a, b); }
-SLANG_FORCE_INLINE double F64_remainder(double a, double b) { return ::remainder(a, b); }
-SLANG_FORCE_INLINE double F64_atan2(double a, double b) { return ::atan2(a, b); }
-
-SLANG_FORCE_INLINE double F64_frexp(double x, int* e) { return ::frexp(x, e); }
-
-SLANG_FORCE_INLINE double F64_modf(double x, double* ip)
-{
-    return ::modf(x, ip);
-}
-
-// Ternary
-SLANG_FORCE_INLINE double F64_fma(double a, double b, double c) { return ::fma(a, b, c); }
-
-#endif // SLANG_LLVM
-
-SLANG_FORCE_INLINE double F64_rsqrt(double f) { return 1.0 / F64_sqrt(f); }
-SLANG_FORCE_INLINE double F64_sign(double f) { return (f == 0.0) ? f : ((f < 0.0) ? -1.0 : 1.0); }
-SLANG_FORCE_INLINE double F64_frac(double f) { return f - F64_floor(f); }
-
-SLANG_FORCE_INLINE void F64_asuint(double d, uint32_t* low, uint32_t* hi)
-{
-    Union64 u;
-    u.d = d;
-    *low = uint32_t(u.u);
-    *hi = uint32_t(u.u >> 32);
-}
-
-SLANG_FORCE_INLINE void F64_asint(double d, int32_t* low, int32_t* hi)
-{
-    Union64 u;
-    u.d = d;
-    *low = int32_t(u.u);
-    *hi = int32_t(u.u >> 32);
-}
-
-SLANG_FORCE_INLINE double F64_calcSafeRadians(double radians)
-{
-    // Put 0 to 2pi cycles to cycle around 0 to 1 
-	double a = radians * (1.0f /  (SLANG_PRELUDE_PI * 2));
-    // Get truncated fraction, as value in  0 - 1 range
-    a = a - F64_floor(a);
-    // Convert back to 0 - 2pi range
-	return (a * (SLANG_PRELUDE_PI * 2));
-}
-
-// ----------------------------- I32 -----------------------------------------
-
-SLANG_FORCE_INLINE int32_t I32_abs(int32_t f) { return (f < 0) ? -f : f; }
-
-SLANG_FORCE_INLINE int32_t I32_min(int32_t a, int32_t b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE int32_t I32_max(int32_t a, int32_t b) { return a > b ? a : b; }
-
-SLANG_FORCE_INLINE float I32_asfloat(int32_t x) { Union32 u; u.i = x; return u.f; }
-SLANG_FORCE_INLINE uint32_t I32_asuint(int32_t x) { return uint32_t(x); }
-SLANG_FORCE_INLINE double I32_asdouble(int32_t low, int32_t hi )
-{
-    Union64 u;
-    u.u = (uint64_t(hi) << 32) | uint32_t(low);
-    return u.d;
-}
-
-// ----------------------------- U32 -----------------------------------------
-
-SLANG_FORCE_INLINE uint32_t U32_abs(uint32_t f) { return f; }
-
-SLANG_FORCE_INLINE uint32_t U32_min(uint32_t a, uint32_t b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE uint32_t U32_max(uint32_t a, uint32_t b) { return a > b ? a : b; }
-
-SLANG_FORCE_INLINE float U32_asfloat(uint32_t x) { Union32 u; u.u = x; return u.f; }
-SLANG_FORCE_INLINE uint32_t U32_asint(int32_t x) { return uint32_t(x); } 
-
-SLANG_FORCE_INLINE double U32_asdouble(uint32_t low, uint32_t hi)
-{
-    Union64 u;
-    u.u = (uint64_t(hi) << 32) | low;
-    return u.d;
-}
-
-
-SLANG_FORCE_INLINE uint32_t U32_countbits(uint32_t v)
-{
-#if SLANG_GCC_FAMILY && !defined(SLANG_LLVM)
-    return __builtin_popcount(v);
-#elif SLANG_PROCESSOR_X86_64 && SLANG_VC
-    return __popcnt(v);
-#else     
-    uint32_t c = 0;
-    while (v)
-    {
-        c++;
-        v &= v - 1;
-    }
-    return c;
-#endif
-}
-
-// ----------------------------- U64 -----------------------------------------
-
-SLANG_FORCE_INLINE uint64_t U64_abs(uint64_t f) { return f; }
-
-SLANG_FORCE_INLINE uint64_t U64_min(uint64_t a, uint64_t b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE uint64_t U64_max(uint64_t a, uint64_t b) { return a > b ? a : b; }
-
-// TODO(JS): We don't define countbits for 64bit in stdlib currently.
-// It's not clear from documentation if it should return 32 or 64 bits, if it exists. 
-// 32 bits can always hold the result, and will be implicitly promoted. 
-SLANG_FORCE_INLINE uint32_t U64_countbits(uint64_t v)
-{
-#if SLANG_GCC_FAMILY && !defined(SLANG_LLVM)   
-    return uint32_t(__builtin_popcountl(v));
-#elif SLANG_PROCESSOR_X86_64 && SLANG_VC
-    return uint32_t(__popcnt64(v));
-#else     
-    uint32_t c = 0;
-    while (v)
-    {
-        c++;
-        v &= v - 1;
-    }
-    return c;
-#endif
-}
-
-// ----------------------------- I64 -----------------------------------------
-
-SLANG_FORCE_INLINE int64_t I64_abs(int64_t f) { return (f < 0) ? -f : f; }
-
-SLANG_FORCE_INLINE int64_t I64_min(int64_t a, int64_t b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE int64_t I64_max(int64_t a, int64_t b) { return a > b ? a : b; }
-
-
-// ----------------------------- Interlocked ---------------------------------
-
-#if SLANG_LLVM
-
-#else // SLANG_LLVM
-
-#   ifdef _WIN32
-#       include <intrin.h>
-#   endif
-
-SLANG_FORCE_INLINE void InterlockedAdd(uint32_t* dest, uint32_t value, uint32_t* oldValue)
-{
-#   ifdef _WIN32
-    *oldValue = _InterlockedExchangeAdd((long*)dest, (long)value);
-#   else
-    *oldValue = __sync_fetch_and_add(dest, value);
-#   endif
-}
-
-#endif // SLANG_LLVM
-
-
-// ----------------------- fmod --------------------------
-SLANG_FORCE_INLINE float _slang_fmod(float x, float y)
-{
-    return F32_fmod(x, y);
-}
-SLANG_FORCE_INLINE double _slang_fmod(double x, double y)
-{
-    return F64_fmod(x, y);
-}
-
-#ifdef SLANG_PRELUDE_NAMESPACE
-} 
-#endif
-
-#endif
diff --git a/external/slang/prelude/slang-cpp-types-core.h b/external/slang/prelude/slang-cpp-types-core.h
deleted file mode 100644
index 25fe4720..00000000
--- a/external/slang/prelude/slang-cpp-types-core.h
+++ /dev/null
@@ -1,578 +0,0 @@
-#ifndef SLANG_PRELUDE_CPP_TYPES_CORE_H
-#define SLANG_PRELUDE_CPP_TYPES_CORE_H
-
-#ifndef SLANG_PRELUDE_ASSERT
-#   ifdef SLANG_PRELUDE_ENABLE_ASSERT
-#       define SLANG_PRELUDE_ASSERT(VALUE) assert(VALUE)
-#   else
-#       define SLANG_PRELUDE_ASSERT(VALUE) 
-#   endif
-#endif
-
-// Since we are using unsigned arithmatic care is need in this comparison.
-// It is *assumed* that sizeInBytes >= elemSize. Which means (sizeInBytes >= elemSize) >= 0
-// Which means only a single test is needed
-
-// Asserts for bounds checking.
-// It is assumed index/count are unsigned types.
-#define SLANG_BOUND_ASSERT(index, count)  SLANG_PRELUDE_ASSERT(index < count); 
-#define SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_PRELUDE_ASSERT(index <= (sizeInBytes - elemSize) && (index & 3) == 0);
-
-// Macros to zero index if an access is out of range
-#define SLANG_BOUND_ZERO_INDEX(index, count) index = (index < count) ? index : 0; 
-#define SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes) index = (index <= (sizeInBytes - elemSize)) ? index : 0; 
-
-// The 'FIX' macro define how the index is fixed. The default is to do nothing. If SLANG_ENABLE_BOUND_ZERO_INDEX
-// the fix macro will zero the index, if out of range
-#ifdef  SLANG_ENABLE_BOUND_ZERO_INDEX
-#   define SLANG_BOUND_FIX(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
-#   define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
-#   define SLANG_BOUND_FIX_FIXED_ARRAY(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
-#else
-#   define SLANG_BOUND_FIX(index, count) 
-#   define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) 
-#   define SLANG_BOUND_FIX_FIXED_ARRAY(index, count) 
-#endif
-
-#ifndef SLANG_BOUND_CHECK
-#   define SLANG_BOUND_CHECK(index, count) SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX(index, count)
-#endif
-
-#ifndef SLANG_BOUND_CHECK_BYTE_ADDRESS
-#   define SLANG_BOUND_CHECK_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
-#endif
-
-#ifndef SLANG_BOUND_CHECK_FIXED_ARRAY
-#   define SLANG_BOUND_CHECK_FIXED_ARRAY(index, count) SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
-#endif
-
-struct TypeInfo
-{
-    size_t typeSize;
-};
-
-template <typename T, size_t SIZE>
-struct FixedArray
-{
-    const T& operator[](size_t index) const { SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE); return m_data[index]; }
-    T& operator[](size_t index) { SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE); return m_data[index]; }
-
-    T m_data[SIZE];
-};
-
-// An array that has no specified size, becomes a 'Array'. This stores the size so it can potentially 
-// do bounds checking.  
-template <typename T>
-struct Array
-{
-    const T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
-    T& operator[](size_t index) { SLANG_BOUND_CHECK(index, count); return data[index]; }
-
-    T* data;
-    size_t count;
-};
-
-/* Constant buffers become a pointer to the contained type, so ConstantBuffer<T> becomes T* in C++ code.
-*/
-
-template <typename T, int COUNT>
-struct Vector;
-
-template <typename T>
-struct Vector<T, 1>
-{
-    T x;
-    const T& operator[](size_t /*index*/) const { return x; }
-    T& operator[](size_t /*index*/) { return x; }
-    operator T() const { return x; }
-    Vector() = default;
-    Vector(T scalar)
-    {
-        x = scalar;
-    }
-    template <typename U>
-    Vector(Vector<U, 1> other)
-    {
-        x = (T)other.x;
-    }
-    template <typename U, int otherSize>
-    Vector(Vector<U, otherSize> other)
-    {
-        int minSize = 1;
-        if (otherSize < minSize) minSize = otherSize;
-        for (int i = 0; i < minSize; i++)
-            (*this)[i] = (T)other[i];
-    }
-};
-
-template <typename T>
-struct Vector<T, 2>
-{
-    T x, y;
-    const T& operator[](size_t index) const { return index == 0 ? x : y; }
-    T& operator[](size_t index) { return index == 0 ? x : y; }
-    Vector() = default;
-    Vector(T scalar)
-    {
-        x = y = scalar;
-    }
-    Vector(T _x, T _y)
-    {
-        x = _x;
-        y = _y;
-    }
-    template <typename U>
-    Vector(Vector<U, 2> other)
-    {
-        x = (T)other.x;
-        y = (T)other.y;
-    }
-    template <typename U, int otherSize>
-    Vector(Vector<U, otherSize> other)
-    {
-        int minSize = 2;
-        if (otherSize < minSize) minSize = otherSize;
-        for (int i = 0; i < minSize; i++)
-            (*this)[i] = (T)other[i];
-    }
-};
-
-template <typename T>
-struct Vector<T, 3>
-{
-    T x, y, z;
-    const T& operator[](size_t index) const { return *((T*)(this) + index); }
-    T& operator[](size_t index) { return *((T*)(this) + index); }
-
-    Vector() = default;
-    Vector(T scalar)
-    {
-        x = y = z = scalar;
-    }
-    Vector(T _x, T _y, T _z)
-    {
-        x = _x;
-        y = _y;
-        z = _z;
-    }
-    template <typename U>
-    Vector(Vector<U, 3> other)
-    {
-        x = (T)other.x;
-        y = (T)other.y;
-        z = (T)other.z;
-    }
-    template <typename U, int otherSize>
-    Vector(Vector<U, otherSize> other)
-    {
-        int minSize = 3;
-        if (otherSize < minSize) minSize = otherSize;
-        for (int i = 0; i < minSize; i++)
-            (*this)[i] = (T)other[i];
-    }
-};
-
-template <typename T>
-struct Vector<T, 4>
-{
-    T x, y, z, w;
-
-    const T& operator[](size_t index) const { return *((T*)(this) + index); }
-    T& operator[](size_t index) { return *((T*)(this) + index); }
-    Vector() = default;
-    Vector(T scalar)
-    {
-        x = y = z = w = scalar;
-    }
-    Vector(T _x, T _y, T _z, T _w)
-    {
-        x = _x;
-        y = _y;
-        z = _z;
-        w = _w;
-    }
-    template <typename U, int otherSize>
-    Vector(Vector<U, otherSize> other)
-    {
-        int minSize = 4;
-        if (otherSize < minSize) minSize = otherSize;
-        for (int i = 0; i < minSize; i++)
-            (*this)[i] = (T)other[i];
-    }
- 
-};
-
-template<typename T, int N>
-SLANG_FORCE_INLINE Vector<T, N> _slang_select(Vector<bool, N> condition, Vector<T, N> v0, Vector<T, N> v1)
-{
-    Vector<T, N> result;
-    for (int i = 0; i < N; i++)
-    {
-        result[i] = condition[i] ? v0[i] : v1[i];
-    }
-    return result;
-}
-
-template<typename T>
-SLANG_FORCE_INLINE T _slang_select(bool condition, T v0, T v1)
-{
-    return condition ? v0 : v1;
-}
-
-template<typename T, int N>
-SLANG_FORCE_INLINE T _slang_vector_get_element(Vector<T, N> x, int index)
-{
-    return x[index];
-}
-
-template<typename T, int N>
-SLANG_FORCE_INLINE const T* _slang_vector_get_element_ptr(const Vector<T, N>* x, int index)
-{
-    return &((*const_cast<Vector<T,N>*>(x))[index]);
-}
-
-template<typename T, int N>
-SLANG_FORCE_INLINE T* _slang_vector_get_element_ptr(Vector<T, N>* x, int index)
-{
-    return &((*x)[index]);
-}
-
-template<typename T, int n, typename OtherT, int m>
-SLANG_FORCE_INLINE Vector<T, n> _slang_vector_reshape(const Vector<OtherT, m> other)
-{
-    Vector<T, n> result;
-    for (int i = 0; i < n; i++)
-    {
-        OtherT otherElement = T(0);
-        if (i < m)
-            otherElement = _slang_vector_get_element(other, i);
-        *_slang_vector_get_element_ptr(&result, i) = (T)otherElement;
-    }
-    return result;
-}
-
-typedef uint32_t uint;
-
-#define SLANG_VECTOR_BINARY_OP(T, op) \
-    template<int n> \
-    SLANG_FORCE_INLINE Vector<T, n> operator op(const Vector<T, n>& thisVal, const Vector<T, n>& other) \
-    { \
-        Vector<T, n> result;\
-        for (int i = 0; i < n; i++) \
-            result[i] = thisVal[i] op other[i]; \
-        return result;\
-    }
-#define SLANG_VECTOR_BINARY_COMPARE_OP(T, op) \
-    template<int n> \
-    SLANG_FORCE_INLINE Vector<bool, n> operator op(const Vector<T, n>& thisVal, const Vector<T, n>& other) \
-    { \
-        Vector<bool, n> result;\
-        for (int i = 0; i < n; i++) \
-            result[i] = thisVal[i] op other[i]; \
-        return result;\
-    }
-
-#define SLANG_VECTOR_UNARY_OP(T, op) \
-    template<int n> \
-    SLANG_FORCE_INLINE Vector<T, n> operator op(const Vector<T, n>& thisVal) \
-    { \
-        Vector<T, n> result;\
-        for (int i = 0; i < n; i++) \
-            result[i] = op thisVal[i]; \
-        return result;\
-    }
-#define SLANG_INT_VECTOR_OPS(T) \
-    SLANG_VECTOR_BINARY_OP(T, +)\
-    SLANG_VECTOR_BINARY_OP(T, -)\
-    SLANG_VECTOR_BINARY_OP(T, *)\
-    SLANG_VECTOR_BINARY_OP(T, / )\
-    SLANG_VECTOR_BINARY_OP(T, &)\
-    SLANG_VECTOR_BINARY_OP(T, |)\
-    SLANG_VECTOR_BINARY_OP(T, &&)\
-    SLANG_VECTOR_BINARY_OP(T, ||)\
-    SLANG_VECTOR_BINARY_OP(T, ^)\
-    SLANG_VECTOR_BINARY_OP(T, %)\
-    SLANG_VECTOR_BINARY_OP(T, >>)\
-    SLANG_VECTOR_BINARY_OP(T, <<)\
-    SLANG_VECTOR_BINARY_COMPARE_OP(T, >)\
-    SLANG_VECTOR_BINARY_COMPARE_OP(T, <)\
-    SLANG_VECTOR_BINARY_COMPARE_OP(T, >=)\
-    SLANG_VECTOR_BINARY_COMPARE_OP(T, <=)\
-    SLANG_VECTOR_BINARY_COMPARE_OP(T, ==)\
-    SLANG_VECTOR_BINARY_COMPARE_OP(T, !=)\
-    SLANG_VECTOR_UNARY_OP(T, !)\
-    SLANG_VECTOR_UNARY_OP(T, ~)
-#define SLANG_FLOAT_VECTOR_OPS(T) \
-    SLANG_VECTOR_BINARY_OP(T, +)\
-    SLANG_VECTOR_BINARY_OP(T, -)\
-    SLANG_VECTOR_BINARY_OP(T, *)\
-    SLANG_VECTOR_BINARY_OP(T, /)\
-    SLANG_VECTOR_UNARY_OP(T, -)\
-    SLANG_VECTOR_BINARY_COMPARE_OP(T, >)\
-    SLANG_VECTOR_BINARY_COMPARE_OP(T, <)\
-    SLANG_VECTOR_BINARY_COMPARE_OP(T, >=)\
-    SLANG_VECTOR_BINARY_COMPARE_OP(T, <=)\
-    SLANG_VECTOR_BINARY_COMPARE_OP(T, ==)\
-    SLANG_VECTOR_BINARY_COMPARE_OP(T, !=)
-
-SLANG_INT_VECTOR_OPS(bool)
-SLANG_INT_VECTOR_OPS(int)
-SLANG_INT_VECTOR_OPS(int8_t)
-SLANG_INT_VECTOR_OPS(int16_t)
-SLANG_INT_VECTOR_OPS(int64_t)
-SLANG_INT_VECTOR_OPS(uint)
-SLANG_INT_VECTOR_OPS(uint8_t)
-SLANG_INT_VECTOR_OPS(uint16_t)
-SLANG_INT_VECTOR_OPS(uint64_t)
-
-SLANG_FLOAT_VECTOR_OPS(float)
-SLANG_FLOAT_VECTOR_OPS(double)
-
-#define SLANG_VECTOR_INT_NEG_OP(T) \
-    template<int N>\
-    Vector<T, N> operator-(const Vector<T, N>& thisVal) \
-    { \
-        Vector<T, N> result;\
-        for (int i = 0; i < N; i++) \
-            result[i] = 0 - thisVal[i]; \
-        return result;\
-    }
-SLANG_VECTOR_INT_NEG_OP(int)
-SLANG_VECTOR_INT_NEG_OP(int8_t)
-SLANG_VECTOR_INT_NEG_OP(int16_t)
-SLANG_VECTOR_INT_NEG_OP(int64_t)
-SLANG_VECTOR_INT_NEG_OP(uint)
-SLANG_VECTOR_INT_NEG_OP(uint8_t)
-SLANG_VECTOR_INT_NEG_OP(uint16_t)
-SLANG_VECTOR_INT_NEG_OP(uint64_t)
-
-#define SLANG_FLOAT_VECTOR_MOD(T)\
-    template<int N> \
-    Vector<T, N> operator%(const Vector<T, N>& left, const Vector<T, N>& right) \
-    {\
-        Vector<T, N> result;\
-        for (int i = 0; i < N; i++) \
-            result[i] = _slang_fmod(left[i], right[i]); \
-        return result;\
-    }
-
-SLANG_FLOAT_VECTOR_MOD(float)
-SLANG_FLOAT_VECTOR_MOD(double)
-#undef SLANG_FLOAT_VECTOR_MOD
-#undef SLANG_VECTOR_BINARY_OP
-#undef SLANG_VECTOR_UNARY_OP
-#undef SLANG_INT_VECTOR_OPS
-#undef SLANG_FLOAT_VECTOR_OPS
-#undef SLANG_VECTOR_INT_NEG_OP
-#undef SLANG_FLOAT_VECTOR_MOD
-
-template <typename T, int ROWS, int COLS>
-struct Matrix
-{
-    Vector<T, COLS> rows[ROWS];
-    Vector<T, COLS>& operator[](size_t index) { return rows[index]; }
-    Matrix() = default;
-    Matrix(T scalar)
-    {
-        for (int i = 0; i < ROWS; i++)
-            rows[i] = Vector<T, COLS>(scalar);
-    }
-    Matrix(const Vector<T, COLS>& row0)
-    {
-        rows[0] = row0;
-    }
-    Matrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1)
-    {
-        rows[0] = row0;
-        rows[1] = row1;
-    }
-    Matrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1, const Vector<T, COLS>& row2)
-    {
-        rows[0] = row0;
-        rows[1] = row1;
-        rows[2] = row2;
-    }
-    Matrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1, const Vector<T, COLS>& row2, const Vector<T, COLS>& row3)
-    {
-        rows[0] = row0;
-        rows[1] = row1;
-        rows[2] = row2;
-        rows[3] = row3;
-    }
-    template<typename U, int otherRow, int otherCol>
-    Matrix(const Matrix<U, otherRow, otherCol>& other)
-    {
-        int minRow = ROWS;
-        int minCol = COLS;
-        if (minRow > otherRow) minRow = otherRow;
-        if (minCol > otherCol) minCol = otherCol;
-        for (int i = 0; i < minRow; i++)
-            for (int j = 0; j < minCol; j++)
-                rows[i][j] = (T)other.rows[i][j];
-    }
-    Matrix(T v0, T v1, T v2, T v3)
-    {
-        rows[0][0] = v0;  rows[0][1] = v1;
-        rows[1][0] = v2;  rows[1][1] = v3;
-    }
-    Matrix(T v0, T v1, T v2, T v3, T v4, T v5)
-    {
-        if (COLS == 3)
-        {
-            rows[0][0] = v0;  rows[0][1] = v1; rows[0][2] = v2;
-            rows[1][0] = v3;  rows[1][1] = v4; rows[1][2] = v5;
-        }
-        else
-        {
-            rows[0][0] = v0;  rows[0][1] = v1;
-            rows[1][0] = v2;  rows[1][1] = v3;
-            rows[2][0] = v4;  rows[2][1] = v5;
-        }
-    }
-    Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7)
-    {
-        if (COLS == 4)
-        {
-            rows[0][0] = v0;  rows[0][1] = v1; rows[0][2] = v2; rows[0][3] = v3;
-            rows[1][0] = v4;  rows[1][1] = v5; rows[1][2] = v6; rows[1][3] = v7;
-        }
-        else
-        {
-            rows[0][0] = v0;  rows[0][1] = v1;
-            rows[1][0] = v2;  rows[1][1] = v3;
-            rows[2][0] = v4;  rows[2][1] = v5;
-            rows[3][0] = v6;  rows[3][1] = v7;
-        }
-    }
-    Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8)
-    {
-        rows[0][0] = v0;  rows[0][1] = v1;  rows[0][2] = v2;
-        rows[1][0] = v3;  rows[1][1] = v4;  rows[1][2] = v5;
-        rows[2][0] = v6;  rows[2][1] = v7;  rows[2][2] = v8;
-    }
-    Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11)
-    {
-        if (COLS == 4)
-        {
-            rows[0][0] = v0;  rows[0][1] = v1;  rows[0][2] = v2;  rows[0][3] = v3;
-            rows[1][0] = v4;  rows[1][1] = v5;  rows[1][2] = v6;  rows[1][3] = v7;
-            rows[2][0] = v8;  rows[2][1] = v9;  rows[2][2] = v10; rows[2][3] = v11;
-        }
-        else
-        {
-            rows[0][0] = v0;  rows[0][1] = v1;  rows[0][2] = v2;
-            rows[1][0] = v3;  rows[1][1] = v4;  rows[1][2] = v5;
-            rows[2][0] = v6;  rows[2][1] = v7;  rows[2][2] = v8;
-            rows[3][0] = v9;  rows[3][1] = v10; rows[3][2] = v11;
-        }
-    }
-    Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15)
-    {
-        rows[0][0] = v0;  rows[0][1] = v1;  rows[0][2] = v2;  rows[0][3] = v3;
-        rows[1][0] = v4;  rows[1][1] = v5;  rows[1][2] = v6;  rows[1][3] = v7;
-        rows[2][0] = v8;  rows[2][1] = v9;  rows[2][2] = v10; rows[2][3] = v11;
-        rows[3][0] = v12; rows[3][1] = v13; rows[3][2] = v14; rows[3][3] = v15;
-    }
-};
-
-#define SLANG_MATRIX_BINARY_OP(T, op) \
-    template<int R, int C> \
-    Matrix<T, R, C> operator op(const Matrix<T, R, C>& thisVal, const Matrix<T, R, C>& other) \
-    { \
-        Matrix<T, R, C> result;\
-        for (int i = 0; i < R; i++) \
-            for (int j = 0; j < C; j++) \
-                result.rows[i][j] = thisVal.rows[i][j] op other.rows[i][j]; \
-        return result;\
-    }
-
-#define SLANG_MATRIX_UNARY_OP(T, op) \
-    template<int R, int C> \
-    Matrix<T, R, C> operator op(const Matrix<T, R, C>& thisVal) \
-    { \
-        Matrix<T, R, C> result;\
-        for (int i = 0; i < R; i++) \
-            for (int j = 0; j < C; j++) \
-                result[i].rows[i][j] = op thisVal.rows[i][j]; \
-        return result;\
-    }
-#define SLANG_INT_MATRIX_OPS(T) \
-    SLANG_MATRIX_BINARY_OP(T, +)\
-    SLANG_MATRIX_BINARY_OP(T, -)\
-    SLANG_MATRIX_BINARY_OP(T, *)\
-    SLANG_MATRIX_BINARY_OP(T, / )\
-    SLANG_MATRIX_BINARY_OP(T, &)\
-    SLANG_MATRIX_BINARY_OP(T, |)\
-    SLANG_MATRIX_BINARY_OP(T, &&)\
-    SLANG_MATRIX_BINARY_OP(T, ||)\
-    SLANG_MATRIX_BINARY_OP(T, ^)\
-    SLANG_MATRIX_BINARY_OP(T, %)\
-    SLANG_MATRIX_UNARY_OP(T, !)\
-    SLANG_MATRIX_UNARY_OP(T, ~)
-#define SLANG_FLOAT_MATRIX_OPS(T) \
-    SLANG_MATRIX_BINARY_OP(T, +)\
-    SLANG_MATRIX_BINARY_OP(T, -)\
-    SLANG_MATRIX_BINARY_OP(T, *)\
-    SLANG_MATRIX_BINARY_OP(T, /)\
-    SLANG_MATRIX_UNARY_OP(T, -)
-SLANG_INT_MATRIX_OPS(int)
-SLANG_INT_MATRIX_OPS(int8_t)
-SLANG_INT_MATRIX_OPS(int16_t)
-SLANG_INT_MATRIX_OPS(int64_t)
-SLANG_INT_MATRIX_OPS(uint)
-SLANG_INT_MATRIX_OPS(uint8_t)
-SLANG_INT_MATRIX_OPS(uint16_t)
-SLANG_INT_MATRIX_OPS(uint64_t)
-
-SLANG_FLOAT_MATRIX_OPS(float)
-SLANG_FLOAT_MATRIX_OPS(double)
-
-#define SLANG_MATRIX_INT_NEG_OP(T) \
-    template<int R, int C>\
-    SLANG_FORCE_INLINE Matrix<T, R, C> operator-(Matrix<T, R, C> thisVal) \
-    { \
-        Matrix<T, R, C> result;\
-        for (int i = 0; i < R; i++) \
-            for (int j = 0; j < C; j++) \
-            result.rows[i][j] = 0 - thisVal.rows[i][j]; \
-        return result;\
-    }
-    SLANG_MATRIX_INT_NEG_OP(int)
-    SLANG_MATRIX_INT_NEG_OP(int8_t)
-    SLANG_MATRIX_INT_NEG_OP(int16_t)
-    SLANG_MATRIX_INT_NEG_OP(int64_t)
-    SLANG_MATRIX_INT_NEG_OP(uint)
-    SLANG_MATRIX_INT_NEG_OP(uint8_t)
-    SLANG_MATRIX_INT_NEG_OP(uint16_t)
-    SLANG_MATRIX_INT_NEG_OP(uint64_t)
-
-#define SLANG_FLOAT_MATRIX_MOD(T)\
-    template<int R, int C> \
-    SLANG_FORCE_INLINE Matrix<T, R, C> operator%(Matrix<T, R, C> left, Matrix<T, R, C> right) \
-    {\
-        Matrix<T, R, C> result;\
-        for (int i = 0; i < R; i++) \
-            for (int j = 0; j < C; j++) \
-                result.rows[i][j] = _slang_fmod(left.rows[i][j], right.rows[i][j]); \
-        return result;\
-    }
-
-    SLANG_FLOAT_MATRIX_MOD(float)
-    SLANG_FLOAT_MATRIX_MOD(double)
-#undef SLANG_FLOAT_MATRIX_MOD
-#undef SLANG_MATRIX_BINARY_OP
-#undef SLANG_MATRIX_UNARY_OP
-#undef SLANG_INT_MATRIX_OPS
-#undef SLANG_FLOAT_MATRIX_OPS
-#undef SLANG_MATRIX_INT_NEG_OP
-#undef SLANG_FLOAT_MATRIX_MOD
-
-template<typename TResult, typename TInput>
-TResult slang_bit_cast(TInput val)
-{
-    return *(TResult*)(&val);
-}
-
-#endif
-
-
diff --git a/external/slang/prelude/slang-cpp-types.h b/external/slang/prelude/slang-cpp-types.h
deleted file mode 100644
index 3f805a8b..00000000
--- a/external/slang/prelude/slang-cpp-types.h
+++ /dev/null
@@ -1,952 +0,0 @@
-#ifndef SLANG_PRELUDE_CPP_TYPES_H
-#define SLANG_PRELUDE_CPP_TYPES_H
-
-#ifdef SLANG_PRELUDE_NAMESPACE
-namespace SLANG_PRELUDE_NAMESPACE {
-#endif
-
-#ifndef SLANG_FORCE_INLINE
-#    define SLANG_FORCE_INLINE inline
-#endif
-
-#include "slang-cpp-types-core.h"
-
-typedef Vector<float, 2> float2;
-typedef Vector<float, 3> float3;
-typedef Vector<float, 4> float4;
-
-typedef Vector<int32_t, 2> int2;
-typedef Vector<int32_t, 3> int3;
-typedef Vector<int32_t, 4> int4;
-
-typedef Vector<uint32_t, 2> uint2;
-typedef Vector<uint32_t, 3> uint3;
-typedef Vector<uint32_t, 4> uint4;
-
-// We can just map `NonUniformResourceIndex` type directly to the index type on CPU, as CPU does not require
-// any special handling around such accesses.
-typedef size_t NonUniformResourceIndex;
-
-// ----------------------------- ResourceType -----------------------------------------
-
-// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-structuredbuffer-getdimensions
-// Missing  Load(_In_  int  Location, _Out_ uint Status);
-
-template <typename T>
-struct RWStructuredBuffer
-{
-    SLANG_FORCE_INLINE T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
-    const T& Load(size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }  
-    void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) { *outNumStructs = uint32_t(count); *outStride = uint32_t(sizeof(T)); }
-  
-    T* data;
-    size_t count;
-};
-
-template <typename T>
-struct StructuredBuffer
-{
-    SLANG_FORCE_INLINE const T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
-    const T& Load(size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
-    void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) { *outNumStructs = uint32_t(count); *outStride = uint32_t(sizeof(T)); }
-    
-    T* data;
-    size_t count;
-};
-
-
-template <typename T>
-struct RWBuffer
-{
-    SLANG_FORCE_INLINE T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
-    const T& Load(size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
-    void GetDimensions(uint32_t* outCount) { *outCount = uint32_t(count); }
-    
-    T* data;
-    size_t count;
-};
-
-template <typename T>
-struct Buffer
-{
-    SLANG_FORCE_INLINE const T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
-    const T& Load(size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
-    void GetDimensions(uint32_t* outCount) { *outCount = uint32_t(count); }
-    
-    T* data;
-    size_t count;
-};
-
-// Missing  Load(_In_  int  Location, _Out_ uint Status);
-struct ByteAddressBuffer
-{
-    void GetDimensions(uint32_t* outDim) const { *outDim = uint32_t(sizeInBytes); }
-    uint32_t Load(size_t index) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
-        return data[index >> 2]; 
-    }
-    uint2 Load2(size_t index) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        return uint2{data[dataIdx], data[dataIdx + 1]}; 
-    }
-    uint3 Load3(size_t index) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]}; 
-    }
-    uint4 Load4(size_t index) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]}; 
-    }
-    template<typename T>
-    T Load(size_t index) const
-    {
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
-        return *(const T*)(((const char*)data) + index);
-    }
-    
-    const uint32_t* data;
-    size_t sizeInBytes;  //< Must be multiple of 4
-};
-
-// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-rwbyteaddressbuffer
-// Missing support for Atomic operations 
-// Missing support for Load with status
-struct RWByteAddressBuffer
-{
-    void GetDimensions(uint32_t* outDim) const { *outDim = uint32_t(sizeInBytes); }
-    
-    uint32_t Load(size_t index) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
-        return data[index >> 2]; 
-    }
-    uint2 Load2(size_t index) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        return uint2{data[dataIdx], data[dataIdx + 1]}; 
-    }
-    uint3 Load3(size_t index) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]}; 
-    }
-    uint4 Load4(size_t index) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]}; 
-    }
-    template<typename T>
-    T Load(size_t index) const
-    {
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
-        return *(const T*)(((const char*)data) + index);
-    }
-
-    void Store(size_t index, uint32_t v) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
-        data[index >> 2] = v; 
-    }
-    void Store2(size_t index, uint2 v) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        data[dataIdx + 0] = v.x;
-        data[dataIdx + 1] = v.y;
-    }
-    void Store3(size_t index, uint3 v) const 
-    {  
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        data[dataIdx + 0] = v.x;
-        data[dataIdx + 1] = v.y;
-        data[dataIdx + 2] = v.z;
-    }
-    void Store4(size_t index, uint4 v) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        data[dataIdx + 0] = v.x;
-        data[dataIdx + 1] = v.y;
-        data[dataIdx + 2] = v.z;
-        data[dataIdx + 3] = v.w;
-    }
-    template<typename T>
-    void Store(size_t index, T const& value) const
-    {
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
-        *(T*)(((char*)data) + index) = value;
-    }
-
-    uint32_t* data;
-    size_t sizeInBytes; //< Must be multiple of 4 
-};
-
-struct ISamplerState;
-struct ISamplerComparisonState;
-
-struct SamplerState
-{
-    ISamplerState* state;
-};
-
-struct SamplerComparisonState
-{
-    ISamplerComparisonState* state;
-};
-
-#ifndef SLANG_RESOURCE_SHAPE
-#    define SLANG_RESOURCE_SHAPE
-typedef unsigned int SlangResourceShape;
-enum
-{
-    SLANG_RESOURCE_BASE_SHAPE_MASK = 0x0F,
-
-    SLANG_RESOURCE_NONE = 0x00,
-
-    SLANG_TEXTURE_1D = 0x01,
-    SLANG_TEXTURE_2D = 0x02,
-    SLANG_TEXTURE_3D = 0x03,
-    SLANG_TEXTURE_CUBE = 0x04,
-    SLANG_TEXTURE_BUFFER = 0x05,
-
-    SLANG_STRUCTURED_BUFFER = 0x06,
-    SLANG_BYTE_ADDRESS_BUFFER = 0x07,
-    SLANG_RESOURCE_UNKNOWN = 0x08,
-    SLANG_ACCELERATION_STRUCTURE = 0x09,
-    SLANG_TEXTURE_SUBPASS = 0x0A,
-
-    SLANG_RESOURCE_EXT_SHAPE_MASK = 0xF0,
-
-    SLANG_TEXTURE_FEEDBACK_FLAG = 0x10,
-    SLANG_TEXTURE_ARRAY_FLAG = 0x40,
-    SLANG_TEXTURE_MULTISAMPLE_FLAG = 0x80,
-
-    SLANG_TEXTURE_1D_ARRAY = SLANG_TEXTURE_1D | SLANG_TEXTURE_ARRAY_FLAG,
-    SLANG_TEXTURE_2D_ARRAY = SLANG_TEXTURE_2D | SLANG_TEXTURE_ARRAY_FLAG,
-    SLANG_TEXTURE_CUBE_ARRAY = SLANG_TEXTURE_CUBE | SLANG_TEXTURE_ARRAY_FLAG,
-
-    SLANG_TEXTURE_2D_MULTISAMPLE = SLANG_TEXTURE_2D | SLANG_TEXTURE_MULTISAMPLE_FLAG,
-    SLANG_TEXTURE_2D_MULTISAMPLE_ARRAY =
-        SLANG_TEXTURE_2D | SLANG_TEXTURE_MULTISAMPLE_FLAG | SLANG_TEXTURE_ARRAY_FLAG,
-    SLANG_TEXTURE_SUBPASS_MULTISAMPLE = SLANG_TEXTURE_SUBPASS | SLANG_TEXTURE_MULTISAMPLE_FLAG,
-};
-#endif
-
-// 
-struct TextureDimensions
-{
-    void reset()
-    {
-        shape = 0;
-        width = height = depth = 0;
-        numberOfLevels = 0;
-        arrayElementCount = 0;
-    }
-    int getDimSizes(uint32_t outDims[4]) const
-    {
-        const auto baseShape = (shape & SLANG_RESOURCE_BASE_SHAPE_MASK);
-        int count = 0;
-        switch (baseShape)
-        {
-            case SLANG_TEXTURE_1D:
-            {
-                outDims[count++] = width;
-                break;
-            }
-            case SLANG_TEXTURE_2D:
-            {
-                outDims[count++] = width;
-                outDims[count++] = height;
-                break;
-            }
-            case SLANG_TEXTURE_3D:
-            {
-                outDims[count++] = width;
-                outDims[count++] = height;
-                outDims[count++] = depth;
-                break;
-            }
-            case SLANG_TEXTURE_CUBE:
-            {
-                outDims[count++] = width;
-                outDims[count++] = height;
-                outDims[count++] = 6;
-                break;
-            }
-        }
-
-        if (shape & SLANG_TEXTURE_ARRAY_FLAG)
-        {
-            outDims[count++] = arrayElementCount;
-        }
-        return count;
-    }
-    int getMIPDims(int outDims[3]) const
-    {
-        const auto baseShape = (shape & SLANG_RESOURCE_BASE_SHAPE_MASK);
-        int count = 0;
-        switch (baseShape)
-        {
-            case SLANG_TEXTURE_1D:
-            {
-                outDims[count++] = width;
-                break;
-            }
-            case SLANG_TEXTURE_CUBE:
-            case SLANG_TEXTURE_2D:
-            {
-                outDims[count++] = width;
-                outDims[count++] = height;
-                break;
-            }
-            case SLANG_TEXTURE_3D:
-            {
-                outDims[count++] = width;
-                outDims[count++] = height;
-                outDims[count++] = depth;
-                break;
-            }
-        }
-        return count;
-    }
-    int calcMaxMIPLevels() const
-    {
-        int dims[3];
-        const int dimCount = getMIPDims(dims);
-        for (int count = 1; true; count++)
-        {
-            bool allOne = true;
-            for (int i = 0; i < dimCount; ++i)
-            {
-                if (dims[i] > 1)
-                {
-                    allOne = false;
-                    dims[i] >>= 1;
-                }
-            }
-            if (allOne)
-            {
-                return count;
-            }
-        }
-    }
-
-    uint32_t shape;
-    uint32_t width, height, depth;
-    uint32_t numberOfLevels;
-    uint32_t arrayElementCount;                  ///< For array types, 0 otherwise
-};
-
-
-
-
-
-// Texture
-
-struct ITexture
-{
-    virtual TextureDimensions GetDimensions(int mipLevel = -1) = 0;
-    virtual void Load(const int32_t* v, void* outData, size_t dataSize) = 0;
-    virtual void Sample(SamplerState samplerState, const float* loc, void* outData, size_t dataSize) = 0;
-    virtual void SampleLevel(SamplerState samplerState, const float* loc, float level, void* outData, size_t dataSize) = 0;
-};
-
-template <typename T>
-struct Texture1D
-{
-    void GetDimensions(uint32_t* outWidth) { *outWidth = texture->GetDimensions().width; }
-    void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outNumberOfLevels) 
-    { 
-        auto dims = texture->GetDimensions(mipLevel); 
-        *outWidth = dims.width; 
-        *outNumberOfLevels = dims.numberOfLevels; 
-    }
-    
-    void GetDimensions(float* outWidth) { *outWidth = texture->GetDimensions().width; }
-    void GetDimensions(uint32_t mipLevel, float* outWidth, float* outNumberOfLevels) 
-    { 
-        auto dims = texture->GetDimensions(mipLevel); 
-        *outWidth = dims.width; 
-        *outNumberOfLevels = dims.numberOfLevels; 
-    }
-    
-    T Load(const int2& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
-    T Sample(SamplerState samplerState, float loc) const { T out; texture->Sample(samplerState, &loc, &out, sizeof(out)); return out; }
-    T SampleLevel(SamplerState samplerState, float loc, float level) { T out; texture->SampleLevel(samplerState, &loc, level, &out, sizeof(out)); return out; }
-    
-    ITexture* texture;              
-};
-
-template <typename T>
-struct Texture2D
-{
-    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight) 
-    { 
-        const auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outHeight = dims.height; 
-    }
-    void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    void GetDimensions(float* outWidth, float* outHeight) 
-    { 
-        const auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outHeight = dims.height; 
-    }
-    void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    
-    T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
-    T Sample(SamplerState samplerState, const float2& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
-    T SampleLevel(SamplerState samplerState, const float2& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
-    
-    ITexture* texture;              
-};
-
-template <typename T>
-struct Texture3D
-{
-    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outDepth)
-    {
-        const auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outHeight = dims.height; 
-        *outDepth = dims.depth;
-    }
-    void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outDepth, uint32_t* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outDepth = dims.depth;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    void GetDimensions(float* outWidth, float* outHeight, float* outDepth)
-    {
-        const auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outHeight = dims.height; 
-        *outDepth = dims.depth;
-    }
-    void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outDepth, float* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outDepth = dims.depth;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    
-    T Load(const int4& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
-    T Sample(SamplerState samplerState, const float3& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
-    T SampleLevel(SamplerState samplerState, const float3& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
-    
-    ITexture* texture;              
-};
-
-template <typename T>
-struct TextureCube
-{
-    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight) 
-    { 
-        const auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outHeight = dims.height; 
-    }
-    void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    void GetDimensions(float* outWidth, float* outHeight) 
-    { 
-        const auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outHeight = dims.height; 
-    }
-    void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    
-    T Sample(SamplerState samplerState, const float3& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
-    T SampleLevel(SamplerState samplerState, const float3& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
-    
-    ITexture* texture;              
-};
-
-template <typename T>
-struct Texture1DArray
-{
-    void GetDimensions(uint32_t* outWidth, uint32_t* outElements) { auto dims = texture->GetDimensions(); *outWidth = dims.width; *outElements = dims.arrayElementCount; }
-    void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outElements, uint32_t* outNumberOfLevels) 
-    {
-        auto dims = texture->GetDimensions(mipLevel); 
-        *outWidth = dims.width; 
-        *outNumberOfLevels = dims.numberOfLevels;
-        *outElements = dims.arrayElementCount; 
-    }        
-    void GetDimensions(float* outWidth, float* outElements) { auto dims = texture->GetDimensions(); *outWidth = dims.width; *outElements = dims.arrayElementCount; }
-    void GetDimensions(uint32_t mipLevel, float* outWidth, float* outElements, float* outNumberOfLevels) 
-    {
-        auto dims = texture->GetDimensions(mipLevel); 
-        *outWidth = dims.width; 
-        *outNumberOfLevels = dims.numberOfLevels;
-        *outElements = dims.arrayElementCount; 
-    }
-    
-    T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
-    T Sample(SamplerState samplerState, const float2& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
-    T SampleLevel(SamplerState samplerState, const float2& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
-    
-    ITexture* texture;              
-};
-
-template <typename T>
-struct Texture2DArray
-{
-    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements)
-    {
-        auto dims = texture->GetDimensions();
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outElements = dims.arrayElementCount;
-    }
-    void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements, uint32_t* outNumberOfLevels)
-    {
-        auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outElements = dims.arrayElementCount;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    
-    void GetDimensions(uint32_t* outWidth, float* outHeight, float* outElements)
-    {
-        auto dims = texture->GetDimensions();
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outElements = dims.arrayElementCount;
-    }
-    void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outElements, float* outNumberOfLevels)
-    {
-        auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outElements = dims.arrayElementCount;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    
-    T Load(const int4& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
-    T Sample(SamplerState samplerState, const float3& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
-    T SampleLevel(SamplerState samplerState, const float3& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
-    
-    ITexture* texture;              
-};
-
-template <typename T>
-struct TextureCubeArray
-{
-    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements)
-    {
-        auto dims = texture->GetDimensions();
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outElements = dims.arrayElementCount;
-    }
-    void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements, uint32_t* outNumberOfLevels)
-    {
-        auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outElements = dims.arrayElementCount;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    
-    void GetDimensions(uint32_t* outWidth, float* outHeight, float* outElements)
-    {
-        auto dims = texture->GetDimensions();
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outElements = dims.arrayElementCount;
-    }
-    void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outElements, float* outNumberOfLevels)
-    {
-        auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outElements = dims.arrayElementCount;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    
-    T Sample(SamplerState samplerState, const float4& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; }
-    T SampleLevel(SamplerState samplerState, const float4& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; }
-    
-    ITexture* texture;              
-};
-
-/* !!!!!!!!!!!!!!!!!!!!!!!!!!! RWTexture !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
-
-struct IRWTexture : ITexture
-{
-        /// Get the reference to the element at loc. 
-    virtual void* refAt(const uint32_t* loc) = 0;
-};
-
-template <typename T>
-struct RWTexture1D
-{
-    void GetDimensions(uint32_t* outWidth) { *outWidth = texture->GetDimensions().width; }
-    void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outNumberOfLevels) { auto dims = texture->GetDimensions(mipLevel); *outWidth = dims.width; *outNumberOfLevels = dims.numberOfLevels; }
-    
-    void GetDimensions(float* outWidth) { *outWidth = texture->GetDimensions().width; }
-    void GetDimensions(uint32_t mipLevel, float* outWidth, float* outNumberOfLevels) { auto dims = texture->GetDimensions(mipLevel); *outWidth = dims.width; *outNumberOfLevels = dims.numberOfLevels; }
-    
-    T Load(int32_t loc) const { T out; texture->Load(&loc, &out, sizeof(out)); return out; }
-    T& operator[](uint32_t loc) { return *(T*)texture->refAt(&loc); }
-    IRWTexture* texture;              
-};
-
-template <typename T>
-struct RWTexture2D
-{
-    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight) 
-    { 
-        const auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outHeight = dims.height; 
-    }
-    void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    void GetDimensions(float* outWidth, float* outHeight) 
-    { 
-        const auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outHeight = dims.height; 
-    }
-    void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    
-    T Load(const int2& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
-    T& operator[](const uint2& loc) { return *(T*)texture->refAt(&loc.x); }
-    IRWTexture* texture;
-};
-
-template <typename T>
-struct RWTexture3D
-{
-    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outDepth)
-    {
-        const auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outHeight = dims.height; 
-        *outDepth = dims.depth;
-    }
-    void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outDepth, uint32_t* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outDepth = dims.depth;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    void GetDimensions(float* outWidth, float* outHeight, float* outDepth)
-    {
-        const auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outHeight = dims.height; 
-        *outDepth = dims.depth;
-    }
-    void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outDepth, float* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outDepth = dims.depth;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    
-    T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
-    T& operator[](const uint3& loc) { return *(T*)texture->refAt(&loc.x); }
-    IRWTexture* texture;
-};
-
-
-template <typename T>
-struct RWTexture1DArray
-{
-    void GetDimensions(uint32_t* outWidth, uint32_t* outElements) 
-    { 
-        auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outElements = dims.arrayElementCount; 
-    }
-    void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outElements, uint32_t* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outElements = dims.arrayElementCount;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    void GetDimensions(float* outWidth, float* outElements) 
-    { 
-        auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outElements = dims.arrayElementCount; 
-    }
-    void GetDimensions(uint32_t mipLevel, float* outWidth, float* outElements, float* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outElements = dims.arrayElementCount;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    
-    T Load(int2 loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
-    T& operator[](uint2 loc) { return *(T*)texture->refAt(&loc.x); }
-
-    IRWTexture* texture;
-};
-
-template <typename T>
-struct RWTexture2DArray
-{
-    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements)
-    {
-        auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outHeight = dims.height;
-        *outElements = dims.arrayElementCount; 
-    }
-    void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements, uint32_t* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outElements = dims.arrayElementCount;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    void GetDimensions(float* outWidth, float* outHeight, float* outElements)
-    {
-        auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outHeight = dims.height;
-        *outElements = dims.arrayElementCount; 
-    }
-    void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outElements, float* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outElements = dims.arrayElementCount;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    
-    T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; }
-    T& operator[](const uint3& loc) { return *(T*)texture->refAt(&loc.x); }
-
-    IRWTexture* texture;
-};
-
-// FeedbackTexture
-
-struct FeedbackType {};
-struct SAMPLER_FEEDBACK_MIN_MIP : FeedbackType {};
-struct SAMPLER_FEEDBACK_MIP_REGION_USED : FeedbackType {};
-
-struct IFeedbackTexture
-{
-    virtual TextureDimensions GetDimensions(int mipLevel = -1) = 0;
-
-    // Note here we pass the optional clamp parameter as a pointer. Passing nullptr means no clamp. 
-    // This was preferred over having two function definitions, and having to differentiate their names
-    virtual void WriteSamplerFeedback(ITexture* tex, SamplerState samp, const float* location, const float* clamp = nullptr) = 0;
-    virtual void WriteSamplerFeedbackBias(ITexture* tex, SamplerState samp, const float* location, float bias, const float* clamp = nullptr) = 0;
-    virtual void WriteSamplerFeedbackGrad(ITexture* tex, SamplerState samp, const float* location, const float* ddx, const float* ddy, const float* clamp = nullptr) = 0;
-    
-    virtual void WriteSamplerFeedbackLevel(ITexture* tex, SamplerState samp, const float* location, float lod) = 0;
-};
-
-template <typename T>
-struct FeedbackTexture2D
-{
-    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight) 
-    { 
-        const auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outHeight = dims.height; 
-    }
-    void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    void GetDimensions(float* outWidth, float* outHeight) 
-    { 
-        const auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outHeight = dims.height; 
-    }
-    void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    
-    template <typename S>
-    void WriteSamplerFeedback(Texture2D<S> tex, SamplerState samp, float2 location, float clamp) { texture->WriteSamplerFeedback(tex.texture, samp, &location.x, &clamp); } 
-
-    template <typename S>
-    void WriteSamplerFeedbackBias(Texture2D<S> tex, SamplerState samp, float2 location, float bias, float clamp) { texture->WriteSamplerFeedbackBias(tex.texture, samp, &location.x, bias, &clamp); }
-
-    template <typename S>
-    void WriteSamplerFeedbackGrad(Texture2D<S> tex, SamplerState samp, float2 location, float2 ddx, float2 ddy, float clamp) { texture->WriteSamplerFeedbackGrad(tex.texture, samp, &location.x, &ddx.x, &ddy.x, &clamp); }
-
-    // Level
-
-    template <typename S> 
-    void WriteSamplerFeedbackLevel(Texture2D<S> tex, SamplerState samp, float2 location, float lod) { texture->WriteSamplerFeedbackLevel(tex.texture, samp, &location.x, lod); }
-    
-    // Without Clamp
-    template <typename S> 
-    void WriteSamplerFeedback(Texture2D<S> tex, SamplerState samp, float2 location) { texture->WriteSamplerFeedback(tex.texture, samp, &location.x); }
-
-    template <typename S> 
-    void WriteSamplerFeedbackBias(Texture2D<S> tex, SamplerState samp, float2 location, float bias) { texture->WriteSamplerFeedbackBias(tex.texture, samp, &location.x, bias); }
-
-    template <typename S> 
-    void WriteSamplerFeedbackGrad(Texture2D<S> tex, SamplerState samp, float2 location, float2 ddx, float2 ddy) { texture->WriteSamplerFeedbackGrad(tex.texture, samp, &location.x, &ddx.x, &ddy.x); }
-    
-    IFeedbackTexture* texture;
-};
-
-template <typename T>
-struct FeedbackTexture2DArray
-{
-    void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements)
-    {
-        auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outHeight = dims.height;
-        *outElements = dims.arrayElementCount; 
-    }
-    void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements, uint32_t* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outElements = dims.arrayElementCount;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    void GetDimensions(float* outWidth, float* outHeight, float* outElements)
-    {
-        auto dims = texture->GetDimensions(); 
-        *outWidth = dims.width; 
-        *outHeight = dims.height;
-        *outElements = dims.arrayElementCount; 
-    }
-    void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outElements, float* outNumberOfLevels)
-    {
-        const auto dims = texture->GetDimensions(mipLevel);
-        *outWidth = dims.width;
-        *outHeight = dims.height;
-        *outElements = dims.arrayElementCount;
-        *outNumberOfLevels = dims.numberOfLevels;
-    }
-    
-    template <typename S>
-    void WriteSamplerFeedback(Texture2DArray<S> texArray, SamplerState samp, float3 location, float clamp) { texture->WriteSamplerFeedback(texArray.texture, samp, &location.x, &clamp); }
-
-    template <typename S>
-    void WriteSamplerFeedbackBias(Texture2DArray<S> texArray, SamplerState samp, float3 location, float bias, float clamp) { texture->WriteSamplerFeedbackBias(texArray.texture, samp, &location.x, bias, &clamp); }
-
-    template <typename S>
-    void WriteSamplerFeedbackGrad(Texture2DArray<S> texArray, SamplerState samp, float3 location, float3 ddx, float3 ddy, float clamp) { texture->WriteSamplerFeedbackGrad(texArray.texture, samp, &location.x, &ddx.x, &ddy.x, &clamp); }
-
-    // Level
-    template <typename S>
-    void WriteSamplerFeedbackLevel(Texture2DArray<S> texArray, SamplerState samp, float3 location, float lod) { texture->WriteSamplerFeedbackLevel(texArray.texture, samp, &location.x, lod); }
-
-    // Without Clamp
-
-    template <typename S>
-    void WriteSamplerFeedback(Texture2DArray<S> texArray, SamplerState samp, float3 location) { texture->WriteSamplerFeedback(texArray.texture, samp, &location.x); }
-
-    template <typename S>
-    void WriteSamplerFeedbackBias(Texture2DArray<S> texArray, SamplerState samp, float3 location, float bias) { texture->WriteSamplerFeedbackBias(texArray.texture, samp, &location.x, bias); }
-
-    template <typename S>
-    void WriteSamplerFeedbackGrad(Texture2DArray<S> texArray, SamplerState samp, float3 location, float3 ddx, float3 ddy) { texture->WriteSamplerFeedbackGrad(texArray.texture, samp, &location.x, &ddx.x, &ddy.x); }
-    
-    IFeedbackTexture* texture;
-};
-
-/* Varying input for Compute */
-
-/* Used when running a single thread */
-struct ComputeThreadVaryingInput
-{
-    uint3 groupID;
-    uint3 groupThreadID;
-};
-
-struct ComputeVaryingInput
-{
-    uint3 startGroupID;     ///< start groupID
-    uint3 endGroupID;       ///< Non inclusive end groupID
-};
-
-// The uniformEntryPointParams and uniformState must be set to structures that match layout that the kernel expects.
-// This can be determined via reflection for example.
-
-typedef void(*ComputeThreadFunc)(ComputeThreadVaryingInput* varyingInput, void* uniformEntryPointParams, void* uniformState);
-typedef void(*ComputeFunc)(ComputeVaryingInput* varyingInput, void* uniformEntryPointParams, void* uniformState);
-
-#ifdef SLANG_PRELUDE_NAMESPACE
-}
-#endif
-
-#endif
-
-
diff --git a/external/slang/prelude/slang-cuda-prelude.h b/external/slang/prelude/slang-cuda-prelude.h
deleted file mode 100644
index 74a9d0ce..00000000
--- a/external/slang/prelude/slang-cuda-prelude.h
+++ /dev/null
@@ -1,2357 +0,0 @@
-#define SLANG_PRELUDE_EXPORT
-
-#ifdef __CUDACC_RTC__
-#define SLANG_CUDA_RTC 1
-#else
-#define SLANG_CUDA_RTC 0
-#endif
-
-#if SLANG_CUDA_RTC
-
-#else
-
-#include <cstdint>
-#include <stdio.h>
-
-#endif
-
-// Define SLANG_CUDA_ENABLE_HALF to use the cuda_fp16 include to add half support. 
-// For this to work NVRTC needs to have the path to the CUDA SDK.
-//
-// As it stands the includes paths defined for Slang are passed down to NVRTC. Similarly defines defined for the Slang compile
-// are passed down. 
-
-#ifdef SLANG_CUDA_ENABLE_HALF
-// We don't want half2 operators, because it will implement comparison operators that return a bool(!). We want to generate
-// those functions. Doing so means that we will have to define all the other half2 operators.
-#   define __CUDA_NO_HALF2_OPERATORS__
-#   include <cuda_fp16.h>
-#endif
-
-#ifdef SLANG_CUDA_ENABLE_OPTIX
-#include <optix.h>
-#endif
-
-// Define slang offsetof implementation 
-#ifndef SLANG_OFFSET_OF
-#   define SLANG_OFFSET_OF(type, member) (size_t)((char*)&(((type *)0)->member) - (char*)0)
-#endif
-
-#ifndef SLANG_ALIGN_OF
-#   define SLANG_ALIGN_OF(type) __alignof__(type)
-#endif
-
-// Must be large enough to cause overflow and therefore infinity
-#ifndef SLANG_INFINITY
-#   define SLANG_INFINITY   ((float)(1e+300 * 1e+300))
-#endif
-
-// For now we'll disable any asserts in this prelude
-#define SLANG_PRELUDE_ASSERT(x) 
-
-#ifndef SLANG_CUDA_WARP_SIZE 
-#   define SLANG_CUDA_WARP_SIZE 32
-#endif
-
-#define SLANG_CUDA_WARP_MASK (SLANG_CUDA_WARP_SIZE - 1) // Used for masking threadIdx.x to the warp lane index
-#define SLANG_CUDA_WARP_BITMASK (~int(0))
-
-//
-#define SLANG_FORCE_INLINE inline
-
-#define SLANG_CUDA_CALL __device__ 
-
-#define SLANG_FORCE_INLINE inline
-#define SLANG_INLINE inline
-
-
-// Since we are using unsigned arithmatic care is need in this comparison.
-// It is *assumed* that sizeInBytes >= elemSize. Which means (sizeInBytes >= elemSize) >= 0
-// Which means only a single test is needed
-
-// Asserts for bounds checking.
-// It is assumed index/count are unsigned types.
-#define SLANG_BOUND_ASSERT(index, count)  SLANG_PRELUDE_ASSERT(index < count); 
-#define SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_PRELUDE_ASSERT(index <= (sizeInBytes - elemSize) && (index & 3) == 0);
-
-// Macros to zero index if an access is out of range
-#define SLANG_BOUND_ZERO_INDEX(index, count) index = (index < count) ? index : 0; 
-#define SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes) index = (index <= (sizeInBytes - elemSize)) ? index : 0; 
-
-// The 'FIX' macro define how the index is fixed. The default is to do nothing. If SLANG_ENABLE_BOUND_ZERO_INDEX
-// the fix macro will zero the index, if out of range
-#ifdef  SLANG_ENABLE_BOUND_ZERO_INDEX
-#   define SLANG_BOUND_FIX(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
-#   define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
-#   define SLANG_BOUND_FIX_FIXED_ARRAY(index, count) SLANG_BOUND_ZERO_INDEX(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
-#else
-#   define SLANG_BOUND_FIX(index, count) 
-#   define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) 
-#   define SLANG_BOUND_FIX_FIXED_ARRAY(index, count) 
-#endif
-
-#ifndef SLANG_BOUND_CHECK
-#   define SLANG_BOUND_CHECK(index, count) SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX(index, count)
-#endif
-
-#ifndef SLANG_BOUND_CHECK_BYTE_ADDRESS
-#   define SLANG_BOUND_CHECK_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
-#endif
-
-#ifndef SLANG_BOUND_CHECK_FIXED_ARRAY
-#   define SLANG_BOUND_CHECK_FIXED_ARRAY(index, count) SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
-#endif
-
- // This macro handles how out-of-range surface coordinates are handled; 
- // I can equal
- // cudaBoundaryModeClamp, in which case out-of-range coordinates are clamped to the valid range
- // cudaBoundaryModeZero, in which case out-of-range reads return zero and out-of-range writes are ignored
- // cudaBoundaryModeTrap, in which case out-of-range accesses cause the kernel execution to fail. 
- 
-#ifndef SLANG_CUDA_BOUNDARY_MODE
-#   define SLANG_CUDA_BOUNDARY_MODE cudaBoundaryModeZero
-
-// Can be one of SLANG_CUDA_PTX_BOUNDARY_MODE. Only applies *PTX* emitted CUDA operations
-// which currently is just RWTextureRW format writes
-// 
-// .trap         causes an execution trap on out-of-bounds addresses
-// .clamp        stores data at the nearest surface location (sized appropriately)
-// .zero         drops stores to out-of-bounds addresses 
-
-#   define SLANG_PTX_BOUNDARY_MODE "zero"
-#endif
-
-struct TypeInfo
-{
-    size_t typeSize;
-};
-
-template <typename T, size_t SIZE>
-struct FixedArray
-{
-    SLANG_CUDA_CALL const T& operator[](size_t index) const { SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE); return m_data[index]; }
-    SLANG_CUDA_CALL T& operator[](size_t index) { SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE); return m_data[index]; }
-    
-    T m_data[SIZE];
-};
-
-// An array that has no specified size, becomes a 'Array'. This stores the size so it can potentially 
-// do bounds checking.  
-template <typename T>
-struct Array
-{
-    SLANG_CUDA_CALL const T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; }
-    SLANG_CUDA_CALL T& operator[](size_t index) { SLANG_BOUND_CHECK(index, count); return data[index]; }
-    
-    T* data;
-    size_t count;
-};
-
-// Typically defined in cuda.h, but we can't ship/rely on that, so just define here
-typedef unsigned long long CUtexObject;                   
-typedef unsigned long long CUsurfObject;                  
-
-// On CUDA sampler state is actually bound up with the texture object. We have a SamplerState type, 
-// backed as a pointer, to simplify code generation, with the downside that such a binding will take up 
-// uniform space, even though it will have no effect. 
-// TODO(JS): Consider ways to strip use of variables of this type so have no binding,
-struct SamplerStateUnused;
-typedef SamplerStateUnused* SamplerState;
-
-
-// TODO(JS): Not clear yet if this can be handled on CUDA, by just ignoring.
-// For now, just map to the index type. 
-typedef size_t NonUniformResourceIndex;
-
-// Code generator will generate the specific type
-template <typename T, int ROWS, int COLS>
-struct Matrix;
-
-typedef int1 bool1;
-typedef int2 bool2;
-typedef int3 bool3;
-typedef int4 bool4; 
-
-#if SLANG_CUDA_RTC
-
-typedef signed char int8_t;
-typedef short int16_t;
-typedef int int32_t;
-typedef long long int64_t;
-
-typedef unsigned char uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned int uint32_t;
-typedef unsigned long long uint64_t;
-
-#endif
-
-typedef long long longlong;
-typedef unsigned long long ulonglong;
-
-typedef unsigned char uchar;
-typedef unsigned short ushort;
-typedef unsigned int uint;
-
-union Union32 
-{
-    uint32_t u;
-    int32_t i;
-    float f;
-};
-
-union Union64
-{
-    uint64_t u;
-    int64_t i;
-    double d;
-};
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float _slang_fmod(float x, float y)
-{
-    return ::fmodf(x, y);
-}
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double _slang_fmod(double x, double y)
-{
-    return ::fmod(x, y);
-}
-
-#if SLANG_CUDA_ENABLE_HALF
-
-// Add the other vector half types
-struct __half1 { __half x; };
-struct __align__(4) __half3 { __half x, y, z; };
-struct __align__(4) __half4 { __half x, y, z, w; };
-#endif
-
-#define SLANG_VECTOR_GET_ELEMENT(T) \
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##1 x, int index) { return ((T*)(&x))[index]; }\
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##2 x, int index) { return ((T*)(&x))[index]; }\
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##3 x, int index) { return ((T*)(&x))[index]; }\
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##4 x, int index) { return ((T*)(&x))[index]; }
-SLANG_VECTOR_GET_ELEMENT(int)
-SLANG_VECTOR_GET_ELEMENT(uint)
-SLANG_VECTOR_GET_ELEMENT(short)
-SLANG_VECTOR_GET_ELEMENT(ushort)
-SLANG_VECTOR_GET_ELEMENT(char)
-SLANG_VECTOR_GET_ELEMENT(uchar)
-SLANG_VECTOR_GET_ELEMENT(longlong)
-SLANG_VECTOR_GET_ELEMENT(ulonglong)
-SLANG_VECTOR_GET_ELEMENT(float)
-SLANG_VECTOR_GET_ELEMENT(double)
-
-#define SLANG_VECTOR_GET_ELEMENT_PTR(T) \
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##1* x, int index) { return ((T*)(x)) + index; }\
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##2* x, int index) { return ((T*)(x)) + index; }\
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##3* x, int index) { return ((T*)(x)) + index; }\
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##4* x, int index) { return ((T*)(x)) + index; }
-SLANG_VECTOR_GET_ELEMENT_PTR(int)
-SLANG_VECTOR_GET_ELEMENT_PTR(uint)
-SLANG_VECTOR_GET_ELEMENT_PTR(short)
-SLANG_VECTOR_GET_ELEMENT_PTR(ushort)
-SLANG_VECTOR_GET_ELEMENT_PTR(char)
-SLANG_VECTOR_GET_ELEMENT_PTR(uchar)
-SLANG_VECTOR_GET_ELEMENT_PTR(longlong)
-SLANG_VECTOR_GET_ELEMENT_PTR(ulonglong)
-SLANG_VECTOR_GET_ELEMENT_PTR(float)
-SLANG_VECTOR_GET_ELEMENT_PTR(double)
-
-#if SLANG_CUDA_ENABLE_HALF
-SLANG_VECTOR_GET_ELEMENT(__half)
-SLANG_VECTOR_GET_ELEMENT_PTR(__half)
-#endif
-
-#define SLANG_CUDA_VECTOR_BINARY_OP(T, n, op) \
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n operator op(T##n thisVal, T##n other) \
-    { \
-        T##n result;\
-        for (int i = 0; i < n; i++) \
-            *_slang_vector_get_element_ptr(&result, i) = _slang_vector_get_element(thisVal,i) op _slang_vector_get_element(other,i); \
-        return result;\
-    }
-#define SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, op) \
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL bool##n operator op(T##n thisVal, T##n other) \
-    { \
-        bool##n result;\
-        for (int i = 0; i < n; i++) \
-            *_slang_vector_get_element_ptr(&result, i) = (int)(_slang_vector_get_element(thisVal,i) op _slang_vector_get_element(other,i)); \
-        return result;\
-    }
-#define SLANG_CUDA_VECTOR_UNARY_OP(T, n, op) \
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n operator op(T##n thisVal) \
-    { \
-        T##n result;\
-        for (int i = 0; i < n; i++) \
-            *_slang_vector_get_element_ptr(&result, i) = op _slang_vector_get_element(thisVal,i); \
-        return result;\
-    }
-
-#define SLANG_CUDA_VECTOR_INT_OP(T, n) \
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, +)\
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, -)\
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, *)\
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, /)\
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, %)\
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, ^)\
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, &)\
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, |)\
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, &&)\
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, ||)\
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, >>)\
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, <<)\
-    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >)\
-    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <)\
-    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >=)\
-    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <=)\
-    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, ==)\
-    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, !=)\
-    SLANG_CUDA_VECTOR_UNARY_OP(T, n, !)\
-    SLANG_CUDA_VECTOR_UNARY_OP(T, n, -)\
-    SLANG_CUDA_VECTOR_UNARY_OP(T, n, ~)
-
-#define SLANG_CUDA_VECTOR_INT_OPS(T) \
-    SLANG_CUDA_VECTOR_INT_OP(T, 2) \
-    SLANG_CUDA_VECTOR_INT_OP(T, 3) \
-    SLANG_CUDA_VECTOR_INT_OP(T, 4)
-
-SLANG_CUDA_VECTOR_INT_OPS(int)
-SLANG_CUDA_VECTOR_INT_OPS(uint)
-SLANG_CUDA_VECTOR_INT_OPS(ushort)
-SLANG_CUDA_VECTOR_INT_OPS(short)
-SLANG_CUDA_VECTOR_INT_OPS(char)
-SLANG_CUDA_VECTOR_INT_OPS(uchar)
-SLANG_CUDA_VECTOR_INT_OPS(longlong)
-SLANG_CUDA_VECTOR_INT_OPS(ulonglong)
-
-#define SLANG_CUDA_VECTOR_FLOAT_OP(T, n) \
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, +)\
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, -)\
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, *)\
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, /)\
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, &&)\
-    SLANG_CUDA_VECTOR_BINARY_OP(T, n, ||)\
-    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >)\
-    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <)\
-    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >=)\
-    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <=)\
-    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, ==)\
-    SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, !=)\
-    SLANG_CUDA_VECTOR_UNARY_OP(T, n, -)
-#define SLANG_CUDA_VECTOR_FLOAT_OPS(T) \
-    SLANG_CUDA_VECTOR_FLOAT_OP(T, 2) \
-    SLANG_CUDA_VECTOR_FLOAT_OP(T, 3) \
-    SLANG_CUDA_VECTOR_FLOAT_OP(T, 4)
-
-SLANG_CUDA_VECTOR_FLOAT_OPS(float)
-SLANG_CUDA_VECTOR_FLOAT_OPS(double)
-#if SLANG_CUDA_ENABLE_HALF
-SLANG_CUDA_VECTOR_FLOAT_OPS(__half)
-#endif
-#define SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, n)\
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n operator%(const T##n& left, const T##n& right) \
-    {\
-        T##n result;\
-        for (int i = 0; i < n; i++) \
-            *_slang_vector_get_element_ptr(&result, i) = _slang_fmod(_slang_vector_get_element(left,i), _slang_vector_get_element(right,i)); \
-        return result;\
-    }
-#define SLANG_CUDA_FLOAT_VECTOR_MOD(T) \
-    SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, 2)\
-    SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, 3)\
-    SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, 4)
-
-SLANG_CUDA_FLOAT_VECTOR_MOD(float)
-SLANG_CUDA_FLOAT_VECTOR_MOD(double)
-
-#if SLANG_CUDA_RTC || SLANG_CUDA_ENABLE_HALF
-#define SLANG_MAKE_VECTOR(T) \
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##2 make_##T##2(T x, T y) { return T##2{x, y}; }\
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##3 make_##T##3(T x, T y, T z) { return T##3{ x, y, z }; }\
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##4 make_##T##4(T x, T y, T z, T w) { return T##4{ x, y, z, w }; }
-#endif
-
-#if SLANG_CUDA_RTC
-SLANG_MAKE_VECTOR(int)
-SLANG_MAKE_VECTOR(uint)
-SLANG_MAKE_VECTOR(short)
-SLANG_MAKE_VECTOR(ushort)
-SLANG_MAKE_VECTOR(char)
-SLANG_MAKE_VECTOR(uchar)
-SLANG_MAKE_VECTOR(float)
-SLANG_MAKE_VECTOR(double)
-SLANG_MAKE_VECTOR(longlong)
-SLANG_MAKE_VECTOR(ulonglong)
-#endif
-
-#if SLANG_CUDA_ENABLE_HALF
-SLANG_MAKE_VECTOR(__half)
-#endif
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool1 make_bool1(bool x) { return bool1{ x }; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool2 make_bool2(bool x, bool y) { return bool2{ x, y }; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool3 make_bool3(bool x, bool y, bool z) { return bool3{ x, y, z }; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool4 make_bool4(bool x, bool y, bool z, bool w) { return bool4{ x, y, z, w }; }
-
-#if SLANG_CUDA_RTC
-#define SLANG_MAKE_VECTOR_FROM_SCALAR(T) \
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##1 make_##T##1(T x) { return T##1{x}; }\
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##2 make_##T##2(T x) { return make_##T##2(x, x); }\
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##3 make_##T##3(T x) { return make_##T##3(x, x, x); }\
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##4 make_##T##4(T x) { return make_##T##4(x, x, x, x); }
-#else
-#define SLANG_MAKE_VECTOR_FROM_SCALAR(T) \
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##2 make_##T##2(T x) { return make_##T##2(x, x); }\
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##3 make_##T##3(T x) { return make_##T##3(x, x, x); }\
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##4 make_##T##4(T x) { return make_##T##4(x, x, x, x); }
-#endif
-SLANG_MAKE_VECTOR_FROM_SCALAR(int)
-SLANG_MAKE_VECTOR_FROM_SCALAR(uint)
-SLANG_MAKE_VECTOR_FROM_SCALAR(short)
-SLANG_MAKE_VECTOR_FROM_SCALAR(ushort)
-SLANG_MAKE_VECTOR_FROM_SCALAR(char)
-SLANG_MAKE_VECTOR_FROM_SCALAR(uchar)
-SLANG_MAKE_VECTOR_FROM_SCALAR(longlong)
-SLANG_MAKE_VECTOR_FROM_SCALAR(ulonglong)
-SLANG_MAKE_VECTOR_FROM_SCALAR(float)
-SLANG_MAKE_VECTOR_FROM_SCALAR(double)
-#if SLANG_CUDA_ENABLE_HALF
-SLANG_MAKE_VECTOR_FROM_SCALAR(__half)
-#if !SLANG_CUDA_RTC
-SLANG_FORCE_INLINE SLANG_CUDA_CALL __half1 make___half1(__half x) { return __half1{x}; }
-#endif
-#endif
-
-#define SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(Fn,T,N) \
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL T##N Fn(T##N* address, T##N val) \
-    {\
-        T##N result; \
-        for (int i = 0; i < N; i++) \
-            *_slang_vector_get_element_ptr(&result, i) = Fn(_slang_vector_get_element_ptr(address, i), _slang_vector_get_element(val, i)); \
-        return result; \
-    }\
-
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 900
-SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, float, 2)
-SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, float, 4)
-#endif
-SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, float, 3)
-SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, int, 2)
-SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, int, 3)
-SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, int, 4)
-SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, uint, 2)
-SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, uint, 3)
-SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, uint, 4)
-SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, ulonglong, 2)
-SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, ulonglong, 3)
-SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, ulonglong, 4)
-
-template<typename T, int n>
-struct GetVectorTypeImpl {};
-
-#define GET_VECTOR_TYPE_IMPL(T, n)\
-template<>\
-struct GetVectorTypeImpl<T,n>\
-{\
-    typedef T##n type;\
-    static SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n fromScalar(T v) { return make_##T##n(v); } \
-};
-#define GET_VECTOR_TYPE_IMPL_N(T)\
-    GET_VECTOR_TYPE_IMPL(T, 1)\
-    GET_VECTOR_TYPE_IMPL(T, 2)\
-    GET_VECTOR_TYPE_IMPL(T, 3)\
-    GET_VECTOR_TYPE_IMPL(T, 4)
-
-GET_VECTOR_TYPE_IMPL_N(int)
-GET_VECTOR_TYPE_IMPL_N(uint)
-GET_VECTOR_TYPE_IMPL_N(short)
-GET_VECTOR_TYPE_IMPL_N(ushort)
-GET_VECTOR_TYPE_IMPL_N(char)
-GET_VECTOR_TYPE_IMPL_N(uchar)
-GET_VECTOR_TYPE_IMPL_N(longlong)
-GET_VECTOR_TYPE_IMPL_N(ulonglong)
-GET_VECTOR_TYPE_IMPL_N(float)
-GET_VECTOR_TYPE_IMPL_N(double)
-#if SLANG_CUDA_ENABLE_HALF
-GET_VECTOR_TYPE_IMPL_N(__half)
-#endif
-template<typename T, int n>
-using Vector = typename GetVectorTypeImpl<T, n>::type;
-
-template<typename T, int n, typename OtherT, int m>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Vector<T, n> _slang_vector_reshape(const Vector<OtherT, m> other)
-{
-    Vector<T, n> result;
-    for (int i = 0; i < n; i++)
-    {
-        OtherT otherElement = T(0);
-        if (i < m)
-            otherElement = _slang_vector_get_element(other, i);
-        *_slang_vector_get_element_ptr(&result, i) = (T)otherElement;
-    }
-    return result;
-}
-
-template <typename T, int ROWS, int COLS>
-struct Matrix
-{
-    Vector<T, COLS> rows[ROWS];
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL Vector<T, COLS>& operator[](size_t index) { return rows[index]; }
-};
-
-
-template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(T scalar)
-{
-    Matrix<T, ROWS, COLS> result;
-    for (int i = 0; i < ROWS; i++)
-        result.rows[i] = GetVectorTypeImpl<T, COLS>::fromScalar(scalar);
-    return result;
-
-}
-
-template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(const Vector<T, COLS>& row0)
-{
-    Matrix<T, ROWS, COLS> result;
-    result.rows[0] = row0;
-    return result;
-}
-
-template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1)
-{
-    Matrix<T, ROWS, COLS> result;
-    result.rows[0] = row0;
-    result.rows[1] = row1;
-    return result;
-}
-
-template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1, const Vector<T, COLS>& row2)
-{
-    Matrix<T, ROWS, COLS> result;
-    result.rows[0] = row0;
-    result.rows[1] = row1;
-    result.rows[2] = row2;
-    return result;
-}
-
-template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1, const Vector<T, COLS>& row2, const Vector<T, COLS>& row3)
-{
-    Matrix<T, ROWS, COLS> result;
-    result.rows[0] = row0;
-    result.rows[1] = row1;
-    result.rows[2] = row2;
-    result.rows[3] = row3;
-    return result;
-}
-
-template<typename T, int ROWS, int COLS, typename U, int otherRow, int otherCol>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(const Matrix<U, otherRow, otherCol>& other)
-{
-    Matrix<T, ROWS, COLS> result;
-    int minRow = ROWS;
-    int minCol = COLS;
-    if (minRow > otherRow) minRow = otherRow;
-    if (minCol > otherCol) minCol = otherCol;
-    for (int i = 0; i < minRow; i++)
-        for (int j = 0; j < minCol; j++)
-            *_slang_vector_get_element_ptr(result.rows + i, j) = (T)_slang_vector_get_element(other.rows[i], j);
-    return result;
-}
-
-template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(T v0, T v1, T v2, T v3)
-{
-    Matrix<T, ROWS, COLS> rs;
-    rs.rows[0].x = v0;  rs.rows[0].y = v1;
-    rs.rows[1].x = v2;  rs.rows[1].y = v3;
-    return rs;
-}
-
-template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(T v0, T v1, T v2, T v3, T v4, T v5)
-{
-    Matrix<T, ROWS, COLS> rs;
-    if (COLS == 3)
-    {
-        rs.rows[0].x = v0;  rs.rows[0].y = v1; rs.rows[0].z = v2;
-        rs.rows[1].x = v3;  rs.rows[1].y = v4; rs.rows[1].z = v5;
-    }
-    else
-    {
-        rs.rows[0].x = v0;  rs.rows[0].y = v1;
-        rs.rows[1].x = v2;  rs.rows[1].y = v3;
-        rs.rows[2].x = v4;  rs.rows[2].y = v5;
-    }
-    return rs;
-
-}
-
-template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7)
-{
-    Matrix<T, ROWS, COLS> rs;
-    if (COLS == 4)
-    {
-        rs.rows[0].x = v0;  rs.rows[0].y = v1; rs.rows[0].z = v2; rs.rows[0].w = v3;
-        rs.rows[1].x = v4;  rs.rows[1].y = v5; rs.rows[1].z = v6; rs.rows[1].w = v7;
-    }
-    else
-    {
-        rs.rows[0].x = v0;  rs.rows[0].y = v1;
-        rs.rows[1].x = v2;  rs.rows[1].y = v3;
-        rs.rows[2].x = v4;  rs.rows[2].y = v5;
-        rs.rows[3].x = v6;  rs.rows[3].y = v7;
-    }
-    return rs;
-}
-
-template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8)
-{
-    Matrix<T, ROWS, COLS> rs;
-    rs.rows[0].x = v0;  rs.rows[0].y = v1;  rs.rows[0].z = v2;
-    rs.rows[1].x = v3;  rs.rows[1].y = v4;  rs.rows[1].z = v5;
-    rs.rows[2].x = v6;  rs.rows[2].y = v7;  rs.rows[2].z = v8;
-    return rs;
-}
-
-template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11)
-{
-    Matrix<T, ROWS, COLS> rs;
-    if (COLS == 4)
-    {
-        rs.rows[0].x = v0;  rs.rows[0].y = v1;  rs.rows[0].z = v2;  rs.rows[0].w = v3;
-        rs.rows[1].x = v4;  rs.rows[1].y = v5;  rs.rows[1].z = v6;  rs.rows[1].w = v7;
-        rs.rows[2].x = v8;  rs.rows[2].y = v9;  rs.rows[2].z = v10; rs.rows[2].w = v11;
-    }
-    else
-    {
-        rs.rows[0].x = v0;  rs.rows[0].y = v1;  rs.rows[0].z = v2;
-        rs.rows[1].x = v3;  rs.rows[1].y = v4;  rs.rows[1].z = v5;
-        rs.rows[2].x = v6;  rs.rows[2].y = v7;  rs.rows[2].z = v8;
-        rs.rows[3].x = v9;  rs.rows[3].y = v10; rs.rows[3].z = v11;
-    }
-    return rs;
-}
-
-template<typename T, int ROWS, int COLS>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, ROWS, COLS> makeMatrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15)
-{
-    Matrix<T, ROWS, COLS> rs;
-    rs.rows[0].x = v0;  rs.rows[0].y = v1;  rs.rows[0].z = v2;  rs.rows[0].w = v3;
-    rs.rows[1].x = v4;  rs.rows[1].y = v5;  rs.rows[1].z = v6;  rs.rows[1].w = v7;
-    rs.rows[2].x = v8;  rs.rows[2].y = v9;  rs.rows[2].z = v10; rs.rows[2].w = v11;
-    rs.rows[3].x = v12; rs.rows[3].y = v13; rs.rows[3].z = v14; rs.rows[3].w = v15;
-    return rs;
-}
-
-#define SLANG_MATRIX_BINARY_OP(T, op) \
-    template<int R, int C> \
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, R, C> operator op(const Matrix<T, R, C>& thisVal, const Matrix<T, R, C>& other) \
-    { \
-        Matrix<T, R, C> result;\
-        for (int i = 0; i < R; i++) \
-            for (int j = 0; j < C; j++) \
-                *_slang_vector_get_element_ptr(result.rows+i,j) = _slang_vector_get_element(thisVal.rows[i], j) op _slang_vector_get_element(other.rows[i], j); \
-        return result;\
-    }
-
-#define SLANG_MATRIX_UNARY_OP(T, op) \
-    template<int R, int C> \
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, R, C> operator op(const Matrix<T, R, C>& thisVal) \
-    { \
-        Matrix<T, R, C> result;\
-        for (int i = 0; i < R; i++) \
-            for (int j = 0; j < C; j++) \
-                *_slang_vector_get_element_ptr(result.rows+i,j) = op _slang_vector_get_element(thisVal.rows[i], j); \
-        return result;\
-    }
-#define SLANG_INT_MATRIX_OPS(T) \
-    SLANG_MATRIX_BINARY_OP(T, +)\
-    SLANG_MATRIX_BINARY_OP(T, -)\
-    SLANG_MATRIX_BINARY_OP(T, *)\
-    SLANG_MATRIX_BINARY_OP(T, / )\
-    SLANG_MATRIX_BINARY_OP(T, &)\
-    SLANG_MATRIX_BINARY_OP(T, |)\
-    SLANG_MATRIX_BINARY_OP(T, &&)\
-    SLANG_MATRIX_BINARY_OP(T, ||)\
-    SLANG_MATRIX_BINARY_OP(T, ^)\
-    SLANG_MATRIX_BINARY_OP(T, %)\
-    SLANG_MATRIX_UNARY_OP(T, !)\
-    SLANG_MATRIX_UNARY_OP(T, ~)
-#define SLANG_FLOAT_MATRIX_OPS(T) \
-    SLANG_MATRIX_BINARY_OP(T, +)\
-    SLANG_MATRIX_BINARY_OP(T, -)\
-    SLANG_MATRIX_BINARY_OP(T, *)\
-    SLANG_MATRIX_BINARY_OP(T, /)\
-    SLANG_MATRIX_UNARY_OP(T, -)
-SLANG_INT_MATRIX_OPS(int)
-SLANG_INT_MATRIX_OPS(uint)
-SLANG_INT_MATRIX_OPS(short)
-SLANG_INT_MATRIX_OPS(ushort)
-SLANG_INT_MATRIX_OPS(char)
-SLANG_INT_MATRIX_OPS(uchar)
-SLANG_INT_MATRIX_OPS(longlong)
-SLANG_INT_MATRIX_OPS(ulonglong)
-SLANG_FLOAT_MATRIX_OPS(float)
-SLANG_FLOAT_MATRIX_OPS(double)
-#if SLANG_CUDA_ENABLE_HALF
-SLANG_FLOAT_MATRIX_OPS(__half)
-#endif
-#define SLANG_MATRIX_INT_NEG_OP(T) \
-    template<int R, int C>\
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, R, C> operator-(Matrix<T, R, C> thisVal) \
-    { \
-        Matrix<T, R, C> result;\
-        for (int i = 0; i < R; i++) \
-            for (int j = 0; j < C; j++) \
-                *_slang_vector_get_element_ptr(result.rows+i,j) = 0 - _slang_vector_get_element(thisVal.rows[i], j); \
-        return result;\
-    }
-    SLANG_MATRIX_INT_NEG_OP(int)
-    SLANG_MATRIX_INT_NEG_OP(uint)
-    SLANG_MATRIX_INT_NEG_OP(short)
-    SLANG_MATRIX_INT_NEG_OP(ushort)
-    SLANG_MATRIX_INT_NEG_OP(char)
-    SLANG_MATRIX_INT_NEG_OP(uchar)
-    SLANG_MATRIX_INT_NEG_OP(longlong)
-    SLANG_MATRIX_INT_NEG_OP(ulonglong)
-
-#define SLANG_FLOAT_MATRIX_MOD(T)\
-    template<int R, int C> \
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<T, R, C> operator%(Matrix<T, R, C> left, Matrix<T, R, C> right) \
-    {\
-        Matrix<T, R, C> result;\
-        for (int i = 0; i < R; i++) \
-            for (int j = 0; j < C; j++) \
-                *_slang_vector_get_element_ptr(result.rows+i,j) = _slang_fmod(_slang_vector_get_element(left.rows[i], j), _slang_vector_get_element(right.rows[i], j)); \
-        return result;\
-    }
-
-    SLANG_FLOAT_MATRIX_MOD(float)
-    SLANG_FLOAT_MATRIX_MOD(double)
-#if SLANG_CUDA_ENABLE_HALF
-    template<int R, int C> 
-    SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<__half, R, C> operator%(Matrix<__half, R, C> left, Matrix<__half, R, C> right)
-    {
-        Matrix<__half, R, C> result;
-        for (int i = 0; i < R; i++) 
-            for (int j = 0; j < C; j++) 
-                * _slang_vector_get_element_ptr(result.rows + i, j) = __float2half(_slang_fmod(__half2float(_slang_vector_get_element(left.rows[i], j)), __half2float(_slang_vector_get_element(right.rows[i], j))));
-        return result;
-    }
-#endif
-#undef SLANG_FLOAT_MATRIX_MOD
-#undef SLANG_MATRIX_BINARY_OP
-#undef SLANG_MATRIX_UNARY_OP
-#undef SLANG_INT_MATRIX_OPS
-#undef SLANG_FLOAT_MATRIX_OPS
-#undef SLANG_MATRIX_INT_NEG_OP
-#undef SLANG_FLOAT_MATRIX_MOD
-
-#define SLANG_SELECT_IMPL(T, N)\
-SLANG_FORCE_INLINE SLANG_CUDA_CALL Vector<T, N> _slang_select(bool##N condition, Vector<T, N> v0, Vector<T, N> v1) \
-{ \
-    Vector<T, N> result; \
-    for (int i = 0; i < N; i++) \
-    { \
-        *_slang_vector_get_element_ptr(&result, i) = _slang_vector_get_element(condition, i) ? _slang_vector_get_element(v0, i) : _slang_vector_get_element(v1, i); \
-    } \
-    return result; \
-}
-#define SLANG_SELECT_T(T)\
-    SLANG_SELECT_IMPL(T, 2)\
-    SLANG_SELECT_IMPL(T, 3)\
-    SLANG_SELECT_IMPL(T, 4)
-
-SLANG_SELECT_T(int)
-SLANG_SELECT_T(uint)
-SLANG_SELECT_T(short)
-SLANG_SELECT_T(ushort)
-SLANG_SELECT_T(char)
-SLANG_SELECT_T(uchar)
-SLANG_SELECT_T(float)
-SLANG_SELECT_T(double)
-
-template<typename T>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_select(bool condition, T v0, T v1)
-{
-    return condition ? v0 : v1;
-}
-
-//
-// Half support
-// 
-
-#if SLANG_CUDA_ENABLE_HALF
-SLANG_SELECT_T(__half)
-
-// Convenience functions ushort -> half
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 __ushort_as_half(const ushort2& i) { return __halves2half2(__ushort_as_half(i.x), __ushort_as_half(i.y)); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 __ushort_as_half(const ushort3& i) { return __half3{__ushort_as_half(i.x), __ushort_as_half(i.y), __ushort_as_half(i.z)}; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 __ushort_as_half(const ushort4& i) { return __half4{ __ushort_as_half(i.x), __ushort_as_half(i.y), __ushort_as_half(i.z), __ushort_as_half(i.w) }; }
-
-// Convenience functions half -> ushort
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort2 __half_as_ushort(const __half2& i) { return make_ushort2(__half_as_ushort(i.x), __half_as_ushort(i.y)); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort3 __half_as_ushort(const __half3& i) { return make_ushort3(__half_as_ushort(i.x), __half_as_ushort(i.y), __half_as_ushort(i.z)); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort4 __half_as_ushort(const __half4& i) { return make_ushort4(__half_as_ushort(i.x), __half_as_ushort(i.y), __half_as_ushort(i.z), __half_as_ushort(i.w)); }
-
-// This is a little bit of a hack. Fortunately CUDA has the definitions of the templated types in 
-// include/surface_indirect_functions.h
-// Here we find the template definition requires a specialization of __nv_isurf_trait to allow 
-// a specialization of the surface write functions. 
-// This *isn't* a problem on the read functions as they don't have a return type that uses this mechanism 
-
-template<> struct __nv_isurf_trait<__half> { typedef void type; };
-template<> struct __nv_isurf_trait<__half2> { typedef void type; };
-template<> struct __nv_isurf_trait<__half4> { typedef void type; };
-
-#define SLANG_DROP_PARENS(...) __VA_ARGS__
-
-#define SLANG_SURFACE_READ(FUNC_NAME, TYPE_ARGS, ARGS) \
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL __half FUNC_NAME<__half>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
-    return __ushort_as_half(FUNC_NAME<ushort>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
-} \
-\
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 FUNC_NAME<__half2>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
-    return __ushort_as_half(FUNC_NAME<ushort2>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
-} \
-\
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 FUNC_NAME<__half4>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
-    return __ushort_as_half(FUNC_NAME<ushort4>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
-}
-
-SLANG_SURFACE_READ(surf1Dread, (int x), (x))
-SLANG_SURFACE_READ(surf2Dread, (int x, int y), (x, y))
-SLANG_SURFACE_READ(surf3Dread, (int x, int y, int z), (x, y, z))
-SLANG_SURFACE_READ(surf1DLayeredread, (int x, int layer), (x, layer))
-SLANG_SURFACE_READ(surf2DLayeredread, (int x, int y, int layer), (x, y, layer))
-SLANG_SURFACE_READ(surfCubemapread, (int x, int y, int face), (x, y, face))
-SLANG_SURFACE_READ(surfCubemapLayeredread, (int x, int y, int layerFace), (x, y, layerFace))
-
-#define SLANG_SURFACE_WRITE(FUNC_NAME, TYPE_ARGS, ARGS) \
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half>(__half data, cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
-    FUNC_NAME<ushort>(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode);  \
-} \
-\
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half2>(__half2 data, cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
-    FUNC_NAME<ushort2>(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode);  \
-} \
-\
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half4>(__half4 data, cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
-    FUNC_NAME<ushort4>(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode); \
-}
-
-SLANG_SURFACE_WRITE(surf1Dwrite, (int x), (x))
-SLANG_SURFACE_WRITE(surf2Dwrite, (int x, int y), (x, y))
-SLANG_SURFACE_WRITE(surf3Dwrite, (int x, int y, int z), (x, y, z))
-SLANG_SURFACE_WRITE(surf1DLayeredwrite, (int x, int layer), (x, layer))
-SLANG_SURFACE_WRITE(surf2DLayeredwrite, (int x, int y, int layer), (x, y, layer))
-SLANG_SURFACE_WRITE(surfCubemapwrite, (int x, int y, int face), (x, y, face))
-SLANG_SURFACE_WRITE(surfCubemapLayeredwrite, (int x, int y, int layerFace), (x, y, layerFace))
-
-// ! Hack to test out reading !!!
-// Only works converting *from* half 
- 
-//template <typename T> 
-//SLANG_FORCE_INLINE SLANG_CUDA_CALL T surf2Dread_convert(cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode);
-
-#define SLANG_SURFACE_READ_HALF_CONVERT(FUNC_NAME, TYPE_ARGS, ARGS) \
-\
-template <typename T>  \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL T FUNC_NAME##_convert(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode); \
-\
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float FUNC_NAME##_convert<float>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode)  \
-{ \
-    return __ushort_as_half(FUNC_NAME<uint16_t>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
-} \
-\
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float2 FUNC_NAME##_convert<float2>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
-    const __half2 v = __ushort_as_half(FUNC_NAME<ushort2>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
-    return float2{v.x, v.y}; \
-} \
-\
-template <> \
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float4 FUNC_NAME##_convert<float4>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \
-{ \
-    const __half4 v = __ushort_as_half(FUNC_NAME<ushort4>(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \
-    return float4{v.x, v.y, v.z, v.w}; \
-}
-
-SLANG_SURFACE_READ_HALF_CONVERT(surf1Dread, (int x), (x)) 
-SLANG_SURFACE_READ_HALF_CONVERT(surf2Dread, (int x, int y), (x, y)) 
-SLANG_SURFACE_READ_HALF_CONVERT(surf3Dread, (int x, int y, int z), (x, y, z))
-
-#endif
-
-// Support for doing format conversion when writing to a surface/RWTexture
-
-// NOTE! For normal surface access x values are *byte* addressed.
-// For the _convert versions they are *not*. They don't need to be because sust.p does not require it.
-
-template <typename T>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode);
-template <typename T>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode);
-template <typename T>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, int y, int z, cudaSurfaceBoundaryMode boundaryMode);
-
-// https://docs.nvidia.com/cuda/inline-ptx-assembly/index.html
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#surface-instructions-sust
-
-// Float
-
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float>(float v, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode)
-{
-    asm volatile ( "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2};}\n\t" :: "l"(surfObj),"r"(x),"f"(v));     
-}
- 
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float>(float v, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode)
-{
-    asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"f"(v));
-}
-
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float>(float v, cudaSurfaceObject_t surfObj, int x, int y, int z, cudaSurfaceBoundaryMode boundaryMode)
-{
-    asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"r"(z),"f"(v));
-}
-
-// Float2
-
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float2>(float2 v, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode)
-{
-    const float vx = v.x, vy = v.y;
-    asm volatile ( "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2,%3};}\n\t" :: "l"(surfObj),"r"(x),"f"(vx),"f"(vy));     
-}
- 
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float2>(float2 v, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode)
-{
-    const float vx = v.x, vy = v.y;
-    asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3,%4};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"f"(vx),"f"(vy));
-}
-
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float2>(float2 v, cudaSurfaceObject_t surfObj, int x, int y, int z, cudaSurfaceBoundaryMode boundaryMode)
-{
-    const float vx = v.x, vy = v.y;
-    asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4,%5};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"r"(z),"f"(vx),"f"(vy));
-}
-
-// Float4
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float4>(float4 v, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode)
-{
-    const float vx = v.x, vy = v.y, vz = v.z, vw = v.w;
-    asm volatile ( "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2,%3,%4,%5};}\n\t" :: "l"(surfObj),"r"(x),"f"(vx),"f"(vy),"f"(vz),"f"(vw));     
-}
- 
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float4>(float4 v, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode)
-{
-    const float vx = v.x, vy = v.y, vz = v.z, vw = v.w;
-    asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3,%4,%5,%6};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"f"(vx),"f"(vy),"f"(vz),"f"(vw));
-}
-
-template <>
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert<float4>(float4 v, cudaSurfaceObject_t surfObj, int x, int y, int z, cudaSurfaceBoundaryMode boundaryMode)
-{
-    const float vx = v.x, vy = v.y, vz = v.z, vw = v.w;
-    asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4,%5,%6,%7};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"r"(z),"f"(vx),"f"(vy),"f"(vz),"f"(vw));
-}
-
-// ----------------------------- F32 -----------------------------------------
-
-// Unary 
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_ceil(float f) { return ::ceilf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_floor(float f) { return ::floorf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_round(float f) { return ::roundf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sin(float f) { return ::sinf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_cos(float f) { return ::cosf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void F32_sincos(float f, float* s, float* c) { ::sincosf(f, s, c); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_tan(float f) { return ::tanf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_asin(float f) { return ::asinf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_acos(float f) { return ::acosf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_atan(float f) { return ::atanf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sinh(float f) { return ::sinhf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_cosh(float f) { return ::coshf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_tanh(float f) { return ::tanhf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_log2(float f) { return ::log2f(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_log(float f) { return ::logf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_log10(float f) { return ::log10f(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_exp2(float f) { return ::exp2f(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_exp(float f) { return ::expf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_abs(float f) { return ::fabsf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_trunc(float f) { return ::truncf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sqrt(float f) { return ::sqrtf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_rsqrt(float f) { return ::rsqrtf(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sign(float f) { return ( f == 0.0f) ? f : (( f < 0.0f) ? -1.0f : 1.0f); } 
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_frac(float f) { return f - F32_floor(f); }
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F32_isnan(float f) { return isnan(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F32_isfinite(float f) { return isfinite(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F32_isinf(float f) { return isinf(f); }
-
-// Binary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_min(float a, float b) { return ::fminf(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_max(float a, float b) { return ::fmaxf(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_pow(float a, float b) { return ::powf(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_fmod(float a, float b) { return ::fmodf(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_remainder(float a, float b) { return ::remainderf(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_atan2(float a, float b) { return float(::atan2(a, b)); }
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_frexp(float x, int* e) { return frexpf(x, e); }
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_modf(float x, float* ip)
-{
-    return ::modff(x, ip);
-}
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t F32_asuint(float f) { Union32 u; u.f = f; return u.u; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t F32_asint(float f) { Union32 u; u.f = f; return u.i; }
-
-// Ternary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_fma(float a, float b, float c) { return ::fmaf(a, b, c); }
-
-
-// ----------------------------- F64 -----------------------------------------
-
-// Unary 
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_ceil(double f) { return ::ceil(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_floor(double f) { return ::floor(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_round(double f) { return ::round(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sin(double f) { return ::sin(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_cos(double f) { return ::cos(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void F64_sincos(double f, double* s, double* c) { ::sincos(f, s, c); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_tan(double f) { return ::tan(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_asin(double f) { return ::asin(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_acos(double f) { return ::acos(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_atan(double f) { return ::atan(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sinh(double f) { return ::sinh(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_cosh(double f) { return ::cosh(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_tanh(double f) { return ::tanh(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_log2(double f) { return ::log2(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_log(double f) { return ::log(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_log10(float f) { return ::log10(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_exp2(double f) { return ::exp2(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_exp(double f) { return ::exp(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_abs(double f) { return ::fabs(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_trunc(double f) { return ::trunc(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sqrt(double f) { return ::sqrt(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_rsqrt(double f) { return ::rsqrt(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sign(double f) { return (f == 0.0) ? f : ((f < 0.0) ? -1.0 : 1.0); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_frac(double f) { return f - F64_floor(f); }
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F64_isnan(double f) { return isnan(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F64_isfinite(double f) { return isfinite(f); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F64_isinf(double f) { return isinf(f); }
-
-// Binary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_min(double a, double b) { return ::fmin(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_max(double a, double b) { return ::fmax(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_pow(double a, double b) { return ::pow(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_fmod(double a, double b) { return ::fmod(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_remainder(double a, double b) { return ::remainder(a, b); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_atan2(double a, double b) { return ::atan2(a, b); }
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_frexp(double x, int* e) { return ::frexp(x, e); }
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_modf(double x, double* ip)
-{
-    return ::modf(x, ip);
-}
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void F64_asuint(double d, uint32_t* low, uint32_t* hi)
-{
-    Union64 u;
-    u.d = d;
-    *low = uint32_t(u.u);
-    *hi = uint32_t(u.u >> 32);
-}
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL void F64_asint(double d, int32_t* low, int32_t* hi)
-{
-    Union64 u;
-    u.d = d;
-    *low = int32_t(u.u);
-    *hi = int32_t(u.u >> 32);
-}
-
-// Ternary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_fma(double a, double b, double c) { return ::fma(a, b, c); }
-
-// ----------------------------- I32 -----------------------------------------
-
-// Unary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_abs(int32_t f) { return (f < 0) ? -f : f; }
-
-// Binary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_min(int32_t a, int32_t b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_max(int32_t a, int32_t b) { return a > b ? a : b; }
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float I32_asfloat(int32_t x) { Union32 u; u.i = x; return u.f; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t I32_asuint(int32_t x) { return uint32_t(x); }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double I32_asdouble(int32_t low, int32_t hi )
-{
-    Union64 u;
-    u.u = (uint64_t(hi) << 32) | uint32_t(low);
-    return u.d;
-}
-
-// ----------------------------- U32 -----------------------------------------
-
-// Unary 
-SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_abs(uint32_t f) { return f; }
-
-// Binary
-SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_min(uint32_t a, uint32_t b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_max(uint32_t a, uint32_t b) { return a > b ? a : b; }
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL float U32_asfloat(uint32_t x) { Union32 u; u.u = x; return u.f; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_asint(int32_t x) { return uint32_t(x); }
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL double U32_asdouble(uint32_t low, uint32_t hi)
-{
-    Union64 u;
-    u.u = (uint64_t(hi) << 32) | low;
-    return u.d;
-}
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_countbits(uint32_t v)
-{
-    // https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html#group__CUDA__MATH__INTRINSIC__INT_1g43c9c7d2b9ebf202ff1ef5769989be46
-    return __popc(v);
-}
-
-
-// ----------------------------- I64 -----------------------------------------
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_abs(int64_t f) { return (f < 0) ? -f : f; }
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_min(int64_t a, int64_t b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_max(int64_t a, int64_t b) { return a > b ? a : b; }
-
-// ----------------------------- U64 -----------------------------------------
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_abs(uint64_t f) { return f; }
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_min(uint64_t a, uint64_t b) { return a < b ? a : b; }
-SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_max(uint64_t a, uint64_t b) { return a > b ? a : b; }
-
-SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U64_countbits(uint64_t v)
-{
-    // https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html#group__CUDA__MATH__INTRINSIC__INT_1g43c9c7d2b9ebf202ff1ef5769989be46
-    return __popcll(v);
-}
-
-
-// ----------------------------- ResourceType -----------------------------------------
-
-
-// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-structuredbuffer-getdimensions
-// Missing  Load(_In_  int  Location, _Out_ uint Status);
-
-template <typename T>
-struct StructuredBuffer
-{
-    SLANG_CUDA_CALL const T& operator[](size_t index) const
-    {
-#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT
-        SLANG_BOUND_CHECK(index, count);
-#endif
-        return data[index];
-    }
-
-    SLANG_CUDA_CALL const T& Load(size_t index) const
-    {
-#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT
-        SLANG_BOUND_CHECK(index, count);
-#endif
-        return data[index];
-    }
-
-#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT
-    SLANG_CUDA_CALL void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) { *outNumStructs = uint32_t(count); *outStride = uint32_t(sizeof(T)); }
-#endif
-
-    T* data;
-#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT
-    size_t count;
-#endif
-};
-
-template <typename T>
-struct RWStructuredBuffer : StructuredBuffer<T>
-{
-    SLANG_CUDA_CALL T& operator[](size_t index) const
-    {
-#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT
-        SLANG_BOUND_CHECK(index, this->count);
-#endif
-        return this->data[index];
-    }
-};
-
-// Missing  Load(_In_  int  Location, _Out_ uint Status);
-struct ByteAddressBuffer
-{
-    SLANG_CUDA_CALL void GetDimensions(uint32_t* outDim) const { *outDim = uint32_t(sizeInBytes); }
-    SLANG_CUDA_CALL uint32_t Load(size_t index) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
-        return data[index >> 2]; 
-    }
-    SLANG_CUDA_CALL uint2 Load2(size_t index) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes); 
-        const size_t dataIdx = index >> 2; 
-        return uint2{data[dataIdx], data[dataIdx + 1]}; 
-    }
-    SLANG_CUDA_CALL uint3 Load3(size_t index) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]}; 
-    }
-    SLANG_CUDA_CALL uint4 Load4(size_t index) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]}; 
-    }
-    template<typename T>
-    SLANG_CUDA_CALL T Load(size_t index) const
-    {
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
-        T data;
-        memcpy(&data, ((const char*)this->data) + index, sizeof(T));
-        return data;
-    }
-    
-    const uint32_t* data;
-    size_t sizeInBytes;  //< Must be multiple of 4
-};
-
-// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-rwbyteaddressbuffer
-// Missing support for Atomic operations 
-// Missing support for Load with status
-struct RWByteAddressBuffer
-{
-    SLANG_CUDA_CALL void GetDimensions(uint32_t* outDim) const { *outDim = uint32_t(sizeInBytes); }
-    
-    SLANG_CUDA_CALL uint32_t Load(size_t index) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
-        return data[index >> 2]; 
-    }
-    SLANG_CUDA_CALL uint2 Load2(size_t index) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        return uint2{data[dataIdx], data[dataIdx + 1]}; 
-    }
-    SLANG_CUDA_CALL uint3 Load3(size_t index) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]}; 
-    }
-    SLANG_CUDA_CALL uint4 Load4(size_t index) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]}; 
-    }
-    template<typename T>
-    SLANG_CUDA_CALL T Load(size_t index) const
-    {
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
-        T data;
-        memcpy(&data, ((const char*)this->data) + index, sizeof(T));
-        return data;
-    }
-    
-    SLANG_CUDA_CALL void Store(size_t index, uint32_t v) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
-        data[index >> 2] = v; 
-    }
-    SLANG_CUDA_CALL void Store2(size_t index, uint2 v) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        data[dataIdx + 0] = v.x;
-        data[dataIdx + 1] = v.y;
-    }
-    SLANG_CUDA_CALL void Store3(size_t index, uint3 v) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        data[dataIdx + 0] = v.x;
-        data[dataIdx + 1] = v.y;
-        data[dataIdx + 2] = v.z;
-    }
-    SLANG_CUDA_CALL void Store4(size_t index, uint4 v) const 
-    { 
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes);
-        const size_t dataIdx = index >> 2; 
-        data[dataIdx + 0] = v.x;
-        data[dataIdx + 1] = v.y;
-        data[dataIdx + 2] = v.z;
-        data[dataIdx + 3] = v.w;
-    }
-    template<typename T>
-    SLANG_CUDA_CALL void Store(size_t index, T const& value) const
-    {
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
-        memcpy((char*)data + index, &value, sizeof(T));
-    }
-    
-        /// Can be used in stdlib to gain access
-    template <typename T>
-    SLANG_CUDA_CALL T* _getPtrAt(size_t index)
-    {
-        SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
-        return (T*)(((char*)data) + index);
-    }
-    
-    uint32_t* data;
-    size_t sizeInBytes; //< Must be multiple of 4 
-};
-
-
-// ---------------------- Wave --------------------------------------
-
-// TODO(JS): It appears that cuda does not have a simple way to get a lane index. 
-// 
-// Another approach could be... 
-// laneId = ((threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x) & SLANG_CUDA_WARP_MASK
-// If that is really true another way to do this, would be for code generator to add this function 
-// with the [numthreads] baked in. 
-// 
-// For now I'll just assume you have a launch that makes the following correct if the kernel uses WaveGetLaneIndex()
-#ifndef SLANG_USE_ASM_LANE_ID
- __forceinline__ __device__ uint32_t _getLaneId()
-{
-    // If the launch is (or I guess some multiple of the warp size) 
-    // we try this mechanism, which is apparently faster. 
-    return threadIdx.x & SLANG_CUDA_WARP_MASK;
-}
-#else
-__forceinline__ __device__ uint32_t _getLaneId()
-{
-    // https://stackoverflow.com/questions/44337309/whats-the-most-efficient-way-to-calculate-the-warp-id-lane-id-in-a-1-d-grid#
-    // This mechanism is not the fastest way to do it, and that is why the other mechanism 
-    // is the default. But the other mechanism relies on a launch that makes the assumption 
-    // true.
-    unsigned ret; 
-    asm volatile ("mov.u32 %0, %laneid;" : "=r"(ret));
-    return ret;
-}
-#endif
-
-typedef int WarpMask;
-
-// It appears that the __activemask() cannot always be used because 
-// threads need to be converged. 
-// 
-// For CUDA the article claims mask has to be used carefully
-// https://devblogs.nvidia.com/using-cuda-warp-level-primitives/
-// With the Warp intrinsics there is no mask, and it's just the 'active lanes'. 
-// __activemask() though does not require there is convergence, so that doesn't work.
-// 
-// '__ballot_sync' produces a convergance. 
-// 
-// From the CUDA docs:
-// ```For __all_sync, __any_sync, and __ballot_sync, a mask must be passed that specifies the threads 
-// participating in the call. A bit, representing the thread's lane ID, must be set for each participating thread 
-// to ensure they are properly converged before the intrinsic is executed by the hardware. All active threads named 
-// in mask must execute the same intrinsic with the same mask, or the result is undefined.```
-//
-// Currently there isn't a mechanism to correctly get the mask without it being passed through.
-// Doing so will most likely require some changes to slang code generation to track masks, for now then we use
-// _getActiveMask. 
-
-// Return mask of all the lanes less than the current lane
-__forceinline__ __device__ WarpMask _getLaneLtMask()
-{
-    return (int(1) << _getLaneId()) - 1;
-}    
-
-// TODO(JS): 
-// THIS IS NOT CORRECT! That determining the appropriate active mask requires appropriate
-// mask tracking.
-__forceinline__ __device__ WarpMask _getActiveMask()
-{
-    return __ballot_sync(__activemask(), true);
-}
-
-// Return a mask suitable for the 'MultiPrefix' style functions
-__forceinline__ __device__ WarpMask _getMultiPrefixMask(int mask)
-{
-    return mask;
-}
-
-// Note! Note will return true if mask is 0, but thats okay, because there must be one
-// lane active to execute anything
-__inline__ __device__ bool _waveIsSingleLane(WarpMask mask)
-{
-    return (mask & (mask - 1)) == 0;
-}
-
-// Returns the power of 2 size of run of set bits. Returns 0 if not a suitable run.
-// Examples:
-// 0b00000000'00000000'00000000'11111111 -> 8
-// 0b11111111'11111111'11111111'11111111 -> 32
-// 0b00000000'00000000'00000000'00011111 -> 0 (since 5 is not a power of 2)
-// 0b00000000'00000000'00000000'11110000 -> 0 (since the run of bits does not start at the LSB)
-// 0b00000000'00000000'00000000'00100111 -> 0 (since it is not a single contiguous run)
-__inline__ __device__ int _waveCalcPow2Offset(WarpMask mask)
-{
-    // This should be the most common case, so fast path it
-    if (mask == SLANG_CUDA_WARP_BITMASK)
-    {
-        return SLANG_CUDA_WARP_SIZE;
-    }
-    // Is it a contiguous run of bits?
-    if ((mask & (mask + 1)) == 0)
-    {
-        // const int offsetSize = __ffs(mask + 1) - 1;
-        const int offset = 32 - __clz(mask);
-        // Is it a power of 2 size
-        if ((offset & (offset - 1)) == 0)
-        {
-            return offset;
-        }
-    }
-    return 0;
-}
-
-__inline__ __device__ bool _waveIsFirstLane()
-{
-    const WarpMask mask = __activemask();
-    // We special case bit 0, as that most warps are expected to be fully active. 
-    
-    // mask & -mask, isolates the lowest set bit.
-    //return (mask & 1 ) || ((mask & -mask) == (1 << _getLaneId()));
-    
-    // This mechanism is most similar to what was in an nVidia post, so assume it is prefered. 
-    return (mask & 1 ) || ((__ffs(mask) - 1) == _getLaneId());
-}
-
-template <typename T>
-struct WaveOpOr
-{
-    __inline__ __device__ static T getInitial(T a) { return 0; }
-    __inline__ __device__ static T doOp(T a, T b) { return a | b; }
-};
-
-template <typename T>
-struct WaveOpAnd
-{
-    __inline__ __device__ static T getInitial(T a) { return ~T(0); }
-    __inline__ __device__ static T doOp(T a, T b) { return a & b; }
-};
-
-template <typename T>
-struct WaveOpXor
-{
-    __inline__ __device__ static T getInitial(T a) { return 0; }
-    __inline__ __device__ static T doOp(T a, T b) { return a ^ b; }
-    __inline__ __device__ static T doInverse(T a, T b) { return a ^ b; }
-};
-
-template <typename T>
-struct WaveOpAdd
-{
-    __inline__ __device__ static T getInitial(T a) { return 0; }
-    __inline__ __device__ static T doOp(T a, T b) { return a + b; }
-    __inline__ __device__ static T doInverse(T a, T b) { return a - b; }
-};
-
-template <typename T>
-struct WaveOpMul
-{
-    __inline__ __device__ static T getInitial(T a) { return T(1); }
-    __inline__ __device__ static T doOp(T a, T b) { return a * b; }
-    // Using this inverse for int is probably undesirable - because in general it requires T to have more precision
-    // There is also a performance aspect to it, where divides are generally significantly slower
-    __inline__ __device__ static T doInverse(T a, T b) { return a / b; }
-};
-
-template <typename T>
-struct WaveOpMax
-{
-    __inline__ __device__ static T getInitial(T a) { return a; }
-    __inline__ __device__ static T doOp(T a, T b) { return a > b ? a : b; }
-};
-
-template <typename T>
-struct WaveOpMin
-{
-    __inline__  __device__ static T getInitial(T a) { return a; }
-    __inline__ __device__ static T doOp(T a, T b) { return a < b ? a : b; }
-};
-
-template <typename T>
-struct ElementTypeTrait;
-
-// Scalar
-template <> struct ElementTypeTrait<int> { typedef int Type; };
-template <> struct ElementTypeTrait<uint> { typedef uint Type; };
-template <> struct ElementTypeTrait<float> { typedef float Type; };
-template <> struct ElementTypeTrait<double> { typedef double Type; };
-template <> struct ElementTypeTrait<uint64_t> { typedef uint64_t Type; };
-template <> struct ElementTypeTrait<int64_t> { typedef int64_t Type; };
-
-// Vector
-template <> struct ElementTypeTrait<int1> { typedef int Type; };
-template <> struct ElementTypeTrait<int2> { typedef int Type; };
-template <> struct ElementTypeTrait<int3> { typedef int Type; };
-template <> struct ElementTypeTrait<int4> { typedef int Type; };
-
-template <> struct ElementTypeTrait<uint1> { typedef uint Type; };
-template <> struct ElementTypeTrait<uint2> { typedef uint Type; };
-template <> struct ElementTypeTrait<uint3> { typedef uint Type; };
-template <> struct ElementTypeTrait<uint4> { typedef uint Type; };
-
-template <> struct ElementTypeTrait<float1> { typedef float Type; };
-template <> struct ElementTypeTrait<float2> { typedef float Type; };
-template <> struct ElementTypeTrait<float3> { typedef float Type; };
-template <> struct ElementTypeTrait<float4> { typedef float Type; };
-
-template <> struct ElementTypeTrait<double1> { typedef double Type; };
-template <> struct ElementTypeTrait<double2> { typedef double Type; };
-template <> struct ElementTypeTrait<double3> { typedef double Type; };
-template <> struct ElementTypeTrait<double4> { typedef double Type; };
-
-// Matrix
-template <typename T, int ROWS, int COLS> 
-struct ElementTypeTrait<Matrix<T, ROWS, COLS> >  
-{ 
-    typedef T Type; 
-};
-
-// Scalar 
-template <typename INTF, typename T>
-__device__ T _waveReduceScalar(WarpMask mask, T val)
-{
-    const int offsetSize = _waveCalcPow2Offset(mask);
-    if (offsetSize > 0)
-    {
-        // Fast path O(log2(activeLanes)) 
-        for (int offset = offsetSize >> 1; offset > 0; offset >>= 1)
-        {
-            val = INTF::doOp(val, __shfl_xor_sync(mask, val, offset));
-        }
-    }
-    else if (!_waveIsSingleLane(mask))
-    {
-        T result = INTF::getInitial(val);
-        int remaining = mask;
-        while (remaining)
-        {
-            const int laneBit = remaining & -remaining;
-            // Get the sourceLane 
-            const int srcLane = __ffs(laneBit) - 1;
-            // Broadcast (can also broadcast to self) 
-            result = INTF::doOp(result, __shfl_sync(mask, val, srcLane));
-            remaining &= ~laneBit;
-        }
-        return result;
-    }
-    return val;
-}
-
-
-// Multiple values
-template <typename INTF, typename T, size_t COUNT>
-__device__ void _waveReduceMultiple(WarpMask mask, T* val)
-{
-    const int offsetSize = _waveCalcPow2Offset(mask);
-    if (offsetSize > 0)
-    {
-        // Fast path O(log2(activeLanes)) 
-        for (int offset = offsetSize >> 1; offset > 0; offset >>= 1)
-        {
-            for (size_t i = 0; i < COUNT; ++i)
-            {
-                val[i] = INTF::doOp(val[i], __shfl_xor_sync(mask, val[i], offset));
-            }
-        }
-    }
-    else if (!_waveIsSingleLane(mask))
-    {
-        // Copy the original
-        T originalVal[COUNT];
-        for (size_t i = 0; i < COUNT; ++i)
-        {
-            const T v = val[i];
-            originalVal[i] = v;
-            val[i] = INTF::getInitial(v);
-        }
-        
-        int remaining = mask;
-        while (remaining)
-        {
-            const int laneBit = remaining & -remaining;
-            // Get the sourceLane 
-            const int srcLane = __ffs(laneBit) - 1;
-            // Broadcast (can also broadcast to self) 
-            for (size_t i = 0; i < COUNT; ++i)
-            {
-                val[i] = INTF::doOp(val[i], __shfl_sync(mask, originalVal[i], srcLane));
-            }
-            remaining &= ~laneBit;
-        }
-    }
-}
-
-template <typename INTF, typename T>
-__device__ void _waveReduceMultiple(WarpMask mask, T* val)
-{
-    typedef typename ElementTypeTrait<T>::Type ElemType;    
-    _waveReduceMultiple<INTF, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)val);
-}
-
-template <typename T>
-__inline__ __device__  T _waveOr(WarpMask mask, T val) { return _waveReduceScalar<WaveOpOr<T>, T>(mask, val); }
-
-template <typename T>
-__inline__ __device__ T _waveAnd(WarpMask mask, T val) { return _waveReduceScalar<WaveOpAnd<T>, T>(mask, val); }
-
-template <typename T>
-__inline__ __device__ T _waveXor(WarpMask mask, T val) { return _waveReduceScalar<WaveOpXor<T>, T>(mask, val); }
-
-template <typename T>
-__inline__ __device__ T _waveProduct(WarpMask mask, T val) { return _waveReduceScalar<WaveOpMul<T>, T>(mask, val); }
-
-template <typename T>
-__inline__ __device__ T _waveSum(WarpMask mask, T val) { return _waveReduceScalar<WaveOpAdd<T>, T>(mask, val); }
-
-template <typename T>
-__inline__ __device__ T _waveMin(WarpMask mask, T val) { return _waveReduceScalar<WaveOpMin<T>, T>(mask, val); }
-
-template <typename T>
-__inline__ __device__ T _waveMax(WarpMask mask, T val) { return _waveReduceScalar<WaveOpMax<T>, T>(mask, val); }
-
-// Fast-path specializations when CUDA warp reduce operators are available
-#if __CUDA_ARCH__ >= 800 // 8.x or higher
-template<>
-__inline__ __device__ unsigned _waveOr<unsigned>(WarpMask mask, unsigned val) { return __reduce_or_sync(mask, val); }
-
-template<>
-__inline__ __device__ unsigned _waveAnd<unsigned>(WarpMask mask, unsigned val) { return __reduce_and_sync(mask, val); }
-
-template<>
-__inline__ __device__ unsigned _waveXor<unsigned>(WarpMask mask, unsigned val) { return __reduce_xor_sync(mask, val); }
-
-template<>
-__inline__ __device__ unsigned _waveSum<unsigned>(WarpMask mask, unsigned val) { return __reduce_add_sync(mask, val); }
-
-template<>
-__inline__ __device__ int _waveSum<int>(WarpMask mask, int val) { return __reduce_add_sync(mask, val); }
-
-template<>
-__inline__ __device__ unsigned _waveMin<unsigned>(WarpMask mask, unsigned val) { return __reduce_min_sync(mask, val); }
-
-template<>
-__inline__ __device__ int _waveMin<int>(WarpMask mask, int val) { return __reduce_min_sync(mask, val); }
-
-template<>
-__inline__ __device__ unsigned _waveMax<unsigned>(WarpMask mask, unsigned val) { return __reduce_max_sync(mask, val); }
-
-template<>
-__inline__ __device__ int _waveMax<int>(WarpMask mask, int val) { return __reduce_max_sync(mask, val); }
-#endif
-
-
-// Multiple
-
-template <typename T>
-__inline__ __device__  T _waveOrMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait<T>::Type ElemType; _waveReduceMultiple<WaveOpOr<ElemType> >(mask, &val); return val; }
-
-template <typename T>
-__inline__ __device__  T _waveAndMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait<T>::Type ElemType; _waveReduceMultiple<WaveOpAnd<ElemType> >(mask, &val); return val; }
-
-template <typename T>
-__inline__ __device__  T _waveXorMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait<T>::Type ElemType; _waveReduceMultiple<WaveOpXor<ElemType> >(mask, &val); return val; }
-
-template <typename T>
-__inline__ __device__  T _waveProductMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait<T>::Type ElemType; _waveReduceMultiple<WaveOpMul<ElemType> >(mask, &val); return val; }
-
-template <typename T>
-__inline__ __device__  T _waveSumMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait<T>::Type ElemType; _waveReduceMultiple<WaveOpAdd<ElemType> >(mask, &val); return val; }
-
-template <typename T>
-__inline__ __device__  T _waveMinMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait<T>::Type ElemType; _waveReduceMultiple<WaveOpMin<ElemType> >(mask, &val); return val; }
-
-template <typename T>
-__inline__ __device__  T _waveMaxMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait<T>::Type ElemType; _waveReduceMultiple<WaveOpMax<ElemType> >(mask, &val); return val; }
-
-
-template <typename T>
-__inline__ __device__ bool _waveAllEqual(WarpMask mask, T val) 
-{
-    int pred;
-    __match_all_sync(mask, val, &pred);
-    return pred != 0;
-}
-
-template <typename T>
-__inline__ __device__ bool _waveAllEqualMultiple(WarpMask mask, T inVal) 
-{
-    typedef typename ElementTypeTrait<T>::Type ElemType;
-    const size_t count = sizeof(T) / sizeof(ElemType);
-    int pred;
-    const ElemType* src = (const ElemType*)&inVal;
-    for (size_t i = 0; i < count; ++i)
-    {
-        __match_all_sync(mask, src[i], &pred);
-        if (pred == 0)
-        {
-            return false;
-        }
-    }
-    return true;
-}
-
-template <typename T>
-__inline__ __device__ T _waveReadFirst(WarpMask mask, T val) 
-{
-    const int lowestLaneId = __ffs(mask) - 1;
-    return __shfl_sync(mask, val, lowestLaneId);   
-}
-
-template <typename T>
-__inline__ __device__ T _waveReadFirstMultiple(WarpMask mask, T inVal) 
-{
-    typedef typename ElementTypeTrait<T>::Type ElemType;
-    const size_t count = sizeof(T) / sizeof(ElemType);
-    T outVal;
-    const ElemType* src = (const ElemType*)&inVal;
-    ElemType* dst = (ElemType*)&outVal;
-    const int lowestLaneId = __ffs(mask) - 1;
-    for (size_t i = 0; i < count; ++i)
-    {
-        dst[i] = __shfl_sync(mask, src[i], lowestLaneId);   
-    }
-    return outVal;
-}
-
-template <typename T>
-__inline__ __device__ T _waveShuffleMultiple(WarpMask mask, T inVal, int lane)
-{
-    typedef typename ElementTypeTrait<T>::Type ElemType;
-    const size_t count = sizeof(T) / sizeof(ElemType);
-    T outVal;
-    const ElemType* src = (const ElemType*)&inVal;
-    ElemType* dst = (ElemType*)&outVal;
-    for (size_t i = 0; i < count; ++i)
-    {
-        dst[i] = __shfl_sync(mask, src[i], lane);   
-    }
-    return outVal;
-}
-
-// Scalar 
-
-// Invertable means that when we get to the end of the reduce, we can remove val (to make exclusive), using 
-// the inverse of the op.
-template <typename INTF, typename T>
-__device__ T _wavePrefixInvertableScalar(WarpMask mask, T val)
-{
-    const int offsetSize = _waveCalcPow2Offset(mask);
-    
-    const int laneId = _getLaneId();
-    T result;
-    if (offsetSize > 0)
-    {    
-        // Sum is calculated inclusive of this lanes value
-        result = val;
-        for (int i = 1; i < offsetSize; i += i) 
-        {
-            const T readVal = __shfl_up_sync(mask, result, i, offsetSize);
-            if (laneId >= i)
-            {
-                result = INTF::doOp(result, readVal);
-            }
-        }
-        // Remove val from the result, by applyin inverse
-        result = INTF::doInverse(result, val);
-    }
-    else 
-    {
-        result = INTF::getInitial(val);
-        if (!_waveIsSingleLane(mask))
-        {
-            int remaining = mask;
-            while (remaining)
-            {
-                const int laneBit = remaining & -remaining;
-                // Get the sourceLane 
-                const int srcLane = __ffs(laneBit) - 1;
-                // Broadcast (can also broadcast to self) 
-                const T readValue = __shfl_sync(mask, val, srcLane);
-                // Only accumulate if srcLane is less than this lane
-                if (srcLane < laneId)
-                {
-                    result = INTF::doOp(result, readValue);
-                }
-                remaining &= ~laneBit;
-            }
-        }   
-    }
-    return result;
-}
- 
-
-// This implementation separately tracks the value to be propogated, and the value
-// that is the final result 
-template <typename INTF, typename T>
-__device__ T _wavePrefixScalar(WarpMask mask, T val)
-{
-    const int offsetSize = _waveCalcPow2Offset(mask);
-    
-    const int laneId = _getLaneId();
-    T result = INTF::getInitial(val);           
-    if (offsetSize > 0)
-    {    
-        // For transmitted value we will do it inclusively with this lanes value
-        // For the result we do not include the lanes value. This means an extra multiply for each iteration
-        // but means we don't need to have a divide at the end and also removes overflow issues in that scenario.
-        for (int i = 1; i < offsetSize; i += i) 
-        {
-            const T readVal = __shfl_up_sync(mask, val, i, offsetSize);
-            if (laneId >= i)
-            {
-                result = INTF::doOp(result, readVal);
-                val = INTF::doOp(val, readVal);
-            }
-        }
-    }
-    else 
-    {
-        if (!_waveIsSingleLane(mask))
-        {
-            int remaining = mask;
-            while (remaining)
-            {
-                const int laneBit = remaining & -remaining;
-                // Get the sourceLane 
-                const int srcLane = __ffs(laneBit) - 1;
-                // Broadcast (can also broadcast to self) 
-                const T readValue = __shfl_sync(mask, val, srcLane);
-                // Only accumulate if srcLane is less than this lane
-                if (srcLane < laneId)
-                {
-                    result = INTF::doOp(result, readValue);
-                }
-                remaining &= ~laneBit;
-            }
-        }
-    }
-    return result;
-}
-
-
-template <typename INTF, typename T, size_t COUNT>
-__device__ T _waveOpCopy(T* dst, const T* src)
-{
-    for (size_t j = 0; j < COUNT; ++j)
-    {
-        dst[j] = src[j];
-    }
-}    
-
-
-template <typename INTF, typename T, size_t COUNT>
-__device__ T _waveOpDoInverse(T* inOut, const T* val)
-{
-    for (size_t j = 0; j < COUNT; ++j)
-    {
-        inOut[j] = INTF::doInverse(inOut[j], val[j]);
-    }
-}    
-
-template <typename INTF, typename T, size_t COUNT>
-__device__ T _waveOpSetInitial(T* out, const T* val)
-{
-    for (size_t j = 0; j < COUNT; ++j)
-    {
-        out[j] = INTF::getInitial(val[j]);
-    }
-} 
-
-template <typename INTF, typename T, size_t COUNT>
-__device__ T _wavePrefixInvertableMultiple(WarpMask mask, T* val)
-{
-    const int offsetSize = _waveCalcPow2Offset(mask);
-    
-    const int laneId = _getLaneId();
-    T originalVal[COUNT];
-    _waveOpCopy<INTF, T, COUNT>(originalVal, val);
-    
-    if (offsetSize > 0)
-    {    
-        // Sum is calculated inclusive of this lanes value
-        for (int i = 1; i < offsetSize; i += i) 
-        {
-            // TODO(JS): Note that here I don't split the laneId outside so it's only tested once.
-            // This may be better but it would also mean that there would be shfl between lanes 
-            // that are on different (albeit identical) instructions. So this seems more likely to 
-            // work as expected with everything in lock step.
-            for (size_t j = 0; j < COUNT; ++j)
-            {
-                const T readVal = __shfl_up_sync(mask, val[j], i, offsetSize);
-                if (laneId >= i)
-                {
-                    val[j] = INTF::doOp(val[j], readVal);
-                }
-            }
-        }
-        // Remove originalVal from the result, by applyin inverse
-        _waveOpDoInverse<INTF, T, COUNT>(val, originalVal);
-    }
-    else 
-    {
-        _waveOpSetInitial<INTF, T, COUNT>(val, val);
-        if (!_waveIsSingleLane(mask))
-        {
-            int remaining = mask;
-            while (remaining)
-            {
-                const int laneBit = remaining & -remaining;
-                // Get the sourceLane 
-                const int srcLane = __ffs(laneBit) - 1;
-                
-                for (size_t j = 0; j < COUNT; ++j)
-                {
-                    // Broadcast (can also broadcast to self) 
-                    const T readValue = __shfl_sync(mask, originalVal[j], srcLane);
-                    // Only accumulate if srcLane is less than this lane
-                    if (srcLane < laneId)
-                    {
-                        val[j] = INTF::doOp(val[j], readValue);
-                    }
-                    remaining &= ~laneBit;
-                }
-            }
-        }   
-    }
-}
- 
-template <typename INTF, typename T, size_t COUNT>
-__device__ T _wavePrefixMultiple(WarpMask mask, T* val)
-{
-    const int offsetSize = _waveCalcPow2Offset(mask);
-    
-    const int laneId = _getLaneId();
-    
-    T work[COUNT];
-    _waveOpCopy<INTF, T, COUNT>(work, val);
-    _waveOpSetInitial<INTF, T, COUNT>(val, val);
-    
-    if (offsetSize > 0)
-    {    
-        // For transmitted value we will do it inclusively with this lanes value
-        // For the result we do not include the lanes value. This means an extra op for each iteration
-        // but means we don't need to have a divide at the end and also removes overflow issues in that scenario.
-        for (int i = 1; i < offsetSize; i += i) 
-        {
-            for (size_t j = 0; j < COUNT; ++j)
-            {
-                const T readVal = __shfl_up_sync(mask, work[j], i, offsetSize);
-                if (laneId >= i)
-                {
-                    work[j] = INTF::doOp(work[j], readVal);
-                    val[j] = INTF::doOp(val[j], readVal);     
-                }
-            }
-        }
-    }
-    else 
-    {
-        if (!_waveIsSingleLane(mask))
-        {
-            int remaining = mask;
-            while (remaining)
-            {
-                const int laneBit = remaining & -remaining;
-                // Get the sourceLane 
-                const int srcLane = __ffs(laneBit) - 1;
-                
-                for (size_t j = 0; j < COUNT; ++j)
-                {
-                    // Broadcast (can also broadcast to self) 
-                    const T readValue = __shfl_sync(mask, work[j], srcLane);
-                    // Only accumulate if srcLane is less than this lane
-                    if (srcLane < laneId)
-                    {
-                        val[j] = INTF::doOp(val[j], readValue);
-                    }
-                }
-                remaining &= ~laneBit;
-            }
-        }
-    }
-}
-
-template <typename T>
-__inline__ __device__ T _wavePrefixProduct(WarpMask mask, T val) { return _wavePrefixScalar<WaveOpMul<T>, T>(mask, val); }
-
-template <typename T>
-__inline__ __device__ T _wavePrefixSum(WarpMask mask, T val) { return _wavePrefixInvertableScalar<WaveOpAdd<T>, T>(mask, val); }    
-
-template <typename T>
-__inline__ __device__ T _wavePrefixXor(WarpMask mask, T val) { return _wavePrefixInvertableScalar<WaveOpXor<T>, T>(mask, val); }    
-    
-template <typename T>
-__inline__ __device__ T _wavePrefixOr(WarpMask mask, T val) { return _wavePrefixScalar<WaveOpOr<T>, T>(mask, val); }      
-    
-template <typename T>
-__inline__ __device__ T _wavePrefixAnd(WarpMask mask, T val) { return _wavePrefixScalar<WaveOpAnd<T>, T>(mask, val); }      
-    
-    
-template <typename T>
-__inline__ __device__ T _wavePrefixProductMultiple(WarpMask mask, T val)  
-{ 
-    typedef typename ElementTypeTrait<T>::Type ElemType;    
-    _wavePrefixInvertableMultiple<WaveOpMul<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)&val);    
-    return val;
-}
-
-template <typename T>
-__inline__ __device__ T _wavePrefixSumMultiple(WarpMask mask, T val) 
-{ 
-    typedef typename ElementTypeTrait<T>::Type ElemType;    
-    _wavePrefixInvertableMultiple<WaveOpAdd<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)&val);    
-    return val;
-}
-
-template <typename T>
-__inline__ __device__ T _wavePrefixXorMultiple(WarpMask mask, T val)  
-{ 
-    typedef typename ElementTypeTrait<T>::Type ElemType;    
-    _wavePrefixInvertableMultiple<WaveOpXor<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)&val);    
-    return val;
-}
-
-template <typename T>
-__inline__ __device__ T _wavePrefixOrMultiple(WarpMask mask, T val) 
-{ 
-    typedef typename ElementTypeTrait<T>::Type ElemType;    
-    _wavePrefixMultiple<WaveOpOr<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)&val);    
-    return val;
-}
-
-template <typename T>
-__inline__ __device__ T _wavePrefixAndMultiple(WarpMask mask, T val)  
-{ 
-    typedef typename ElementTypeTrait<T>::Type ElemType;    
-    _wavePrefixMultiple<WaveOpAnd<ElemType>, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)&val);    
-    return val;
-}
-
-template <typename T>
-__inline__ __device__ uint4 _waveMatchScalar(WarpMask mask, T val) 
-{
-    int pred;
-    return make_uint4(__match_all_sync(mask, val, &pred), 0, 0, 0);
-}
-
-template <typename T>
-__inline__ __device__ uint4 _waveMatchMultiple(WarpMask mask, const T& inVal) 
-{
-    typedef typename ElementTypeTrait<T>::Type ElemType;
-    const size_t count = sizeof(T) / sizeof(ElemType);
-    int pred;
-    const ElemType* src = (const ElemType*)&inVal;
-    uint matchBits = 0xffffffff;
-    for (size_t i = 0; i < count && matchBits; ++i)
-    {
-        matchBits = matchBits & __match_all_sync(mask, src[i], &pred);
-    }
-    return make_uint4(matchBits, 0, 0, 0);
-}
-
-__device__ uint getAt(dim3 a,  int b)
-{
-    SLANG_PRELUDE_ASSERT(b >= 0 && b < 3);
-    return (&a.x)[b];
-}
-__device__ uint3 operator*(uint3 a, dim3 b)
-{
-    uint3 r;
-    r.x = a.x * b.x;
-    r.y = a.y * b.y;
-    r.z = a.z * b.z;
-    return r;
-}
-
-template<typename TResult, typename TInput>
-__inline__ __device__ TResult slang_bit_cast(TInput val)
-{
-    return *(TResult*)(&val);
-}
-
-/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
-
-
-/* Type that defines the uniform entry point params. The actual content of this type is dependent on the entry point parameters, and can be
-found via reflection or defined such that it matches the shader appropriately.
-*/
-struct UniformEntryPointParams;
-struct UniformState;
-
-// ---------------------- OptiX Ray Payload --------------------------------------
-#ifdef SLANG_CUDA_ENABLE_OPTIX
-struct RayDesc
-{
-    float3 Origin;
-    float  TMin;
-    float3 Direction;
-    float  TMax;
-};
-
-static __forceinline__ __device__
-void *unpackOptiXRayPayloadPointer(uint32_t i0, uint32_t i1)
-{
-    const uint64_t uptr = static_cast<uint64_t>(i0) << 32 | i1;
-    void*           ptr = reinterpret_cast<void*>(uptr);
-    return ptr;
-}
-
-static __forceinline__ __device__
-void  packOptiXRayPayloadPointer(void* ptr, uint32_t& i0, uint32_t& i1)
-{
-    const uint64_t uptr = reinterpret_cast<uint64_t>(ptr);
-    i0 = uptr >> 32;
-    i1 = uptr & 0x00000000ffffffff;
-}
-
-static __forceinline__ __device__ void *getOptiXRayPayloadPtr()
-{
-    const uint32_t u0 = optixGetPayload_0();
-    const uint32_t u1 = optixGetPayload_1();
-    return unpackOptiXRayPayloadPointer(u0, u1);
-}
-
-template<typename T>
-__forceinline__ __device__ void *traceOptiXRay(
-    OptixTraversableHandle AccelerationStructure,
-    uint32_t RayFlags,
-    uint32_t InstanceInclusionMask,
-    uint32_t RayContributionToHitGroupIndex,
-    uint32_t MultiplierForGeometryContributionToHitGroupIndex,
-    uint32_t MissShaderIndex,
-    RayDesc Ray,
-    T *Payload
-) {
-    uint32_t r0, r1;
-    packOptiXRayPayloadPointer((void*)Payload, r0, r1);
-    optixTrace(
-        AccelerationStructure,
-        Ray.Origin,
-        Ray.Direction,
-        Ray.TMin,
-        Ray.TMax,
-        0.f, /* Time for motion blur, currently unsupported in slang */
-        InstanceInclusionMask,
-        RayFlags,
-        RayContributionToHitGroupIndex,
-        MultiplierForGeometryContributionToHitGroupIndex,
-        MissShaderIndex,
-        r0, r1
-    );
-}
-
-#endif
-
-static const int kSlangTorchTensorMaxDim = 5;
-
-// TensorView
-struct TensorView
-{
-    uint8_t* data;
-    uint32_t strides[kSlangTorchTensorMaxDim];
-    uint32_t sizes[kSlangTorchTensorMaxDim];
-    uint32_t dimensionCount;
-
-    template<typename T>
-    __device__ T* data_ptr()
-    {
-        return reinterpret_cast<T*>(data);
-    }
-
-    template<typename T>
-    __device__ T* data_ptr_at(uint32_t index)
-    {
-        uint64_t offset = strides[0] * index;
-        return reinterpret_cast<T*>(data + offset);
-    }
-
-    template<typename T>
-    __device__ T* data_ptr_at(uint2 index)
-    {
-        uint64_t offset = strides[0] * index.x + strides[1] * index.y;
-        return reinterpret_cast<T*>(data + offset);
-    }
-
-    template<typename T>
-    __device__ T* data_ptr_at(uint3 index)
-    {
-        uint64_t offset = strides[0] * index.x + strides[1] * index.y + strides[2] * index.z;
-        return reinterpret_cast<T*>(data + offset);
-    }
-
-    template<typename T>
-    __device__ T* data_ptr_at(uint4 index)
-    {
-        uint64_t offset = strides[0] * index.x + strides[1] * index.y + strides[2] * index.z + strides[3] * index.w;
-        return reinterpret_cast<T*>(data + offset);
-    }
-
-    template<typename T, unsigned int N>
-    __device__ T* data_ptr_at(uint index[N])
-    {
-        uint64_t offset = 0;
-        for (unsigned int i = 0; i < N; ++i)
-        {
-            offset += strides[i] * index[i];
-        }
-        return reinterpret_cast<T*>(data + offset);
-    }
-
-    template<typename T>
-    __device__ T& load(uint32_t x)
-    {
-        return *reinterpret_cast<T*>(data + strides[0] * x);
-    }
-    template<typename T>
-    __device__ T& load(uint32_t x, uint32_t y)
-    {
-        return *reinterpret_cast<T*>(data + strides[0] * x + strides[1] * y);
-    }
-    template<typename T>
-    __device__ T& load(uint2 index)
-    {
-        return *reinterpret_cast<T*>(data + strides[0] * index.x + strides[1] * index.y);
-    }
-    template<typename T>
-    __device__ T& load(uint32_t x, uint32_t y, uint32_t z)
-    {
-        return *reinterpret_cast<T*>(data + strides[0] * x + strides[1] * y + strides[2] * z);
-    }
-    template<typename T>
-    __device__ T& load(uint3 index)
-    {
-        return *reinterpret_cast<T*>(data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z);
-    }
-    template<typename T>
-    __device__ T& load(uint32_t x, uint32_t y, uint32_t z, uint32_t w)
-    {
-        return *reinterpret_cast<T*>(data + strides[0] * x + strides[1] * y + strides[2] * z + strides[3] * w);
-    }
-    template<typename T>
-    __device__ T& load(uint4 index)
-    {
-        return *reinterpret_cast<T*>(data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z + strides[3] * index.w);
-    }
-    template<typename T>
-    __device__ T& load(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4)
-    {
-        return *reinterpret_cast<T*>(data + strides[0] * i0 + strides[1] * i1 + strides[2] * i2 + strides[3] * i3 + strides[4] * i4);
-    }
-
-    // Generic version of load
-    template<typename T, unsigned int N>
-    __device__ T& load(uint index[N])
-    {
-        uint64_t offset = 0;
-        for (unsigned int i = 0; i < N; ++i)
-        {
-            offset += strides[i] * index[i];
-        }
-        return *reinterpret_cast<T*>(data + offset);
-    }
-
-    template<typename T>
-    __device__ void store(uint32_t x, T val)
-    {
-        *reinterpret_cast<T*>(data + strides[0] * x) = val;
-    }
-    template<typename T>
-    __device__ void store(uint32_t x, uint32_t y, T val)
-    {
-        *reinterpret_cast<T*>(data + strides[0] * x + strides[1] * y) = val;
-    }
-    template<typename T>
-    __device__ void store(uint2 index, T val)
-    {
-        *reinterpret_cast<T*>(data + strides[0] * index.x + strides[1] * index.y) = val;
-    }
-    template<typename T>
-    __device__ void store(uint32_t x, uint32_t y, uint32_t z, T val)
-    {
-        *reinterpret_cast<T*>(data + strides[0] * x + strides[1] * y + strides[2] * z) = val;
-    }
-    template<typename T>
-    __device__ void store(uint3 index, T val)
-    {
-        *reinterpret_cast<T*>(data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z) = val;
-    }
-    template<typename T>
-    __device__ void store(uint32_t x, uint32_t y, uint32_t z, uint32_t w, T val)
-    {
-        *reinterpret_cast<T*>(
-            data + strides[0] * x + strides[1] * y + strides[2] * z + strides[3] * w) = val;
-    }
-    template<typename T>
-    __device__ void store(uint4 index, T val)
-    {
-        *reinterpret_cast<T*>(data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z + strides[3] * index.w) = val;
-    }
-    template<typename T>
-    __device__ void store(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4, T val)
-    {
-        *reinterpret_cast<T*>(data + strides[0] * i0 + strides[1] * i1 + strides[2] * i2 + strides[3] * i3 + strides[4] * i4) = val;
-    }
-
-    // Generic version
-    template<typename T, unsigned int N>
-    __device__ void store(uint index[N], T val)
-    {
-        uint64_t offset = 0;
-        for (unsigned int i = 0; i < N; ++i)
-        {
-            offset += strides[i] * index[i];
-        }
-        *reinterpret_cast<T*>(data + offset) = val;
-    }
-};
diff --git a/external/slang/prelude/slang-hlsl-prelude.h b/external/slang/prelude/slang-hlsl-prelude.h
deleted file mode 100644
index d892f228..00000000
--- a/external/slang/prelude/slang-hlsl-prelude.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifdef SLANG_HLSL_ENABLE_NVAPI
-#include "nvHLSLExtns.h"
-#endif
-
-#ifndef __DXC_VERSION_MAJOR
-    // warning X3557: loop doesn't seem to do anything, forcing loop to unroll
-    #pragma warning(disable: 3557)
-#endif
diff --git a/external/slang/docs/64bit-type-support.md b/external/slang/share/doc/slang/64bit-type-support.md
similarity index 72%
rename from external/slang/docs/64bit-type-support.md
rename to external/slang/share/doc/slang/64bit-type-support.md
index 506e0549..15faccd8 100644
--- a/external/slang/docs/64bit-type-support.md
+++ b/external/slang/share/doc/slang/64bit-type-support.md
@@ -5,9 +5,10 @@ Slang 64-bit Type Support
 
 * Not all targets support 64 bit types, or all 64 bit types 
   * 64 bit integers generally require later APIs/shader models
-* When specifying 64 bit literals *always* use the type suffixes (ie `L`, `ULL`, `LL`) 
+* When specifying 64 bit floating-point literals *always* use the type suffixes (ie `L`) 
+* An integer literal will be interpreted as 64 bits if it cannot fit in a 32 bit value.
 * GPU target/s generally do not support all double intrinsics 
-  * Typically missing are trascendentals (sin, cos etc), logarithm and exponental functions
+  * Typically missing are trascendentals (sin, cos etc), logarithm and exponential functions
   * CUDA is the exception supporting nearly all double intrinsics
 * D3D 
   * D3D targets *appear* to support double intrinsics (like sin, cos, log etc), but behind the scenes they are actually being converted to float
@@ -20,15 +21,15 @@ Overview
 
 The Slang language supports 64 bit built in types. Such as
 
-* double
-* uint64_t
-* int64_t
+* `double`
+* `uint64_t`
+* `int64_t`
 
 This also applies to vector and matrix versions of these types. 
 
-Unfortunately if a specific target supports the type or the typical HLSL instrinsic functions (such as sin/cos/max/min etc) depends very much on the target. 
+Unfortunately if a specific target supports the type or the typical HLSL intrinsic functions (such as sin/cos/max/min etc) depends very much on the target.
 
-Special attention has to be made with respect to literal 64 bit types. By default float and integer literals if they do not have an explicit suffix are assumed to be 32 bit. There is a variety of reasons for this design choice - the main one being around by default behavior of getting good performance. The suffixes required for 64 bit types are as follows
+Special attention has to be made with respect to literal 64 bit types. By default float literals if they do not have an explicit suffix are assumed to be 32 bit. There is a variety of reasons for this design choice - the main one being around by default behavior of getting good performance. The suffixes required for 64 bit types are as follows
 
 ```
 // double - 'l' or 'L'
@@ -40,20 +41,12 @@ double b = 1.34e-200;
 // int64_t - 'll' or 'LL' (or combination of upper/lower)
 
 int64_t c = -5436365345345234ll;
-// WRONG!: This is the same as d = int64_t(int32_t(-5436365345345234)) which means d ! = -5436365345345234LL. 
-// Will produce a warning.
-int64_t d = -5436365345345234;      
 
 int64_t e = ~0LL;       // Same as 0xffffffffffffffff
-// Does produce the same result as 'e' because equivalent int64_t(~int32_t(0))
-int64_t f = ~0;         
 
 // uint64_t - 'ull' or 'ULL' (or combination of upper/lower)
 
 uint64_t g = 0x8000000000000000ull; 
-// WRONG!: This is the same as h = uint64_t(uint32_t(0x8000000000000000)) which means h = 0
-// Will produce a warning.
-uint64_t h = 0x8000000000000000u;   
 
 uint64_t i = ~0ull;       // Same as 0xffffffffffffffff
 uint64_t j = ~0;          // Equivalent to 'i' because uint64_t(int64_t(~int32_t(0)));
@@ -61,6 +54,34 @@ uint64_t j = ~0;          // Equivalent to 'i' because uint64_t(int64_t(~int32_t
 
 These issues are discussed more on issue [#1185](https://github.com/shader-slang/slang/issues/1185)
 
+The type of a decimal non-suffixed integer literal is the first integer type from the list [`int`, `int64_t`] 
+which can represent the specified literal value. If the value cannot fit, the literal is  represented as an `uint64_t` 
+and a warning is given.
+The type of a hexadecimal non-suffixed integer literal  is the first type from the list [`int`, `uint`, `int64_t`, `uint64_t`] 
+that can represent the specified literal value. A non-suffixed integer literal will be 64 bit if it cannot fit in 32 bits.
+```
+// Same as int64_t a = int(1), the value can fit into a 32 bit integer.
+int64_t a = 1;
+
+// Same as int64_t b = int64_t(2147483648), the value cannot fit into a 32 bit integer.
+int64_t b = 2147483648;
+
+// Same as int64_t c = uint64_t(18446744073709551615), the value is larger than the maximum value of a signed 64 bit
+// integer, and is interpreted as an unsigned 64 bit integer. Warning is given.
+uint64_t c = 18446744073709551615;
+
+// Same as uint64_t = int(0x7FFFFFFF), the value can fit into a 32 bit integer.
+uint64_t d = 0x7FFFFFFF;
+
+// Same as uint64_t = int64_t(0x7FFFFFFFFFFFFFFF), the value cannot fit into an unsigned 32 bit integer but
+// can fit into a signed 64 bit integer.
+uint64_t e = 0x7FFFFFFFFFFFFFFF;
+
+// Same as uint64_t = uint64_t(0xFFFFFFFFFFFFFFFF), the value cannot fit into a signed 64 bit integer, and
+// is interpreted as an unsigned 64 bit integer.
+uint64_t f = 0xFFFFFFFFFFFFFFFF;
+```
+
 Double support
 ==============
 
@@ -107,7 +128,7 @@ On dxc the following intrinsics are available with double::
 
 These are tested in the test `tests/hlsl-intrinsic/scalar-double-d3d-intrinsic.slang`.
 
-There is no suport for transcendentals (`sin`, `cos` etc) or `log`/`exp`. More surprising is that`sqrt`, `rsqrt`, `frac`, `ceil`, `floor`, `trunc`, `step`, `lerp`, `smoothstep` are also not supported.
+There is no support for transcendentals (`sin`, `cos` etc) or `log`/`exp`. More surprising is that `sqrt`, `rsqrt`, `frac`, `ceil`, `floor`, `trunc`, `step`, `lerp`, `smoothstep` are also not supported.
 
 uint64_t and int64_t Support
 ============================
@@ -125,8 +146,8 @@ D3D12    | FXC/DXBC         |      No        |          No        |   2
 
 2) uint64_t support requires https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12, so DXBC is not a target.
 
-The intrinsics available on uint64_t type are `abs`, `min`, `max`, `clamp` and `countbits`.
-The intrinsics available on uint64_t type are `abs`, `min`, `max` and `clamp`.
+The intrinsics available on `uint64_t` type are `abs`, `min`, `max`, `clamp` and `countbits`.
+The intrinsics available on `uint64_t` type are `abs`, `min`, `max` and `clamp`.
 
 GLSL
 ====
diff --git a/external/slang/docs/README.md b/external/slang/share/doc/slang/README.md
similarity index 89%
rename from external/slang/docs/README.md
rename to external/slang/share/doc/slang/README.md
index b53060a2..ca6a3ddf 100644
--- a/external/slang/docs/README.md
+++ b/external/slang/share/doc/slang/README.md
@@ -9,7 +9,7 @@ Getting Started
 
 The Slang [User's Guide](https://shader-slang.github.io/slang/user-guide/) provides an introduction to the Slang language and its major features, as well as the compilation and reflection API.
 
-There is also documentation specific to using the [`slangc`](command-line-slangc.md) command-line tool.
+There is also documentation specific to using the [slangc](https://shader-slang.github.io/slang/user-guide/compiling.html#command-line-compilation-with-slangc) command-line tool.
 
 Advanced Users
 --------------
@@ -19,7 +19,7 @@ The [target compatibility guide](target-compatibility.md) gives an overview of f
 
 The [CPU target guide](cpu-target.md) gives information on compiling Slang or C++ source into shared libraries/executables or functions that can be directly executed. It also covers how to generate C++ code from Slang source.  
 
-The [CUDA target guide](cuda-target.md) provides information on compiling Slang/HLSL or CUDA source. Slang can compile to equivalent CUDA source, as well as to PTX via the nvrtc CUDA complier.
+The [CUDA target guide](cuda-target.md) provides information on compiling Slang/HLSL or CUDA source. Slang can compile to equivalent CUDA source, as well as to PTX via the nvrtc CUDA compiler.
 
 Contributors
 ------------
diff --git a/external/slang/share/doc/slang/_config.yml b/external/slang/share/doc/slang/_config.yml
new file mode 100644
index 00000000..72d781d7
--- /dev/null
+++ b/external/slang/share/doc/slang/_config.yml
@@ -0,0 +1 @@
+theme: jekyll-theme-tactile
diff --git a/external/slang/share/doc/slang/_includes/anchor_headings.html b/external/slang/share/doc/slang/_includes/anchor_headings.html
new file mode 100644
index 00000000..3df42b6a
--- /dev/null
+++ b/external/slang/share/doc/slang/_includes/anchor_headings.html
@@ -0,0 +1,137 @@
+{% capture headingsWorkspace %}
+{% comment %}
+Copyright (c) 2018 Vladimir "allejo" Jimenez
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+{% endcomment %}
+{% comment %}
+Version 1.0.9
+https://github.com/allejo/jekyll-anchor-headings
+
+"Be the pull request you wish to see in the world." ~Ben Balter
+
+Usage:
+{% include anchor_headings.html html=content anchorBody="#" %}
+
+Parameters:
+* html (string) - the HTML of compiled markdown generated by kramdown in Jekyll
+
+Optional Parameters:
+* beforeHeading (bool) : false - Set to true if the anchor should be placed _before_ the heading's content
+* headerAttrs (string) : '' - Any custom HTML attributes that will be added to the heading tag; you may NOT use `id`;
+the `%heading%` and `%html_id%` placeholders are available
+* anchorAttrs (string) : '' - Any custom HTML attributes that will be added to the `<a>` tag; you may NOT use `href`,
+    `class` or `title`;
+    the `%heading%` and `%html_id%` placeholders are available
+    * anchorBody (string) : '' - The content that will be placed inside the anchor; the `%heading%` placeholder is
+    available
+    * anchorClass (string) : '' - The class(es) that will be used for each anchor. Separate multiple classes with a
+    space
+    * anchorTitle (string) : '' - The `title` attribute that will be used for anchors
+    * h_min (int) : 1 - The minimum header level to build an anchor for; any header lower than this value will be
+    ignored
+    * h_max (int) : 6 - The maximum header level to build an anchor for; any header greater than this value will be
+    ignored
+    * bodyPrefix (string) : '' - Anything that should be inserted inside of the heading tag _before_ its anchor and
+    content
+    * bodySuffix (string) : '' - Anything that should be inserted inside of the heading tag _after_ its anchor and
+    content
+
+    Output:
+    The original HTML with the addition of anchors inside of all of the h1-h6 headings.
+    {% endcomment %}
+
+    {% assign minHeader = include.h_min | default: 1 %}
+    {% assign maxHeader = include.h_max | default: 2 %}
+    {% assign beforeHeading = include.beforeHeading %}
+    {% assign nodes = include.html | split: '<h' %} {% capture edited_headings %}{% endcapture %} {% for _node in nodes
+        %} {% capture node %}{{ _node | strip }}{% endcapture %} {% if node=="" %} {% continue %} {% endif %} {% assign
+        nextChar=node | replace: '"' , '' | strip | slice: 0, 1 %} {% assign headerLevel=nextChar | times: 1 %} <!-- If
+        the level is cast to 0, it means it's not a h1-h6 tag, so let's see if we need to fix it -->
+        {% if headerLevel == 0 %}
+        <!-- Split up the node based on closing angle brackets and get the first one. -->
+        {% assign firstChunk = node | split: '>' | first %}
+
+        <!-- If the first chunk does NOT contain a '<', that means we've broken another HTML tag that starts with 'h' -->
+        {% unless firstChunk contains '<' %} {% capture node %}<h{{ node }}{% endcapture %} {% endunless %} {% capture
+            edited_headings %}{{ edited_headings }}{{ node }}{% endcapture %} {% continue %} {% endif %} {% capture
+            _closingTag %}</h{{ headerLevel }}>{% endcapture %}
+            {% assign _workspace = node | split: _closingTag %}
+            {% assign _idWorkspace = _workspace[0] | split: 'id="' %}
+            {% assign _idWorkspace = _idWorkspace[1] | split: '"' %}
+            {% assign html_id = _idWorkspace[0] %}
+
+            {% capture _hAttrToStrip %}{{ _workspace[0] | split: '>' | first }}>{% endcapture %}
+            {% assign header = _workspace[0] | replace: _hAttrToStrip, '' %}
+
+            <!-- Build the anchor to inject for our heading -->
+            {% capture anchor %}{% endcapture %}
+
+            {% if html_id and headerLevel >= minHeader and headerLevel <= maxHeader %} {% assign escaped_header=header |
+                strip_html %} {% if include.headerAttrs %} {% capture _hAttrToStrip %}{{ _hAttrToStrip | split: '>' |
+                first }} {{ include.headerAttrs | replace: '%heading%' , escaped_header | replace: '%html_id%' , html_id
+                }}>{% endcapture %}
+                {% endif %}
+
+                {% capture anchor %}href="#{{ html_id }}"{% endcapture %}
+
+                {% if include.anchorClass %}
+                {% capture anchor %}{{ anchor }} class="{{ include.anchorClass }}"{% endcapture %}
+                {% endif %}
+
+                {% if include.anchorTitle %}
+                {% capture anchor %}{{ anchor }} title="{{ include.anchorTitle | replace: '%heading%', escaped_header
+                }}"{% endcapture %}
+                {% endif %}
+
+                {% if include.anchorAttrs %}
+                {% capture anchor %}{{ anchor }} {{ include.anchorAttrs | replace: '%heading%', escaped_header |
+                replace: '%html_id%', html_id }}{% endcapture %}
+                {% endif %}
+
+                {% capture anchor %}<a {{ anchor }}>{{ include.anchorBody | replace: '%heading%', escaped_header |
+                    default: '' }}</a>{% endcapture %}
+
+                <!-- In order to prevent adding extra space after a heading, we'll let the 'anchor' value contain it -->
+                {% if beforeHeading %}
+                {% capture anchor %}{{ anchor }} {% endcapture %}
+                {% else %}
+                {% capture anchor %} {{ anchor }}{% endcapture %}
+                {% endif %}
+                {% endif %}
+
+                {% capture new_heading %}
+                <h{{ _hAttrToStrip }} {{ include.bodyPrefix }} {% if beforeHeading %} {{ anchor }}{{ header }} {% else
+                    %} {{ header }}{{ anchor }} {% endif %} {{ include.bodySuffix }} </h{{ headerLevel }}>
+                    {% endcapture %}
+
+                    <!--
+    If we have content after the `</hX>` tag, then we'll want to append that here so we don't lost any content.
+    -->
+                    {% assign chunkCount = _workspace | size %}
+                    {% if chunkCount > 1 %}
+                    {% capture new_heading %}{{ new_heading }}{{ _workspace | last }}{% endcapture %}
+                    {% endif %}
+
+                    {% capture edited_headings %}{{ edited_headings }}{{ new_heading }}{% endcapture %}
+                    {% endfor %}
+                    {% endcapture %}{% assign headingsWorkspace = '' %}{{ edited_headings | strip }}
\ No newline at end of file
diff --git a/external/slang/share/doc/slang/_layouts/deprecated.html b/external/slang/share/doc/slang/_layouts/deprecated.html
new file mode 100644
index 00000000..f9d8b0b5
--- /dev/null
+++ b/external/slang/share/doc/slang/_layouts/deprecated.html
@@ -0,0 +1,225 @@
+<!DOCTYPE html>
+<html lang="{{ site.lang | default: " en-US" }}">
+
+<head>
+  <meta charset='utf-8'>
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <link rel="stylesheet" href="{{ '/assets/css/style.css?v=' | append: site.github.build_revision | relative_url }}">
+  <link rel="stylesheet" type="text/css" href="{{ '/assets/css/print.css' | relative_url }}" media="print">
+  <script async src="https://www.googletagmanager.com/gtag/js?id=G-TMTZVLLMBP"></script>
+  <script>
+    window.dataLayer = window.dataLayer || [];
+    function gtag(){dataLayer.push(arguments);}
+    gtag('js', new Date());
+    gtag('config', 'G-TMTZVLLMBP');
+  </script>
+  <!--[if lt IE 9]>
+    <script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script>
+    <![endif]-->
+  <style>
+    #centeringDiv {
+      margin: auto;
+      max-width: 1200px;
+    }
+    #navDiv
+    {
+      display: block;
+      box-sizing: border-box;
+      padding-top: 5px;
+      padding-bottom: 5px;
+      border-bottom-width: 3px;
+      border-bottom-style: solid;
+      border-bottom-color: #F0F0F0;
+    }
+    #navDiv nav
+    {
+      float:left;
+    }
+    #navDiv::after {
+      content: "";
+      clear: both;
+      display: table;
+    }
+    #navDiv nav li::after
+    {
+      content: "/";
+      padding-left: 10px;
+      padding-right: 0px;
+      color: #808080;
+    }
+    #navDiv nav li
+    {
+      display:inline;
+      padding-left: 10px;
+      padding-right: 0px;
+    }
+    #tocColumn {
+      width: 350px;
+      position: fixed;
+      overflow-y: auto;
+      box-sizing: border-box;
+      display: block;
+    }
+
+    #tocInner {
+      padding: 20px;
+    }
+
+    #rightColumn {
+      padding-left: 390px;
+      padding-right: 40px;
+      padding-top: 20px;
+    }
+
+    .toc_root_list {
+      list-style-type: none;
+      list-style-position: outside;
+      background-color: initial;
+      padding-left: 0px;
+    }
+    .toc_list {
+        padding-left: 16px;
+        background-color: initial;
+        list-style-type: none;
+        margin-bottom: 0px;
+    }
+    .toc_item {
+        cursor: pointer;
+        user-select: none;
+        list-style-type: none;
+        padding-left: 0px;
+        padding-top: 5px;
+    }
+    .toc_item_expanded::before {
+        content: "\25be";
+        cursor: pointer;
+    }
+    .toc_item_collapsed::before {
+        content: "\25b8";
+        cursor: pointer;
+    }
+    .toc_item_leaf {
+        padding-left: 14px;
+        cursor: pointer;
+        list-style-type: none;
+    }
+    .toc_span:hover
+    {
+      color: #d5000d;
+    }
+    .tocIcon
+    {
+      vertical-align: -2.5px;
+    }
+    .editButton
+    {
+      float: right;
+      margin-right: 10px;
+      color:#808080;
+    }
+    .editIcon
+    {
+      fill: currentColor;
+      vertical-align: text-top;
+    }
+    #btnToggleTOC {
+      display: none;
+      width: fit-content;
+      margin-left: 10px;
+      margin-top: 10px;
+      padding: 10px;
+      border-style: solid;
+      border-color: #808080;
+      border-width: 1px;
+      background-color: #E8E8E8;
+    }
+    #btnToggleTOC:hover {
+      background-color: #F0F0E8;
+    }
+    #btnToggleTOC:active {
+      background-color: #D4D4D4;
+    }
+    @media screen and (max-width: 900px) {
+      #tocColumn {
+        width: 300px;
+        display: block;
+        box-sizing: border-box;
+      }
+      #rightColumn {
+        padding-left: 320px;
+        padding-right: 20px;
+      }
+    }
+
+    @media screen and (max-width: 700px) {
+      #tocColumn {
+        width: 100%;
+        position: static;
+        display: none;
+        border-right-style: none;
+        box-sizing: content-box;
+      }
+      #tocInner {
+        padding: 10px;
+      }
+      #rightColumn {
+        padding-left: 10px;
+        padding-right: 10px;
+      }
+      #centeringDiv {
+         padding-left: 0px;
+      }
+      #btnToggleTOC {
+        display: block;
+      }
+    }
+  </style>
+  {% seo %}
+</head>
+
+<body>
+  <div id="centeringDiv">
+    <div id="navDiv">
+    <a class="editButton" title="Edit this page" href="https://github.com/{{ site.github.repository_nwo }}/edit/master/docs/{{ page.path }}">
+      <svg class="editIcon" height="16" viewBox="0 0 16 16" version="1.1" width="16" aria-hidden="true">
+        <path fill-rule="evenodd"
+          d="M11.013 1.427a1.75 1.75 0 012.474 0l1.086 1.086a1.75 1.75 0 010 2.474l-8.61 8.61c-.21.21-.47.364-.756.445l-3.251.93a.75.75 0 01-.927-.928l.929-3.25a1.75 1.75 0 01.445-.758l8.61-8.61zm1.414 1.06a.25.25 0 00-.354 0L10.811 3.75l1.439 1.44 1.263-1.263a.25.25 0 000-.354l-1.086-1.086zM11.189 6.25L9.75 4.81l-6.286 6.287a.25.25 0 00-.064.108l-.558 1.953 1.953-.558a.249.249 0 00.108-.064l6.286-6.286z">
+        </path>
+      </svg>
+    </a>
+    </div>
+    <div id="rightColumn">
+        <section id="main_content">
+          {% include anchor_headings.html html=content anchorBody="" %}
+        </section>
+        <a href="javascript:;" id="_content_end_"></a>
+        <footer>
+          {% if site.github.is_project_page %}
+          {{ site.title | default: site.github.repository_name }} is maintained by <a
+            href="{{ site.github.owner_url }}">{{ site.github.owner_name }}</a><br>
+          {% endif %}
+          This page was generated by <a href="https://pages.github.com">GitHub Pages</a>.
+        </footer>
+      </div>
+    </div>
+  <script type="text/x-mathjax-config">
+    MathJax.Hub.Config({
+      tex2jax: {
+        inlineMath: [ ['$$','$$'], ["\\(","\\)"] ],
+        displayMath: [ ['$$','$$'], ["\\(","\\)"] ],
+      },
+      TeX: {
+        Macros: {
+          bra: ["\\langle{#1}|", 1],
+          ket: ["|{#1}\\rangle", 1],
+          braket: ["\\langle{#1}\\rangle", 1],
+          bk: ["\\langle{#1}|{#2}|{#3}\\rangle", 3]
+       }
+     }
+    });
+  </script>
+  <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+</body>
+
+</html>
\ No newline at end of file
diff --git a/external/slang/share/doc/slang/_layouts/user-guide.html b/external/slang/share/doc/slang/_layouts/user-guide.html
new file mode 100644
index 00000000..347eb283
--- /dev/null
+++ b/external/slang/share/doc/slang/_layouts/user-guide.html
@@ -0,0 +1,417 @@
+<!DOCTYPE html>
+<html lang="{{ site.lang | default: " en-US" }}">
+
+<head>
+  <meta charset='utf-8'>
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <link rel="stylesheet" href="{{ '/assets/css/style.css?v=' | append: site.github.build_revision | relative_url }}">
+  <link rel="stylesheet" type="text/css" href="{{ '/assets/css/print.css' | relative_url }}" media="print">
+  <script async src="https://www.googletagmanager.com/gtag/js?id=G-TMTZVLLMBP"></script>
+  <script>
+    window.dataLayer = window.dataLayer || [];
+    function gtag(){dataLayer.push(arguments);}
+    gtag('js', new Date());
+    gtag('config', 'G-TMTZVLLMBP');
+  </script>
+  <!--[if lt IE 9]>
+    <script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script>
+    <![endif]-->
+  <style>
+    #centeringDiv {
+      margin: auto;
+      max-width: 1200px;
+    }
+    #navDiv
+    {
+      display: block;
+      box-sizing: border-box;
+      padding-top: 5px;
+      padding-bottom: 5px;
+      border-bottom-width: 3px;
+      border-bottom-style: solid;
+      border-bottom-color: #F0F0F0;
+    }
+    #navDiv nav
+    {
+      float:left;
+    }
+    #navDiv::after {
+      content: "";
+      clear: both;
+      display: table;
+    }
+    #navDiv nav li::after
+    {
+      content: "/";
+      padding-left: 10px;
+      padding-right: 0px;
+      color: #808080;
+    }
+    #navDiv nav li
+    {
+      display:inline;
+      padding-left: 10px;
+      padding-right: 0px;
+    }
+    #tocColumn {
+      width: 350px;
+      position: fixed;
+      overflow-y: auto;
+      box-sizing: border-box;
+      display: block;
+    }
+
+    #tocInner {
+      padding: 20px;
+    }
+
+    #rightColumn {
+      padding-left: 390px;
+      padding-right: 40px;
+      padding-top: 20px;
+    }
+
+    .toc_root_list {
+      list-style-type: none;
+      list-style-position: outside;
+      background-color: initial;
+      padding-left: 0px;
+    }
+    .toc_list {
+        padding-left: 16px;
+        background-color: initial;
+        list-style-type: none;
+        margin-bottom: 0px;
+    }
+    .toc_item {
+        cursor: pointer;
+        user-select: none;
+        list-style-type: none;
+        padding-left: 0px;
+        padding-top: 5px;
+    }
+    .toc_item_expanded::before {
+        content: "\25be";
+        cursor: pointer;
+    }
+    .toc_item_collapsed::before {
+        content: "\25b8";
+        cursor: pointer;
+    }
+    .toc_item_leaf {
+        padding-left: 14px;
+        cursor: pointer;
+        list-style-type: none;
+    }
+    .toc_span:hover
+    {
+      color: #d5000d;
+    }
+    .tocIcon
+    {
+      vertical-align: -2.5px;
+    }
+    .editButton
+    {
+      float: right;
+      margin-right: 10px;
+      color:#808080;
+    }
+    .editIcon
+    {
+      fill: currentColor;
+      vertical-align: text-top;
+    }
+    #btnToggleTOC {
+      display: none;
+      width: fit-content;
+      margin-left: 10px;
+      margin-top: 10px;
+      padding: 10px;
+      border-style: solid;
+      border-color: #808080;
+      border-width: 1px;
+      background-color: #E8E8E8;
+    }
+    #btnToggleTOC:hover {
+      background-color: #F0F0E8;
+    }
+    #btnToggleTOC:active {
+      background-color: #D4D4D4;
+    }
+    @media screen and (max-width: 900px) {
+      #tocColumn {
+        width: 300px;
+        display: block;
+        box-sizing: border-box;
+      }
+      #rightColumn {
+        padding-left: 320px;
+        padding-right: 20px;
+      }
+    }
+
+    @media screen and (max-width: 700px) {
+      #tocColumn {
+        width: 100%;
+        position: static;
+        display: none;
+        border-right-style: none;
+        box-sizing: content-box;
+      }
+      #tocInner {
+        padding: 10px;
+      }
+      #rightColumn {
+        padding-left: 10px;
+        padding-right: 10px;
+      }
+      #centeringDiv {
+         padding-left: 0px;
+      }
+      #btnToggleTOC {
+        display: block;
+      }
+    }
+  </style>
+  {% seo %}
+</head>
+
+<body>
+  <div id="centeringDiv">
+    <div id="navDiv">
+    {% include_relative nav.html %}
+    <a class="editButton" title="Edit this page" href="https://github.com/{{ site.github.repository_nwo }}/edit/master/docs/{{ page.path }}">
+      <svg class="editIcon" height="16" viewBox="0 0 16 16" version="1.1" width="16" aria-hidden="true">
+        <path fill-rule="evenodd"
+          d="M11.013 1.427a1.75 1.75 0 012.474 0l1.086 1.086a1.75 1.75 0 010 2.474l-8.61 8.61c-.21.21-.47.364-.756.445l-3.251.93a.75.75 0 01-.927-.928l.929-3.25a1.75 1.75 0 01.445-.758l8.61-8.61zm1.414 1.06a.25.25 0 00-.354 0L10.811 3.75l1.439 1.44 1.263-1.263a.25.25 0 000-.354l-1.086-1.086zM11.189 6.25L9.75 4.81l-6.286 6.287a.25.25 0 00-.064.108l-.558 1.953 1.953-.558a.249.249 0 00.108-.064l6.286-6.286z">
+        </path>
+      </svg>
+    </a>
+    </div>
+    <button id="btnToggleTOC" onclick="toggleTOC()">
+      <svg height="16" class="tocIcon" viewBox="0 0 16 16" version="1.1" width="16" aria-hidden="true">
+        <path fill-rule="evenodd"
+          d="M2 4a1 1 0 100-2 1 1 0 000 2zm3.75-1.5a.75.75 0 000 1.5h8.5a.75.75 0 000-1.5h-8.5zm0 5a.75.75 0 000 1.5h8.5a.75.75 0 000-1.5h-8.5zm0 5a.75.75 0 000 1.5h8.5a.75.75 0 000-1.5h-8.5zM3 8a1 1 0 11-2 0 1 1 0 012 0zm-1 6a1 1 0 100-2 1 1 0 000 2z">
+        </path>
+      </svg>
+      Table of Contents</button>
+    <div id="tocColumn">
+      <div id="tocInner">
+        {% include_relative toc.html %}
+      </div>
+    </div>
+    <div id="rightColumn">
+        <section id="main_content">
+          {% include anchor_headings.html html=content anchorBody="" %}
+        </section>
+        <a href="javascript:;" id="_content_end_"></a>
+        <footer>
+          {% if site.github.is_project_page %}
+          {{ site.title | default: site.github.repository_name }} is maintained by <a
+            href="{{ site.github.owner_url }}">{{ site.github.owner_name }}</a><br>
+          {% endif %}
+          This page was generated by <a href="https://pages.github.com">GitHub Pages</a>.
+        </footer>
+      </div>
+    </div>
+  <script>
+    // Fix for IE. Make sure String has `startsWith` method.
+    if (!String.prototype.startsWith)
+    {
+      String.prototype.startsWith = function (searchString, position) {
+        position = position || 0;
+        return this.indexOf(searchString, position) === position;
+      };
+    }
+
+    var tocColumn = document.getElementById("tocColumn");
+    var rightColumn = document.getElementById("rightColumn");
+    function updateScroll()
+    {
+      if (window.innerWidth < 700)
+      {
+        tocColumn.style.height = "";
+        return;
+      }
+      var top = Math.max(0, rightColumn.getBoundingClientRect().top);
+      tocColumn.style.top = top + "px";
+      tocColumn.style.height = (window.innerHeight-top) + "px";
+    }
+    function updatePosition()
+    {
+      if (window.innerWidth > 700)
+        tocColumn.style.display = "";
+      tocColumn.style.left = rightColumn.getBoundingClientRect().left + "px";
+      updateScroll();
+    }
+    window.addEventListener("resize", updatePosition);
+    updatePosition();
+
+    var tocItemsArray = [];
+    var subSectionItems = [];
+    var selectedItem = null;
+    function toggleTOC() {
+      var tocColumn = document.getElementById("tocColumn");
+      if (tocColumn.style.display == "block")
+        tocColumn.style.display = "none";
+      else
+        tocColumn.style.display = "block";
+      event.stopPropagation();
+    }
+    function expandItem(e) {
+      if (e == selectedItem)
+        e.style["font-weight"] = "bold";
+      var childList = e.getElementsByClassName("toc_list");
+      if (childList.length == 0)
+        return;
+      childList[0].style.display = "block";
+      childList[0].style["font-weight"] = "normal";
+      e.setAttribute("class", "toc_item toc_item_expanded");
+    }
+    function collapseItem(e) {
+      var childList = e.getElementsByClassName("toc_list");
+      if (childList.length == 0)
+        return;
+      childList[0].style.display = "none";
+      e.setAttribute("class", "toc_item toc_item_collapsed");
+    }
+    function tocSpanOnClick(e)
+    {
+      if (event.srcElement != null && event.srcElement.parentElement != null)
+      {
+        var link = event.srcElement.parentElement.getAttribute("data-link");
+        if (link != null)
+        {
+          var poundIndex = link.indexOf("#");
+          if (poundIndex == -1)
+            window.location.href = link + ".html";
+          else
+            window.location.href = link.substr(0, poundIndex) + ".html#" + link.substr(poundIndex+1, link.length - poundIndex - 1);
+        }
+      }
+      event.stopPropagation();
+    }
+    function tocItemOnClick(e)
+    {
+      if (event.srcElement == null) return;
+      // Toggle expanded/collapsed state.
+      if (event.srcElement.getAttribute("class").endsWith("toc_item_collapsed"))
+        expandItem(event.srcElement);
+      else if (event.srcElement.getAttribute("class").endsWith("toc_item_expanded"))
+        collapseItem(event.srcElement);
+      event.stopPropagation();
+    }
+    var path = window.location.pathname;
+    var pageName = path.split("/").pop();
+    var currentPageID = pageName.substr(0, pageName.lastIndexOf("."));
+    if (currentPageID.length == 0)
+      currentPageID = "index";
+    var tocLists = document.getElementsByClassName("toc_root_list");
+    for (var i = 0; i < tocLists.length; i++) {
+      var tocList = tocLists[i];
+      var items = tocList.getElementsByTagName("li")
+      for (var j = 0; j < items.length; j++)
+        tocItemsArray.push(items[j]);
+    }
+    for (var i = 0; i < tocItemsArray.length; i++) {
+      var item = tocItemsArray[i];
+      if (item.getAttribute("data-link") == currentPageID)
+        selectedItem = item;
+      if (item.getElementsByTagName("li").length != 0) {
+        collapseItem(item);
+      }
+      else {
+        item.setAttribute("class", "toc_item toc_item_leaf");
+      }
+      item.addEventListener("click", tocItemOnClick);
+      var innerSpan = item.getElementsByTagName("span");
+      if (innerSpan.length != 0)
+      {
+        innerSpan[0].addEventListener("click", tocSpanOnClick);
+        innerSpan[0].setAttribute("class", "toc_span");
+      }
+    }
+    var curItem = selectedItem;
+    while (curItem != null) {
+      expandItem(curItem);
+      curItem = curItem.parentElement;
+      if (curItem != null && curItem.getAttribute("class") != null &&
+        curItem.getAttribute("class").startsWith("toc_list"))
+        curItem = curItem.parentElement;
+      if (curItem != null && curItem.getAttribute("class") != null &&
+        curItem.getAttribute("class").startsWith("toc_root_list"))
+        break;
+    }
+
+    var subItems = selectedItem.getElementsByTagName("li");
+    var subSectionTitles = [];
+    var subSectionTitleStrs = [];
+    for (var i = 0; i < subItems.length; i++)
+    {
+      subSectionItems.push(subItems[i]);
+      var title = subItems[i].getAttribute("data-link");
+      var pos = title.lastIndexOf("#");
+      title = title.substr(pos + 1);
+      var element = document.getElementById(title);
+      subSectionTitles.push(element);
+      subSectionTitleStrs.push(title);
+    }
+    subSectionTitles.push(document.getElementById("_content_end_"));
+    function isSectionFullyVisible(id)
+    {
+      var titleElement = subSectionTitles[id];
+      var nextTitleElement = subSectionTitles[id+1];
+      return (titleElement.getBoundingClientRect().top >= 0 && nextTitleElement.getBoundingClientRect().top <= window.innerHeight);
+    }
+    function findCurrentSubsection()
+    {
+      var currentSubsectionID = -1;
+      for (var i = 0; i < subSectionItems.length; i++) {
+        var titleElement = subSectionTitles[i];
+        if (titleElement == null)
+          continue;
+        if (titleElement.getBoundingClientRect().top < window.innerHeight * 0.12)
+          currentSubsectionID = i;
+      }
+      return currentSubsectionID;
+    }
+    function updateCurrentSubsection(currentSubsectionID)
+    {
+      for (var i = 0; i < subSectionItems.length; i++)
+      {
+        if (i == currentSubsectionID || isSectionFullyVisible(i))
+          subSectionItems[i].getElementsByTagName("span")[0].style["font-weight"] = 600;
+        else
+          subSectionItems[i].getElementsByTagName("span")[0].style["font-weight"] = 400;
+      }
+    }
+    function windowScroll(e)
+    {
+      updateCurrentSubsection(findCurrentSubsection());
+      updateScroll();
+    }
+    window.addEventListener("scroll", windowScroll);
+    updateCurrentSubsection(findCurrentSubsection());
+  </script>
+  <script type="text/x-mathjax-config">
+    MathJax.Hub.Config({
+      tex2jax: {
+        inlineMath: [ ['$$','$$'], ["\\(","\\)"] ],
+        displayMath: [ ['$$','$$'], ["\\(","\\)"] ],
+      },
+      TeX: {
+        Macros: {
+          bra: ["\\langle{#1}|", 1],
+          ket: ["|{#1}\\rangle", 1],
+          braket: ["\\langle{#1}\\rangle", 1],
+          bk: ["\\langle{#1}|{#2}|{#3}\\rangle", 3]
+       }
+     }
+    });
+  </script>
+  <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+</body>
+
+</html>
\ No newline at end of file
diff --git a/external/slang/share/doc/slang/assets/css/style.scss b/external/slang/share/doc/slang/assets/css/style.scss
new file mode 100644
index 00000000..801ee557
--- /dev/null
+++ b/external/slang/share/doc/slang/assets/css/style.scss
@@ -0,0 +1,203 @@
+---
+---
+
+@import "{{ site.theme }}";
+a:hover {
+    text-decoration: underline;
+}
+h3 {
+    color: #363636;
+}
+h4 {
+    color: #363636;
+}
+blockquote {
+    background-color: #f2f2f2;
+    padding-top: 10px;
+    padding-bottom: 5px;
+}
+blockquote p {
+    font-size: 16px;
+    font-weight: 400;
+    margin-bottom: 5px;
+    color: #202020;
+}
+body {
+    color: initial;
+    text-shadow: none;
+    background: none;
+}
+#container 
+{
+    background:none;
+}
+
+
+
+.highlight .cm {
+  color: #148b04;
+}
+.highlight .cp {
+  color: #148b04;
+}
+.highlight .c1 {
+  color: #148b04;
+}
+.highlight .cs {
+  color: #148b04;
+}
+.highlight .c, .highlight .ch, .highlight .cd, .highlight .cpf {
+  color: #148b04;
+}
+.highlight .err {
+  color: #a61717;
+  background-color: #e3d2d2;
+}
+.highlight .gd {
+  color: #000000;
+  background-color: #ffdddd;
+}
+.highlight .ge {
+  color: #000000;
+  font-style: italic;
+}
+.highlight .gr {
+  color: #aa0000;
+}
+.highlight .gh {
+  color: #999999;
+}
+.highlight .gi {
+  color: #000000;
+  background-color: #ddffdd;
+}
+.highlight .go {
+  color: #888888;
+}
+.highlight .gp {
+  color: #555555;
+}
+.highlight .gu {
+  color: #aaaaaa;
+}
+.highlight .gt {
+  color: #aa0000;
+}
+.highlight .kc {
+  color: #1243d4;
+}
+.highlight .kd {
+  color: #1243d4;
+}
+.highlight .kn {
+  color: #1243d4;
+}
+.highlight .kp {
+  color: #1243d4;
+}
+.highlight .kr {
+  color: #1243d4;
+}
+.highlight .kt {
+  color: #1243d4;
+}
+.highlight .k, .highlight .kv {
+  color: #1243d4;
+}
+.highlight .m, .highlight .mb, .highlight .mx, .highlight .mi, .highlight .mf {
+  color: #7211c2;
+}
+.highlight .sa {
+  color: #000000;
+}
+.highlight .sb {
+  color: #d14;
+}
+.highlight .sc {
+  color: #d14;
+}
+.highlight .sd {
+  color: #d14;
+}
+.highlight .s2 {
+  color: #d14;
+}
+.highlight .se {
+  color: #d14;
+}
+.highlight .sh {
+  color: #d14;
+}
+.highlight .si {
+  color: #d14;
+}
+.highlight .sx {
+  color: #d14;
+}
+.highlight .sr {
+  color: #009926;
+}
+.highlight .s1 {
+  color: #d14;
+}
+.highlight .ss {
+  color: #990073;
+}
+.highlight .s, .highlight .dl {
+  color: #d14;
+}
+.highlight .na {
+  color: #008080;
+}
+.highlight .bp {
+  color: #999999;
+}
+.highlight .n{
+    color: black;
+}
+.highlight .nc {
+  color: #11abb9;
+}
+.highlight .nt {
+  color: #11abb9;
+}
+.highlight .vc {
+  color: #008080;
+}
+.highlight .vg {
+  color: #008080;
+}
+.highlight .vi {
+  color: #008080;
+}
+.highlight .nv, .highlight .vm {
+  color: #008080;
+}
+.highlight .ow {
+  color: #000000;
+}
+.highlight .o {
+  color: #000000;
+}
+.highlight .w {
+  color: #000000;
+}
+.highlight .p {color:#000000;}
+
+code
+{
+    background-color: initial;
+    border:none;
+}
+pre{
+  color: #000000;
+  background: #F8F8F8;
+}
+pre code {
+  color: #000000;
+  background-color: #F8F8F8;
+}
+.highlight
+{
+    background: #F8F8F8;
+}
diff --git a/external/slang/share/doc/slang/assets/moduletree.png b/external/slang/share/doc/slang/assets/moduletree.png
new file mode 100644
index 00000000..ef8c099a
Binary files /dev/null and b/external/slang/share/doc/slang/assets/moduletree.png differ
diff --git a/external/slang/share/doc/slang/build_reference.ps1 b/external/slang/share/doc/slang/build_reference.ps1
new file mode 100644
index 00000000..a2967681
--- /dev/null
+++ b/external/slang/share/doc/slang/build_reference.ps1
@@ -0,0 +1,62 @@
+# This script uses `slangc` to generate the core module reference documentation and push the updated
+# documents to shader-slang/stdlib-reference repository.
+# The stdlib-reference repository has github-pages setup so that the markdown files we generate
+# in this step will be rendered as html pages by Jekyll upon a commit to the repository.
+# So we we need to do here is to pull the stdlib-reference repository, regenerate the markdown files
+# and push the changes back to the repository.
+
+# The generated markdown files will be located in three folders:
+# - ./global-decls
+# - ./interfaces
+# - ./types
+# In addition, slangc will generate a table of content file `toc.html` which will be copied to
+# ./_includes/stdlib-reference-toc.html for Jekyll for consume it correctly.
+
+# If stdlib-reference folder does not exist, clone from github repo
+if (-not (Test-Path ".\stdlib-reference")) {
+    git clone https://github.com/shader-slang/stdlib-reference/
+}
+else {
+# If it already exist, just pull the latest changes.
+    cd stdlib-reference
+    git pull
+    cd ../
+}
+# Remove the old generated files.
+Remove-Item -Path ".\stdlib-reference\global-decls" -Recurse -Force
+Remove-Item -Path ".\stdlib-reference\interfaces" -Recurse -Force
+Remove-Item -Path ".\stdlib-reference\types" -Recurse -Force
+Remove-Item -Path ".\stdlib-reference\attributes" -Recurse -Force
+
+# Use git describe to produce a version string and write it to _includes/version.inc.
+# This file will be included by the stdlib-reference Jekyll template.
+git describe --tags | Out-File -FilePath ".\stdlib-reference\_includes\version.inc" -Encoding ASCII
+
+cd stdlib-reference
+$slangPaths = @(
+    "../../build/RelWithDebInfo/bin/slangc.exe",
+    "../../build/Release/bin/slangc.exe",
+    "../../build/Debug/bin/slangc.exe"
+)
+$slangExe = $slangPaths | Where-Object { Test-Path $_ } | Select-Object -First 1
+if ($slangExe) {
+    & $slangExe -compile-core-module -doc
+    Move-Item -Path ".\toc.html" -Destination ".\_includes\stdlib-reference-toc.html" -Force
+    git config user.email "bot@shader-slang.com"
+    git config user.name "Stdlib Reference Bot"
+    git add .
+    git commit -m "Update the core module reference"
+    git push
+} else {
+    Write-Error "Could not find slangc executable in RelWithDebInfo or Release directories"
+}
+cd ../
+
+# For local debugging only.
+# Remove-Item -Path "D:\git_repo\stdlib-reference\global-decls" -Recurse -Force
+# Remove-Item -Path "D:\git_repo\stdlib-reference\interfaces" -Recurse -Force
+# Remove-Item -Path "D:\git_repo\stdlib-reference\types" -Recurse -Force
+# Copy-Item -Path .\stdlib-reference\global-decls -Destination D:\git_repo\stdlib-reference\global-decls -Recurse -Force
+# Copy-Item -Path .\stdlib-reference\interfaces -Destination D:\git_repo\stdlib-reference\interfaces -Recurse -Force
+# Copy-Item -Path .\stdlib-reference\types -Destination D:\git_repo\stdlib-reference\types -Recurse -Force
+# Copy-Item -Path .\stdlib-reference\_includes\stdlib-reference-toc.html -Destination D:\git_repo\stdlib-reference\_includes\stdlib-reference-toc.html -Force
diff --git a/external/slang/share/doc/slang/build_toc.ps1 b/external/slang/share/doc/slang/build_toc.ps1
new file mode 100644
index 00000000..ad727713
--- /dev/null
+++ b/external/slang/share/doc/slang/build_toc.ps1
@@ -0,0 +1,12 @@
+$job = Start-Job -ArgumentList $PSScriptRoot -ScriptBlock {
+    Set-Location $args[0]
+    $code = (Get-Content -Raw -Path "scripts/Program.cs").ToString()
+    $assemblies = ("System.Core", "System.IO", "System.Collections")
+    Add-Type -ReferencedAssemblies $assemblies -TypeDefinition $code -Language CSharp
+    $path = Join-Path -Path $args[0] -ChildPath "user-guide"
+    [toc.Builder]::Run($path);
+    $path = Join-Path -Path $args[0] -ChildPath "gfx-user-guide"
+    [toc.Builder]::Run($path);
+}
+Wait-Job $job
+Receive-Job -Job $job
diff --git a/external/slang/share/doc/slang/build_toc.sh b/external/slang/share/doc/slang/build_toc.sh
new file mode 100644
index 00000000..9c197cad
--- /dev/null
+++ b/external/slang/share/doc/slang/build_toc.sh
@@ -0,0 +1,127 @@
+#!/usr/bin/env bash
+set -e
+
+script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+project_root="$(dirname "$script_dir")"
+check_only=0
+
+show_help() {
+  me=$(basename "$0")
+  cat <<EOF
+$me: Build table of contents for documentation directories
+
+Usage: $me [--help] [--source <path>] [--check-only]
+
+Options:
+  --help           Show this help message
+  --source         Path to project root directory (defaults to parent of the script directory)
+  --check-only     Check if TOC needs updating, exit 1 if changes needed
+EOF
+}
+
+while [[ "$#" -gt 0 ]]; do
+  case $1 in
+  -h | --help)
+    show_help
+    exit 0
+    ;;
+  --source)
+    project_root="$2"
+    shift
+    ;;
+  --check-only)
+    check_only=1
+    ;;
+  *)
+    echo "unrecognized argument: $1" >&2
+    show_help >&2
+    exit 1
+    ;;
+  esac
+  shift
+done
+
+missing_bin=0
+
+require_bin() {
+  local name="$1"
+  if ! command -v "$name" &>/dev/null; then
+    echo "This script needs $name, but it isn't in \$PATH" >&2
+    missing_bin=1
+    return
+  fi
+}
+
+require_bin "mcs"
+require_bin "mono"
+
+if [ "$missing_bin" -eq 1 ]; then
+  exit 1
+fi
+
+temp_dir=$(mktemp -d)
+trap 'rm -rf "$temp_dir"' EXIT
+
+docs_dir="$project_root/docs"
+
+cat >"$temp_dir/temp_program.cs" <<EOL
+$(cat "$script_dir/scripts/Program.cs")
+
+namespace toc
+{
+    class Program
+    {
+        static int Main(string[] args)
+        {
+            if (args.Length < 1)
+            {
+                Console.WriteLine("Please provide a directory path");
+                return 1;
+            }
+
+            try
+            {
+                Builder.Run(args[0]);
+                return 0;
+            }
+            catch (Exception ex)
+            {
+                Console.WriteLine(\$"Error: {ex.Message}");
+                return 1;
+            }
+        }
+    }
+}
+EOL
+
+if ! mcs -r:System.Core "$temp_dir/temp_program.cs" -out:"$temp_dir/toc-builder.exe"; then
+  echo "Compilation of $script_dir/scripts/Program.cs failed" >&2
+  exit 1
+fi
+
+for dir in "user-guide" "gfx-user-guide"; do
+  if [ -d "$docs_dir/$dir" ]; then
+    if [ "$check_only" -eq 1 ]; then
+      # Ensure working directory is clean
+      if ! git -C "$project_root" diff --quiet "docs/$dir/toc.html" 2>/dev/null; then
+        echo "Working directory not clean, cannot check TOC" >&2
+        exit 1
+      fi
+    fi
+
+    if ! mono "$temp_dir/toc-builder.exe" "$docs_dir/$dir"; then
+      echo "TOC generation failed for $dir" >&2
+      exit 1
+    fi
+
+    if [ "$check_only" -eq 1 ]; then
+      if ! git -C "$project_root" diff --quiet "docs/$dir/toc.html" 2>/dev/null; then
+        git -C "$project_root" diff --color "docs/$dir/toc.html"
+        git -C "$project_root" checkout -- "docs/$dir/toc.html" 2>/dev/null
+        exit 1
+      fi
+    fi
+  else
+    echo "Directory $dir not found" >&2
+  fi
+done
diff --git a/external/slang/share/doc/slang/building.md b/external/slang/share/doc/slang/building.md
new file mode 100644
index 00000000..6597c519
--- /dev/null
+++ b/external/slang/share/doc/slang/building.md
@@ -0,0 +1,358 @@
+# Building Slang From Source
+
+### TLDR
+
+`cmake --workflow --preset release` to configure, build, and package a release
+version of Slang.
+
+## Prerequisites:
+
+Please install:
+
+- CMake (3.25 preferred, but 3.22 works[^1])
+- A C++ compiler with support for C++17. GCC, Clang and MSVC are supported
+- A CMake compatible backend, for example Visual Studio or Ninja
+- Python3 (a dependency for building spirv-tools)
+
+Optional dependencies for tests include
+
+- CUDA
+- OptiX
+- NVAPI
+- Aftermath
+- X11
+
+Other dependencies are sourced from submodules in the [./external](./external)
+directory.
+
+## Get the Source Code
+
+Clone [this](https://github.com/shader-slang/slang) repository. Make sure to
+fetch the submodules also.
+
+```bash
+git clone https://github.com/shader-slang/slang --recursive
+```
+
+## Configure and build
+
+> This section assumes cmake 3.25 or greater, if you're on a lower version
+> please see [building with an older cmake](#building-with-an-older-cmake)
+
+For a Ninja based build system (all platforms) run:
+```bash
+cmake --preset default
+cmake --build --preset releaseWithDebugInfo # or --preset debug, or --preset release
+```
+
+For Visual Studio run:
+```bash
+cmake --preset vs2022 # or 'vs2019' or `vs2022-dev`
+start devenv ./build/slang.sln # to optionally open the project in Visual Studio
+cmake --build --preset releaseWithDebugInfo # to build from the CLI, could also use --preset release or --preset debug
+```
+
+There also exists a `vs2022-dev` preset which turns on features to aid
+debugging.
+
+### WebAssembly build
+
+In order to build WebAssembly build of Slang, Slang needs to be compiled with
+[Emscripten SDK](https://github.com/emscripten-core/emsdk). You can find more
+information about [Emscripten](https://emscripten.org/).
+
+You need to clone the EMSDK repo. And you need to install and activate the latest.
+
+
+```bash
+git clone https://github.com/emscripten-core/emsdk.git
+cd emsdk
+```
+
+For non-Windows platforms
+```bash
+./emsdk install latest
+./emsdk activate latest
+```
+
+For Windows
+```cmd
+emsdk.bat install latest
+emsdk.bat activate latest
+```
+
+After EMSDK is activated, Slang needs to be built in a cross compiling setup: 
+
+- build the `generators` target for the build platform
+- configure the build with `emcmake` for the host platform
+- build for the host platform.
+
+> Note: For more details on cross compiling please refer to the 
+> [cross-compiling](docs/building.md#cross-compiling) section.
+
+```bash
+# Build generators.
+cmake --workflow --preset generators --fresh
+mkdir generators
+cmake --install build --prefix generators --component generators
+
+# Configure the build with emcmake.
+# emcmake is available only when emsdk_env setup the environment correctly.
+pushd ../emsdk
+source ./emsdk_env # For Windows, emsdk_env.bat
+popd
+emcmake cmake -DSLANG_GENERATORS_PATH=generators/bin --preset emscripten -G "Ninja"
+
+# Build slang-wasm.js and slang-wasm.wasm in build.em/Release/bin
+cmake --build --preset emscripten --target slang-wasm
+```
+
+> Note: If the last build step fails, try running the command that `emcmake`
+> outputs, directly.
+
+## Installing
+
+Build targets may be installed using cmake:
+
+```bash
+cmake --build . --target install
+```
+
+This should install `SlangConfig.cmake` that should allow `find_package` to work.
+SlangConfig.cmake defines `SLANG_EXECUTABLE` variable that will point to `slangc`
+executable and also define `slang::slang` target to be linked to.
+
+For now, `slang::slang` is the only exported target defined in the config which can
+be linked to.
+
+Example usage
+
+```cmake
+find_package(slang REQUIRED PATHS ${your_cmake_install_prefix_path} NO_DEFAULT_PATH)
+# slang_FOUND should be automatically set
+target_link_libraries(yourLib PUBLIC
+  slang::slang
+)
+```
+
+## Testing
+
+```bash
+build/Debug/bin/slang-test
+```
+
+See the [documentation on testing](../tools/slang-test/README.md) for more information.
+
+## More niche topics
+
+### CMake options
+
+| Option                            | Default                    | Description                                                                                  |
+|-----------------------------------|----------------------------|----------------------------------------------------------------------------------------------|
+| `SLANG_VERSION`                   | Latest `v*` tag            | The project version, detected using git if available                                         |
+| `SLANG_EMBED_CORE_MODULE`         | `TRUE`                     | Build slang with an embedded version of the core module                                      |
+| `SLANG_EMBED_CORE_MODULE_SOURCE`  | `TRUE`                     | Embed the core module source in the binary                                                   |
+| `SLANG_ENABLE_DXIL`               | `TRUE`                     | Enable generating DXIL using DXC                                                             |
+| `SLANG_ENABLE_ASAN`               | `FALSE`                    | Enable ASAN (address sanitizer)                                                              |
+| `SLANG_ENABLE_FULL_IR_VALIDATION` | `FALSE`                    | Enable full IR validation (SLOW!)                                                            |
+| `SLANG_ENABLE_IR_BREAK_ALLOC`     | `FALSE`                    | Enable IR BreakAlloc functionality for debugging.                                            |
+| `SLANG_ENABLE_GFX`                | `TRUE`                     | Enable gfx targets                                                                           |
+| `SLANG_ENABLE_SLANGD`             | `TRUE`                     | Enable language server target                                                                |
+| `SLANG_ENABLE_SLANGC`             | `TRUE`                     | Enable standalone compiler target                                                            |
+| `SLANG_ENABLE_SLANGRT`            | `TRUE`                     | Enable runtime target                                                                        |
+| `SLANG_ENABLE_SLANG_GLSLANG`      | `TRUE`                     | Enable glslang dependency and slang-glslang wrapper target                                   |
+| `SLANG_ENABLE_TESTS`              | `TRUE`                     | Enable test targets, requires SLANG_ENABLE_GFX, SLANG_ENABLE_SLANGD and SLANG_ENABLE_SLANGRT |
+| `SLANG_ENABLE_EXAMPLES`           | `TRUE`                     | Enable example targets, requires SLANG_ENABLE_GFX                                            |
+| `SLANG_LIB_TYPE`                  | `SHARED`                   | How to build the slang library                                                               |
+| `SLANG_ENABLE_RELEASE_DEBUG_INFO` | `TRUE`                     | Enable generating debug info for Release configs                                             |
+| `SLANG_ENABLE_RELEASE_LTO`        | `TRUE`                     | Enable LTO for Release builds                                                                |
+| `SLANG_ENABLE_SPLIT_DEBUG_INFO`   | `TRUE`                     | Enable generating split debug info for Debug and RelWithDebInfo configs                      |
+| `SLANG_SLANG_LLVM_FLAVOR`         | `FETCH_BINARY_IF_POSSIBLE` | How to set up llvm support                                                                   |
+| `SLANG_SLANG_LLVM_BINARY_URL`     | System dependent           | URL specifying the location of the slang-llvm prebuilt library                               |
+| `SLANG_GENERATORS_PATH`           | ``                         | Path to an installed `all-generators` target for cross compilation                           |
+
+The following options relate to optional dependencies for additional backends
+and running additional tests. Left unchanged they are auto detected, however
+they can be set to `OFF` to prevent their usage, or set to `ON` to make it an
+error if they can't be found.
+
+| Option                   | CMake hints                    | Notes                                                                                        |
+|--------------------------|--------------------------------|----------------------------------------------------------------------------------------------|
+| `SLANG_ENABLE_CUDA`      | `CUDAToolkit_ROOT` `CUDA_PATH` | Enable running tests with the CUDA backend, doesn't affect the targets Slang itself supports |
+| `SLANG_ENABLE_OPTIX`     | `Optix_ROOT_DIR`               | Requires CUDA                                                                                |
+| `SLANG_ENABLE_NVAPI`     | `NVAPI_ROOT_DIR`               | Only available for builds targeting Windows                                                  |
+| `SLANG_ENABLE_AFTERMATH` | `Aftermath_ROOT_DIR`           | Enable Aftermath in GFX, and add aftermath crash example to project                          |
+| `SLANG_ENABLE_XLIB`      |                                |                                                                                              |
+
+### Advanced options
+
+| Option                             | Default | Description                                                                                                                    |
+|------------------------------------|---------|--------------------------------------------------------------------------------------------------------------------------------|
+| `SLANG_ENABLE_DX_ON_VK`            | `FALSE` | Enable running the DX11 and DX12 tests on non-warning Windows platforms via vkd3d-proton, requires system-provided d3d headers |
+| `SLANG_ENABLE_SLANG_RHI`           | `TRUE`  | Enable building and using [slang-rhi](https://github.com/shader-slang/slang-rhi) for tests                                     |
+| `SLANG_USE_SYSTEM_MINIZ`           | `FALSE` | Build using system Miniz library instead of the bundled version in [./external](./external)                                    |
+| `SLANG_USE_SYSTEM_LZ4`             | `FALSE` | Build using system LZ4 library instead of the bundled version in [./external](./external)                                      |
+| `SLANG_USE_SYSTEM_VULKAN_HEADERS`  | `FALSE` | Build using system Vulkan headers instead of the bundled version in [./external](./external)                                   |
+| `SLANG_USE_SYSTEM_SPIRV_HEADERS`   | `FALSE` | Build using system SPIR-V headers instead of the bundled version in [./external](./external)                                   |
+| `SLANG_USE_SYSTEM_UNORDERED_DENSE` | `FALSE` | Build using system unordered dense instead of the bundled version in [./external](./external)                                  |
+| `SLANG_SPIRV_HEADERS_INCLUDE_DIR`  | ``      | Use this specific path to SPIR-V headers instead of the bundled version in [./external](./external)                            |
+
+### LLVM Support
+
+There are several options for getting llvm-support:
+
+- Use a prebuilt binary slang-llvm library:
+  `-DSLANG_SLANG_LLVM_FLAVOR=FETCH_BINARY` or `-DSLANG_SLANG_LLVM_FLAVOR=FETCH_BINARY_IF_POSSIBLE` (this is the default)
+    - You can set `SLANG_SLANG_LLVM_BINARY_URL` to point to a local
+      `libslang-llvm.so/slang-llvm.dll` or set it to a URL of an zip/archive
+      containing such a file
+    - If this isn't set then the build system tries to download it from the
+      release on github matching the current tag. If such a tag doesn't exist
+      or doesn't have the correct os*arch combination then the latest release
+      will be tried.
+    - If `SLANG_SLANG_LLVM_BINARY_URL` is `FETCH_BINARY_IF_POSSIBLE` then in
+      the case that a prebuilt binary can't be found then the build will proceed
+      as though `DISABLE` was chosen
+- Use a system supplied LLVM: `-DSLANG_SLANG_LLVM_FLAVOR=USE_SYSTEM_LLVM`, you
+  must have llvm-13.0 and a matching libclang installed. It's important that
+  either:
+    - You don't end up linking to a dynamic libllvm.so, this will almost
+      certainly cause multiple versions of LLVM to be loaded at runtime,
+      leading to errors like `opt: CommandLine Error: Option
+      'asm-macro-max-nesting-depth' registered more than once!`. Avoid this by
+      compiling LLVM without the dynamic library.
+    - Anything else which may be linked in (for example Mesa, also dynamically
+      loads the same llvm object)
+- Do not enable LLVM support: `-DSLANG_SLANG_LLVM_FLAVOR=DISABLE`
+
+To build only a standalone slang-llvm, you can run:
+
+```bash
+cmake --workflow --preset slang-llvm
+```
+
+This will generate `build/dist-release/slang-slang-llvm.zip` containing the
+library. This, of course, uses the system LLVM to build slang-llvm, otherwise
+it would just be a convoluted way to download a prebuilt binary.
+
+### Cross compiling
+
+Slang generates some code at build time, using generators build from this
+codebase. Due to this, for cross compilation one must already have built these
+generators for the build platform. Build them with the `generators` preset, and
+pass the install path to the cross building CMake invocation using
+`SLANG_GENERATORS_PATH`
+
+Non-Windows platforms:
+
+```bash
+# build the generators
+cmake --workflow --preset generators --fresh
+mkdir build-platform-generators
+cmake --install build --config Release --prefix build-platform-generators --component generators
+# reconfigure, pointing to these generators
+# Here is also where you should set up any cross compiling environment
+cmake \
+  --preset default \
+  --fresh \
+  -DSLANG_GENERATORS_PATH=build-platform-generators/bin \
+  -Dwhatever-other-necessary-options-for-your-cross-build \
+  # for example \
+  -DCMAKE_C_COMPILER=my-arch-gcc \
+  -DCMAKE_CXX_COMPILER=my-arch-g++
+# perform the final build
+cmake --workflow --preset release
+```
+
+Windows
+
+```bash
+# build the generators
+cmake --workflow --preset generators --fresh
+mkdir build-platform-generators
+cmake --install build --config Release --prefix build-platform-generators --component generators
+# reconfigure, pointing to these generators
+# Here is also where you should set up any cross compiling environment
+# For example
+./vcvarsamd64_arm64.bat
+cmake \
+  --preset default \
+  --fresh \
+  -DSLANG_GENERATORS_PATH=build-platform-generators/bin \
+  -Dwhatever-other-necessary-options-for-your-cross-build
+# perform the final build
+cmake --workflow --preset release
+```
+
+### Example cross compiling with MSVC to windows-aarch64
+
+One option is to build using the ninja generator, which requires providing the
+native and cross environments via `vcvarsall.bat`
+
+```bash
+vcvarsall.bat
+cmake --workflow --preset generators --fresh
+mkdir generators
+cmake --install build --prefix generators --component generators
+vsvarsall.bat x64_arm64
+cmake --preset default --fresh -DSLANG_GENERATORS_PATH=generators/bin
+cmake --workflow --preset release
+```
+
+Another option is to build using the Visual Studio generator which can find
+this automatically
+
+```
+cmake --preset vs2022 # or --preset vs2019
+cmake --build --preset generators # to build from the CLI
+cmake --install build --prefix generators --component generators
+rm -rf build # The Visual Studio generator will complain if this is left over from a previous build
+cmake --preset vs2022 --fresh -A arm64 -DSLANG_GENERATORS_PATH=generators/bin
+cmake --build --preset release
+```
+
+### Nix
+
+This repository contains a [Nix](https://nixos.org/)
+[flake](https://wiki.nixos.org/wiki/Flakes) (not officially supported or
+tested), which provides the necessary prerequisites for local development. Also,
+if you use [direnv](https://direnv.net/), you can run the following commands to
+have the Nix environment automatically activate when you enter your clone of
+this repository:
+
+```bash
+echo 'use flake' >> .envrc
+direnv allow
+```
+
+## Building with an older CMake
+
+Because older CMake versions don't support all the features we want to use in
+CMakePresets, you'll have to do without the presets. Something like the following
+
+```bash
+cmake -B build -G Ninja
+cmake --build build -j
+```
+
+## Static linking against libslang
+
+If linking against a static `libslang.a` you will need to link against some
+dependencies also if you're not already incorporating them into your project.
+
+You will need to link against:
+
+```
+${SLANG_DIR}/build/Release/lib/libslang.a
+${SLANG_DIR}/build/Release/lib/libcompiler-core.a
+${SLANG_DIR}/build/Release/lib/libcore.a
+${SLANG_DIR}/build/external/miniz/libminiz.a
+${SLANG_DIR}/build/external/lz4/build/cmake/liblz4.a
+```
+
+## Notes
+
+[^1] below 3.25, CMake lacks the ability to mark directories as being
+system directories (https://cmake.org/cmake/help/latest/prop_tgt/SYSTEM.html#prop_tgt:SYSTEM),
+this leads to an inability to suppress warnings originating in the
+dependencies in `./external`, so be prepared for some additional warnings.
diff --git a/external/slang/share/doc/slang/ci.md b/external/slang/share/doc/slang/ci.md
new file mode 100644
index 00000000..fb4c7e68
--- /dev/null
+++ b/external/slang/share/doc/slang/ci.md
@@ -0,0 +1,36 @@
+# Our CI
+
+There are github actions for building and testing slang.
+
+## Tests
+
+Most configurations run a restricted set of tests, however on some self hosted
+runners we run the full test suite, as well as running Falcor's test suite with
+the new slang build.
+
+## Building LLVM
+
+We require a static build of LLVM for building slang-llvm, we build and cache
+this in all workflow runs. Since this changes infrequently, the cache is almost
+always hit. A cold build takes about an hour on the slowest platform. The
+cached output is a few hundred MB, so conceivably if we add many more platforms
+we might be caching more than the 10GB github allowance, which would
+necessitate being a bit more complicated in building and tracking outputs here.
+
+For slang-llvm, this is handled the same as any other dependency, except on
+Windows Debug builds, where we are required by the differences in Debug/Release
+standard libraries to always make a release build, this is noted in the ci
+action yaml file.
+
+Note that we don't use sccache while building LLVM, as it changes very
+infrequently. The caching of LLVM is done by caching the final build product
+only.
+
+## sccache
+
+> Due to reliability issues, we are not currently using sccache, this is
+> historical/aspirational.
+
+The CI actions use sccache, keyed on compiler and platform, this runs on all
+configurations and significantly speeds up small source change builds. This
+cache can be safely missed without a large impact on build times.
diff --git a/external/slang/docs/command-line-slangc-reference.md b/external/slang/share/doc/slang/command-line-slangc-reference.md
similarity index 99%
rename from external/slang/docs/command-line-slangc-reference.md
rename to external/slang/share/doc/slang/command-line-slangc-reference.md
index 3e25cdce..36493b2f 100644
Binary files a/external/slang/docs/command-line-slangc-reference.md and b/external/slang/share/doc/slang/command-line-slangc-reference.md differ
diff --git a/external/slang/docs/cpu-target.md b/external/slang/share/doc/slang/cpu-target.md
similarity index 96%
rename from external/slang/docs/cpu-target.md
rename to external/slang/share/doc/slang/cpu-target.md
index 76226a2b..89a43e09 100644
--- a/external/slang/docs/cpu-target.md
+++ b/external/slang/share/doc/slang/cpu-target.md
@@ -52,9 +52,9 @@ SLANG_HOST_CPP_SOURCE,     ///< C++ code for `host` style
    
 Using the `-target` command line option
 
-* C_SOURCE: c
-* CPP_SOURCE: cpp,c++,cxx
-* HOST_CPP_SOURCE: host-cpp,host-c++,host-cxx
+* `C_SOURCE`: c
+* `CPP_SOURCE`: cpp,c++,cxx
+* `HOST_CPP_SOURCE`: host-cpp,host-c++,host-cxx
 
 Note! Output of C source is not currently supported.
 
@@ -70,11 +70,11 @@ SLANG_OBJECT_CODE,              ///< Object code that can be used for later link
 
 Using the `-target` command line option
 
-* EXECUTABLE: exe, executable
-* SHADER_SHARED_LIBRARY: sharedlib, sharedlibrary, dll
-* SHADER_HOST_CALLABLE: callable, host-callable
-* OBJECT_CODE: object-conde
-* HOST_HOST_CALLABLE: host-host-callable
+* `EXECUTABLE`: exe, executable
+* `SHADER_SHARED_LIBRARY`: sharedlib, sharedlibrary, dll
+* `SHADER_HOST_CALLABLE`: callable, host-callable
+* `OBJECT_CODE`: object-conde
+* `HOST_HOST_CALLABLE`: host-host-callable
     
 Using `host-callable` types from the the command line, other than to test such code compile and can be loaded for host execution.
 
@@ -90,7 +90,7 @@ The `shader` style implies
 
 * The code *can* be executed in a GPU-kernel like execution model, launched across multiple threads (as described in the [ABI](#abi)) 
 * Currently no reference counting
-* Only functionality from the Slang stdlib, built in HLSL or anything supplied by a [COM interfaces](#com-interface) is available
+* Only functionality from the Slang core module, built in HLSL or anything supplied by a [COM interfaces](#com-interface) is available
 * Currently [slang-llvm](#slang-llvm) only supports the `shader` style
 
 The `host` style implies 
@@ -293,7 +293,7 @@ The global can now be set from host code via
     }
 ```
 
-In terms of reflection `__global` variables are not visibile. 
+In terms of reflection `__global` variables are not visible.
 
 ## NativeString
 
@@ -309,7 +309,7 @@ TODO(JS): What happens with String with shader compile style on CPU? Shouldn't i
 
 It is currently not possible to step into LLVM-JIT code when using [slang-llvm](#slang-llvm). Fortunately it is possible to step into code compiled via a [regular C/C++ compiler](#regular-cpp). 
 
-Below is a code snippet showing how to swich to a [regular C/C++ compiler](#regular-cpp) at runtime. 
+Below is a code snippet showing how to switch to a [regular C/C++ compiler](#regular-cpp) at runtime.
 
 ```C++
     SlangPassThrough findRegularCppCompiler(slang::IGlobalSession* slangSession)
@@ -401,7 +401,7 @@ struct ComputeVaryingInput
 
 `ComputeVaryingInput` allows specifying a range of groupIDs to execute - all the ids in a grid from startGroup to endGroup, but not including the endGroupIDs. Most compute APIs allow specifying an x,y,z extent on 'dispatch'. This would be equivalent as having startGroupID = { 0, 0, 0} and endGroupID = { x, y, z }. The exported function allows setting a range of groupIDs such that client code could dispatch different parts of the work to different cores. This group range mechanism was chosen as the 'default' mechanism as it is most likely to achieve the best performance.
 
-There are two other functions that consist of the entry point name postfixed with `_Thread` and `_Group`. For the entry point 'computeMain' these functions would be accessable from the shared library interface as `computeMain_Group` and `computeMain_Thread`. `_Group` has the same signature as the listed for computeMain, but it doesn't execute a range, only the single group specified by startGroupID (endGroupID is ignored). That is all of the threads within the group (as specified by `[numthreads]`) will be executed in a single call. 
+There are two other functions that consist of the entry point name postfixed with `_Thread` and `_Group`. For the entry point 'computeMain' these functions would be accessible from the shared library interface as `computeMain_Group` and `computeMain_Thread`. `_Group` has the same signature as the listed for computeMain, but it doesn't execute a range, only the single group specified by startGroupID (endGroupID is ignored). That is all of the threads within the group (as specified by `[numthreads]`) will be executed in a single call.
 
 It may be desirable to have even finer control of how execution takes place down to the level of individual 'thread's and this can be achieved with the `_Thread` style. The signature looks as follows
 
@@ -566,7 +566,7 @@ It may be useful to be able to include `slang-cpp-types.h` in C++ code to access
 
 Would wrap all the Slang prelude types in the namespace `CPPPrelude`, such that say a `StructuredBuffer<int32_t>` could be specified in C++ source code as `CPPPrelude::StructuredBuffer<int32_t>`.
 
-The code that sets up the prelude for the test infrastucture and command line usage can be found in ```TestToolUtil::setSessionDefaultPrelude```. Essentially this determines what the absolute path is to `slang-cpp-prelude.h` is and then just makes the prelude `#include "the absolute path"`.
+The code that sets up the prelude for the test infrastructure and command line usage can be found in ```TestToolUtil::setSessionDefaultPrelude```. Essentially this determines what the absolute path is to `slang-cpp-prelude.h` is and then just makes the prelude `#include "the absolute path"`.
 
 The *default* prelude is set to the contents of the files for C++ held in the prelude directory and is held within the Slang shared library. It is therefore typically not necessary to distribute Slang with prelude files.
 
@@ -616,7 +616,7 @@ Vector<float, 3> defValue = {};         // Zero initialize such that read access
 values.at(3).x = 10;
 ```
 
-Note that '[] 'would be turned into the `at` function, which takes the default value as a paramter provided by the caller. If this is then written to then only the defValue is corrupted.  Even this mechanism not be quite right, because if we write and then read again from the out of bounds reference in HLSL we may expect that 0 is returned, whereas here we get the value that was last written.
+Note that '[] 'would be turned into the `at` function, which takes the default value as a parameter provided by the caller. If this is then written to then only the defValue is corrupted.  Even this mechanism not be quite right, because if we write and then read again from the out of bounds reference in HLSL we may expect that 0 is returned, whereas here we get the value that was last written.
 
 ## <a id="zero-index"/>Zero index bound checking
 
diff --git a/external/slang/docs/cuda-target.md b/external/slang/share/doc/slang/cuda-target.md
similarity index 86%
rename from external/slang/docs/cuda-target.md
rename to external/slang/share/doc/slang/cuda-target.md
index 17d1c6a0..241f253f 100644
--- a/external/slang/docs/cuda-target.md
+++ b/external/slang/share/doc/slang/cuda-target.md
@@ -30,7 +30,7 @@ The following are a work in progress or not implemented but are planned to be so
 
 For producing PTX binaries Slang uses [NVRTC](https://docs.nvidia.com/cuda/nvrtc/index.html). NVRTC dll/shared library has to be available to Slang (for example in the appropriate PATH for example) for it to be able to produce PTX.
 
-The NVRTC compiler can be accessed directly via the pass through mechanism and is identifed by the enum value `SLANG_PASS_THROUGH_NVRTC`.
+The NVRTC compiler can be accessed directly via the pass through mechanism and is identified by the enum value `SLANG_PASS_THROUGH_NVRTC`.
 
 Much like other targets that use downstream compilers Slang can be used to compile CUDA source directly to PTX via the pass through mechansism. The Slang command line options will broadly be mapped down to the appropriate options for the NVRTC compilation. In the API the `SlangCompileTarget` for CUDA is `SLANG_CUDA_SOURCE` and for PTX is `SLANG_PTX`. These can also be specified on the Slang command line as `-target cuda` and `-target ptx`.
 
@@ -126,11 +126,11 @@ The UniformState and UniformEntryPointParams struct typically vary by shader. Un
 
 Read only textures will be bound as the opaque CUDA type CUtexObject. This type is the combination of both a texture AND a sampler. This is somewhat different from HLSL, where there can be separate `SamplerState` variables. This allows access of a single texture binding with different types of sampling.
 
-If code relys on this behavior it will be necessary to bind multiple CtexObjects with different sampler settings, accessing the same texture data.
+If code relies on this behavior it will be necessary to bind multiple CtexObjects with different sampler settings, accessing the same texture data.
 
 Slang has some preliminary support for TextureSampler type - a combined Texture and SamplerState. To write Slang code that can target CUDA and other platforms using this mechanism will expose the semantics appropriately within the source.
 
-Load is only supported for Texture1D, and the mip map selection argument is ignored. This is because there is tex1Dfetch and no higher dimensional equivalents. CUDA also only allows such access if the backing array is linear memory - meaning the bound texture cannot have mip maps - thus making the mip map parameter superflous anyway. RWTexture does allow Load on other texture types.
+Load is only supported for Texture1D, and the mip map selection argument is ignored. This is because there is tex1Dfetch and no higher dimensional equivalents. CUDA also only allows such access if the backing array is linear memory - meaning the bound texture cannot have mip maps - thus making the mip map parameter superfluous anyway. RWTexture does allow Load on other texture types.
 
 ## RWTexture
 
@@ -145,7 +145,7 @@ RWTexture2D<float2> rwt2D_2;
 
 The format names used are the same as for [GLSL layout format types](https://www.khronos.org/opengl/wiki/Layout_Qualifier_(GLSL)). If no format is specified Slang will *assume* that the format is the same as the type specified.
 
-Note that the format attribution is on variables/paramters/fields and not part of the type system. This means that if you have a scenario like...
+Note that the format attribution is on variables/parameters/fields and not part of the type system. This means that if you have a scenario like...
 
 ```
 [format(rg16f)]
@@ -239,7 +239,7 @@ That for pass-through usage, prelude is not pre-pended, preludes are for code ge
 void setDownstreamCompilerPrelude(SlangPassThrough passThrough, const char* preludeText);
 ```
 
-The code that sets up the prelude for the test infrastucture and command line usage can be found in ```TestToolUtil::setSessionDefaultPrelude```. Essentially this determines what the absolute path is to `slang-cpp-prelude.h` is and then just makes the prelude `#include "the absolute path"`.
+The code that sets up the prelude for the test infrastructure and command line usage can be found in ```TestToolUtil::setSessionDefaultPrelude```. Essentially this determines what the absolute path is to `slang-cpp-prelude.h` is and then just makes the prelude `#include "the absolute path"`.
 
 Half Support
 ============
@@ -256,7 +256,7 @@ If this fails - the prelude include of `cuda_fp16.h` will most likely fail on NV
 
 CUDA has the `__half` and `__half2` types defined in `cuda_fp16.h`. The `__half2` can produce results just as quickly as doing the same operation on `__half` - in essence for some operations `__half2` is [SIMD](https://en.wikipedia.org/wiki/SIMD) like. The half implementation in Slang tries to take advantage of this optimization.
 
-Since Slang supports up to 4 wide vectors Slang has to build on CUDAs half support. The types _`_half3` and `__half4` are implemented in `slang-cuda-prelude.h` for this reason. It is worth noting that `__half3` is made up of a `__half2` and a `__half`. As `__half2` is 4 byte aligned, this means `__half3` is actually 8 bytes, rather than 6 bytes that might be expected.
+Since Slang supports up to 4 wide vectors Slang has to build on CUDAs half support. The types `__half3` and `__half4` are implemented in `slang-cuda-prelude.h` for this reason. It is worth noting that `__half3` is made up of a `__half2` and a `__half`. As `__half2` is 4 byte aligned, this means `__half3` is actually 8 bytes, rather than 6 bytes that might be expected.
 
 One area where this optimization isn't fully used is in comparisons - as in effect Slang treats all the vector/matrix half comparisons as if they are scalar. This could be perhaps be improved on in the future. Doing so would require using features that are not directly available in the CUDA headers.
 
@@ -265,9 +265,9 @@ Wave Intrinsics
 
 There is broad support for [HLSL Wave intrinsics](https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12), including support for [SM 6.5 intrinsics](https://microsoft.github.io/DirectX-Specs/d3d/HLSL_ShaderModel6_5.html).
 
-Most Wave intrinsics will work with vector, matrix or scalar types of typical built in types - uint, int, float, double, uint64_t, int64_t.
+Most Wave intrinsics will work with vector, matrix or scalar types of typical built in types - `uint`, `int`, `float`, `double`, `uint64_t`, `int64_t`.
 
-The support is provided via both the Slang stdlib as well as the Slang CUDA prelude found in 'prelude/slang-cuda-prelude.h'. Many Wave intrinsics are not directly applicable within CUDA which supplies a more low level mechanisms. The implementation of most Wave functions work most optimally if a 'Wave' where all lanes are used. If all lanes from index 0 to pow2(n) -1  are used (which is also true if all lanes are used) a binary reduction is typically applied. If this is not the case the implementation fallsback on a slow path which is linear in the number of active lanes, and so is typically significantly less performant.
+The support is provided via both the Slang core module as well as the Slang CUDA prelude found in 'prelude/slang-cuda-prelude.h'. Many Wave intrinsics are not directly applicable within CUDA which supplies a more low level mechanisms. The implementation of most Wave functions work most optimally if a 'Wave' where all lanes are used. If all lanes from index 0 to pow2(n) -1  are used (which is also true if all lanes are used) a binary reduction is typically applied. If this is not the case the implementation fallsback on a slow path which is linear in the number of active lanes, and so is typically significantly less performant.
 
 For more a more concrete example take
 
@@ -292,7 +292,7 @@ Will require 3 times as many steps as the earlier scalar example just using a si
 
 ## WaveGetLaneIndex
 
-'WaveGetLaneIndex' defaults to `(threadIdx.x & SLANG_CUDA_WARP_MASK)`. Depending on how the kernel is launched this could be incorrect. There other ways to get lane index, for example using inline assembly. This mechanism though is apparently slower than the simple method used here. There is support for using the asm mechnism in the CUDA prelude using the `SLANG_USE_ASM_LANE_ID` preprocessor define to enable the feature.
+'WaveGetLaneIndex' defaults to `(threadIdx.x & SLANG_CUDA_WARP_MASK)`. Depending on how the kernel is launched this could be incorrect. There are other ways to get lane index, for example using inline assembly. This mechanism though is apparently slower than the simple method used here. There is support for using the asm mechanism in the CUDA prelude using the `SLANG_USE_ASM_LANE_ID` preprocessor define to enable the feature.
 
 There is potential to calculate the lane id using the [numthreads] markup in Slang/HLSL, but that also requires some assumptions of how that maps to a lane index.
 
@@ -301,6 +301,17 @@ There is potential to calculate the lane id using the [numthreads] markup in Sla
 * Intrinsics which only work in pixel shaders
   + QuadXXXX intrinsics
 
+OptiX Support
+=============
+
+Slang supports OptiX for raytracing. To compile raytracing programs, NVRTC must have access to the `optix.h` and dependent files that are typically distributed as part of the OptiX SDK. When Slang detects the use of raytracing in source, it will define `SLANG_CUDA_ENABLE_OPTIX` when `slang-cuda-prelude.h` is included. This will in turn try to include `optix.h`.
+
+Slang tries several mechanisms to locate `optix.h` when NVRTC is initiated. The first mechanism is to look in the include paths that are passed to Slang. If `optix.h` can be found in one of these paths, no more searching will be performed.
+
+If this fails, the default OptiX SDK install locations are searched. On Windows this is `%{PROGRAMDATA}\NVIDIA Corporation\OptiX SDK X.X.X\include`. On Linux this is `${HOME}/NVIDIA-OptiX-SDK-X.X.X-suffix`. 
+
+If OptiX headers cannot be found, compilation will fail.
+
 Limitations
 ===========
 
diff --git a/external/slang/share/doc/slang/deprecated/a1-02-slangpy.md b/external/slang/share/doc/slang/deprecated/a1-02-slangpy.md
new file mode 100644
index 00000000..a2d169de
--- /dev/null
+++ b/external/slang/share/doc/slang/deprecated/a1-02-slangpy.md
@@ -0,0 +1,813 @@
+---
+layout: deprecated
+permalink: "docs/user-guide/a1-02-slangpy"
+---
+
+Using Slang to Write PyTorch Kernels
+=========================================================
+
+> #### Deprecated Feature
+> Note: This documentation is about `slang-torch`, an old way to use Slang with Python and PyTorch.
+> Developers who are building new projects should use <a href="https://slangpy.shader-slang.org">SlangPy</a> instead.
+
+If you are a PyTorch user seeking to write complex, high-performance, and automatically differentiated kernel functions using a per-thread programming model, we invite you to try Slang. Slang is a cutting-edge shading language that provides a straightforward way to define kernel functions that run incredibly fast in graphics applications. With the latest addition of automatic differentiation and PyTorch interop features, Slang offers an efficient solution for developing auto-differentiated kernels that run at lightning speed with a strongly typed, per-thread programming model.
+
+One of the primary advantages of a per-thread programming model in kernel programming is the elimination of concerns regarding maintaining masks for branches. When developing a kernel in Slang, you can use all control flow statements, composite data types (structs, arrays, etc.), and function calls without additional effort. Code created with these language constructs can be automatically differentiated by the compiler without any restrictions. Additionally, Slang is a strongly typed language, which ensures that you will never encounter type errors at runtime. Most code errors can be identified as you type thanks to the [compiler's coding assistance service](https://marketplace.visualstudio.com/items?itemName=shader-slang.slang-language-extension), further streamlining the development process.
+
+In addition, using a per-thread programming model also results in more optimized memory usage. When writing a kernel in Slang, most intermediate results do not need to be written out to global memory and then read back, reducing global memory bandwidth consumption and the delay caused by these memory operations. As a result, a Slang kernel can typically run at higher efficiency compared to the traditional bulk-synchronous programming model.
+
+## Getting Started with SlangTorch
+
+In this tutorial, we will use a simple example to walk through the steps to use Slang in your PyTorch project.
+
+### Installation
+`slangtorch` is available via PyPI, so you can install it simply through
+```sh
+pip install slangtorch
+```
+
+Note that `slangtorch` requires `torch` with CUDA support. See the [pytorch](https://pytorch.org/) installation page to find the right version for your platform.
+
+You can check that you have the right installation by running: 
+```sh
+python -c "import torch; print(f'cuda: {torch.cuda.is_available()}')"
+```
+
+### Writing Slang kernels for `slangtorch` >= **v1.1.5**
+
+From **v2023.4.0**, Slang supports auto-binding features that make it easier than ever to invoke Slang kernels from python, and interoperate seamlessly with `pytorch` tensors.
+
+Here's a barebones example of a simple squaring kernel written in Slang (`square.slang`):
+
+```csharp
+[AutoPyBindCUDA]
+[CUDAKernel]
+void square(TensorView<float> input, TensorView<float> output)
+{
+    // Get the 'global' index of this thread.
+    uint3 dispatchIdx = cudaThreadIdx() + cudaBlockIdx() * cudaBlockDim();
+
+    // If the thread index is beyond the input size, exit early.
+    if (dispatchIdx.x >= input.size(0))
+        return;
+
+    output[dispatchIdx.x] = input[dispatchIdx.x] * input[dispatchIdx.x];
+}
+
+```
+
+This code follows the standard pattern of a typical CUDA kernel function. It takes as input
+two tensors, `input` and `output`. 
+It first obtains the global dispatch index of the current thread and performs range check to make sure we don't read or write out
+of the bounds of input and output tensors, and then calls `square()` to compute the per-element result, and
+store it at the corresponding location in `output` tensor.
+
+
+`slangtorch` works by compiling kernels to CUDA and it identifies the functions to compile by checking for the `[CUDAKernel]` attribute.
+The second attribute `[AutoPyBindCUDA]` allows us to call `square` directly from python without having to write any host code. If you would like to write the host code yourself for finer control, see the other version of this example [here](#manually-binding-kernels).
+
+You can now simply invoke this kernel from python:
+
+```python
+import torch
+import slangtorch
+
+m = slangtorch.loadModule('square.slang')
+
+A = torch.randn((1024,), dtype=torch.float).cuda()
+
+output = torch.zeros_like(A).cuda()
+
+# Number of threads launched = blockSize * gridSize
+m.square(input=A, output=output).launchRaw(blockSize=(32, 1, 1), gridSize=(64, 1, 1))
+
+print(output)
+```
+
+The python script `slangtorch.loadModule("square.slang")` returns a scope that contains a handle to the `square` kernel.
+
+The kernel can be invoked by 
+1. calling `square` and binding `torch` tensors as arguments for the kernel, and then
+2. launching it using `launchRaw()` by specifying CUDA launch arguments to `blockSize` & `gridSize`. (Refer to the [CUDA documentation](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications) for restrictions around `blockSize`)
+
+Note that for semantic clarity reasons, calling a kernel requires the use of keyword arguments with names that are lifted from the `.slang` implementation.
+
+### Invoking derivatives of kernels using slangtorch
+
+The `[AutoPyBindCUDA]` attribute can also be used on differentiable functions defined in Slang, and will automatically bind the derivatives. To do this, simply add the `[Differentiable]` attribute.
+
+One key point is that the basic `TensorView<T>` objects are not differentiable. They can be used as buffers for data that does not require derivatives, or even as buffers for the manual accumulation of derivatives.
+
+Instead, use the `DiffTensorView` type for when you need differentiable tensors. Currently, `DiffTensorView` only supports the `float` dtype variety.
+
+Here's a barebones example of a differentiable version of `square`:
+
+```csharp
+[AutoPyBindCUDA]
+[CUDAKernel]
+[Differentiable]
+void square(DiffTensorView input, DiffTensorView output)
+{
+    uint3 dispatchIdx = cudaThreadIdx() + cudaBlockIdx() * cudaBlockDim();
+
+    if (dispatchIdx.x >= input.size(0))
+        return;
+    
+    output[dispatchIdx.x] = input[dispatchIdx.x] * input[dispatchIdx.x];
+}
+```
+
+Now, `slangtorch.loadModule("square.slang")` returns a scope with three callable handles `square`, `square.fwd` for the forward-mode derivative & `square.bwd` for the reverse-mode derivative.
+
+You can invoke `square()` normally to get the same effect as the previous example, or invoke `square.fwd()` / `square.bwd()` by binding pairs of tensors to compute the derivatives.
+
+
+```python
+import torch
+import slangtorch
+
+m = slangtorch.loadModule('square.slang')
+
+input = torch.tensor((0, 1, 2, 3, 4, 5), dtype=torch.float).cuda()
+output = torch.zeros_like(input).cuda()
+
+# Invoke normally
+m.square(input=input, output=output).launchRaw(blockSize=(6, 1, 1), gridSize=(1, 1, 1))
+
+print(output)
+
+# Invoke reverse-mode autodiff by first allocating tensors to hold the gradients
+input = torch.tensor((0, 1, 2, 3, 4, 5), dtype=torch.float).cuda()
+input_grad = torch.zeros_like(input).cuda()
+
+output = torch.zeros_like(input)
+# Pass in all 1s as the output derivative for our example
+output_grad = torch.ones_like(output) 
+
+m.square.bwd(
+    input=(input, input_grad), output=(output, output_grad)
+).launchRaw(
+    blockSize=(6, 1, 1), gridSize=(1, 1, 1))
+
+# Derivatives get propagated to input_grad
+print(input_grad)
+
+# Note that the derivatives in output_grad are 'consumed'.
+# i.e. all zeros after the call.
+print(output_grad)
+```
+
+`slangtorch` also binds the forward-mode version of your kernel (propagate derivatives of inputs to the output) which can be invoked the same way using `module.square.fwd()`
+
+You can refer to [this documentation](autodiff) for a detailed reference of Slang's automatic differentiation feature.
+
+### Wrapping your kernels as pytorch functions
+
+`pytorch` offers an easy way to define a custom operation using `torch.autograd.Function`, and defining the `.forward()` and `.backward()` members.
+
+This can be a very helpful way to wrap your Slang kernels as pytorch-compatible operations. Here's an example of the `square` kernel as a differentiable pytorch function.
+
+```python
+import torch
+import slangtorch
+
+m = slangtorch.loadModule("square.slang")
+
+class MySquareFunc(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, input):
+        output = torch.zeros_like(input)
+
+        kernel_with_args = m.square(input=input, output=output)
+        kernel_with_args.launchRaw(
+            blockSize=(32, 32, 1),
+            gridSize=((input.shape[0] + 31) // 32, (input.shape[1] + 31) // 32, 1))
+
+        ctx.save_for_backward(input, output)
+
+        return output
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        (input, output) = ctx.saved_tensors
+
+        input_grad = torch.zeros_like(input)
+        
+        # Note: When using DiffTensorView, grad_output gets 'consumed' during the reverse-mode.
+        # If grad_output may be reused, consider calling grad_output = grad_output.clone()
+        #
+        kernel_with_args = m.square.bwd(input=(input, input_grad), output=(output, grad_output))
+        kernel_with_args.launchRaw(
+            blockSize=(32, 32, 1),
+            gridSize=((input.shape[0] + 31) // 32, (input.shape[1] + 31) // 32, 1))
+        
+        return input_grad
+```
+
+Now we can use the autograd function `MySquareFunc` in our python script:
+
+```python
+x = torch.tensor((3.0, 4.0), requires_grad=True, device='cuda')
+print(f"X = {x}")
+y_pred = MySquareFunc.apply(x)
+loss = y_pred.sum()
+loss.backward()
+print(f"dX = {x.grad.cpu()}")
+```
+
+Output:
+```
+X = tensor([3., 4.],
+           device='cuda:0', requires_grad=True)
+dX = tensor([6., 8.])
+```
+
+And that's it! `slangtorch.loadModule` uses JIT compilation to compile your Slang source into CUDA binary.
+It may take a little longer the first time you execute the script, but the compiled binaries will be cached and as long as the kernel code is not changed, future runs will not rebuild the CUDA kernel.
+
+Because the PyTorch JIT system requires `ninja`, you need to make sure `ninja` is installed on your system
+and is discoverable from the current environment, you also need to have a C++ compiler available on the system.
+On Windows, this means that Visual Studio need to be installed.
+
+## Specializing shaders using slangtorch
+
+`slangtorch.loadModule` allows specialization parameters to be specified since it might be easier to write shaders with placeholder definitions that can be substituted at load-time.
+For instance, here's a sphere tracer that uses a _compile-time_ specialization parameter for its maximum number of steps (`N`):
+
+```csharp
+float sphereTrace<let N:int>(Ray ray, SDF sdf)
+{
+    var pt = ray.o;
+    for (int i = 0; i < N; i++)
+    {
+        pt += sdf.eval(pt) * ray.d;
+    }
+
+    return pt;
+}
+
+float render(Ray ray)
+{
+    // Use N=20 for sphere tracing.
+    float3 pt = sphereTrace<20>(ray, sdf);
+    return shade(pt, sdf.normal());
+}
+```
+
+However, instead of using a fixed `20` steps, the renderer can be configured to use an arbitrary compile-time constant.
+
+```csharp
+// Compile-time constant. Expect "MAX_STEPS" to be set by the loadModule call.
+static const uint kMaxSteps = MAX_STEPS;
+
+float render(Ray ray)
+{
+    float3 pt = sphereTrace<kMaxSteps>(ray, sdf);
+    return shade(pt, sdf.normal());
+}
+```
+
+Then multiple versions of this shader can be compiled from Python using the `defines` argument:
+```python
+import slangtorch
+
+sdfRenderer20Steps = slangtorch.loadModule('sdf.slang', defines={"MAX_STEPS": 20})
+sdfRenderer50Steps = slangtorch.loadModule('sdf.slang', defines={"MAX_STEPS": 50})
+...
+```
+
+This is often helpful for code re-use, parameter sweeping, comparison/ablation studies, and more, from the convenience of Python.
+
+## Back-propagating Derivatives through Complex Access Patterns
+
+In most common scenarios, a kernel function will access input tensors in a complex pattern instead of mapping
+1:1 from an input element to an output element, like the `square` example shown above. When you have a kernel
+function that access many different elements from the input tensors and use them to compute an output element,
+the derivatives of each input element can't be represented directly as a function parameter, like the `x` in `square(x)`.
+
+Consider a 3x3 box filtering kernel that computes for each pixel in a 2D image, the average value of its 
+surrounding 3x3 pixel block. We can write a Slang function that computes the value of an output pixel:
+```csharp
+float computeOutputPixel(TensorView<float> input, uint2 pixelLoc)
+{
+    int width = input.size(0);
+    int height = input.size(1);
+
+    // Track the sum of neighboring pixels and the number
+    // of pixels currently accumulated.
+    int count = 0;
+    float sumValue = 0.0;
+
+    // Iterate through the surrounding area.
+    for (int offsetX = -1; offsetX <= 1; offsetX++)
+    {
+        // Skip out of bounds pixels.
+        int x = pixelLoc.x + offsetX;
+        if (x < 0 || x >= width) continue;
+
+        for (int offsetY = -1; offsetY <= 1; offsetY++)
+        {
+            int y = pixelLoc.y + offsetY;
+            if (y < 0 || y >= height) continue;
+            sumValue += input[x, y];
+            count++;
+        }
+    }
+
+    // Compute the average value.
+    sumValue /= count;
+
+    return sumValue;
+}
+```
+
+We can define our kernel function to compute the entire output image by calling `computeOutputPixel`:
+
+```csharp
+[CudaKernel]
+void boxFilter_fwd(TensorView<float> input, TensorView<float> output)
+{
+    uint2 pixelLoc = (cudaBlockIdx() * cudaBlockDim() + cudaThreadIdx()).xy;
+    int width = input.dim(0);
+    int height = input.dim(1);
+    if (pixelLoc.x >= width) return;
+    if (pixelLoc.y >= height) return;
+
+    float outputValueAtPixel = computeOutputPixel(input, pixelLoc)
+
+    // Write to output tensor.
+    output[pixelLoc] = outputValueAtPixel;
+}
+```
+
+How do we define the backward derivative propagation kernel? Note that in this example, there
+isn't a function like `square` that we can just mark as `[Differentiable]` and
+call `bwd_diff(square)` to get back the derivative of an input parameter.
+
+In this example, the input comes from multiple elements in a tensor. How do we propagate the
+derivatives to those input elements?
+
+The solution is to wrap tensor access with a custom function:
+```csharp
+float getInputElement(
+    TensorView<float> input,
+    TensorView<float> inputGradToPropagateTo,
+    uint2 loc)
+{
+    return input[loc];
+}
+```
+
+Note that the `getInputElement` function simply returns `input[loc]` and is not using the
+`inputGradToPropagateTo` parameter. That is intended. The `inputGradToPropagateTo` parameter
+is used to hold the backward propagated derivatives of each input element, and is reserved for later use.
+
+Now we can replace all direct accesses to `input` with a call to `getInputElement`. The
+`computeOutputPixel` can be implemented as following:
+
+```csharp
+[Differentiable]
+float computeOutputPixel(
+    TensorView<float> input,
+    TensorView<float> inputGradToPropagateTo,
+    uint2 pixelLoc)
+{
+    int width = input.dim(0);
+    int height = input.dim(1);
+
+    // Track the sum of neighboring pixels and the number
+    // of pixels currently accumulated.
+    int count = 0;
+    float sumValue = 0.0;
+
+    // Iterate through the surrounding area.
+    for (int offsetX = -1; offsetX <= 1; offsetX++)
+    {
+        // Skip out of bounds pixels.
+        int x = pixelLoc.x + offsetX;
+        if (x < 0 || x >= width) continue;
+
+        for (int offsetY = -1; offsetY <= 1; offsetY++)
+        {
+            int y = pixelLoc.y + offsetY;
+            if (y < 0 || y >= height) continue;
+            sumValue += getInputElement(input, inputGradToPropagateTo, uint2(x, y));
+            count++;
+        }
+    }
+
+    // Compute the average value.
+    sumValue /= count;
+
+    return sumValue;
+}
+```
+
+The main changes compared to our original version of `computeOutputPixel` are:
+- Added a `inputGradToPropagateTo` parameter.
+- Modified `input[x,y]` with a call to `getInputElement`.
+- Added a `[Differentiable]` attribute to the function.
+
+With that, we can define our backward kernel function:
+
+```csharp
+[CudaKernel]
+void boxFilter_bwd(
+    TensorView<float> input,
+    TensorView<float> resultGradToPropagateFrom,
+    TensorView<float> inputGradToPropagateTo)
+{
+    uint2 pixelLoc = (cudaBlockIdx() * cudaBlockDim() + cudaThreadIdx()).xy;
+    int width = input.dim(0);
+    int height = input.dim(1);
+    if (pixelLoc.x >= width) return;
+    if (pixelLoc.y >= height) return;
+
+    bwd_diff(computeOutputPixel)(input, inputGradToPropagateTo, pixelLoc);
+}
+```
+
+The kernel function simply calls `bwd_diff(computeOutputPixel)` without taking any return values from the call
+and without writing to any elements in the final `inputGradToPropagateTo` tensor. But when exactly does the propagated
+output get written to the output gradient tensor (`inputGradToPropagateTo`)?
+
+And that logic is defined in our final piece of code:
+```csharp
+[BackwardDerivativeOf(getInputElement)]
+void getInputElement_bwd(
+    TensorView<float> input,
+    TensorView<float> inputGradToPropagateTo,
+    uint2 loc,
+    float derivative)
+{
+    float oldVal;
+    inputGradToPropagateTo.InterlockedAdd(loc, derivative, oldVal);
+}
+```
+
+Here, we are providing a custom defined backward propagation function for `getInputElement`.
+In this function, we simply add `derivative` to the element in `inputGradToPropagateTo` tensor.
+
+When we call `bwd_diff(computeOutputPixel)` in `boxFilter_bwd`, the Slang compiler will automatically
+differentiate all operations and function calls in `computeOutputPixel`. By wrapping the tensor element access
+with `getInputElement` and by providing a custom backward propagation function of `getInputElement`, we are effectively
+telling the compiler what to do when a derivative propagates to an input tensor element. Inside the body
+of `getInputElement_bwd`, we define what to do then: atomically adds the derivative propagated to the input element
+in the `inputGradToPropagateTo` tensor. Therefore, after running `boxFilter_bwd`, the `inputGradToPropagateTo` tensor will contain all the
+back propagated derivative values.
+
+Again, to understand all the details of the automatic differentiation system, please refer to the 
+[Automatic Differentiation](autodiff) chapter for a detailed explanation.
+
+## Manually binding kernels
+`[AutoPyBindCUDA]` works for most use cases, but in certain situations, it may be necessary to write the *host* function by hand. The host function can also be written in Slang, and `slangtorch` handles its compilation to C++.
+
+Here's the same `square` example from before:
+
+```csharp
+// square.slang
+float compute_square(float x)
+{
+    return x * x;
+}
+
+[CudaKernel]
+void square_kernel(TensorView<float> input, TensorView<float> output)
+{
+    uint3 globalIdx = cudaBlockIdx() * cudaBlockDim() + cudaThreadIdx();
+
+    if (globalIdx.x >= input.size(0))
+        return;
+
+    float result = compute_square(input[globalIdx.x]);
+
+    output[globalIdx.x] = result;
+}
+```
+
+To manually invoke this kernel, we then need to write a CPU(host) function that defines how this kernel is dispatched. This can be defined in the same Slang file:
+
+```csharp
+[TorchEntryPoint]
+TorchTensor<float> square(TorchTensor<float> input)
+{
+    var result = TorchTensor<float>.zerosLike(input);
+    let blockCount = uint3(1);
+    let groupSize = uint3(result.size(0), result.size(1), 1);
+    __dispatch_kernel(square_kernel, blockCount, groupSize)(input, result);
+    return result;
+}
+```
+
+Here, we mark the function with the `[TorchEntryPoint]` attribute, so it will be compiled to C++ and exported as a python callable. 
+Since this is a host function, we can perform tensor allocations. For instance, `square()` calls `TorchTensor<float>.zerosLike` to allocate a 2D-tensor that has the same size as the input.
+`zerosLike` returns a `TorchTensor<float>` object that represents a CPU handle of a PyTorch tensor.
+
+Then we launch `square_kernel` with the `__dispatch_kernel` syntax. Note that we can directly pass
+`TorchTensor<float>` arguments to a `TensorView<float>` parameter and the compiler will automatically convert the type and obtain a view into the tensor that can be accessed by the GPU kernel function.
+
+### Calling a `[TorchEntryPoint]` function from Python
+
+You can use the following code to call `square` from Python:
+
+```python
+import torch
+import slangtorch
+
+m = slangtorch.loadModule("square.slang")
+
+x = torch.randn(2,2)
+print(f"X = {x}")
+y = m.square(x)
+print(f"Y = {y.cpu()}")
+```
+
+Result output:
+```
+X = tensor([[ 0.1407,  0.6594],
+        [-0.8978, -1.7230]])
+Y = tensor([[0.0198, 0.4349],
+        [0.8060, 2.9688]])
+```
+
+### Manual binding for kernel derivatives
+
+The above example demonstrates how to write a simple kernel function in Slang and call it from Python.
+Another major benefit of using Slang is that the Slang compiler support generating backward derivative
+propagation functions automatically.
+
+In the following section, we walk through how to use Slang to generate a backward propagation function
+for `square`, and expose it to PyTorch as an autograd function.
+
+First we need to tell Slang compiler that we need the `square` function to be considered a differentiable function, so Slang compiler can generate a backward derivative propagation function for it:
+```csharp
+[Differentiable]
+float square(float x)
+{
+    return x * x;
+}
+```
+This is done by simply adding a `[Differentiable]` attribute to our `square` function.
+
+With that, we can now define `square_bwd_kernel` that performs backward propagation as:
+
+```csharp
+[CudaKernel]
+void square_bwd_kernel(TensorView<float> input, TensorView<float> grad_out, TensorView<float> grad_propagated)
+{
+    uint3 globalIdx = cudaBlockIdx() * cudaBlockDim() + cudaThreadIdx();
+
+    if (globalIdx.x >= input.size(0) || globalIdx.y >= input.size(1))
+        return;
+
+    DifferentialPair<float> dpInput = diffPair(input[globalIdx.xy]);
+    var gradInElem = grad_out[globalIdx.xy];
+    bwd_diff(square)(dpInput, gradInElem);
+    grad_propagated[globalIdx.xy] = dpInput.d;
+}
+```
+
+Note that the function follows the same structure of `square_fwd_kernel`, with the only difference being that
+instead of calling into `square` to compute the forward value for each tensor element, we are calling `bwd_diff(square)`
+that represents the automatically generated backward propagation function of `square`.
+`bwd_diff(square)` will have the following signature:
+```csharp
+void bwd_diff_square(inout DifferentialPair<float> dpInput, float dOut);
+```
+
+Where the first parameter, `dpInput` represents a pair of original and derivative value for `input`, and the second parameter,
+`dOut`, represents the initial derivative with regard to some latent variable that we wish to back-prop through. The resulting
+derivative will be stored in `dpInput.d`. For example:
+
+```csharp
+// construct a pair where the primal value is 3, and derivative value is 0.
+var dp = diffPair(3.0);
+bwd_diff(square)(dp, 1.0);
+// dp.d is now 6.0
+```
+
+Similar to `square_fwd`, we can define the host side function `square_bwd` as:
+
+```csharp
+[TorchEntryPoint]
+TorchTensor<float> square_bwd(TorchTensor<float> input, TorchTensor<float> grad_out)
+{
+    var grad_propagated = TorchTensor<float>.zerosLike(input);
+    let blockCount = uint3(1);
+    let groupSize = uint3(input.size(0), input.size(1), 1);
+    __dispatch_kernel(square_bwd_kernel, blockCount, groupSize)(input, grad_out, grad_propagated);
+    return grad_propagated;
+}
+```
+
+## Builtin Library Support for PyTorch Interop
+
+As shown in previous tutorial, Slang has defined the `TorchTensor<T>` and `TensorView<T>` type for interop with PyTorch
+tensors. The `TorchTensor<T>` represents the CPU view of a tensor and provides methods to allocate a new tensor object.
+The `TensorView<T>` represents the GPU view of a tensor and provides accessors to read write tensor data.
+
+Following is a list of built-in methods and attributes for PyTorch interop.
+
+### `TorchTensor` methods
+
+#### `static TorchTensor<T> TorchTensor<T>.alloc(uint x, uint y, ...)`
+Allocates a new PyTorch tensor with the given dimensions. If `T` is a vector type, the length of the vector is implicitly included as the last dimension.
+For example, `TorchTensor<float3>.alloc(4, 4)` allocates a 3D tensor of size `(4,4,3)`.
+
+#### `static TorchTensor<T> TorchTensor<T>.emptyLike(TorchTensor<T> other)`
+Allocates a new PyTorch tensor that has the same dimensions as `other` without initializing it.
+
+#### `static TorchTensor<T> TorchTensor<T>.zerosLike(TorchTensor<T> other)`
+Allocates a new PyTorch tensor that has the same dimensions as `other` and initialize it to zero.
+
+#### `uint TorchTensor<T>.dims()`
+Returns the tensor's dimension count.
+
+#### `uint TorchTensor<T>.size(int dim)`
+Returns the tensor's size (in number of elements) at `dim`.
+
+#### `uint TorchTensor<T>.stride(int dim)`
+Returns the tensor's stride (in bytes) at `dim`.
+
+### `TensorView` methods
+
+#### `TensorView<T>.operator[uint x, uint y, ...]`
+Provide an accessor to data content in a tensor.
+
+#### `TensorView<T>.operator[vector<uint, N> index]`
+Provide an accessor to data content in a tensor, indexed by a uint vector.
+`tensor[uint3(1,2,3)]` is equivalent to `tensor[1,2,3]`.
+
+#### `uint TensorView<T>.dims()`
+Returns the tensor's dimension count.
+
+#### `uint TensorView<T>.size(int dim)`
+Returns the tensor's size (in number of elements) at `dim`.
+
+#### `uint TensorView<T>.stride(int dim)`
+Returns the tensor's stride (in bytes) at `dim`.
+
+#### `void TensorView<T>.fillZero()`
+Fills the tensor with zeros. Modifies the tensor in-place.
+
+#### `void TensorView<T>.fillValue(T value)`
+Fills the tensor with the specified value, modifies the tensor in-place.
+
+#### `T* TensorView<T>.data_ptr_at(vector<uint, N> index)`
+Returns a pointer to the element at `index`.
+
+#### `void TensorView<T>.InterlockedAdd(vector<uint, N> index, T val, out T oldVal)`
+Atomically add `val` to element at `index`. 
+
+#### `void TensorView<T>.InterlockedMin(vector<uint, N> index, T val, out T oldVal)`
+Atomically computes the min of `val` and the element at `index`. Available for 32 and 64 bit integer types only.
+
+#### `void TensorView<T>.InterlockedMax(vector<uint, N> index, T val, out T oldVal)`
+Atomically computes the max of `val` and the element at `index`. Available for 32 and 64 bit integer types only.
+
+#### `void TensorView<T>.InterlockedAnd(vector<uint, N> index, T val, out T oldVal)`
+Atomically computes the bitwise and of `val` and the element at `index`. Available for 32 and 64 bit integer types only.
+
+#### `void TensorView<T>.InterlockedOr(vector<uint, N> index, T val, out T oldVal)`
+Atomically computes the bitwise or  of `val` and the element at `index`. Available for 32 and 64 bit integer types only.
+
+#### `void TensorView<T>.InterlockedXor(vector<uint, N> index, T val, out T oldVal)`
+Atomically computes the bitwise xor  of `val` and the element at `index`. Available for 32 and 64 bit integer types only.
+
+#### `void TensorView<T>.InterlockedExchange(vector<uint, N> index, T val, out T oldVal)`
+Atomically swaps `val` into the element at `index`. Available for `float` and 32/64 bit integer types only.
+
+#### `void TensorView<T>.InterlockedCompareExchange(vector<uint, N> index, T compare, T val)`
+Atomically swaps `val` into the element at `index` if the element equals to `compare`. Available for `float` and 32/64 bit integer types only.
+
+### `DiffTensorView` methods
+
+#### `DiffTensorView.operator[uint x, uint y, ...]`
+Provide an accessor to data content in a tensor. This method is **differentiable**, and has the same semantics as using a `.load()` to get data, and `.store()` to set data.
+
+#### `DiffTensorView.operator[vector<uint, N> index]`
+Provide an accessor to data content in a tensor, indexed by a uint vector.`tensor[uint3(1,2,3)]` is equivalent to `tensor[1,2,3]`. This method is **differentiable**, and has the same semantics as using a `.load()` to get data, and `.store()` to set data.
+
+#### `float DiffTensorView.load(vector<uint, N> index)`
+Loads the 32-bit floating point data at the specified multi-dimensional `index`. This method is **differentiable**, and in reverse-mode will perform an atomic-add.
+
+#### `void DiffTensorView.store(vector<uint, N> index, float val)`
+Stores the 32-bit floating point value `val` at the specified multi-dimensional `index`. This method is **differentiable**, and in reverse-mode will perform an *atomic exchange* to retrieve the derivative and replace with 0.
+
+#### `float DiffTensorView.loadOnce(vector<uint, N> index)`
+Loads the 32-bit floating point data at the specified multi-dimensional `index`. This method is **differentiable**, and uses a simple `store` for the reverse-mode for faster gradient aggregation, but `loadOnce` **must** be used at most once per index. `loadOnce` is ideal for situations where each thread loads data from a unique index, but will cause incorrect gradients when an index may be accessed multiple times.
+
+#### `void DiffTensorView.storeOnce(vector<uint, N> index, float val)`
+Stores the 32-bit floating point value `val` at the specified multi-dimensional `index`. This method is **differentiable**, and uses a simple `load` for the reverse-mode for faster gradient loading, but `storeOnce` **must** be used at most once per index. `loadOnce` is ideal for situations where each thread stores data to a unique index, but will cause incorrect gradient propagation when an index may be accessed multiple times.
+
+#### `uint DiffTensorView.size(int dim)`
+Returns the underlying primal tensor's size (in number of elements) at `dim`.
+
+#### `uint DiffTensorView.dims()`
+Returns the underlying primal tensor's dimension count.
+
+#### `uint DiffTensorView.stride(uint dim)`
+Returns the stride of the underlying primal tensor's `dim` dimension
+
+### CUDA Support Functions
+
+#### `cudaThreadIdx()`
+Returns the `threadIdx` variable in CUDA.
+
+#### `cudaBlockIdx()`
+Returns the `blockIdx` variable in CUDA.
+
+#### `cudaBlockDim()`
+Returns the `blockDim` variable in CUDA.
+
+#### `syncTorchCudaStream()`
+Waits for all pending CUDA kernel executions to complete on host.
+
+### Attributes for PyTorch Interop
+
+#### `[CudaKernel]` attribute
+Marks a function as a CUDA kernel (maps to a `__global__` function)
+
+#### `[TorchEntryPoint]` attribute
+Marks a function for export to Python. Functions marked with `[TorchEntryPoint]` will be accessible from a loaded module returned by `slangtorch.loadModule`.
+
+#### `[CudaDeviceExport]` attribute
+Marks a function as a CUDA device function, and ensures the compiler to include it in the generated CUDA source.
+
+#### `[AutoPyBindCUDA]` attribute
+Marks a cuda kernel for automatic binding generation so that it may be invoked from python without having to hand-code the torch entry point. The marked function **must** also be marked with `[CudaKernel]`. If the marked function is also marked with `[Differentiable]`, this will also generate bindings for the derivative methods.
+
+Restriction: methods marked with `[AutoPyBindCUDA]` will not operate 
+
+## Type Marshalling Between Slang and Python
+
+
+### Python-CUDA type marshalling for functions using `[AutoPyBindCUDA]` 
+
+When using auto-binding, aggregate types like structs are converted to Python `namedtuples` and are made available when using `slangtorch.loadModule`. 
+
+```csharp
+// mesh.slang
+struct Mesh
+{
+    TensorView<float> vertices;
+    TensorView<int> indices;
+};
+
+[AutoPyBindCUDA]
+[CUDAKernel]
+void processMesh(Mesh mesh)
+{
+    /* ... */ 
+}
+```
+
+Here, since `Mesh` is being used by `renderMesh`, the loaded module will provide `Mesh` as a python `namedtuple` with named fields.
+While using the `namedtuple` is the best way to use structured arguments, they can also be passed as a python `dict` or `tuple`
+
+```python
+m = slangtorch.loadModule('mesh.slang')
+
+vertices = torch.tensor()
+indices = torch.tensor()
+
+# use namedtuple to provide structured input.
+mesh = m.Mesh(vertices=vertices, indices=indices)
+m.processMesh(mesh=mesh).launchRaw(blockSize=(32, 32, 1), gridSize=(1, 1, 1))
+
+# use dict to provide input.
+mesh = {'vertices': vertices, 'indices':indices}
+m.processMesh(mesh=mesh).launchRaw(blockSize=(32, 32, 1), gridSize=(1, 1, 1))
+
+# use tuple to provide input (warning: user responsible for right order)
+mesh = (vertices, indices)
+m.processMesh(mesh=mesh).launchRaw(blockSize=(32, 32, 1), gridSize=(1, 1, 1))
+```
+
+
+### Python-CUDA type marshalling for functions using `[TorchEntryPoint]`
+
+The return types and parameters types of an exported `[TorchEntryPoint]` function can be a basic type (e.g. `float`, `int` etc.), a vector type (e.g. `float3`), a `TorchTensor<T>` type, an array type, or a struct type.
+
+When you use struct or array types in the function signature, it will be exposed as a Python tuple.
+For example,
+```csharp
+struct MyReturnType
+{
+    TorchTensor<T> tensors[3];
+    float v;
+}
+
+[TorchEntryPoint]
+MyReturnType myFunc()
+{
+    ...
+}
+```
+
+Calling `myFunc` from python will result in a python tuple in the form of
+```
+[[tensor, tensor, tensor], float]
+```
+
+The same transform rules apply to parameter types.
diff --git a/external/slang/share/doc/slang/design/README.md b/external/slang/share/doc/slang/design/README.md
new file mode 100644
index 00000000..58e1e39a
--- /dev/null
+++ b/external/slang/share/doc/slang/design/README.md
@@ -0,0 +1,25 @@
+Slang Design and Implementation Notes
+=====================================
+
+This directory contains documents that are primarily intended for developers working on the Slang implementation.
+They are not intended to be helpful to Slang users.
+
+These documents can only be trusted to reflect the state of the codebase or the plans of their authors at the time they were written. Changes to the implementation are not expected to always come with matching changes to these documents, so some amount of drift is to be expected.
+
+Developers interested in contributing to Slang might want to start with the [Overview](overview.md) document, which describes the overall compilation pipeline that Slang uses and the purpose of the various steps (both implemented and planned).
+
+The [Coding Conventions](coding-conventions.md) document describes the conventions that should be followed in all code added to the Slang project.
+
+The [Interfaces](interfaces.md) document describes the high-level design plan for Slang's interfaces and generics features.
+
+The [Declaration References](decl-refs.md) document is intended to help out developers who are mystified by the heavily used `DeclRef` type in the compiler implementation.
+
+The [Intermediate Representation (IR)](ir.md) document describes the design of Slang's internal IR.
+
+The [Existential Types](existential-types.md) document goes into some detail about what "existential types" are in the context of the Slang language, and explains how we may go about supporting them.
+
+The [Capabilities](capabilities.md) document explains the proposed model for how Slang will support general notions of profile- or capability-based overloading/dispatch.
+
+The [Casting](casting.md) document explains how casting works in the slang C++ compiler code base.
+
+The [Experimental API Interfaces](experimental.md) document explains how experimental Slang API changes are to be deployed.
\ No newline at end of file
diff --git a/external/slang/share/doc/slang/design/autodiff.md b/external/slang/share/doc/slang/design/autodiff.md
new file mode 100644
index 00000000..8bf26baa
--- /dev/null
+++ b/external/slang/share/doc/slang/design/autodiff.md
@@ -0,0 +1,333 @@
+Reverse Mode Autodiff (Out of Date)
+==================================
+
+
+This document serves as a design reference for reverse-mode auto-diff in the Slang compiler.
+
+## Reverse-Mode Passes
+
+Rather than implementing reverse-mode as a separate pass, Slang implements this as a series of independent passes:
+
+If a function needs a reverse-mode version generated:
+ - *Linearize* the function, and all dependencies.
+ - *Propagate* differential types through the linearized code.
+ - *Unzip* by moving primal insts to before differential insts.
+ - *Transpose* the differential insts.
+
+
+## Linearization (Forward-mode)
+
+### Overview
+(This is a incomplete section. More details coming soon)
+
+Consider an arbitrary function `float f(float a, float b, float c, ..., z)` which takes in N inputs and generates one output `y`. Linearization aims to generate the first-order Taylor expansion of f about _all_ of it's inputs.
+
+Mathematically, the forward derivative `fwd_f` represents `df/da * (a_0 - a)  + df/db * (b_0 - b) + ...`, where `a_0` is the value at which the Taylor expansion was produced. The quantity `a_0 - a` is known as the 'differential' (for brevity we'll denote them da, db, dc, etc..), and there is at-most one differential per input.
+
+Thus, the new function's signature should be `fwd_f(float a, float da, float b, float db, float c, float dc, ...)`. For simplicity, we'll use *pairs* instead of interleaving the original and differential parameters. We use the intrinsic `DifferentialPair<T>` (or for short: `DP<T>`) to denote this.
+
+The signature we use is then `fwd_f(DP<float> a, DP<float> b, DP<float> c)`
+
+An example of linearization:
+```C
+
+float f(float a, float b)
+{
+    if (a > 0)
+    {
+        return a + b + 2.0 * a * b;
+    }
+    else
+    {
+        return sqrt(a);
+    }
+}
+```
+
+We'll write out the SSA form of this function.
+
+```C
+float f_SSA(float a, float b)
+{
+    bool _b1 = a > 0;
+    if (_b1)
+    {
+        float _t1 = a + b;
+        float _t2 = 2.0 * a;
+        float _t3 = _t2 * b;
+        float _t4 = _t1 + _t3;
+
+        return _t4;
+    }
+    else
+    {
+        float _t1 = sqrt(a);
+        return _t1;
+    }
+}
+
+DP<float> f_SSA(DP<float> dpa, DP<float> dpb)
+{
+
+    bool _b1 = dpa.p > 0;
+    if (_b1)
+    {
+        float _t1 = dpa.p + dpb.p;
+        float _t1_d = dpa.d + dpb.d;
+
+        float _t2 = 2.0 * dpa.p;
+        float _t2_d = 0.0 * dpa.p + 2.0 * dpa.d;
+
+        float _t3 = _t2 * dpb.p;
+        float _t3_d = _t2_d * dpb.p + _t2 * dpb.d;
+
+        float _t4 = _t1 + _t3;
+        float _t4_d = _t1_d + _t3_d;
+
+        return DP<float>(_t4, _t4_d);
+    }
+    else
+    {
+        DP<float> _t1_dp = sqrt_fwd(dpa);
+        return DP<float>(_t1_dp.p, _t1_dp.d);
+    }
+}
+
+```
+
+In the result, the primal part of the pair holds the original computation, while the differential part computes the dot product of the differentials with the derivatives of the function's output w.r.t each input. 
+
+
+## Propagation
+
+This step takes a linearized function and propagates information about which instructions are computing a differential and which ones are part of the primal (original) computation.
+
+Assuming first-order differentiation only:
+The approach will be to mark any instructions that extract the differential from the differential pair as a differential. Then any instruction that uses the differential is itself marked as a differential and so on. The only exception is the call instruction which is either non-differentiable (do nothing) or differentiable and returns a pair (follow the same process)
+
+
+Here's the above example with propagated type information (we use float.D to denote intermediaries that have been marked as differential, and also expand everything so that each line has a single operation)
+
+```C
+
+DP<float> f_SSA_Proped(DP<float> dpa, DP<float> dpb)
+{
+    bool _b1 = dpa.p > 0;
+    if (_b1)
+    {
+        float _t1 = dpa.p + dpb.p;
+        
+        float.D _q1_d = dpa.d;
+        float.D _q2_d = dpb.d;
+
+        float.D _t1_d = _q1_d + _q2_d;
+
+        float _t2 = 2.0 * dpa.p;
+        
+        float.D _q2_d = dpa.d;
+        float.D _q3_d = 2.0 * dpa.d;
+
+        float _q4 = dpa.p;
+        float.D _q4_d = 0.0 * dpa.p;
+
+        float.D _t2_d = _q4_d + _q3_d;
+
+        float _t3 = _t2 * dpb.p;
+
+        float _q5 = dpb.p;
+        float.D _q6_d = _q5 * _t2_d;
+
+        float.D _q7_d = dpb.d;
+        float.D _q8_d = _t2 * _q7_d
+
+        float _t3_d = _q6_d + _q8_d;
+
+        float _t4 = _t1 + _t3;
+
+        float.D _t4_d = _t1_d + _t3_d;
+
+        return DP<float>(_t4, _t4_d);
+    }
+    else
+    {
+        DP<float> _t1_dp = sqrt_fwd(dpa);
+
+        float _q1 = _t1_dp.p;
+        float.D _q1_d = _t1_dp.d;
+
+        return DP<float>(_q1, _q1_d);
+    }
+}
+
+```
+
+## Unzipping
+
+
+This is a fairly simple process when there is no control flow. We simply move all non-differential instructions to before the first differential instruction.
+
+When there is control flow, we need to be a bit more careful: the key is to *replicate* the control flow graph once for primal and once for the differential.
+
+Here's the previous example unzipped:
+
+
+```C
+
+DP<float> f_SSA_Proped(DP<float> dpa, DP<float> dpb)
+{
+    bool _b1 = dpa.p > 0;
+
+    float _t1, _t2, _q4, _t3, _q5, _t3_d, _t4, _q1;
+
+    if (_b1)
+    {
+        _t1 = dpa.p + dpb.p;
+        
+        _t2 = 2.0 * dpa.p;
+        
+        _q4 = dpa.p;
+        
+        _t3 = _t2 * dpb.p;
+
+        _q5 = dpb.p;
+
+        _t4 = _t1 + _t3;
+
+    }
+    else
+    {
+
+        _q1 = sqrt_fwd(DP<float>(dpa.p, 0.0));
+    }
+
+    // Note here that we have to 'store' all the intermediaries 
+    // _t1, _t2, _q4, _t3, _q5, _t3_d, _t4 and _q1. This is fundamentally
+    // the tradeoff between fwd_mode and rev_mode
+
+    if (_b1)
+    {
+        float.D _q1_d = dpa.d;
+        float.D _q2_d = dpb.d;
+
+        float.D _t1_d = _q1_d + _q2_d;
+
+        float.D _q2_d = dpa.d;
+        float.D _q3_d = 2.0 * dpa.d;
+
+        float.D _q4_d = 0.0 * dpa.p;
+
+        float.D _t2_d = _q4_d + _q3_d;
+
+        float.D _q6_d = _q5 * _t2_d;
+
+        float.D _q7_d = dpb.d;
+        float.D _q8_d = _t2 * _q7_d
+
+        float.D _t3_d = _q6_d + _q8_d;
+
+        float.D _t4_d = _t1_d + _t3_d;
+
+        return DP<float>(_t4, _t4_d);
+    }
+    else
+    {
+        DP<float> _t1_dp = sqrt_fwd(dpa);
+
+        float.D _q1_d = _t1_dp.d;
+
+        return DP<float>(_q1, _q1_d);
+    }
+}
+
+```
+
+## Transposition
+
+### Overview
+
+This transposition pass _assumes_ that provided function is linear in it's differentials.
+It is out of scope of this project to attempt to enforce that constraint for user-defined differential code.
+
+For transposition we walk all differential instructions in reverse starting from the return statement, and apply the following rules:
+
+We'll have an accumulator dictionary `Dictionary<IRInst, IRInst> accMap` holding assignments for
+intermediaries which don't have concrete variables. When we add a pair (A, C) and (A, B) already exists, this will form the pair (A, ADD(C, B)) in the dictionary. (ADD will be replaced with a call to `T.dadd` for a generic type T)
+
+ - If `inst` is a `RETURN(A)`, add pair `(A, d_out)` to `accMap`
+ - If an instruction is `MUL(P, D)` where D is the differential, add pair `(D, MUL(P, accMap[this_inst]))` to `accMap`
+ - If an instruction is `ADD(D1, D2)`, where both D1 and D2 are differentials (this is the only config that should occur), then add pair `(D1, accMap[this_inst])` to `accMap`
+ - If an instruction is `CALL(f_fwd, (P1, D1), (P2, D2), ...)`, create variables D1v, D2v, ... for D1, D2, ..., then replace with `CALL(f_rev, (P1, D1v), (P2, D2v), ..., accMap[this_inst])`, and finally add pairs `(D1, LOAD[D1v]), (D2, LOAD[D2v]), ...` to `accMap`
+
+ ```C
+
+void f_SSA_Rev(inout DP<float> dpa, inout DP<float> dpb, float dout)
+{
+    bool _b1 = dpa.p > 0;
+
+    float _t1, _t2, _q4, _t3, _q5, _t3_d, _t4, _q1;
+
+    if (_b1)
+    {
+        _t1 = dpa.p + dpb.p;
+        
+        _t2 = 2.0 * dpa.p;
+        
+        _q4 = dpa.p;
+        
+        _t3 = _t2 * dpb.p;
+
+        _q5 = dpb.p;
+
+        _t4 = _t1 + _t3;
+
+    }
+    else
+    {
+
+        _q1 = sqrt_fwd(DP<float>(dpa.p, 0.0));
+    }
+
+    // Note here that we have to 'store' all the intermediaries 
+    // _t1, _t2, _q4, _t3, _q5, _t3_d, _t4 and _q1. This is fundamentally
+    // the tradeoff between fwd_mode and rev_mode
+
+    if (_b1)
+    {
+
+        float.D _t4_rev = d_out;
+
+        float.D _t1_rev = _t4_rev;
+        float.D _t3_rev = _t4_rev;
+
+        float.D _q8_rev = _t3_rev;
+        float.D _q6_rev = _t3_rev;
+
+        float.D _q7_rev = _t2 * _q8_rev;
+
+        dpb.d += _q7_rev;
+
+        float.D _t2_rev = _q5 * _q6_rev;
+
+        float.D _q4_rev = _t2_rev;
+        float.D _q3_rev = _t2_rev;
+
+        dpa.d += 2.0 * _q3_rev;
+
+        float.D _q1_rev = _t1_rev;
+        float.D _q2_rev = _t1_rev;
+
+        dpb.d += _q2_rev;
+        dpa.d += _q1_rev;
+    }
+    else
+    {
+        _q1_rev = d_out;
+
+        DP<float> dpa_copy;
+        sqrt_rev(dpa_copy, _q1_rev);
+
+        dpa.d += dpa_copy.d;
+    }
+}
+
+```
diff --git a/external/slang/share/doc/slang/design/autodiff/basics.md b/external/slang/share/doc/slang/design/autodiff/basics.md
new file mode 100644
index 00000000..43ed164a
--- /dev/null
+++ b/external/slang/share/doc/slang/design/autodiff/basics.md
@@ -0,0 +1,396 @@
+<!--The goal of this set of documents is to describe the design of Slang's automatic differentiation passes, along with the mechanisms & passes used to support various features. -->
+
+This documentation is intended for Slang contributors and is written from a compiler engineering point of view. For Slang users, see the user-guide at this link: [https://shader-slang.com/slang/user-guide/autodiff.html](https://shader-slang.com/slang/user-guide/autodiff.html)
+
+## What is Automatic Differentiation?
+
+Before diving into the design of the automatic differentiation (for brevity, we will call it 'auto-diff') passes, it is important to understand the end goal of what auto-diff tries to achieve.
+
+The over-arching goal of Slang's auto-diff is to enable the user to compute derivatives of a given shader program or function's output w.r.t its input parameters. This critical compiler feature enables users to quickly use their shaders with gradient-based parameter optimization algorithms, which forms the backbone of modern machine learning systems. It enables users to train and deploy graphics systems that contain ML primitives (like multi-layer perceptron's or MLPs) or use their shader programs as differentiable primitives within larger ML pipelines.
+
+### More Resources
+Here are some links to resources that talk more about differentiable programming from a more mathematical perspective:
+1. UCSD CSE 291 (Spring 2024): https://cseweb.ucsd.edu/~tzli/cse291/sp2024/
+2. UW CSE 5990 (Winter 2024): https://sites.google.com/cs.washington.edu/cse-599o-dppl
+
+## Definition of Derivatives
+
+This section is based off of these slides: https://cseweb.ucsd.edu/~tzli/cse291/sp2024/lectures/03_forward_mode.pdf.
+
+Here, we establish the mathematical definition of derivatives, starting with a simple 1D case (function with a single input and output), and extending to the general case of functions mapping multiple inputs to multiple outputs.
+
+To avoid confusion, we will denote mathematical functions using LaTeX italic script ($f$, $g$, etc..) and programs that compute these functions with markdown code (`f`, `g`, etc..)
+
+### Derivatives of scalar (1D) functions
+
+Consider the simplest case: a smooth scalar mathematical function that maps a real number to another real number:
+
+$$f : \mathbb{R} \to \mathbb{R}$$
+
+There are several definitions for a derivative, but we will use the definition that a derivative is the *closest linear approximation* of the output function at a given input location. 
+Concretely, given a specific input $x$, we can create a linear approximation of the function $f$ around $x$ as follows:
+
+$$ f(x + dx) \approx f(x) + Df(x) \cdot dx $$
+<!--// TODO: Add image here.-->
+
+This can also be understood as a geometric 'tangent' to the function at $x$. $Df(x)$ is the slope of $f$ at $x$, i.e. $\frac{\partial f}{\partial x}$, and $dx$ is the perturbation away from $x$. Our approximation is linear as a function of the perturbation $dx$. Note that no matter how non-linear or complex the underlying function $f(x)$ is, the approximation is always linear (this property becomes very important later).
+
+### Forward-mode derivative functions
+
+Now consider a concrete program `f` that computes some function.
+
+```C
+// Computes square of x
+float f(float x)
+{
+    return x * x;
+}
+```
+
+What should its derivative program look like? We the need the output $f(x)$ and the product of derivative at $x$, $Df(x)$ with the differential $dx$.
+
+In Slang, we put both of these together into a single function, called the *forward-mode derivative* function, which takes in a pair $(x, dx)$ returns a pair $(f(x), Df(x)\cdot dx)$ Note that in auto-diff literature, this is also often referred to as the *total derivative* function. 
+
+```C
+DifferentialPair<float> fwd_f(DifferentialPair<float> dpx)
+{
+    float x = dpx.getPrimal(); // Can also be accessed via property dpx.p
+    float dx = dpx.getDifferential(); // Can also be accessed via property dpx.d
+    return makePair(x * x, (2 * x) * dx);
+}
+```
+
+Note that `(2 * x)` is the multiplier corresponding to $Df(x)$. We refer to $x$ and $f(x)$ as "*primal*" values and the perturbations $dx$ and $Df(x)\cdot dx$ as "*differential*" values. The reason for this separation is that the "*differential*" output values are always linear w.r.t their "*differential*" inputs.
+
+As the name implies, `DifferentialPair<T>` is a special pair type used by Slang to hold values and their corresponding differentials.
+
+
+### Forward-mode derivatives for higher-dimensional functions
+In practice, most functions tend to have multiple inputs and multiple outputs, i.e. $f: \mathbb{R}^N \to \mathbb{R}^M$
+
+The definition above can be extended to higher dimensions, using the closest-linear-approximation idea. The main difference is that the derivative function represents a hyperplane rather than a line.
+
+Effectively, we want our forward-mode derivative to compute the following:
+
+$$ f(\mathbf{x} + \mathbf{dx}) \approx f(\mathbf{x}) + \langle Df(\mathbf{x}),\mathbf{dx}\rangle $$
+
+Here, the input and its differential can be represented as a vector quantity $\mathbf{x}, \mathbf{dx} \in \mathbb{R}^N$ and the multiplier $Df(\mathbf{x})$ (also known as the *Jacobian* matrix) is a NxM matrix, and $\left\< \cdot,\cdot \right\>$ denotes the inner product (i.e. matrix-vector multiplication)
+
+Here's an example of a Slang function taking in two inputs (N=2) and generating one output (M=1)
+
+```C
+// Compute length of hypotenuse.
+float f(float x, float y)
+{
+    return sqrt(x * x + y * y);
+}
+```
+
+and its forward-mode derivative:
+
+```C
+// Closest linear approximation at x, y
+DifferentialPair<float> fwd_f(DifferentialPair<float> dpx, DifferentialPair<float> dpy)
+{
+    float x = dpx.p;
+    float y = dpy.p;
+    float dx = dpx.d;
+    float dy = dpx.d;
+
+    return DifferentialPair<float>(
+        sqrt(x * x + y * y),                       // f(x, y)
+        (x * dx + y * dy) / sqrt(x * x, y * y));   // <Df(x,y), dx>
+}
+```
+
+Important note: the forward-mode function only needs to compute the inner product $\langle Df(\mathbf{x}),\mathbf{dx} \rangle$. The Jacobian matrix itself never needs to be fully materialized. This is a key design element of automatic differentiation, one which allows it to scale to huge input/output counts.
+
+### Building Blocks: Forward-mode derivatives compose in forward order of execution.
+
+In practice, we compute forward-mode derivatives of a complex function by decomposing them into constituent functions (or in compiler-speak: instructions) and composing the forward-mode derivative of each piece in the **same** order. 
+This is because of each forward derivative is a 'right-side' product (or product of Jacobian matrix with a vector)
+
+Here's an example of this in action (consider a complex function $h$ composed of $f$ and $g$):
+
+$$ h(\mathbf{x}) = f(g(\mathbf{x})) $$
+
+It's forward-mode derivative is then:
+
+$$ \langle Dh(\mathbf{x}), \mathbf{dx}\rangle = \big\langle Df(\mathbf{x}), \langle Dg(\mathbf{x}), \mathbf{dx}\rangle\big\rangle $$
+
+which is the forward-mode derivative of the outer function $f$ evaluated on the result of the forward-mode derivative of the inner function $g$. 
+
+An example of this in Slang code:
+```C
+// Compute square.
+float sqr(float x)
+{
+    return x * x;
+}
+
+// Compute length of hypotenuse.
+float f(float x, float y)
+{
+    float x_sqr = sqr(x);
+    float y_sqr = sqr(y)
+    return sqrt(x_sqr + y_sqr);
+}
+```
+
+The resulting derivative of `f` can be computed by composition:
+```C
+// Forward-mode derivative of sqr()
+DifferentialPair<float> fwd_sqr(DifferentialPair<float> dpx)
+{
+    float x = dpx.getPrimal();
+    float dx = dpx.getDifferential();
+
+    return DifferentialPair<float>(x * x, 2 * x * dx);
+}
+
+// Forward-mode derivative of f()
+DifferentialPair<float> fwd_f(DifferentialPair<float> dpx, DifferentialPair<float> dpy)
+{
+    DifferentialPair<float> dp_x_sqr = fwd_sqr(dpx);
+    DifferentialPair<float> dp_y_sqr = fwd_sqr(dpy);
+
+    float x_sqr = dp_x_sqr.getPrimal();
+    float y_sqr = dp_y_sqr.getPrimal();
+    float x_sqr_d = dp_x_sqr.getDifferential();
+    float y_sqr_d = dp_y_sqr.getDifferential();
+
+    return DifferentialPair<float>(
+        sqrt(x_sqr + y_sqr),
+        (x_sqr_d + y_sqr_d) / sqrt(x_sqr + y_sqr));
+}
+```
+
+### Tip: Extracting partial derivatives from a forward-mode derivative (i.e. a 'total' derivative)
+
+As we discussed above, forward-mode derivatives compute $\langle Df(\mathbf{x}),\mathbf{dx}\rangle$ rather than what you may be used to seeing in a calculus course (e.g. partial derivatives like $\frac{\partial f}{\partial x}$).
+
+In fact, the forward-mode derivative is simply an product of the partial derivative w.r.t each input parameter multiplied by their differential perturbations $\frac{\partial f}{\partial x} * dx + \frac{\partial f}{\partial x} * dy$. This is the reason for the alternative name: *total derivative*.
+
+Thus, partial derivative can be obtained by successively setting each input's differential to 1 (and 0 for everything else)
+Example:
+```C
+// Compute partial derivative w.r.t x (pass dx=1.0)
+float df_dx = fwd_f(DifferentialPair<float>(x, 1.0), DifferentialPair<float>(y, 0.0)).d;
+
+// Compute partial derivaive w.r.t y (pass dy=1.0)
+float df_dy = fwd_f(DifferentialPair<float>(x, 0.0), DifferentialPair<float>(y, 1.0)).d;
+```
+
+### Tip: Testing forward-mode derivatives using the first principles of calculus (i.e. the *finite difference* method)
+
+In Calculus, partial derivatives of a function are often defined in a 'black box' manner using limits, by perturbing a single parameter by an infinitesimal amount:
+
+$$ \frac{\partial f}{\partial x} = \lim_{dx\to 0} \frac{f(x + dx) - f(x - dx)}{2 * dx} $$
+
+At the moment, we cannot leverage programming languages to compute true inifinitesimal limits, but we can replace $dx \to 0$ with a sufficiently small $\epsilon$ leading to the following 'test' to check if derivatives produced by automatic differentiation match with their true mathematical expected values.
+
+Here's an example of using this idea to test functions (many autodiff tests were written this way)
+
+```C
+// Compute partial derivative w.r.t x analytically
+float df_dx_ad = fwd_f(DifferentialPair<float>(x, 1.0), DifferentialPair<float>(y, 0.0))
+
+// Compute partial derivative w.r.t x through the finite difference (FD) method.
+float eps = 1e-4
+float df_dx_fd = (f(x + eps, y) - f(x - eps, y)) / (2 * eps);
+
+// If computed correctly, df_dx_ad and df_dx_fd are very close.
+```
+
+**Caveats:**
+Since the finite difference method only produces a biased estimate of the derivative, the result is only numerically *close* to the auto-diff-based result. Poorly behaved functions (those that rapidly change, or are discontinuous or otherwise non-differentiable) will result in a (expected) mismatch between FD and AD results.
+
+## Reverse-mode derivative functions
+
+This section is based off of these slides: https://cseweb.ucsd.edu/~tzli/cse291/sp2024/lectures/05_reverse_mode.pdf.
+
+### Motivation: Challenges with scaling forward-mode derivatives
+
+A big problem with forward-mode derivatives is their inability to scale to great parameter counts.
+
+Machine learning pipelines often compute derivatives of a large complex pipeline with millions or even billions of input parameters, but a single output value, i.e. the *loss* or *objective* function, frequently denoted by $\mathcal{L}$.
+Computing $\frac{\partial \mathcal{L}}{\partial x_i}$ for $N$ inputs $x_i$ using the one-hot vector approach will involve invoking the forward-mode derivative function $N$ times.
+
+The reason for this limitation is that forward-mode derivatives pass derivatives from the inputs through to the outputs by computing the dot-product $\left\< Df(\mathbf{x}),\mathbf{dx}\right\>$. 
+Instead, we employ a different approach called the reverse-mode derivative, which propagates differentials *backwards* from outputs to inputs.
+
+### Key Idea: Generate code to compute $\langle \frac{\partial \mathcal{L}}{\partial f}, Df(\mathbf{x})\rangle$ rather than $\langle Df(\mathbf{x}),\mathbf{dx}\rangle$
+
+The fundamental building blocks of reverse-mode derivatives are the **left-side inner product**. That is, the product of a vector of derivatives of w.r.t outputs $\frac{\partial \mathcal{L}}{\partial f}$ with the Jacobian matrix $Df(\mathbf{x})$.
+
+An important thing to keep in mind is that it does not necessarily matter what the scalar quantity $\mathcal{L}$ is. The goal of this product is to propagate the derivatives of any scalar value $\mathcal{L}$ w.r.t output vector $f(\mathbf{x})$ (i.e., $\frac{\partial \mathcal{L}}{\partial f}$) into derivatives of that same scalar value $\mathcal{L}$ w.r.t the input vector $\mathbf{x}$ (i.e., $\frac{\partial \mathcal{L}}{\partial \mathbf{x}}$).
+
+Here's an example of a Slang function computing the `reverse-mode derivative`.
+
+```C
+// Compute length of hypotenuse
+float f(float x, float y)
+{
+    return sqrt(x * x + y * y);
+}
+
+// Reverse-mode derivative of f. dOutput represents the derivative dL/dOutput of the output w.r.t scalar value.
+void rev_f(inout DifferentialPair<float> dpx, inout DifferentialPair<float> dpy, float dOutput)
+{
+    float x = dpx.getPrimal();
+    float y = dpy.getPrimal();
+
+    float t = 1.0 / (sqrt(x * x + y * y));
+
+    dpx = DifferentialPair<float>(
+        x,                 // The primal part of the return value is *always* copied in from the input as-is.
+        dOutput * x * t);  // The differential part for x is the derivative dL/dx computed as 
+                           // (dL/dOutput) * (dOutput/dx), where dOutput/dx = x / sqrt(x*x+y*y).
+
+    dpy = DifferentialPair<float>(
+        y,                
+        dOutput * y * t);  // The differential part for y is the derivative dL/dy computed as 
+                           // (dL/dOutput) * (dOutput/dy), where dOutput/dy = y / sqrt(x*x+y*y).
+}
+```
+
+Note that `rev_f` accepts derivatives w.r.t the output value as the input, and returns derivatives w.r.t inputs as its output (through `inout` parameters). `rev_f` still needs the primal values `x` and `y` to compute the derivatives, so those are still passed in as an input through the primal part of the differential pair. 
+
+Also note that the reverse-mode derivative function does not have to compute the primal result value (its return is void). The reason for this is a matter of convenience: reverse-mode derivatives are often invoked after all the primal functions, and there is typically no need for these values. We go into more detail on this topic in the checkpointing chapter.
+
+The reverse mode function can be used to compute both `dOutput/dx` and `dOutput/dy` with a single invocation (unlike the forward-mode case where we had to invoke `fwd_f` once for each input)
+
+```C
+DifferentialPair<float> dpx = makePair<float>(x, 0.f); // Initialize diff-value to 0 (not necessary)
+DifferentialPair<float> dpx = makePair<float>(y, 0.f); // Initialize diff-value to 0 (not necessary)
+
+rev_f(dpx, dpy, 1.0); // Pass 1.0 for dL/dOutput so that the results are (1.0 * dOutput/dx) and (1.0 * dOutput/dy)
+
+float doutput_dx = dpx.getDifferential(); 
+float doutput_dy = dpy.getDifferential();
+```
+
+### Extension to multiple outputs
+The extension to multiple outputs is fairly natural. Each output gets a separate input for its derivative.
+Here is an example:
+```C
+// Computation involving multiple inputs and outputs.
+float2 f_multi_output(float x, float y)
+{
+    return float2(
+        x * x,
+        x + y);
+}
+
+// Reverse-mode derivative of 'f_multi_output'. The derivative of the outputs is also a vector quantity 
+// (type follows from return type of f_multi_output)
+void rev_f_multi_output(DifferentialPair<float> dpx, DifferentialPair<float> dpy, float2 dOut)
+{
+    float x = dpx.getPrimal();
+    float y = dpy.getPrimal();
+
+    dpx = DifferentialPair<float>(x, dOut[0] * 2 * x + dOut[1]);
+    dpy = DifferentialPair<float>(x, dOut[1]);
+}
+```
+
+### Jacobian method: Generate forward- and reverse-mode derivatives from first principles.
+A simple way to figure out what the generated reverse (or forward) derivative function is supposed to compute is to write down the entire Jacobian function. That is, write down the partial derivative of each input w.r.t each output
+
+$$
+D\mathbf{f}(\mathbf{x}) = \begin{bmatrix} 
+\partial f_0 / \partial x & \partial f_0 / \partial y \\  
+\partial f_1 / \partial x & \partial f_1 / \partial y \\
+\end{bmatrix} = 
+\begin{bmatrix} 
+2x    & 0.0 \\  
+1.0   & 1.0 \\
+\end{bmatrix}
+$$
+
+The **reverse-mode derivative**'s outputs should match the left-product of this matrix with the vector of derivatives w.r.t outputs:
+
+$$ \left\langle \frac{\partial \mathcal{L}}{\partial \mathbf{f}}, D\mathbf{f}(\mathbf{x})\right\rangle  = 
+\begin{bmatrix}
+\frac{\partial \mathcal{L}}{\partial f_0} & \frac{\partial \mathcal{L}}{\partial f_1}
+\end{bmatrix}
+\begin{bmatrix} 
+2x    & 0.0 \\  
+1.0   & 1.0 \\
+\end{bmatrix} = 
+\begin{bmatrix} \left(\frac{\partial \mathcal{L}}{\partial f_0} \cdot 2x + \frac{\partial \mathcal{L}}{\partial f_1}\right) & \frac{\partial \mathcal{L}}{\partial f_1} \end{bmatrix}
+$$
+
+and the **forward-mode derivative**'s outputs should match the right-product of this matrix with the vector of differentials of the inputs:
+
+$$ \langle D\mathbf{f}(\mathbf{x}), d\mathbf{x}\rangle  = 
+\begin{bmatrix} 
+2x    & 0.0 \\  
+1.0   & 1.0 \\
+\end{bmatrix}
+\begin{bmatrix}
+dx \\ dy
+\end{bmatrix} = 
+\begin{bmatrix} 2x \cdot dx & dx + dy \end{bmatrix}
+$$
+
+Note that when we generate derivative code in practice, we do not materialize the full Jacobian matrix, and instead use the composition property to chain together derivatives at the instruction level. 
+However, the resulting code is equivalent to the Jacobian method (mathematically), and it is a good, analytical way to confirm that the generated code is indeed correct (or when thinking about what the derivative of a particular instruction/set of instructions should be)
+
+
+### Building Blocks: Reverse-mode derivatives compose in reverse order of execution.
+A consequence of using the 'left-side inner product' is that derivatives of a composite function must be computed in the reverse of the order of primal computation.
+
+Here's an example of a composite function $h$ (similar to the example used in forward-mode building blocks):
+
+$$ h(\mathbf{x}) = f(g(\mathbf{x})) $$
+
+where (for brevity):
+
+$$ \mathbf{y} = g(\mathbf{x}) $$
+
+The reverse-mode derivative function for $h$ can be written as the composition of the reverse-mode derivatives of $f$ and $g$
+
+$$ \left\langle \frac{\partial L}{\partial h}, Dh(\mathbf{x})\right\rangle  = \left\langle \left\langle \frac{\partial L}{\partial h}, Df(\mathbf{y})\right\rangle , Dg(\mathbf{x})\right\rangle $$
+
+Note the 'backward' order here. We must first pass the derivatives through the outer function $f$, and then pass the result through the inner function $g$ to compute derivatives w.r.t inner-most inputs $\mathbf{x}$. This process of passing derivatives backwards is often referred to as *backpropagation*.
+
+A more concrete Slang example of the same:
+
+```C
+// Compute square
+float sqr(float x)
+{
+    return x * x;
+}
+
+// Compute length of hypotenuse
+float f(float x, float y)
+{
+    return sqrt(sqr(x) + sqr(y));
+}
+```
+
+The derivative functions are then:
+```C
+void rev_sqr(DifferentialPair<float> dpx, float dOutput)
+{
+    float x = dpx.getPrimal();
+
+    dpx = DifferentialPair<float>(x, dOutput * 2 * x);
+}
+
+void rev_f(DifferentialPair<float> dpx, DifferentialPair<float> dpy, float dOut)
+{
+    float t = 0.5f / sqrt(x * x + y * y);
+    
+    float d_xsqr = t * dOut; // Calculate derivatives w.r.t output of sqr(x)
+    float d_ysqr = t * dOut; // Calculate derivatives w.r.t output of sqr(y)
+
+    rev_sqr(dpx, d_xsqr); // Propagate to x
+    rev_sqr(dpx, d_ysqr); // Propagate to y
+}
+```
+
+When comparing `rev_f`'s implementation to `fwd_f`, note the order of computing derivative w.r.t `sqr` (in `rev_f`, `rev_sqr` is called at the end, while in `fwd_f` it is called at the beginning)
+
diff --git a/external/slang/share/doc/slang/design/autodiff/decorators.md b/external/slang/share/doc/slang/design/autodiff/decorators.md
new file mode 100644
index 00000000..27bf0e3d
--- /dev/null
+++ b/external/slang/share/doc/slang/design/autodiff/decorators.md
@@ -0,0 +1,92 @@
+This document details auto-diff-related decorations that are lowered in to the IR to help annotate methods with relevant information.
+
+## `[Differentiable]`
+The `[Differentiable]` attribute is used to mark functions as being differentiable. The auto-diff process will only touch functions that are marked explicitly as `[Differentiable]`. All other functions are considered non-differentiable and calls to such functions from a differentiable function are simply copied as-is with no transformation.
+
+Further, only `[Differentiable]` methods are checked during the derivative data-flow pass. This decorator is translated into `BackwardDifferentiableAttribute` (which implies both forward and backward differentiability), and then lowered into the IR `OpBackwardDifferentiableDecoration`
+
+**Note:** `[Differentiable]` was previously implemented as two separate decorators `[ForwardDifferentiable]` and `[BackwardDifferentiable]` to denote differentiability with each type of auto-diff transformation. However, these are now **deprecated**. The preferred approach is to use only `[Differentiable]`
+
+`fwd_diff` and `bwd_diff` cannot be directly called on methods that don't have the `[Differentiable]` tag (will result in an error). If non-`[Differentiable]` methods are called from within a `[Differentiable]` method, they must be wrapped in `no_diff()` operation (enforced by the [derivative data-flow analysis pass](./types.md#derivative-data-flow-analysis) )
+
+### `[Differentiable]` for `interface` Requirements
+The `[Differentiable]` attribute can also be used to decorate interface requirements. In this case, the attribute is handled in a slightly different manner, since we do not have access to the concrete implementations.
+
+The process is roughly as follows:
+1. During the semantic checking step, when checking a method that is an interface requirement (in `checkCallableDeclCommon` in `slang-check-decl.cpp`), we check if the method has a `[Differentiable]` attribute
+2. If yes, we construct create a set of new method declarations, one for the forward-mode derivative (`ForwardDerivativeRequirementDecl`) and one for the reverse-mode derivative (`BackwardDerivativeRequirementDecl`), with the appropriate translated function types and insert them into the same interface.
+3. Insert a new member into the original method to reference the new declarations (`DerivativeRequirementReferenceDecl`)
+4. When lowering to IR, the `DerivativeRequirementReferenceDecl` member is converted into a custom derivative reference by adding the `OpBackwardDerivativeDecoration(deriv-fn-req-key)` and `OpForwardDerivativeDecoration(deriv-fn-req-key)` decorations on the primal method's requirement key.
+
+Here is an example of what this would look like:
+
+```C
+interface IFoo
+{
+    [Differentiable]
+    float bar(float);
+};
+
+// After checking & lowering
+interface IFoo_after_checking_and_lowering
+{
+    [BackwardDerivative(bar_bwd)]
+    [ForwardDerivative(bar_fwd)]
+    float bar(float);
+
+    void bar_bwd(inout DifferentialPair<float>, float);
+
+    DifferentialPair<float> bar_fwd(DifferentialPair<float>);
+};
+```
+
+**Note:** All conforming types must _also_ declare their corresponding implementations as differentiable so that their derivative implementations are synthesized to match the interface signature. In this sense, the `[Differentiable]` attribute is part of the functions signature, so a `[Differentiable]` interface requirement can only be satisfied by a `[Differentiable]` function implementation
+
+### `[TreatAsDifferentiable]`
+In large codebases where some interfaces may have several possible implementations, it may not be reasonable to have to mark all possible implementations with `[Differentiable]`, especially if certain implementations use hacks or workarounds that need additional consideration before they can be marked `[Differentiable]`
+
+In such cases, we provide the `[TreatAsDifferentiable]` decoration (AST node: `TreatAsDifferentiableAttribute`, IR: `OpTreatAsDifferentiableDecoration`), which instructs the auto-diff passes to construct an 'empty' function that returns a 0 (or 0-equivalent) for the derivative values. This allows the signature of a `[TreatAsDifferentiable]` function to match a `[Differentiable]` requirement without actually having to produce a derivative.
+
+## Custom derivative decorators
+In many cases, it is desirable to manually specify the derivative code for a method rather than let the auto-diff pass synthesize it from the method body. This is usually desirable if:
+1. The body of the method is too complex, and there is a simpler, mathematically equivalent way to compute the same value (often the case for intrinsics like `sin(x)`, `arccos(x)`, etc..)
+2. The method involves global/shared memory accesses, and synthesized derivative code may cause race conditions or be very slow due to overuse of synchronization. For this reason Slang assumes global memory accesses are non-differentiable by default, and requires that the user (or the core module) define separate accessors with different derivative semantics.
+
+The Slang front-end provides two sets of decorators to facilitate this:
+1. To reference a custom derivative function from a primal function: `[ForwardDerivative(fn)]` and `[BackwardDerivative(fn)]` (AST Nodes: `ForwardDerivativeAttribute`/`BackwardDerivativeAttribute`, IR: `OpForwardDervativeDecoration`/`OpBackwardDerivativeDecoration`), and 
+2. To reference a primal function from its custom derivative function: `[ForwardDerivativeOf(fn)]` and `[BackwardDerivativeOf(fn)]` (AST Nodes: `ForwardDerivativeAttributeOf`/`BackwardDerivativeAttributeOf`). These attributes are useful to provide custom derivatives for existing methods in a different file without having to edit/change that module. For instance, we use `diff.meta.slang` to provide derivatives for the core module functions in `hlsl.meta.slang`. When lowering to IR, these references are placed on the target (primal function). That way both sets of decorations are lowered on the primal function.
+
+These decorators also work on generically defined methods, as well as struct methods. Similar to how function calls work, these decorators also work on overloaded methods (and reuse the `ResolveInoke` infrastructure to perform resolution)
+
+### Checking custom derivative signatures
+To ensure that the user-provided derivatives agree with the expected signature, as well as resolve the appropriate method when multiple overloads are available, we check the signature of the custom derivative function against the translated version of the primal function. This currently occurs in `checkDerivativeAttribute()`/`checkDerivativeOfAttribute()`. 
+
+The checking process re-uses existing infrastructure from `ResolveInvoke`, by constructing a temporary invoke expr to call the user-provided derivative using a set of 'imaginary' arguments according to the translated type of the primal method. If `ResolveInvoke` is successful, the provided derivative signature is considered to be a match. This approach also automatically allows us to resolve overloaded methods, account for generic types and type coercion.
+
+## `[PrimalSubstitute(fn)]` and `[PrimalSubstituteOf(fn)]`
+In some cases, we face the opposite problem that inspired custom derivatives. That is, we want the compiler to auto-synthesize the derivative from the function body, but there _is_ no function body to translate.
+This frequently occurs with hardware intrinsic operations that are lowered into special op-codes that map to hardware units, such as texture sampling & interpolation operations. 
+However, these operations do have reference 'software' implementations which can be used to produce the derivative.
+
+To allow user code to use the fast hardware intrinsics for the primal pass, but use synthesized derivatives for the derivative pass, we provide decorators `[PrimalSubstitute(ref-fn)]` and `[PrimalSubstituteOf(orig-fn)]` (AST Node: `PrimalSubstituteAttribute`/`PrimalSubstituteOfAttribute`, IR: `OpPrimalSubstituteDecoration`), that can be used to provide a reference implementation for the auto-diff pass.
+
+Example:
+```C
+[PrimalSubstitute(sampleTexture_ref)]
+float sampleTexture(TexHandle2D tex, float2 uv)
+{
+    // Hardware intrinsics
+}
+
+float sampleTexture_ref(TexHandle2D tex, float2 uv)
+{
+    // Reference SW implementation.
+}
+
+void sampleTexture_bwd(TexHandle2D tex, inout DifferentialPair<float2> dp_uv, float dOut)
+{
+    // Backward derivate code synthesized using the reference implementation.
+}
+```
+
+The implementation of `[PrimalSubstitute(fn)]` is relatively straightforward. When the transcribers are asked to synthesize a derivative of a function, they check for a `OpPrimalSubstituteDecoration`, and swap the current function out for the substitute function before proceeding with derivative synthesis.
diff --git a/external/slang/share/doc/slang/design/autodiff/ir-overview.md b/external/slang/share/doc/slang/design/autodiff/ir-overview.md
new file mode 100644
index 00000000..83391e27
--- /dev/null
+++ b/external/slang/share/doc/slang/design/autodiff/ir-overview.md
@@ -0,0 +1,1462 @@
+This documentation is intended for Slang contributors and is written from a compiler engineering point of view. For Slang users, see the user-guide at this link: [https://shader-slang.com/slang/user-guide/autodiff.html](https://shader-slang.com/slang/user-guide/autodiff.html)
+
+# Overview of Automatic Differentiation's IR Passes
+In this document we will detail how Slang's auto-diff passes generate valid forward-mode and reverse-mode derivative functions. Refer to [Basics](./basics.md) for a review of the two derivative propagation methods and their mathematical connotations & [Types](./types.md) for a review of how types are handled under differentiation.
+
+## Auto-Diff Pass Invocation
+Note that without an explicit auto-diff instruction (`fwd_diff(fn)` or `bwd_diff(fn)`) from the user present anywhere in the code, none of the auto-diff passes will do anything. 
+
+Auto-diff processing operates on a function-by-function basis. Most of the logic is contained in `AutoDiffPass::processReferencedFunctions`. Here is a high-level workflow:
+1. Scanning reachable insts in the module looking for `IRForwardDifferentiate` or `IRBackwardDifferentiate` operations. These instructions are added onto a work-list. The subject of a differentiate inst may be a plain function (`IRFunc`), a specialize inst (`IRSpecialize(a : IRGeneric, ...)`) in case of a generic method, or a lookup inst (`IRLookupWitness(a : IRWitnessTableType)`) when differentiating a method of an interface.
+
+2. Dispatch each differentiation request through the appropriate 'transcriber' class. A transcriber (implements `AutodiffTranscriberBase`) is responsible for accepting a differentiation request and resolving it by replacing it with a generated function or a call to an already existing function that computes its derivative. 
+
+3. Once all currently available derivative insts have been dispatched, the follow-up work-list is checked for more transcription requests. This is a global list that all transcribers can add more follow-up work to. As an example, differentiating a function that calls another function will generate a follow-up task for this inner function, even though the latter never appears directly in a `IRForwardDifferentiate` or `IRBackwardDifferentiate` inst. 
+At this step, there are 2 other variants that can appear `IRBackwardDifferentiatePrimal` and `IRBackwardDifferentiatePropagate` (though these can't be invoked by the user directly).
+
+4. This process from (1.) is run in a loop. This is because we can have nested differentiation requests such as `IRForwardDifferentiate(IRBackwardDifferentiate(a : IRFuncType))`. The inner request is processed in the first pass, and the outer request gets processed in the next pass.
+
+## Auto-Diff Passes for `IRForwardDifferentiate`
+For forward-mode derivatives, we only require a single pass implemented wholly in `ForwardDiffTranscriber`. This implements the linearization algorithm, which roughly follows this logic:
+
+1. Create a clone of the original function
+2. Perform pre-autodiff transformations, the most  
+    a. **Temp-Var-For-Mutable-Params** Using `IRVar` to load from 
+    b. **Linkage-Removal**: This is simply so the cloned function can be eliminated by DCE after auto-diff is complete
+    c. **Force-Inline**: Inline all `__unsafeForceEarlyInline` & `[ForceInline]` functions _prior_ to auto-diff, so their contents can be included in the differentiation pass (even if they aren't actually marked as `[Differentiable]`)
+
+3. Create a new blank function for the fwd-mode func (usually named `s_fwd_<ORIGINAL-FUNC-NAME>`) with the function type derived by transforming the original function type (See [Types](./types.md) for more information).
+4. Create new blocks into the new fwd-func for each block in the orig-func.
+5. Go through instructions in each block and dispatch to the appropriate generator function to emit the derivative logic into the corresponding block in the fwd-func. Each generator method is responsible for cloning in the original instruction as well as emitting a corresponding derivative instruction. `mapPrimalInst` and `mapDifferentialInst` are used to keep track of prior results so that operands for new instructions can be looked up.
+
+The generator for each instruction computes the forward-mode derivative of each *instruction* using the Jacobian method that is detailed in [Basics#Jacobian-Method](./basics.md#jacobian-method-generate-forward--and-reverse-mode-derivatives-from-first-principles). Since forward-mode derivatives can be composed in the same order as the original instructions, our generation process goes through instructions in each block in the order that they appear, creating differential insts which act as operands for future insts.
+
+Here's an example of this in IR-form
+
+```Rust
+OpModule
+{
+    %ftype = OpFuncType (%float) (%float) (%float)
+    %f = OpFunc : %ftype
+    {
+        %b = OpBlock
+        {
+            %a = OpParam : %float
+            %b = OpParam : %float
+
+            %1 = OpAdd %a %b : %float
+            %2 = OpAdd %1 %1 : %float
+
+            OpReturn %2
+        }
+    }
+
+    // Generated function type
+    %dpfloat = OpDifferentialPairType (%float) (%witness_that_float_is_idifferentiable)
+    %ftype_fwd = OpFuncType (%dpfloat) (%dpfloat) (%dpfloat)
+
+    // Generated function
+    %f_fwd = OpFunc : %ftype_fwd
+    {
+        %b_fwd = OpBlock
+        {
+            %dpa = OpParam : %dpfloat // Convert params to differential pair types
+            %dpb = OpParam : %dpfloat // Convert params to differential pair types
+
+            // Split block inputs into primals and differentials
+            %a = OpDifferentialPairGetPrimal %dpa : %float
+            %da = OpDifferentialPairGetDifferential %dpa : %float
+
+            %b = OpDifferentialPairGetPrimal %dpb : %float
+            %db = OpDifferentialPairGetDifferential %dpb : %float
+
+            // Clone the primal inst for %1
+            %1_primal = OpAdd %a %b : %float
+
+            // Generate the diff inst for %1
+            // Here, we consider the 'mini-function' Add(a,b) = a + b, and use the Jacobian method
+            // to get the result that the fwd-mode derivative should be:
+            // DAdd((a, da), (b, db)) = da + db = Add(da, db)
+            // 
+            %1_diff = OpAdd %da %db : %float
+            
+            // Do the same for the next inst (%2): clone in the primal
+            // by looking up primal versions of the operands.
+            //
+            %2_primal = OpAdd %1_primal %1_primal : %float
+
+            // Then, generate the derivative inst by looking up the differential
+            // versions of the operands.
+            // 
+            %2_diff = OpAdd %1_diff %1_diff : %float
+
+            // Return both the primal and differential
+            %2_pair = OpDifferentialPairMakePair %2_primal %2_diff : %dpfloat
+            OpReturn %2_pair
+        }
+    }
+}
+```
+
+**Multiple Differential Insts:**
+In the above example, the derivative of each inst was a single inst. This is not always the case. 
+For instance, `OpMul %a %b` translates to **three** insts:
+```Rust
+%1 = OpMul %a_diff %b_primal : %float
+%2 = OpMul %a_primal %b_diff : %float
+%3 = OpAdd %1 %2 : %float
+```
+
+**Combined Primal & Differential Insts:**
+In some cases, there is not need to clone in the primal inst since both the primal and differential can be computed in a single inst. An example is `IRCall`, whose deriavative only needs a single call (though it needs plenty of insts to pair and unpair arguments)
+```Rust
+// Original inst
+%1 = OpCall %func %a %b : %float
+
+//
+// Upon differentiation:
+
+// Pack args into pairs
+%a_pair = OpDifferentialPairMakePair %a_primal %a_diff : %dpfloat
+%b_pair = OpDifferentialPairMakePair %b_primal %b_diff : %dpfloat
+
+// Call into fwd-mode deriv which computes *both* primal and differential
+// values.
+//
+%func_fwd = OpForwardDifferentiate %func : %functype_fwd
+%1_pair = OpCall %func_fwd %a_pair %b_pair : %float
+
+// Split into primal and differential so they can be used for future insts.
+%1_primal = OpDifferentialPairGetPrimal %1_pair : %float
+%1_diff = OpDifferentialPairGetDifferential %1_pair : %float
+
+```
+
+
+### Phi Arguments
+Block arguments are handled the same way as function arguments (which in the Slang IR, are also simply block arguments of the first block), and are converted into pair type arguments, with `OpDifferentialPairGetPrimal` and `OpDifferentialPairGetDifferential` insts automatically added to extract the primal and differential parts of each argument.
+
+
+## Auto-Diff Passes for `IRBackwardDifferentiate`
+
+For reverse-mode derivatives, we need several passes that also includes differentiating the forward-mode derivative. Most of this logic is contained in `BackwardDiffTranscriberBase::transcribeFuncImpl`. These passes are inspired by the paper ["You Only Linearize Once: Tangents Transpose to Gradients"](https://arxiv.org/abs/2204.10923), which describes this approach in a functional language setting. These passes extend these ideas to work for a general-purpose imperative language structure.
+
+### 1. Preparation
+The reverse-mode derivative generation involves a lot of large scale control-flow manipulation, including a CFG reversal step that aims to construct a method that flows from the end of the function to the beginning in order to compose reverse-mode derivatives.
+To avoid having to deal with too many corner cases (and the maintainability issues that come with it), we bring the function to a 'normal form' before running our differentiation steps. This greatly simplifies the logic of the future passes.
+
+Another high-level goal of these transformations is to bring the control-flow graph to a **reversible** form. That is, we can represent the reverse of control-flow graph using existing Slang constructs (`IRIfElse`, `IRUnconditionalBranch`, `IRLoop` and `IRSwitch`). This is not necessarily true of any valid Slang IR, so we perform additional transformations.
+
+Note: These transformations are always applied onto a temporary clone of the original function. The original function is never touched so as to not affect its use in non-autodiff contexts.
+
+Specifically we:
+1. Bring the function into **single-return form**: If there are multiple blocks with return statements (i.e. multiple exit points) in a function, we eliminate this by wrapping the complete function body in a trivial loop (i.e. a single-iteration loop) and replacing existing return statements with breaks (or multi-level breaks) into its break block, which serves as the unique exit point for the function. This pass is currently contained in `convertFuncToSingleReturnForm()`
+
+2. Eliminate **continue** statements: Loop continue statements introduce a reversibility problem. Since the forward loop can have multiple exit point, the reverse loop needs to have multiple entry points. Slang's loops do not support this. So, we eliminate these statements wrapping the body of the loop in another trivial loop (i.e. single-iteration loop) and turning the **continue** statements into **break** statements. This also involves writing **break** statements in the original loop into **multi-level** breaks.
+    
+    Here is an example:
+    ```C
+    // Original loop
+    for (uint i = 0; i < N; i++)
+    {
+        if (i > 5)
+            continue;
+
+        if (i > 9)
+            break;
+        
+        x = x + i;
+    }
+
+    // After continue-elimination
+    outer_for:
+    for (uint i = 0; i < N; i++)
+    {
+        inner_for:
+        for (;;)
+        {
+            if (i > 5)
+                break;
+
+            if (i > 9)
+                break outer_for; // multi-level break
+            
+            x = x + i;
+
+            break;
+        }
+    }
+    ```
+
+3. Eliminate **multi-level breaks**: Slang supports breaking out to an outer loop. Unfortunately, this operation is hard to reverse since Slang (and shading languages in general) do not support arbitrary `goto` statements. We eliminate multi-level breaks by assigning each nested loop a nesting index (a constant `uint` denoting the nesting level). All break statements are rewritten to break out to the immediate next level (i.e. a standard break) with a index parameter denoting the intended break level. This parameter is checked at each level and if the break index does not match the level index, we break again to the immediate upper level. This pass is currently contained in `eliminateMultiLevelBreakForFunc`
+
+    Continuing the above example, here is the code after multi-level break elimination.
+    ```C
+    // After multi-level-break elimination
+    uint level = -1;
+    for (uint i = 0; i < N; i++)
+    {
+        for (;;)
+        {
+            if (i > 5)
+            {
+                level = 1;
+                break;
+            }
+
+            if (i > 9)
+            {
+                level = 0;
+                break;
+            }
+            
+            x = x + i;
+
+            level = 1;
+            break;
+        }
+
+        if (level != 1) // Level check immediately after breaking out of each loop.
+            break;
+    }
+    ```
+
+4. Eliminate **break** statements (enclosed in `normalizeCFG()`): Break statements also pose the same problem as continue statements (i.e. multiple exit points require the reverse loop to have multiple entry points, and Slang does not have a primitive for this). We eliminate break statements by introducing a boolean break flag which is set to `false` to indicate a break instead of using the break statement. Each *region* is enclosed in a if-else statement that checks the break flag and skips to the end if necessary.
+
+    Break elimination proceeds with the following steps;
+
+    Here is the above example code after break elimination.
+    ```C
+    // After break elimination
+    
+    uint level = -1;
+    bool bflag_0 = true; // for outer loop (true => keep-going, false => break)
+
+    for (uint i = 0; (i < N) && bflag_0; i++) // Insert flag into the loop condition (&& with the current condition)
+    {
+        bool bflag_1 = true; // for inner loop (true => keep-going, false => break)
+
+        for (;bflag_1;) // Insert flag into the loop condition
+        {
+            if (i > 5)
+            {
+                level = 1;
+                bflag_1 = false; // break
+            }
+
+            // Region after any break statement is enclosed in a 
+            // if-else check.
+            // 
+            if (bflag_1)
+            {
+                if (i > 9)
+                {
+                    level = 0;
+                    bflag_1 = false; // break
+                }
+
+                // Another if-else enclosure, this time for the second
+                // break.
+                // 
+                if (bflag)
+                {
+                    x = x + i;
+                    level = 1;
+                }
+
+                bflag_1 = false;
+            }
+        }
+
+        if (level != 1)
+        {
+            bflag_0 = false;
+        }
+    }
+    ```
+
+    **Extra evaluation of the condition block:** The CFG normalization passes always attempt to preserve the equivalence of the original function while manipulating the control-flow constructs (i.e. ensure that the transformed code always computes the same thing). However, there is one corner-case exception: after break-elimination, the loop condition code can be evaluated 1 additional time, since we don't directly break out of the loop, but go through an extra loop condition check. This becomes important during the checkpointing step, when arrays are allocated to hold loop variables. The array bounds must account for an additional loop iteration to avoid correctness problems.
+
+
+### 2. Linearization with Inst-Tagging
+This is the same as generating the forward-derivative function, and is in-fact handled in the same way, by invoking `ForwardDiffTranscriber`. The **inst-tagging** part of this pass is not necessary for forward-mode auto-diff (simply discarded after the auto-diff pass), but is essential for reverse-mode.
+
+**Inst-Tagging:** This pass also **tags** every instruction and block with either `IRPrimalInstDecoration`, `IRDifferentialInstDecoration` or `IRMixedDifferentialnstDecoration`, depending on whether an instruction contains/computes/reads/writes a primal value, a differential value or both. 
+
+This assignment is according to the following rules:
+1. The result of `.getDifferential()` from an inst of `IRDifferentialPairType` is a *differential* inst and `.getPrimal()` is a primal inst **NOTE:** This does not apply to `IRDifferentialPairUserCodeType`, all of whose operations yield a *primal* inst.
+2. Further, any inst which contains a differential inst as an operand **AND** whose output value may be affected by this operand is a differential inst (e.g. if `isDifferentialInst(a) = true` then `isDifferentialInst( IRMul(a, b) ) = true`)
+3. If an inst contains multiple outputs, *some* of which are differential and the others are primal, then these are *mixed-differential* insts. E.g. (a value of `IRDifferentialPairType` contains both a primal and differential value, and similarly a call of the form `IRCall(IRForwardDifferentiate(inner_fn))(...)` results in a mixed differential type since the primal part is not affect by differential inputs)
+4. All other insts are *primal* by default.
+5. Blocks are marked differential or primal if they contain **ONLY** differential or primal insts (respectively). Otherwise they are marked mixed-differential. The vast majority of blocks are mixed-differential.
+
+Correct tag information is critical for the next steps to correctly transform the forward-mode derivative into the reverse-mode derivative function.
+
+Here's the same forward-mode example, but with insts tagged accordingly
+```Rust
+OpModule
+{
+    // Generated function type
+    ...
+
+    // Generated function
+    ...
+        [OpMixedDifferentiaInstDecoration]
+        %b_fwd = OpBlock
+        {
+            // Block params are mixed differentials since they carry both
+            // primal and differential values
+            // 
+            [OpMixedDifferentialInstDecoration]
+            %dpa = OpParam : %dpfloat 
+            [OpMixedDifferentialInstDecoration]
+            %dpb = OpParam : %dpfloat 
+
+            [OpPrimalInstDecoration]
+            %a = OpDifferentialPairGetPrimal %dpa : %float
+
+            [OpDifferentialInstDecoration]
+            %da = OpDifferentialPairGetDifferential %dpa : %float
+
+            [OpPrimalInstDecoration]
+            %b = OpDifferentialPairGetPrimal %dpb : %float
+
+            [OpDifferentialInstDecoration]
+            %db = OpDifferentialPairGetDifferential %dpb : %float
+
+            [OpPrimalInstDecoration]
+            %1_primal = OpAdd %a %b : %float
+
+            [OpDifferentialInstDecoration]
+            %1_diff = OpAdd %da %db : %float
+
+            [OpPrimalInstDecoration]
+            %2_primal = OpAdd %1_primal %1_primal : %float
+
+            [OpDifferentialInstDecoration]
+            %2_diff = OpAdd %1_diff %1_diff : %float
+
+            // Return both the primal and differential
+            [OpMixedDifferentialInstDecoration]
+            %2_pair = OpDifferentialPairMakePair %2_primal %2_diff : %dpfloat
+
+            [OpDifferentialInstDecoration]
+            OpReturn %2_pair
+        }
+    ...
+}
+```
+
+### 3. Unzipping
+Implemented by `DiffUnzipPass`, this pass is responsible for **separating** primal instructions from differential instructions (as denoted by their decorations), by creating a full set of duplicate blocks that start **after** the last block, i.e. return block (the return statement is removed).
+
+This separation is possible because the computation of a differential inst may include primal operands but a primal inst can never use a differential operand. 
+
+The unzipping pass uses the decorations from the linearization step to figure out which instructions need to be moved.
+
+The separation process uses the following high-level logic:
+1. Create two clones of all the blocks in the provided function (one for primal insts, one for differential insts), and hold a mapping between each original (mixed) block to each primal and differential block. The return statement of the current final block is **removed**. 
+2. Process each instruction of each block: instructions marked as **primal** are moved to the corresponding **primal block**, instructions marked **differential** are moved to the corresponding **differential block**.
+3. Instructions marked **mixed** need op-specific handling, and so are dispatched to the appropriate splitting function. For instance, block parameters that are holding differential-pair values are split into parameters for holding primal and differential values (the exception is function parameters, which are not affected). Similarly, `IRVar`s, `IRTerminatorInst`s (control-flow) and `IRCall`s are all split into multiple insts.
+4. Except for `IRReturn`, all other control-flow insts are effectively duplicated so that the control-flow between the primal blocks and differential blocks both follow the original blocks' control-flow. The main difference is that PHI arguments are split (primal blocks carry primal values in their PHI arguments, and differential blocks carry diff values) between the two. Note that condition values (i.e. booleans) are used by both the primal and differential control-flow insts. However, since booleans are always primal values, they are always defined in the primal blocks.
+
+
+**Block-Tagging:** Blocks are now tagged primal or differential depending on whether they are holding primal or differential insts. This is important for the next step (transposition) to figure out which blocks need to be transposed.
+
+**Out-of-Scope Accesses:** After unzipping, the resulting IR is often **not valid**. If the control-flow is straight line (i.e. no branching or loops), the resulting IR is valid. However, if there is control-flow, then instructions can use operands whose definition does not dominate the use. This invalid IR is currently allowed to persist until the end of the auto-diff passes, when the checkpointing step occurs (i.e. Running IR validation will fail in between these steps)
+
+
+Here is an example of unzipped code:
+
+```Rust
+OpModule
+{
+    // Generated function type
+    ...
+
+    // Unzipped code
+    ...
+        // The first block of a function is still mixed differential, and exclusively holds 
+        // function parameter definitions (no other instructions)
+        // 
+        [OpMixedDifferentialDecoration]
+        {
+            [OpMixedDifferentialDecoration]
+            %dpa = OpParam : %dpfloat
+            [OpMixedDifferentialDecoration]
+            %dpb = OpParam : %dpfloat
+        }
+
+        // Primal version of b containing only primal instructions
+        [OpPrimalInstDecoration]
+        %b_primal = OpBlock
+        {
+            [OpPrimalInstDecoration]
+            %a_primal = OpDifferentialPairGetPrimal %dpa : %dpfloat 
+            [OpPrimalInstDecorarion]
+            %b_primal = OpDifferentialPairGetPrimal %dpa : %dpfloat 
+
+            [OpPrimalInstDecoration]
+            %1_primal = OpAdd %a_primal %b_primal : %float
+
+            [OpPrimalInstDecoration]
+            %2_primal = OpAdd %1_primal %1_primal : %float
+
+            [OpBackwardDerivativePrimalReturnDecoration %2_primal]
+            OpUnconditionalBranch %b_diff
+        }
+
+        // Differential version of b containing only differential instructions
+        // with some exceptions. 
+        // 
+        [OpDifferentialInstDecoration]
+        %b_diff = OpBlock
+        {
+            [OpDifferentialInstDecoration]
+            %a_diff = OpDifferentialPairGetDifferential %dpa : %dpfloat 
+            [OpDifferentialInstDecorarion]
+            %b_diff = OpDifferentialPairGetDifferential %dpa : %dpfloat 
+
+            [OpDifferentialInstDecoration]
+            %1_diff = OpAdd %a_diff %b_diff : %float
+
+            [OpDifferentialInstDecoration]
+            %2_diff = OpAdd %1_diff %1_diff : %float
+
+            // Return both the primal and differential
+            [OpMixedDifferentialInstDecoration]
+            %2_pair = OpDifferentialPairMakePair %2_primal %2_diff : %dpfloat
+
+            [OpDifferentialInstDecoration]
+            OpReturn %2_pair
+        }
+
+    ...
+}
+```
+
+### 4. Transposition
+
+The next step involves converting each differential instruction into its transpose. Effectively, we are re-writing each forward-mode derivative into its reverse-mode equivalent.
+
+Recall from auto-diff [basics](./basics.md), that both the forward and reverse mode derivatives can be derived from the Jacobian matrix of any operation. The main difference is whether we multiply the derivatives of the inputs with the Jacobian or multiply the Jacobian with the derivatives w.r.t the outputs. These two operations are the transpose of each other, in that the reverse-mode derivative can be thought of as multiplying with the transpose of the Jacobian.
+
+We perform this transposition on a per-instruction level.
+
+Here is an example of a transposition of a multiplication operation:
+```Rust
+[OpPrimalInstDecoration]
+%b = OpLoad %var_b // %b is a primal value
+
+[OpDifferentialInstDecoration]
+%da = OpLoad %var_da // %da is a differential value
+
+// The operation we want to transpose
+[OpDifferentialInstDecoration]
+%1d = OpMul %da %b : %float
+
+[OpDifferentialInstDecoration]
+OpStore %1d %var_result
+```
+
+This multiplication can be represented as a tiny matrix multiplication between a singleton vector `[%da]` and singleton matrix `[%b]`. 
+It's transpose will be the multiplication of the transpose of that matrix (which is the value itself `[%b]`) with a derivative w.r.t its output `%1d`, i.e. it becomes `%da = OpMul %1d %b`. Note that we now have to provide `%1d` as an **input**, and receive `da` was an output.
+
+The resulting code is then:
+```Rust
+[OpPrimalInstDecoration]
+%b = OpLoad %var_b : %float // primal values are unaffected (at this stage, they are in primal blocks)
+
+// Reverse-mode code: (_rev) appended to all variables & insts to keep them distinct from the fwd-mode code.
+[OpDifferentialInstDecoration]
+%1d_rev = OpLoad %var_result_rev : %float
+
+// The operation we want to transpose
+[OpDifferentialInstDecoration]
+%da_rev = OpMul %1d_rev %b : %float
+
+[OpDifferentialInstDecoration]
+OpStore %da_rev %var_da_rev
+```
+
+Notice that the three differential instructions are effectively run backwards **and** transposed. Loads become stores, 
+the `OpMul` is transposed into another `OpMul`, and stores become loads. This backwards transposition is because the differential outputs become differential inputs, and thus, we need to process the future instructions first so that the new operands are defined before bring used for the new instruction.
+
+This reverse order of operations also applies to control-flow. The rule of thumb is: if the forward-mode pass takes a particular path through the code, for a given set of primal values, the reverse-mode must "re-trace" the same path through the code, but in reverse by starting at the end.
+
+We synthesize a CFG that satisfies this property through the following steps:
+1. Clone the provided unzipped forward-mode function (and all blocks + instructions) to serve as the reverse-mode function.
+2. Remove all **differential** blocks and create a set of corresponding reverse-mode blocks for each **differential** block removed (**primal** blocks are simply left alone), while holding a map between corresponding blocks. Initially, they are empty. 
+3. Using the provided unzipped forward-mode function as a reference, process each differential block by walking each instruction from the _last_ (terminator) inst, and dispatching to the appropriate op-specific `transposeXYZ()` method to emit the appropriate transposed instructions into the corresponding reverse-mode block. 
+
+    There are several concerns that must be taken care of:
+    1. **Multiple Derivaive Outputs:** Unlike forward-mode auto-diff, where an inst producing a single value, would only need a single derivative (corresponding to that value), reverse-mode auto-diff can produce multiple derivatives from an inst. For instance `%dc = IRAdd(%da, %db)` produces two derivatives: `%da_rev = %dc_rev` and `%db_rev = %dc_rev`. Thus, the `transposeXYZ()` implementation for any instruction can return a set of derivative insts for each relevant input differential value.
+
+    2. **Insts Used in Multiple Places (Derivative Accumulation):** If an inst is used in multiple places, and receives a reverse-mode derivative from several of those places, these results need to be **added up** to get the correct derivative. 
+    
+        Consider this forward-mode example
+
+        ```Rust
+        [OpDifferentialInstDecoration]
+        %db = OpAdd %da, %da : %float
+
+        [OpDifferentialInstDecoration]
+        %dc = OpAdd %db, %da : %float
+        ```
+
+        It's reverse-mode derivative will look like this:
+
+        ```Rust
+        %db_rev = %dc_rev // %db only has one differential since it only consumed in one place.
+
+        // reverse-mode differential for %da from trnaposing the first instruction
+        [OpDifferentialInstDecoration]
+        %da_rev_1 = OpAdd %db_rev, %db_rev : %float
+
+        // reverse-mode differential for %da from transposing the second instruction
+        [OpDifferentialInstDecoration]
+        %da_rev_2 = %dc_rev
+
+        // add them together to get the final derivative for %da
+        [OpDifferentialInstDecoration]
+        %da_rev = OpAdd %da_rev_1 %da_rev_2 : %float
+        ```
+
+        Derivative accumulation is achieved through two ways:
+        
+        **Within** a block, we keep a list all the reverse derivative insts for each inst and only **materialize** the total derivative when it is required as an operand. This is the most efficient way to do this, because we can apply certain optimizations for composite types (derivative of an array element, vector element, struct field, etc..).
+        
+        **Across** blocks, we use an accumulator variable that is inserted into a top-level block in the function, and add to this variable whenever a transposition operation generates a new inst. This can sometimes produce sub-optimal code for aggregate/large data types, but at the moment, the accumulator method is necessary because insts can receive derivatives from conditionally executed blocks.
+
+        While this example uses `OpAdd` to demonstrate accumulation, in practice, we use the derivative type system (See [Types](./types.md) for more) to look up the derivative addition function (`dadd`) to add two values of an arbitrary differential type. In practice, the `OpAdd` is replaced by `OpCall %float_dadd %da_rev1 %da_rev_2`. Similarly, for accumulator variables, we must initialize them to zero for the accumulation to work correctly, and we lookup the `dzero` interface method to initialize it in a type-specific way.
+
+    3. **Deferred Materialization for Derivatives of Composite Types:**
+        Non-primitive types, such as vectors, arrays, structs, etc. whose elements are used in several places in the forward-mode code, can result in sub-optimal reverse-mode code. Here is an example (in Slang source-style):
+        ```C
+        float f_fwd(DifferentialPair<float3> input)
+        {
+            float3 dinput = input.getDifferential();
+            float a = dinput.x + dinput.y;
+            float b = a + dinput.z;
+
+            return b;
+        }
+
+        // Transposed code (naively, without deferred materialization)
+        void f_rev(inout DifferentialPair<float3> input, float d_output)
+        {
+            // transpose of (return b;)
+            float db_rev = d_output;
+            
+            // transpose of (float b = a + dinput.z)
+            float da_rev = db_rev;
+            float3 dinput_rev_1 = float3(0.f, 0.f, da_rev);
+
+            // transpose of (float a = dinput.x + dinput.y)
+            float3 dinput_rev_2 = float3(0.f, da_rev, 0.f);
+            float3 dinput_rev_3 = float3(da_rev, 0.f, 0.f);
+
+            // Accumulate [dinput_rev_1, dinput_rev_2, dinput_rev_3]
+            float3 dinput = dinput_rev_1 + dinput_rev_2 + dinput_rev_3
+
+            input = DifferentialPair<float3>(
+                input.getPrimal(),
+                dinput);
+        }
+        ```
+
+        Note that, this approach to inst-by-inst transposition can use a lot more stack space than is necessary (`dinput_rev_1`, `dinput_rev_2` and `dinput_rev_3` all only have a single non-0 entry). This is a known complexity issue with naive inst-by-inst transposition: hypothetically, an size-$N$ vector/array would end up allocating $O(N^2)$ memory even if only $N$ elements are non-0. 
+        In our Slang implementation, we circumvent this (to an extent) by deferring materialization. Rather than create each component `dinput_rev_i` as soon as we see an inst use, we hold the derivative with a special flavor value for lookups (say `Swizzle` or `GetElement`). When the total value `dinput_rev` is necessary, we process components of each flavor type at once and create a single derivative from all the components. 
+
+        Here is the same example, with deferred materialization:
+        ```C
+        // Transposed code (naively, without deferred materialization)
+        void f_rev(inout DifferentialPair<float3> input, float d_output)
+        {
+            // transpose of (return b;)
+            float db_rev = d_output;
+            
+            // transpose of (float b = a + dinput.z), hold {flavor=Swizzle, component=.z, derivInst=db_rev} in list.
+            float da_rev = db_rev;
+
+            // transpose of (float a = dinput.x + dinput.y), 
+            // hold {flavor=Swizzle, component=.x, derivInst=da_rev} and {flavor=Swizzle, component=.y, derivInst=da_rev} in list.
+
+            // Materialize when required (for constructing return pair)
+            float3 dinput = float3(db_rev, da_rev, da_rev);
+
+            input = DifferentialPair<float3>(
+                input.getPrimal(),
+                dinput);
+        }
+        ```
+
+        Note that this only really works for accumulation *within* a single block/control-flow region. For across regions, we still have to materialize when we exit a region, so this memory problem can still manifest for control-flow heavy functions, where each region must allocate enough space for its contribution to the full derivative, even if only a small subset is non-0.
+
+
+
+```C
+float a[10] = /*...*/;
+for (int i = 0; i < 10; i++)
+{
+    a[i] = f(a[i]);
+}
+```
+
+```C
+
+// Entry block
+%t = OpBlock
+{
+    IRLoop %c %br %c 0
+}
+
+// Condition
+%c = OpBlock
+{
+    %i = OpParam : %float
+    %a = OpParam : %Array(%float, 10)
+    
+    %2 = OpLesser(%i, 10) : %bool
+
+    %OpIfElse(%2, %b, %br, %br)
+}
+
+// Loop body.
+%b = OpBlock 
+{
+    %a_i = OpGetElement(%a, %i) : %float
+    %f_a_i = OpCall(f, %a_i) : %float
+
+    %a_next = OpUpdateElement(%a, %i, %f_a_i) : %Array(%float, 10)
+
+    %i_next = OpAdd(%i, 1)
+
+    OpUnconditionalBranch(%c, %i_next, %a_next)
+}
+
+// Break block
+%br = OpBlock
+{
+    //...
+}
+```
+
+After AD passes, this results in the following code:
+```C
+
+//// Primal context pass.
+
+// Entry block
+%t_rev = OpBlock
+{
+    // Context storage for all loop phi variables (n_iters + 1)
+    %ctx_a = IRVar : %array(%array(%float, 10), 11) // Catastrophically large amount of storage.
+    %ctx_i = IRVar : %array(%float, 11)
+
+    OpLoop %c %br %c 0
+}
+
+// Condition
+%c_rev = OpBlock
+{
+    %i = OpParam : %float
+    %a = OpParam : %array(%float, 10)
+
+    // Context store operations.
+    %ctx_i_ptr = OpGetElementPtr(%ctx_i, %i) : %ptr(%int)
+    OpStore(%ctx_i_ptr, %i)
+    %ctx_a_ptr = OpGetElementPtr(%ctx_a, %i) : %ptr(%array(%float, 10))
+    OpStore(%ctx_a_ptr, %a)
+    
+    %2 = OpLesser(%i, 10) : %bool
+
+    %OpIfElse(%2, %b, %br, %br)
+}
+
+// Loop body.
+%b = OpBlock 
+{ /*...*/ }
+
+// Break block
+%br = OpBlock
+{ /*...*/ }
+
+//// Backprop pass
+
+// Entry block
+%t_rev = OpBlock
+{
+    // Count down from the end
+    OpLoop %c_rev %br_rev %c_rev 9 
+
+    // Variable to hold the derivative of %a
+    %var_da_rev = OpVar : %ptr(%array(%float, 10))
+}
+
+// Condition
+%c_rev = OpBlock
+{
+    // rev-mode loop counter (runs backwards from limit to 0)
+    %dc = OpParam : %int
+    
+    %2 = OpLesser(%i, 10) : %bool
+
+    OpIfElse %2 %b %br %br
+}
+
+// Loop body.
+%b_rev = OpBlock 
+{
+    // Context load operations.
+    %ctx_i_ptr = OpGetElementPtr(%ctx_i, %dc) : %ptr(%int)
+    %i_saved = OpLoad(%ctx_i_ptr) : %int
+
+    %ctx_a_ptr = OpGetElementPtr(%ctx_a, %dc) : %ptr(%array(%float, 10))
+    %a_saved = OpLoad(%ctx_a_ptr) : %array(%float, 10)
+
+    %a_i = OpGetElement(%a_saved, %i_saved) : %float
+    %a_pair_i = OpMakeDifferentialPair(%a_i, 0) : %diff_pair(%float)
+
+    %da_rev_ptr = OpGetElementPtr(%var_da_rev, %i_saved) : %ptr(%float)
+    %df_output = OpLoad(%da_rev_ptr) : %float
+
+    // Call rev-mode of f to propagate derivative of output of f to input of f. (Assume f has no context requirement)
+    %var_a_pair_i = OpVar : %ptr(%diff_pair(%float))
+    OpStore(%var_a_pair_i, %a_pair_i)
+    OpCall(f_rev, %a_pair_i, %df_output) : %float 
+
+    // Load derivative for a_i
+    %a_pair_i_loaded = OpLoad(%var_a_pair_i, %a_pair_i)
+    %da_rev_i = OpDifferentialPairGetDifferential(%a_pair_i_loaded) : %float
+
+    // Create derivative array for backpropagation (this happens during gradient materialization)
+    %da_rev_local_var = OpVar : %ptr(%array(%float, 10))
+    %da_rev_init_zero = OpMakeArray(0, 0, 0, 0, 0, 0, 0, 0, 0, 0) : %array(%float, 10)
+    OpStore(%da_rev_local_var, %da_rev_init_zero)
+
+    %da_rev_var_i = OpGetElementPtr(%da_rev_local_var, %dc) : %ptr(%float)
+    %curr_dval = OpLoad(%da_rev_var_i) : %float
+    %acc_dval = OpAdd(%curr_dval, %da_rev_i) : %float
+    OpStore(%da_rev_var_i, %acc_dval)
+
+    // Add derivative array to the global var.
+    %curr_dval_a = OpLoad(%var_da_rev) : %array(%float, 10)
+    %new_dval_a = OpLoad(%da_rev_local_var) : %array(%float, 10)
+    %acc_dval_a = OpCall('array_dadd', %curr_dval_a, %new_dval_a) : %array(%float, 10)
+    OpStore(%var_da_rev, %acc_dval_a)
+
+    %dc_next = OpAdd(%dc, -1)
+
+    OpUnconditionalBranch(%c_rev, %dc_next)
+}
+
+// Break block
+%br_rev = OpBlock
+{ /*...*/ }
+```
+
+4. Construct the reverse control-flow (`reveseCFGRegion()`) by going through the reference forward-mode blocks, and cloning the control-flow onto the reverse-mode blocks, but in reverse. This is achieved by running `reverseCFGRegion()` recursively on each sub-region, where a *region* is defined as a set of blocks with a single entry block and a single exit block. This definition of a region only works because we normalized the CFG into this form.
+
+    The reversal logic follows these general rules:
+    1. **Unconditional Branch**: For an unconditional branch from `A->B` we simply have to map the reverse version of B with that of A. i.e. `rev[B] -> rev[A]`
+    2. **If-Else**: For an if-else of the form `A->[true = T->...->T_last->M, false = F->...->F_last->M]`, we construct `rev[M]->[true = rev[T_last]->...->rev[T_last]->rev[A], false = rev[F_last]->...->rev[F]->rev[A]]`. That is, we reverse each sub-region, and start from the merge block and end at the split block.
+    Note that we need to identify `T_last` and `F_last` i.e. the last two blocks in the true and false regions. We make the last block in the region an additional return value of `reverseCFGRegion()`, so that when reversing the true and false sub-regions, we also get the relevant last block as an additional output. Also note that additional empty blocks may be inserted to carry derivatives of the phi arguments, but this does not alter the control-flow.
+    3. **Switch-case**: Proceeds in exactly the same way as `if-else` reversal, but with multiple cases instead of just 2.
+    4. **Loop**: After normalization, all (non-trivial) loops are of the form: `A->C->[true = T->...->T_last->C, false=B->...->M]`. We reverse this loop into `rev[M]->...rev[B]->rev[C]->[true=rev[T_last]->...->rev[T]->rev[C], false=rev[A]]`. The actual reversal logic also handles some corner cases by inserting additional blank blocks to avoid situations where regions may share the same merge block.
+
+    Finally, we process the first and last blocks (entry and return blocks) by inserting a void return (reverse-mode derivative functions are always of void result type)
+
+At this stage, the reverse-mode generation is almost complete. The control-flow and the derivative logic is present, but we still have to resolve out-of-scope accesses from the new differential blocks into the primal block.
+
+### 5. Checkpointing/Recomputation (also called 'primal-hoisting')
+This step legalizes the out-of-scope accesses of primal insts from within differential blocks. This is to prepare us for the next step (i.e. [extraction](#6-extraction)) that splits the function into two by moving the primal blocks into a separate primal-context-generator function, and the differential blocks into the backward-propagation function. 
+
+Before we can perform this extraction, we must find any primal values being used in differential blocks and handle them in one of two ways:
+**Store** (put the values in a static struct) or **Recompute** (clone the necessary instructions to recompute when necessary). We first _classify_ all necessary instructions into one of the two buckets before processing each use accordingly.
+
+1. **Classify uses into each set:** Note that rather than proceeding on an inst-by-inst basis, we classify **uses** of insts. The same inst can be used in several places, and we may decide to store one use and recompute another (in some cases, this could be the optimal result). 
+The classification process uses a work-list approach that roughly looks like the following:
+    1. Add all uses of **primal** insts in an inst within a **differential** block to the work list. This is our initial set of uses that require classification. 
+    2. Query the active policy object (which for now is hardcoded) to obtain the classification based on heuristics & user decorations (Specifically `[PreferRecompute]` and `[PreferCheckpoint]` decorations influence the classification policy)
+    3. For uses that should be **recomputed**, we have to now make the same decision one their **operands**, in order to make them available for the recomputation insts. Thus, their operands are added to the work list.
+    4. For uses that should be **stored**, there is no need to consider their operands, since the computed value will be explicitly stored and loaded later.
+    5. Once the worklist is empty, go over all the **uses** and their classifications, and convert them into a list of **insts** that should be stored or recomputed. Note that if an inst has uses with both classifications, then it can appear in both lists.
+
+2. **Process 'Store' (i.e. checkpoint) insts:** Store them into a single variable (of a struct type that is synthesized as necessary), and then loaded from in the differential blocks. This allows us to simply turn this variable into an output parameter from the context function and an input parameter for the backprop function.
+When storing values this way, we must consider that instructions within loops can have different values each iteration. Thus, we must use an array to store each value, and this array's size must be statically known since we wish to synthesize a static struct type to hold all the stored values. Thus, we enforce the requirement of a `[MaxIters(N)]` decoration and attempt to infer a loop iteration limit if one is not provided.
+
+    Here's an example of a case where we decide to checkpoint _all_ relevant uses:
+
+    ```C
+    // Example function without loops post-transposition step (BEFORE hoisting)
+    void f_rev(DifferentialPair<float> dpx, float d_out)
+    {
+        //
+        // Primal blocks (will be extracted into a separate function in Step 6: Extraction)
+        //
+
+        float x = dpx.getPrimal();
+        float p = 0;
+
+        if (x < 0.5)
+        {
+            float t1 = x * x;
+            p = t1 * t1 + x;
+        }
+
+        if (x > 10.f)
+        {
+            float t2 = x * x * x;
+            p = t2 * t2 + x;
+        }
+
+        //
+        // Reversed differential blocks start here (will be extracted into a separate function in Step 6: Extraction)
+        //
+
+        float dp_rev = d_out;
+        float dx_rev = 0.f; // accumulator var for 'x.d'
+        if (x > 10.f)
+        {
+            float dt2_rev = t2 * dp_rev; // access of a primal value 't2' from a differential block.
+            dx_rev += dp_rev;
+            dp_rev = 0.f; // dp_rev's value gets reset to 0 after use.
+
+            dx_rev += x * x * dt2_rev;
+            dx_rev += x * dt2_rev * x;
+            dx_rev += dt2_rev * x * x;
+        }
+
+        if (x < 0.5)
+        {
+            float dt1_rev = t1 * dp_rev; // access of a primal value 't1' from a differential block.
+            dx_rev += dp_rev;
+
+            dx_rev += x * dt1_rev;
+            dx_rev += dt1_rev * x;
+        }
+
+        dpx = DifferentialPair<float>(x, dx_rev);
+    }
+
+    // The same function after the primal hoisting's checkpointing step. In this example, we
+    // assume all relevant uses are being checkpointed.
+    // 
+    void f_rev_hoisted(DifferentialPair<float> dpx, float d_out)
+    {
+        // Insert vars for checkpointed insts at the top-level
+        float t1_storage;
+        float t2_storage;
+
+        //
+        // Primal blocks
+        //
+
+        float x = dpx.getPrimal();
+        float p = 0;
+
+        if (x < 0.5)
+        {
+            float t1 = x * x;
+            t1_storage = t1; // Cache values immediately after they are created.
+            p = t1 * t1 + x;
+        }
+
+        if (x > 10.f)
+        {
+            float t2 = x * x * x;
+            t2_storage = t2; // Cache values immediately after they are created.
+            p = t2 * t2 + x;
+        }
+
+        //
+        // Reversed differential blocks
+        //
+
+        float x = dpx.getPrimal();
+
+        float dp_rev = d_out;
+        float dx_rev = 0.f; // accumulator var for 'x.d'
+        if (x > 10.f)
+        {
+            float dt2_rev = t2_storage * dp_rev; // Use stored value.
+            dx_rev += dp_rev;
+
+            dx_rev += x * x * dt2_rev;
+            dx_rev += x * dt2_rev * x;
+            dx_rev += dt2_rev * x * x;
+        }
+
+        if (x < 0.5)
+        {
+            float dt1_rev = t1_storage * dp_rev; // Use stored value.
+            dx_rev += dp_rev;
+
+            dx_rev += x * dt1_rev;
+            dx_rev += dt1_rev * x;
+        }
+
+        dpx = DifferentialPair<float>(x, dx_rev);
+    }
+    ```
+    Another example with a function `g` that does contain loops:
+
+    ```C
+    // Example function with a loop, post-transposition step (BEFORE hoisting)
+    void g_rev(DifferentialPair<float> dpx, float d_out)
+    {
+        //
+        // Primal blocks (will be extracted into a separate function in Step 6: Extraction)
+        //
+
+        float x = dpx.getPrimal();
+        float p = 0;
+
+        for (uint i = 0; i < 10; i++)
+        {
+            p = x * p;
+        }
+
+        //
+        // Reversed differential blocks
+        //
+
+        float dx_rev = 0.f;
+        float dp_rev = d_out;
+        for (uint i = 9; i > 0; i--)
+        {
+            dx_rev += p * dp_rev; // primal value 'p' accessed from differential blocks
+            dp_rev = x * dp_rev;
+        }
+
+        return DifferentialPair<float>(x, dx_rev);
+    }
+
+    // After hoisting, note that we checkpoint 'p' in this case by using an array.
+    void g_rev_hoisted(DifferentialPair<float> dpx, float d_out)
+    {
+        // Insert array to hold states of 'p'
+        float p_storage[11];
+
+        //
+        // Primal blocks (will be extracted into a separate function in Step 6: Extraction)
+        //
+
+        float x = dpx.getPrimal();
+        float p = 0;
+
+        // Insert storage for all states of p, including the initial value upon loop entry
+        p_storage[0] = p;
+        for (uint i = 0; i < 10; i++)
+        {
+            p = x * p;
+            // Use the loop induction variable 'i' to figure out which index to store p in.
+            p_storage[i+1] = p;
+        }
+
+        //
+        // Reversed differential blocks
+        //
+
+        float dx_rev = 0.f;
+        float dp_rev = d_out;
+        for (uint i = 9; i >= 0; i--)
+        {
+            // Load appropriate value of p from storage
+            float p = p_storage[i];
+            dx_rev += p * dp_rev; 
+            dp_rev = x * dp_rev;
+        }
+
+        return DifferentialPair<float>(x, dx_rev);
+    }
+    ```
+
+    **Indexed Region Processing:** In order to be able to allocate the right array and use the right indices, we need information about which blocks are part of which loop (and loops can be nested, so blocks can be part of multiple loops). To do this, we run a pre-processing step that maps all blocks to all relevant loop regions, the corresponding index variables and the inferred iteration limits (maximum times a loop can run). Note that if an instruction appears in a nested block, we create a multi-dimensional array and use multiple indices.
+
+    **Loop State Variables:** Certain variables cannot be classified as recompute. Major examples are loop state variables which are defined as variables that are read from and written to within the loop. In practice, they appear as phi-variables on the first loop block after SSA simplification. Their uses _must_ be classified as 'store', because recomputing them requires duplicating the primal loop within the differential loop. This is because the differential loop runs backwards so the state of a primal variable at loop index $N$ cannot be recomputed when the loop is running backwards ($N+1 \to N \to N-1$), and involves running the primal loop up to $N$ times within the current iteration of the differential loop. In terms of complexity, this turns an $O(N)$ loop into an $O(N^2)$ loop, and so we disallow this.
+    It is possible that the resulting $O(N^2)$ loop may end up being faster in practice due to reduced memory requirements, but we currently lack the infrastructure to robustly allow such loop duplication while keeping the user informed of the potentially drastic complexity issues.
+
+3. **Process 'Recompute' insts:** Insert a copy of the primal instruction into a corresponding 'recomputation' block that is inserted into the differential control-flow so that it dominates the use-site. 
+
+    **Insertion of Recompute Blocks:** In order to accommodate recomputation, we first preprocess the function, by going through each **breakable (i.e. loop) region** in the differential blocks, looking up the corresponding **primal region** and cloning all the primal blocks into the beginning of the differential region. Note that this cloning process does not actually clone the instructions within each block, only the control-flow (i.e. terminator) insts. This way, there is a 1:1 mapping between the primal blocks and the newly created **recompute blocks**, This way, if we decide to 'recompute' an instruction, we can simply clone it into the corresponding recompute block, and we have a guarantee that the definition and use-site are within the same loop scope, and that the definition comes before the use.
+    
+    **Legalizing Accesses from Branches:** Our per-loop-region recompute blocks ensure that the recomputed inst is always within the same region as its uses, but it can still be out-of-scope if it is defined within a branch (i.e. if-else). We therefore still run a light-weight hoisting pass that detects these uses, inserts an `IRVar` at the immediate dominator of the def and use, and inserts loads and stores accordingly. Since they occur within the same loop region, there is no need to worry about arrays/indices (unlike the 'store' case).
+    
+    **Marking Recompute Blocks:** These blocks are marked with `OpRecomputeBlockDecoration` to identify them as containing primal instructions, even though they are within differential regions. This helps us remove any unused blocks if none of the instructions end up being recomputed.
+    
+    Here is an example of recomputation demonstrated in Slang source-style (although this takes place in IR-form)
+    ```C
+    // Example function without loops post-transposition step. 
+    void f_rev(DifferentialPair<float> dpx, float d_out)
+    {
+        //
+        // Primal blocks (will be extracted into a separate function in Step 6: Extraction)
+        //
+
+        float x = dpx.getPrimal();
+        float p = 0;
+
+        if (x < 0.5)
+        {
+            float t1 = x * x;
+            p = t1 * t1 + x;
+        }
+
+        if (x > 10.f)
+        {
+            float t2 = x * x * x;
+            p = t2 * t2 + x;
+        }
+
+        //
+        // Reversed differential blocks start here (will be extracted into a separate function in Step 6: Extraction)
+        //
+
+        float dp_rev = d_out;
+        float dx_rev = 0.f; // accumulator var for 'x.d'
+        if (x > 10.f)
+        {
+            float dt2_rev = t2 * dp_rev; // access of a primal value 't2' from a differential block.
+            dx_rev += dp_rev;
+            dp_rev = 0.f; // dp_rev's value gets reset to 0 after use.
+
+            dx_rev += x * x * dt2_rev;
+            dx_rev += x * dt2_rev * x;
+            dx_rev += dt2_rev * x * x;
+        }
+
+        if (x < 0.5)
+        {
+            float dt1_rev = t1 * dp_rev; // access of a primal value 't1' from a differential block.
+            dx_rev += dp_rev;
+
+            dx_rev += x * dt1_rev;
+            dx_rev += dt1_rev * x;
+        }
+
+        dpx = DifferentialPair<float>(x, dx_rev);
+    }
+
+    // The same function after the primal hoisting step. Note that the primal control flow has been cloned into the start of
+    // the top-level differential region.
+    // 
+    void f_rev_hoisted(DifferentialPair<float> dpx, float d_out)
+    {
+        //
+        // Primal blocks (will be extracted into a separate function in Step 6: Extraction)
+        //
+
+        float x = dpx.getPrimal();
+        float p = 0;
+
+        if (x < 0.5)
+        {
+            float t1 = x * x;
+            p = t1 * t1 + x;
+        }
+
+        if (x > 10.f)
+        {
+            float t2 = x * x * x;
+            p = t2 * t2 + x;
+        }
+
+        //
+        // Reversed differential blocks start here (will be extracted into a separate function in Step 6: Extraction)
+        //
+
+        // Recompute blocks are inserted at the beginning of each differential region.
+        float x_recompute = dpx.getPrimal();
+        if (x_recompute < 0.5)
+        {
+            // Only the t1 instruction is cloned in since it is used by the differential blocks.
+            float t1_recompute = x_recompute * x_recompute;
+        }
+
+        if (x_recompute > 10.f)
+        {
+            // Only the t2 instruction is cloned in since it is used by the differential blocks.
+            float t2_recompute = x_recompute * x_recompute * x_recompute;
+        }
+
+        float dp_rev = d_out;
+        float dx_rev = 0.f; // accumulator var for 'x.d'
+        if (x_recompute > 10.f)
+        {
+            float dt2_rev = t2_recompute * dp_rev; // invalid access of 't2_recompute' (it's inside a branch)
+            dx_rev += dp_rev;
+
+            dx_rev += x_recompute * x_recompute * dt2_rev;
+            dx_rev += x_recompute * dt2_rev * x_recompute;
+            dx_rev += dt2_rev * x_recompute * x_recompute;
+        }
+
+        if (x < 0.5)
+        {
+            float dt1_rev = t1 * dp_rev; // invalid access of 't1_recompute' (it's inside a branch)
+            dx_rev += dp_rev;
+
+            dx_rev += x_recompute * dt1_rev;
+            dx_rev += dt1_rev * x_recompute;
+        }
+
+        dpx = DifferentialPair<float>(x, dx_rev);
+    }
+
+    // Same function after branch-access-legalization (run after the primal-hoisting step):
+    void f_rev_hoisted_and_legalized(DifferentialPair<float> dpx, float d_out)
+    {
+        //
+        // Primal blocks:
+        //
+
+        float x = dpx.getPrimal();
+        float p = 0;
+
+        float t1; // Var inserted/moved to immediate dominator block (branch-access-legalization)
+        if (x < 0.5)
+        {
+            t1 = x * x;
+            p = t1 * t1 + x;
+        }
+
+        float t2; // Var inserted/moved to immediate dominator block (branch-access-legalization)
+        if (x > 10.f)
+        {
+            t2 = x * x * x;
+            p = t2 * t2 + x;
+        }
+
+        //
+        // Reversed differential blocks:
+        //
+
+        float dp_rev = d_out;
+        float dx_rev = 0.f; // accumulator var for 'x.d'
+        if (x > 10.f)
+        {
+            float dt2_rev = t2 * dp_rev;
+            dx_rev += dp_rev;
+
+            dx_rev += x * x * dt2_rev;
+            dx_rev += x * dt2_rev * x;
+            dx_rev += dt2_rev * x * x;
+        }
+
+        if (x < 0.5)
+        {
+            float dt1_rev = t1 * dp_rev;
+            dx_rev += dp_rev;
+
+            dx_rev += x * dt1_rev;
+            dx_rev += dt1_rev * x;
+        }
+    }
+    ```
+
+    For completeness, here is another example of a function `g` which contains a loop to demonstrate how recomputation works when there are
+    multiple loop regions.
+
+    ```C
+    // Example function with a loop, post-transposition step (BEFORE hoisting)
+    void g_rev(DifferentialPair<float> dpx, float d_out)
+    {
+        //
+        // Primal blocks (will be extracted into a separate function in Step 6: Extraction)
+        //
+
+        float x = dpx.getPrimal();
+        float p = x;
+
+        if (x < 0.5)
+        {
+            float k = 2.f * x;
+            p = p * k;
+        }
+
+        for (uint i = 0; i < 10; i++)
+        {
+            if (x > 0.5)
+            {
+                float t = 2.f * i;
+                p = p + x * t;
+            }
+        }
+
+        //
+        // Reversed differential blocks
+        //
+
+        float dt_rev = 0.f;
+        float dp_rev = 0.f;
+        for (uint i = 9; i >= 0; i++)
+        {
+            if (x > 0.5)
+            {
+                dx_rev += t * dp_rev; // Use of primal value 't' in differential blocks.
+            }
+        }
+
+        if (x < 0.5)
+        {
+            dp_rev = dp_rev * k;       // Use of primal value 'k' in differential blocks.
+            float dk_rev = p * dp_rev; // Use of primal value 'p' in differential blocks.
+            dx_rev += dk_rev * 2.f;
+        }
+
+        dx_rev += dp_rev;
+
+        return DifferentialPair<float>(x, dx_rev);
+    }
+
+    // The same function after hoisting and branch-access-legalization. 
+    // Notice that recompute blocks are inserted into the top-level
+    // as well as each loop region in the differential blocks.
+    // 
+    void g_rev_hoisted_and_legalized(DifferentialPair<float> dpx, float d_out)
+    {
+        //
+        // Primal blocks (will be extracted into a separate function in Step 6: Extraction)
+        //
+
+        float x = dpx.getPrimal();
+        float p = x;
+
+        if (x < 0.5)
+        {
+            float k = 2.f * x;
+            p = p * k;
+        }
+
+        for (uint i = 0; i < 10; i++)
+        {
+            if (x > 0.5)
+            {
+                float t = 2.f * i;
+                p = p + x * t;
+            }
+        }
+
+        //
+        // Reversed differential blocks
+        //
+
+        // ----- Recompute blocks inserted for top-level
+        float p_recompute = x; // Inst recomputed.
+        float k_recompute; 
+        if (x < 0.5)
+        {
+            k_recompute = 2.f * x; // Inst recomputed.
+        }
+        // -----
+
+        float dt_rev = 0.f;
+        float dp_rev = 0.f;
+        for (uint i = 9; i >= 0; i++)
+        {
+            // ---- Recompute blocks inserted for loop region.
+            float t_recompute;
+            if (x > 0.5)
+            {
+                t_recompute = 2.f * i; // Inst recomputed.
+            }
+            // ----
+
+            if (x > 0.5)
+            {
+                dx_rev += t_recompute * dp_rev; 
+            }
+        }
+
+        if (x < 0.5)
+        {
+            dp_rev = dp_rev * k_recompute;       
+            float dk_rev = p_recompute * dp_rev; 
+            dx_rev += dk_rev * 2.f;
+        }
+
+        dx_rev += dp_rev;
+
+        return DifferentialPair<float>(x, dx_rev);
+    }
+    ```
+
+### 6. Extraction 
+The final step involves _splitting_ the function immediately after the primal block to create two functions: a **primal context function** that computes the primal value normally, but also outputs a context object with relevant intermediate values, and a **backward propagation function** that computes the backward derivative and consumes this context object for the required intermediate values.
+
+The first 5 steps have set us up for this final step, so it is not particularly complex. We follow this high-level logic:
+
+1. Create an empty function for the primal context function. The type of this function is the same as the primal function, but with an additional `out` parameter for the intermediate context, whose type is undecided at this stage. We use a temporary function-specific type called `OpBackwardDerivativeIntermediateContextType(func)` as a placeholder. 
+2. Move primal blocks to the primal context function. Re-create the return inst (the return value is temporarily remembered using a decoration during the rest of the AD process). Also, the first block (reserved for function parameters) is also duplicated and processed to have primal parameters in the primal function and pair parameters in the differential function.
+3. Lower all `OpBackwardDerivativeIntermediateContextType` types into concrete struct types by creating a field for each 'stored' inst from Step 5. This lowering process happens **at the end of the current AD pass after all relevant methods have completed Step 5**. We need Step 5 (hoisting) to be complete for all relevant methods because the context struct for a given function can include context structs of other functions that are called from it. Our context-type lowering therefore proceeds recursively by lowering the context for inner functions as necessary. The lowering process also removes the temporary vars that were created to hold the store insts, and replaces them with a stores and loads from the context struct.
+   
+   **Recursive Functions are Disallowed:** Since we lower all intermediate types into a static struct type, recursive calls cannot currently be supported from differentiable functions. The context struct for a method may include itself, creating an impossible scenario.
+
+Here is one of the examples above (`g`) after checkpointing:
+
+```C
+// Example function before the extraction step.
+void f_rev_hoisted(DifferentialPair<float> dpx, float d_out)
+{
+    // Insert vars for checkpointed insts at the top-level
+    float t1_storage;
+    float t2_storage;
+
+    //
+    // Primal blocks
+    //
+
+    float x = dpx.getPrimal();
+    float p = 0;
+
+    if (x < 0.5)
+    {
+        float t1 = x * x;
+        t1_storage = t1; // Cache values immediately after they are created.
+        p = t1 * t1 + x;
+    }
+
+    if (x > 10.f)
+    {
+        float t2 = x * x * x;
+        t2_storage = t2; // Cache values immediately after they are created.
+        p = t2 * t2 + x;
+    }
+
+    //
+    // Reversed differential blocks
+    //
+
+    float x = dpx.getPrimal();
+
+    float dp_rev = d_out;
+    float dx_rev = 0.f; // accumulator var for 'x.d'
+    if (x > 10.f)
+    {
+        float dt2_rev = t2_storage * dp_rev; // Use stored value.
+        dx_rev += dp_rev;
+
+        dx_rev += x * x * dt2_rev;
+        dx_rev += x * dt2_rev * x;
+        dx_rev += dt2_rev * x * x;
+    }
+
+    if (x < 0.5)
+    {
+        float dt1_rev = t1_storage * dp_rev; // Use stored value.
+        dx_rev += dp_rev;
+
+        dx_rev += x * dt1_rev;
+        dx_rev += dt1_rev * x;
+    }
+
+    dpx = DifferentialPair<float>(x, dx_rev);
+}
+
+// After extraction: lowered intermediate context for f
+struct f_Intermediates
+{
+    float t1;
+    float t2;
+};
+
+
+// After extraction: primal context function
+float s_primal_ctx_f(float x, out f_Intermediates ctx)
+{
+    //
+    // Primal blocks
+    //
+
+    float x = dpx.getPrimal();
+    float p = 0;
+
+    if (x < 0.5)
+    {
+        float t1 = x * x;
+        ctx.t1 = t1; // Cache values immediately after they are created.
+        p = t1 * t1 + x;
+    }
+
+    if (x > 10.f)
+    {
+        float t2 = x * x * x;
+        ctx.t2 = t2; // Cache values immediately after they are created.
+        p = t2 * t2 + x;
+    }
+
+    return p;
+}
+
+// After extraction: backward propagation function.
+void s_bwd_f(DifferentialPair<float> dpx, float d_out, f_Intermediates ctx)
+{
+    float x = dpx.getPrimal();
+
+    float dp_rev = d_out;
+    float dx_rev = 0.f; // accumulator var for 'x.d'
+    if (x > 10.f)
+    {
+        float dt2_rev = ctx.t2 * dp_rev; // Use stored value.
+        dx_rev += dp_rev;
+
+        dx_rev += x * x * dt2_rev;
+        dx_rev += x * dt2_rev * x;
+        dx_rev += dt2_rev * x * x;
+    }
+
+    if (x < 0.5)
+    {
+        float dt1_rev = ctx.t1 * dp_rev; // Use stored value.
+        dx_rev += dp_rev;
+
+        dx_rev += x * dt1_rev;
+        dx_rev += dt1_rev * x;
+    }
+
+    dpx = DifferentialPair<float>(x, dx_rev);
+}
+```
+
+Having separate methods for the primal and backward passes is necessary when reverse-mode differentiating a method that calls out to other differentiable functions.
+Here is an example of differentiating a method that calls out to multiple methods, to get an idea for why we need the primal context method to be separate
+
+```C
+float outer(float x)
+{
+    float y = f(x);
+    float z = g(y);
+    float w = h(z);
+
+    return w;
+}
+
+// It's complete reverse mode derivative looks like the following:
+void outer_rev(DifferentialPair<float> dpx, float d_output)
+{
+    // Compute the primal values in the forward direction, while producing relevant context. 
+    f_Intermediates f_ctx;
+    g_Intermediates g_ctx;
+    h_Intermediates h_ctx;
+
+    float y = s_primal_ctx_f(x, f_ctx);
+    float z = s_primal_ctx_g(y, g_ctx);
+    float w = s_primal_ctx_h(z, h_ctx);
+
+    // Note that at this point, we are holding intermediate context variables for f, g and h.
+
+    // Consume the context while evaluating the propagating the derivatives backwards.
+    DifferentialPair<float> dpz = {z, 0.f};
+    s_bwd_h(dpz, d_output, h_ctx);
+
+    DifferentialPair<float> dpy = {y, 0.f};
+    s_bwd_g(dpy, dpz.getDifferential(), g_ctx);
+
+    DifferentialPair<float> _dpx = {x, 0.f};
+    s_bwd_f(dpx, dpy.getDifferential(), f_ctx);
+
+    dpx = _dpx;
+}
+```
diff --git a/external/slang/share/doc/slang/design/autodiff/types.md b/external/slang/share/doc/slang/design/autodiff/types.md
new file mode 100644
index 00000000..3860f0df
--- /dev/null
+++ b/external/slang/share/doc/slang/design/autodiff/types.md
@@ -0,0 +1,290 @@
+
+This documentation is intended for Slang contributors and is written from a compiler engineering point of view. For Slang users, see the user-guide at this link: [https://shader-slang.com/slang/user-guide/autodiff.html](https://shader-slang.com/slang/user-guide/autodiff.html)
+
+Before diving into this document, please review the document on [Basics](./basics.md) for the fundamentals of automatic differentiation. 
+
+# Components of the Type System
+Here we detail the main components of the type system: the `IDifferentiable` interface to define differentiable types, the `DifferentialPair<T>` type to carry a primal and corresponding differential in a single type. 
+We also detail how auto-diff operators are type-checked (the higher-order function checking system), how the `no_diff` decoration can be used to avoid differentiation through attributed types, and the derivative data flow analysis that warns the the user of unintentionally stopping derivatives.
+
+## `interface IDifferentiable`
+Defined in core.meta.slang, `IDifferentiable` forms the basis for denoting differentiable types, both within the core module, and otherwise. 
+The definition of `IDifferentiable` is designed to encapsulate the following 4 items:
+1. `Differential`: The type of the differential value of the conforming type. This allows custom data-structures to be defined to carry the differential values, which may be optimized for space instead of relying solely on compiler synthesis/
+
+Since the computation of derivatives is inherently linear, we only need access to a few operations. These are:
+
+2. `dadd(Differential, Differential) -> Differential`: Addition of two values of the differential type. It's implementation must be associative and commutative, or the resulting derivative code may be incorrect.
+3. `dzero() -> Differential`: Additive identity (i.e. the zero or empty value) that can be used to initialize variables during gradient aggregation
+4. `dmul<S:__BuiltinRealType>(S, Differential)`: Scalar multiplication of a real number with the differential type. It's implementation must be distributive over differential addition (`dadd`).
+
+Points 2, 3 & 4 are derived from the concept of vector spaces. The derivative values of any Slang function always form a vector space (https://en.wikipedia.org/wiki/Vector_space).
+
+### Derivative member associations
+In certain scenarios, the compiler needs information on how the fields in the original type map to the differential type. Particularly, this is a problem when differentiate the implicit construction of a struct through braces (i.e. `{}`), represented by `kIROp_MakeStruct`. We provide the decorator `[DerivativeMember(DifferentialTypeName.fieldName)]` (ASTNode: DerivativeMemberAttribute, IR: kIROp_DerivativeMemberDecoration) to explicitly mark these associations.
+Example
+```C
+struct MyType : IDifferentiable
+{
+    typealias Differential = MyDiffType;
+    float a;
+
+    [DerivativeMember(MyDiffType.db)]
+    float b;
+
+    /* ... */
+};
+
+struct MyDiffType
+{
+    float db;
+};
+```
+
+### Automatic Synthesis of `IDifferentible` Conformances for Aggregate Types
+It can be tedious to expect users to hand-write the associated `Differential` type, the corresponding mappings and interface methods for every user-defined `struct` type. For aggregate types, these are trivial to construct by analysing which of their components conform to `IDifferentiable`. 
+The synthesis proceeds in roughly the following fashion:
+1. `IDifferentiable`'s components are tagged with special decorator `__builtin_requirement(unique_integer_id)` which carries an enum value from `BuiltinRequirementKind`.
+2. When checking that types conform to their interfaces, if a user-provided definition does not satisfy a requirement with a built-in tag, we perform synthesis by dispatching to `trySynthesizeRequirementWitness`. 
+3. For _user-defined types_, Differential **types** are synthesized during conformance-checking through `trySynthesizeDifferentialAssociatedTypeRequirementWitness` by checking if each constituent type conforms to `IDifferentiable`, looking up the corresponding `Differential` type, and constructing a new aggregate type from these differential types. Note that since it is possible that a `Differential` type of a constituent member has not yet been synthesized, we have additional logic in the lookup system (`trySynthesizeRequirementWitness`) that synthesizes a temporary empty type with a `ToBeSynthesizedModifier`, so that the fields can be filled in later, when the member type undergoes conformance checking.
+4. For _user-defined types_, Differential methods (`dadd`, `dzero` and `dmul`) are synthesized in `trySynthesizeDifferentialMethodRequirementWitness` by utilizing the `Differential` member and its `[DifferentialMember]` decorations to determine which fields need to be considered and the base type to use for each field. There are two synthesis patterns. The fully-inductive pattern is used for `dadd` and `dzero` which works by calling `dadd` and `dzero` respectively on the individual fields of the `Differential` type under consideration. 
+Example:
+```C
+// Synthesized from "struct T {FT1 field1; FT2 field2;}"
+T.Differential dadd(T.Differential a, T.Differential b)
+{
+    return Differential(
+        FT1.dadd(a.field1, b.field1),
+        FT2.dadd(a.field2, b.field2),
+    )
+}
+```
+On the other hand, `dmul` uses the fixed-first arg pattern since the first argument is a common scalar, and proceeds inductively on all the other args.
+Example:
+```C
+// Synthesized from "struct T {FT1 field1; FT2 field2;}"
+T.Differential dmul<S:__BuiltinRealType>(S s, T.Differential a)
+{
+    return Differential(
+        FT1<S>.dmul(s, a.field1),
+        FT2<S>.dmul(s, a.field2),
+    )
+}
+```
+5. During auto-diff, the compiler can sometimes synthesize new aggregate types. The most common case is the intermediate context type (`kIROp_BackwardDerivativeIntermediateContextType`), which is lowered into a standard struct once the auto-diff pass is complete. It is important to synthesize the `IDifferentiable` conformance for such types since they may be further differentiated (through higher-order differentiation). This implementation is contained in `fillDifferentialTypeImplementationForStruct(...)` and is roughly analogous to the AST-side synthesis.
+
+### Differentiable Type Dictionaries
+During auto-diff, the IR passes frequently need to perform lookups to check if an `IRType` is differentiable, and retrieve references to the corresponding `IDifferentiable` methods. These lookups also need to work on generic parameters (that are defined inside generic containers), and existential types that are interface-typed parameters.
+
+To accommodate this range of different type systems, Slang uses a type dictionary system that associates a dictionary of relevant types with each function. This works in the following way:
+1. When `CheckTerm()` is called on an expression within a function that is marked differentiable (`[Differentiable]`), we check if the resolved type conforms to `IDifferentiable`. If so, we add this type to the dictionary along with the witness to its differentiability. The dictionary is currently located on `DifferentiableAttribute` that corresponds to the `[Differentiable]` modifier.
+
+2. When lowering to IR, we create a `DifferentiableTypeDictionaryDecoration` which holds the IR versions of all the types in the dictionary as well as a reference to their `IDifferentiable` witness tables.
+
+3. When synthesizing the derivative code, all the transcriber passes use `DifferentiableTypeConformanceContext::setFunc()` to load the type dictionary. `DifferentiableTypeConformanceContext` then provides convenience functions to lookup differentiable types, appropriate `IDifferentiable` methods, and construct appropriate `DifferentialPair<T>`s.
+
+### Looking up Differential Info on _Generic_ types
+Generically defined types are also lowered into the differentiable type dictionary, but rather than having a concrete witness table, the witness table is itself a parameter. When auto-diff passes need to find the differential type or place a call to the IDifferentiable methods, this is turned into a lookup on the witness table parameter (i.e. `Lookup(<InterfaceRequirementKey>, <WitnessTableParameter>)`). Note that these lookups instructions are inserted into the generic parent container rather than the inner most function. 
+Example:
+```C
+T myFunc<T:IDifferentiable>(T a)
+{
+    return a * a;
+}
+
+// Reverse-mode differentiated version
+void bwd_myFunc<T:IDifferentiable>(
+    inout DifferentialPair<T> dpa,
+    T.Differential dOut) // T.Differential is Lookup('Differential', T_Witness_Table)
+{
+    T.Differential da = T.dzero(); // T.dzero is Lookup('dzero', T_Witness_Table)
+
+    da = T.dadd(dpa.p * dOut, da); // T.dadd is Lookup('dadd', T_Witness_Table)
+    da = T.dadd(dpa.p * dOut, da);
+
+    dpa = diffPair(dpa.p, da);
+}
+```
+
+### Looking up Differential Info on _Existential_ types
+Existential types are interface-typed values, where there are multiple possible implementations at run-time. The existential type carries information about the concrete type at run-time and is effectively a 'tagged union' of all possible types.
+
+#### Differential type of an Existential
+The differential type of an existential type is tricky to define since our type system's only restriction on the `.Differential` type is that it also conforms to `IDifferentiable`. The differential type of any interface `IInterface : IDifferentiable` is therefore the interface type `IDifferentiable`. This is problematic since Slang generally requires a static `anyValueSize` that must be a strict upper bound on the sizes of all conforming types (since this size is used to allocate space for the union). Since `IDifferentiable` is defined in the core module `core.meta.slang` and can be used by the user, it is impossible to define a reliable bound. 
+We instead provide a new **any-value-size inference** pass (`slang-ir-any-value-inference.h`/`slang-ir-any-value-inference.cpp`) that assembles a list of types that conform to each interface in the final linked IR and determines a relevant upper bound. This allows us to ignore types that conform to `IDifferentiable` but aren't used in the final IR, and generate a tighter upper bound. 
+
+**Future work:**
+This approach, while functional, creates a locality problem since the size of `IDifferentiable` is the max of _all_ types that conform to `IDifferentiable` in visible modules, even though we only care about the subset of types that appear as `T.Differential` for `T : IInterface`. The reason for this problem is that upon performing an associated type lookup, the Slang IR drops all information about the base interface that the lookup starts from and only considers the constraint interface (in this case `Differential : IDifferentiable`). 
+There are several ways to resolve this issue, including (i) a static analysis pass that determines the possible set of types at each use location and propagates them to determine a narrower set of types, or (ii) generic (or 'parameterized') interfaces, such as `IDifferentiable<T>` where each version can have a different set of conforming types.
+
+<!--#### IDifferentiable Method lookups on an Existential
+All other method lookups are performed using existential-type lookups on the existential parameter. The idea is that existential-typed parameters come with a witness-table component that can be accessed by invoking `kIROp_ExtractExistentialWitnessTable` on them. This allows us to look up the `dadd`/`dzero` methods on this witness table in the same way as we did for generic types.-->
+
+Example:
+```C
+interface IInterface : IDifferentiable
+{
+    [Differentiable]
+    This foo(float val);
+
+    [Differentiable]
+    float bar();
+};
+
+float myFunc(IInterface obj, float a)
+{
+    IInterface k = obj.foo(a);
+    return k.bar();
+}
+
+// Reverse-mode differentiated version (in pseudo-code corresponding to IR, some of these will get lowered further)
+void bwd_myFunc(
+    inout DifferentialPair<IInterface> dpobj,
+    inout DifferentialPair<float> dpa,
+    float.Differential dOut) // T.Differential is Lookup('Differential', T_Witness_Table)
+{
+    // Primal pass..
+    IInterface obj = dpobj.p;
+    IInterface k = obj.foo(a);
+    // .....
+
+    // Backward pass
+    DifferentialPair<IInterface> dpk = diffPair(k);
+    bwd_bar(dpk, dOut);
+    IDifferentiable dk = dpk.d; // Differential of `IInterface` is `IDifferentiable`
+
+    DifferentialPair<IInterface> dp = diffPair(dpobj.p);
+    bwd_foo(dpobj, dpa, dk);
+}
+
+```
+
+#### Looking up `dadd()` and `dzero()` on Existential Types
+There are two distinct cases for lookup on an existential type. The more common case is the closed-box existential type represented simply by an interface. Every value of this type contains a type identifier & a witness table identifier along with the value itself.  The less common case is when the function calls are performed directly on the value after being cast to the concrete type.
+
+**`dzero()` for "closed" Existential type: The `NullDifferential` Type**
+For concrete and even generic types, we can initialize a derivative accumulator variable by calling the appropriate `Type.dzero()` method. This is unfortunately not possible when initializing an existential differential (which is currently of type `IDifferentiable`), since we must also initialize the type-id of this existential to one of the implementations, but we do not know which one yet since that is a run-time value that only becomes known after the first differential value is generated.
+
+To get around this issue, we declare a special type called `NullDifferential` that acts as a "none type" for any `IDifferentiable` existential object. 
+
+**`dadd()` for "closed" Existential types: `__existential_dadd`**
+We cannot directly use `dadd()` on two existential differentials of type `IDifferentiable` because we must handle the case where one of them is of type `NullDifferential` and `dadd()` is only defined for differentials of the same type. 
+We handle this currently by synthesizing a special method called `__existential_dadd` (`getOrCreateExistentialDAddMethod` in `slang-ir-autodiff.cpp`) that performs a run-time type-id check to see if one of the operand is of type `NullDifferential` and returns the other operand if so. If both are non-null, we dispatch to the appropriate `dadd` for the concrete type.
+
+**`dadd()` and `dzero()` for "open" Existential types**
+If we are dealing with values of the concrete type (i.e. the opened value obtained through `ExtractExistentialValue(ExistentialParam)`). Then we can perform lookups in the same way we do for generic type. All existential parameters come with a witness table. We insert instructions to extract this witness table and perform lookups accordingly. That is, for `dadd()`, we use `Lookup('dadd', ExtractExistentialWitnessTable(ExistentialParam))` and place a call to the result.
+
+## `struct DifferentialPair<T:IDifferentiable>`
+The second major component is `DifferentialPair<T:IDifferentiable>` that represents a pair of a primal value and its corresponding differential value. 
+The differential pair is primarily used for passing & receiving derivatives from the synthesized derivative methods, as well as for block parameters on the IR-side.
+Both `fwd_diff(fn)` and `bwd_diff(fn)` act as function-to-function transformations, and so the Slang front-end translates the type of `fn` to its derivative version so the arguments can be type checked.
+
+### Pair type lowering.
+The differential pair type is a special type throughout the AST and IR passes (AST Node: `DifferentialPairType`, IR: `kIROp_DifferentialPairType`) because of its use in front-end semantic checking and when synthesizing the derivative code for the functions. Once the auto-diff passes are complete, the pair types are lowering into simple `struct`s so they can be easily emitted (`DiffPairLoweringPass` in `slang-ir-autodiff-pairs.cpp`). 
+We also define additional instructions for pair construction (`kIROp_MakeDifferentialPair`) and extraction (`kIROp_DifferentialPairGetDifferential` & `kIROp_DifferentialPairGetPrimal`) which are lowered into struct construction and field accessors, respectively.
+
+### "User-code" Differential Pairs
+Just as we use special IR codes for differential pairs because they have special handling in the IR passes, sometimes differential pairs should be _treated as_ regular struct types during the auto-diff passes.
+This happens primarily during higher-order differentiation when the user wishes to differentiate the same code multiple times. 
+Slang's auto-diff approaches this by rewriting all the relevant differential pairs into 'irrelevant' differential pairs (`kIROp_DifferentialPairUserCode`) and 'irrelevant' accessors (`kIROp_DifferentialPairGetDifferentialUserCode`, `kIROp_DifferentialPairGetPrimalUserCode`) at the end of **each auto-diff iteration** so that the next iteration treats these as regular differentiable types. 
+The user-code versions are also lowered into `struct`s in the same way.
+
+## Type Checking of Auto-Diff Calls (and other _higher-order_ functions)
+Since `fwd_diff` and `bwd_diff` are represented as higher order functions that take a function as an input and return the derivative function, the front-end semantic checking needs some notion of higher-order functions to be able to check and lower the calls into appropriate IR.
+
+### Higher-order Invocation Base: `HigherOrderInvokeExpr`
+All higher order transformations derive from `HigherOrderInvokeExpr`. For auto-diff there are two possible expression classes `ForwardDifferentiateExpr` and `BackwardDifferentiateExpr`, both of which derive from this parent expression.
+
+### Higher-order Function Call Checking: `HigherOrderInvokeExprCheckingActions`
+Resolving the concrete method is not a trivial issue in Slang, given its support for overloading, type coercion and more. This becomes more complex with the presence of a function transformation in the chain. 
+For example, if we have `fwd_diff(f)(DiffPair<float>(...), DiffPair<double>(...))`, we would need to find the correct match for `f` based on its post-transform argument types.
+
+To facilitate this we use the following workflow:
+1. The `HigherOrderInvokeExprCheckingActions` base class provides a mechanism for different higher-order expressions to implement their type translation (i.e. what is the type of the transformed function). 
+2. The checking mechanism passes all detected overloads for `f` through the type translation and assembles a new group out of the results (the new functions are 'temporary')
+3. This new group is used by `ResolveInvoke` when performing overload resolution and type coercion using the user-provided argument list.
+4. The resolved signature (if there is one) is then replaced with the corresponding function reference and wrapped in the appropriate higher-order invoke.
+
+**Example:**
+
+Let's say we have two functions with the same name `f`: (`int -> float`, `double, double -> float`)
+and we want to resolve `fwd_diff(f)(DiffPair<float>(1.0, 0.0), DiffPair<float>(0.0, 1.0))`.
+
+The higher-order checking actions will synthesize the 'temporary' group of translated signatures (`int -> DiffPair<float>`, `DiffPair<double>, DiffPair<double> -> DiffPair<float>`). 
+Invoke resolution will then narrow this down to a single match (`DiffPair<double>, DiffPair<double> -> DiffPair<float>`) by automatically casting the `float`s to `double`s. Once the resolution is complete, 
+we return `InvokeExpr(ForwardDifferentiateExpr(f : double, double -> float), casted_args)` by wrapping the corresponding function in the corresponding higher-order expr
+
+## Attributed Types (`no_diff` parameters)
+
+Often, it will be necessary to prevent gradients from propagating through certain parameters, for correctness reasons. For example, values representing random samples are often not differentiated since the result may be mathematically incorrect.
+
+Slang provides the `no_diff` operator to mark parameters as non-differentiable, even if they use a type that conforms to `IDifferentiable`
+
+```C
+float myFunc(float a, no_diff float b)
+{
+    return a * b;
+}
+
+// Resulting fwd-mode derivative:
+DiffPair<float> myFunc(DiffPair<float> dpa, float b)
+{
+    return diffPair(dpa.p * b, dpa.d * b);
+}
+```
+
+Slang uses _OpAttributedType_ to denote the IR type of such parameters. For example, the lowered type of `b` in the above example is `OpAttributedType(OpFloat, OpNoDiffAttr)`. In the front-end, this is represented through the `ModifiedType` AST node. 
+
+Sometimes, this additional layer can get in the way of things like type equality checks and other mechanisms where the `no_diff` is irrelevant. Thus, we provide the `unwrapAttributedType` helper to remove attributed type layers for such cases.
+
+## Derivative Data-Flow Analysis
+Slang has a derivative data-flow analysis pass that is performed on a per-function basis immediately after lowering to IR and before the linking step (`slang-ir-check-differentiability.h`/`slang-ir-check-differentiability.cpp`). 
+
+The job of this pass is to enforce that instructions that are of a differentiable type will propagate a derivatives, unless explicitly dropped by the user through `detach()` or `no_diff`. The reason for this is that Slang requires functions to be decorated with `[Differentiable]` to allow it to propagate derivatives. Otherwise, the function is considered non-differentiable, and effectively produces a 0 derivative. This can lead to frustrating situations where a function may be dropping non-differentiable on purpose. Example:
+```C
+float nonDiffFunc(float x)
+{
+    /* ... */
+}
+
+float differentiableFunc(float x) // Forgot to annotate with [Differentiable]
+{
+    /* ... */
+}
+
+float main(float x)
+{
+    // User doesn't realise that the function that is supposed to be differentiable is not 
+    // getting differentiated, because the types here are all 'float'.
+    // 
+    return nonDiffFunc(x) * differentiableFunc(x);
+}
+```
+
+The data-flow analysis step enforces that non-differentiable functions used in a differentiable context should get their derivative dropped explicitly. That way, it is clear to the user whether a call is getting differentiated or dropped.
+
+Same example with `no_diff` enforcement:
+```C
+float nonDiffFunc(float x)
+{
+    /* ... */
+}
+
+[Differentiable]
+float differentiableFunc(float x)
+{
+    /* ... */
+}
+
+float main(float x)
+{
+    return no_diff(nonDiffFunc(x)) * differentiableFunc(x);
+}
+```
+
+A `no_diff` can only be used directly on a function call, and turns into a `TreatAsDifferentiableDecoration` that indicates that the function will not produce a derivative.
+
+The derivative data-flow analysis pass works similar to a standard data-flow pass:
+1. We start by assembling a set of instructions that 'produce' derivatives by starting with the parameters of differentiable types (and without an explicit `no_diff`), and propagating them through each instruction in the block. An inst carries a derivative if there one of its operands carries a derivative, and the result type is differentiable.
+2. We then assemble a set of instructions that expect a derivative. These are differentiable operands of differentiable functions (unless they have been marked by `no_diff`). We then reverse-propagate this set by adding in all differentiable operands (and repeating this process).
+3. During this reverse-propagation, if there is any `OpCall` in the 'expect' set that is not also in the 'produce' set, then we have a situation where the gradient hasn't been explicitly dropped, and we create a user diagnostic.
diff --git a/external/slang/share/doc/slang/design/capabilities.md b/external/slang/share/doc/slang/design/capabilities.md
new file mode 100644
index 00000000..b4bd4c09
--- /dev/null
+++ b/external/slang/share/doc/slang/design/capabilities.md
@@ -0,0 +1,271 @@
+Capabilities (Out of Date)
+============
+
+Slang aims to be a portable language for shader programming, which introduces two complementary problems:
+
+1. We need a way to indicate that certain constructs (types, functions, etc.) are only allowed on certain targets, so that a user gets a meaningful error if they try to do something that won't work on one or more of the APIs or platforms they want to target. Similarly, the user expects to get an error if they call a fragment-shader-specific function inside of, say, compute shader code, or vice versa.
+
+2. If the same feature can be implemented across multiple platforms, but the best (or only) implementation path differs across platforms, then we need a way to express the platform specific code and pick the right implementation per-target.
+
+Item (2) is traditionally handled with preprocessor techniques (e.g., `#ifdef`ing the body of a function based on target platform), but that of course requires that the user invoke the Slang front end once for each target platform, and target-specific coding in a library will then "infect" code that uses that library, forcing them to invoke the front-end once per target as well.
+
+We are especially sensitive to this problem in the compiler itself, because we have to author and maintain the Slang standard modules, which needs to (1) expose the capabilities of many platforms and (2) work across all those platforms. It would be very unfortunate if we had to build different copies of our standard modules per-target.
+
+The intention in Slang is to solve both of these problems with a system of *capabilities*.
+
+What is a capability?
+---------------------
+
+For our purposes a capability is a discrete feature that a compilation target either does or does not support.
+We could imagine defining a capability for the presence of texture sampling operations with implicit gradients; this capability would be supported when generating fragment shader kernel code, but not when generating code for other stages.
+
+Let's imagine a language syntax that the standard modules could use to define some *atomic* capabilities:
+
+```
+capability implicit_gradient_texture_fetches;
+```
+We can then imagine using attributes to indicate that a function requires a certain capability:
+
+```
+struct Texture2D
+{
+	...
+
+	// Implicit-gradient sampling operation.
+	[availableFor(implicit_gradient_texture_fetches)]
+	float4 Sample(SamplerState s, float2 uv);
+}
+```
+
+(Note that the `[availableFor(...)]` syntax is just a straw-man to write up examples, and a better name would be desirable if/when we implement this stuff.)
+
+Given those declarations, we could then check when compiling code if the user is trying to call `Texture2D.Sample` in code compiled for a target that *doesn't* support implicit-gradient texture fetches, and issue an appropriate error.
+The details on how to sequence this all in the compiler will be covered later.
+
+Derived Capabilities
+--------------------
+
+Once we can define atomic capabilities, the next step is to be able to define *derived* capabilities.
+Let's imagine that we extend our `capability` syntax so that we can define a new capability that automatically implies one or more other capabilities:
+
+```
+capability fragment : implicit_gradient_texture_fetches;
+```
+
+Here we've said that whenever the `fragment` capability is available, we can safely assume that the `implicit_gradient_texture_fetches` capability is available (but not vice versa).
+
+Given even a rudimentary tool like that, we can start to build up capabilities that relate closely to the "profiles" in things like D3D:
+
+```
+capability d3d;
+capability sm_5_0 : d3d;
+capability sm_5_1 : sm_5_0;
+capability sm_6_0 : sm_5_1;
+...
+
+capability d3d11 : d3d, sm_5_0;
+capability d3d12 : d3d, sm_6_0;
+
+capability khronos;
+capability glsl_400 : khronos;
+capability glsl_410 : glsl_400;
+...
+
+capability vulkan : khronos, glsl_450;
+capability opengl : khronos;
+```
+
+Here we are saying that `sm_5_1` supports everything `sm_5_0` supports, and potentially more. We are saying that `d3d12` supports `sm_6_0` but maybe not, e.g., `sm_6_3`.
+We are expressing that fact that having a `glsl_*` capability means you are on some Khronos API target, but that it doesn't specify which one.
+(The exact details of these declarations obviously aren't the point; getting a good hierarchy of capabilities will take time.)
+
+Capability Composition
+----------------------
+
+Sometimes we'll want to give a distinct name to a specific combination of capabilities, but not say that it supports anything new:
+
+```
+capability ps_5_1 = sm_5_1 & fragment;
+```
+
+Here we are saying that the `ps_5_1` capability is *equivalent* to the combination of `sm_5_1` and `fragment` (that is, if you support both `sm_5_1` and `fragment` then you support `ps_5_1` and vice versa).
+
+Compositions should be allowed in `[availableFor(...)]` attributes (e.g., `[availableFor(vulkan & glsl_450)]`), but pre-defined compositions should be favored when possible.
+
+When composing things with `&` it is safe for the compiler to filter out redundancies based on what it knows so that, e.g., `ps_5_0 & fragment` resolves to just `ps_5_0`.
+
+Once we have an `&` operator for capabilities, it is easy to see that "derived" capabilities are really syntax sugar, so that a derived capability like:
+
+```
+capability A : B, C
+```
+
+could have been written instead as :
+
+```
+capability A_atomic
+capability A = A_atomic & B & C
+```
+
+Where the `A_atomic` capability guarantees that `A` implies `B` and `C` but not vice versa.
+
+It is also useful to think of an `|` operator on capabilities.
+In particular if a function has multiple `[availableFor(...)]` attributes:
+
+```
+[availableFor(vulkan & fragment)]
+[availableFor(d3d12 & fragment)]
+void myFunc();
+```
+
+This function should be equivalent to one with just a single `[availableFor((vulkan & fragment) | (d3d12 & fragment))]` which is equivalent to `[availableFor((vulkan | d3d12) & fragment)]`.
+Simplification should generally push toward "disjunctive normal form," though, rather than pursue simplifications like that.
+Note that we do *not* include negation, so that capabilities are not general Boolean expressions.
+
+Validation
+----------
+
+For a given function definition `F`, the front end will scan its body and see what it calls, and compose the capabilities required by the called functions using `&` (simplifying along the way). Call the resulting capability (in disjunctive normal form) `R`.
+
+If `F` doesn't have an `[availableFor(...)]` attribute, then we can derive its *effective* `[availableFor(...)]` capability as `R` (this probably needs to be expressed as an iterative dataflow problem over the call graph, to handle cycles).
+
+If `F` *does* have one or more `[availableFor(...)]` clauses that amount to a declared capability `C` (again in disjunctive normal form), then we can check that `C` implies `R` and error out if it is not the case.
+A reasonable implementation would track which calls introduced which requirements, and be able to explain *why* `C` does not capture the stated requirements.
+
+For a shader entry point, we should check it as if it had an `[availableFor(...)]` that is the OR of all the specified target profiles (e.g., `sm_5_0 | glsl_450 | ...`) ANDed with the specified stage (e.g., `fragment`).
+Any error here should be reported to the user.
+If an entry point has an explicit `[availableFor(...)]` then we should AND that onto the profile computed above, so that the user can restrict certain entry points to certain profiles.
+
+In order to support separate compilation, the functions that are exported from a module should probably either have explicit availability attributes, or else they will be compiled against a kind of "default capability" used for the whole module.
+Downstream code that consumes such a module would see declarations with explicit capabilities only.
+Picking an appropriate "default capability" to use when compiling modules is an important challenge; it would in practice define the "min spec" to use when compiling.
+
+Capability Overriding
+---------------------
+
+It should be possible to define multiple versions of a function, having different `[availableFor(...)]` attributes:
+
+```
+[availableFor(vulkan)] void myFunc() { ... }
+
+[availableFor(d3d12)] void myFunc() { ... }
+```
+
+For front-end checking, these should be treated as if they were a single definition of `myFunc` with an ORed capability (e.g., `vulkan | d3d12`).
+Overload resolution will pick the "best" candidate at a call site based *only* on the signatures of the function (note that this differs greatly from how profile-specific function overloading works in Cg).
+
+The front-end will then generate initial IR code for each definition of `myFunc`.
+Each of the IR functions will have the *same* mangled name, but different bodies, and each will have appropriate IR decorations to indicate the capabilities it requires.
+
+The choice of which definition to use is then put off until IR linking for a particular target.
+At that point we can look at all the IR functions matching a given mangled name, filter them according to the capabilities of the target, and then select the "best" one.
+
+In general a definition `A` of an IR symbol is better than another definition `B` if the capabilities on `A` imply those on `B` but not versa.
+(In practice this probably needs to be "the capabilities on `A` intersected with those of the target," and similarly for `B`)
+
+This approach allows us to defer profile-based choices of functions to very late in the process. The one big "gotcha" to be aware of is when functions are overloaded based on pipeline stage, where we would then have to be careful when generating DXIL or SPIR-V modules with multiple entry points (as a single function `f` might need to be specialized twice if it calls a stage-overloaded function `g`).
+
+Capabilities in Other Places
+----------------------------
+
+So far I've talked about capabilities on functions, but they should also be allowed on other declarations including:
+
+- Types, to indicate that code using that type needs the given capability
+- Interface conformances, to indicate that a type only conforms to the interface when the capabilities are available
+- Struct fields, to indicate that the field is only present in the type when the capabilities are present
+- Extension declarations, to indicate that everything in them requires the specified capabilities
+
+We should also provide a way to specify that a `register` or other layout modifier is only applicable for specific targets/stages. Such a capability nominally exists in HLSL today, but it would be much more useful if it could be applied to specify target-API-specific bindings.
+
+Only functions should support overloading based on capability. In all other cases there can only be one definition of an entity, and capabilities just decide when it is available.
+
+API Extensions as Capabilities
+------------------------------
+
+One clear use case for capabilities is to represent optional extensions, including cases where a feature is "built-in" in D3D but requires an extension in Vulkan:
+
+```
+capability KHR_secret_sauce : vulkan;
+
+[available_for(sm_7_0)] // always available for D3D Shader Model 7.0
+[available_for(KHR_secret_sauce)] // Need the "secret sauce" extension for Vulkan
+void improveShadows();
+```
+
+When generating code for Vulkan, we should be able to tell the user that the `improveShadows()` function requires the given extension. The user should be able to express compositions of capabilities in their `-profile` option (and similarly for the API):
+
+```
+slangc code.slang -profile vulkan+KHR_secret_sauce
+```
+(Note that for the command line, it is beneficial to use `+` instead of `&` to avoid conflicts with shell interpreters)
+
+An important question is whether the compiler should automatically infer required extensions without them being specified, so that it produces SPIR-V that requires extensions the user didn't ask for.
+The argument against such inference is that users should opt in to non-standard capabilities they are using, but it would be unfortunate if this in turn requires verbose command lines when invoking the compiler.
+It should be possible to indicate the capabilities that a module or entry point should be compiled to use without command-line complications.
+
+(A related challenge is when a capability can be provided by two different extensions: how should the compiler select the "right" one to use?)
+
+Disjoint Capabilities
+---------------------
+
+Certain compositions of capabilities make no sense. If a user declared a function as needing `vulkan & d3d12` they should probably get an error message.
+
+Knowing that certain capabilities are disjoint can also help improve the overall user experience.
+If a function requires `(vulkan & extensionA) | (d3d12 & featureb)` and we know we are compiling for `vulkan` we should be able to give the user a pointed error message saying they need to ask for `extensionA`, because adding `featureB` isn't going to do any good.
+
+As a first-pass model we could have a notion of `abstract` capabilities that are used to model the root of hierarchies of disjoint capabilities:
+
+```
+abstract capability api;
+
+abstract capability d3d : api;
+capability d3d11 : d3d;
+capability d3d12 : d3d;
+
+abstract capability khronos : api;
+capability vulkan : khronos;
+capability opengl : khronos;
+```
+
+As a straw man:  we could have a rule that to decide if non-abstract capabilities `A` and `B` are disjoint, we look for their common ancestor in the tree of capabilities.
+If the common ancestor is abstract, they are disjoint, and if not they not disjoint.
+We'd also know that if the user tries to compile for a profile that includes an abstract capability but *not* some concrete capability derived from it, then that is an error (we can't generate code for just `d3d`).
+
+The above is an over-simplification because we don't have a *tree* of capabilities, but a full *graph*, so we'd need an approach that works for the full case.
+
+Interaction with Generics/Interfaces
+------------------------------------
+
+It should be possible for an interface requirement to have a capability requirement attached to it.
+This would mean that users of the interface can only use the method/type/whatever when the capability is present (just like for any other function):
+
+```
+interface ITexture
+{
+	float4 sampleLevel(float2 uv, float lod);
+
+	[availableFor(fragment)]
+	float4 sample(float2 uv); // can only call this from fragment code
+}
+```
+When implementing an interface, any capability constraints we put on a member that satisfies an interface requirement would need to guarantee that either:
+
+- the capabilities on our method are implied by those on the requirement (we don't require more), or
+
+- the capabilities on the method are implied by those on the type itself, or its conformance to the interface (you can't use the conformance without the capabilities), or
+
+- the capabilities are already implied by those the whole module is being compiled for
+
+In each case, you need to be sure that `YourType` can't be passed as a generic argument to some function that uses just the `ITexture` interface above and have them call a method on your type from a profile that doesn't have the required capabilities.
+
+Interaction with Heterogeneity
+------------------------------
+
+If Slang eventually supports generating CPU code as well as shaders, it should use capabilities to handle the CPU/GPU split similar to how they can be used to separate out vertex- and fragment-shader functionality.
+Something like a `cpu` profile that works as a catch-all for typical host CPU capabilities would be nice, and could be used as a convenient way to mark "host" functions in a file that is otherwise compiled for a "default profile" that assumes GPU capabilities.
+
+Conclusion
+----------
+
+Overall, the hope is that in many cases developers will be able to use capability-based partitioning and overloading of APIs to build code that only has to pass through the Slang front-end once, but that can then go through back-end code generation for each target.
+In cases where this can't be achieved, the way that capability-based overloading is built into the Slang IR design means that we should be able to merge multiple target-specific definitions into one IR module, so that a module can employ target-specific specializations while still presenting a single API to consumers.
diff --git a/external/slang/share/doc/slang/design/casting.md b/external/slang/share/doc/slang/design/casting.md
new file mode 100644
index 00000000..6eafea1a
--- /dev/null
+++ b/external/slang/share/doc/slang/design/casting.md
@@ -0,0 +1,150 @@
+Casting in the Slang Compiler
+=============================
+
+The following discussion is about casting within the C++ implementation of the slang compiler. 
+
+C++'s built in mechanisms for casting (principally dynamic_cast) is problematic within the slang compiler codebase. Code using 'dynamic_cast' requires RTTI information is available, and that a type that uses it must have a vtbl (have at least one virtual member). Some problems with this...
+
+* There are types which we want to 'dynamic_cast' that do not have, and we do not want to have a Vtbl (for example Slang::IRInst). 
+* There are types which a 'dynamic_cast' doesn't do quite what we want (for example casting on Type* derived types typically wants to work on their canonical type)
+* We may want to replace use of dynamic_cast in the future for speed/space or other reasons
+* It is common in the code base when using a 'smart pointer' type to cast it, but still return a smart pointer 
+
+To deal with these issues we need casting within Slang to follow it's own methodology. In summary it is as follows...
+
+* Use 'as' free function to do a typical 'dynamic like' cast. 
+    * 'as' doesn't guarantee the returned pointer points to the same object.
+    * For example with Type* it *actually* does the cast on the canonical type which is often a different object. 
+* If you want to *literally* do a dynamic cast use 'dynamicCast' free function. 
+    * This guarantees the returned pointer points to the same object (like normal dynamic_cast)
+* If you want to return a smart pointer from a cast from a smart pointer use the .as or .dynamicCast *methods*
+* If you want to determine if an 'as' cast is possible on a smart pointer use the .is method
+    * Doing so will produce more efficient code because a new smart pointer does not need to be constructed
+
+These functions will also work with types that do not have Vtbl - like IRInst derived types. 
+
+Both 'as' and 'dynamicCast' handle the case if the pointer is a nullptr, by returning a nullptr. If the cast succeeds the cast pointer is returned otherwise nullptr is returned. If a cast is performed with a free function it always returns a raw pointer. 
+
+So why have 'as' and 'dynamicCast' - they seem sort of similar? The primary difference is dynamicCast *must* always return a pointer to the same object, whilst 'as' *can* return a pointer to a different object if that is the desired 'normal' casting behavior for the type. This is the case for Type* when using 'as' it may return a different object - the 'canonical type' for the Type*. For a concrete example take 'NamedExpressionType', its canonical type is the type the name relates to. If you use 'as' on it - it will produce a pointer to a different object, an object that will not be castable back into a NamedExpressionType.
+
+Also keep in mind that 'as' behavior is based on the pointer type being cast from. For any pointer to a type derived from Type it will cast the canonical type. **BUT** if the pointer is pointing to a Type derived *object*, but the pointer type is *not* derived from Type (like say RefObject*), then 'as' will behave like dynamicCast. 
+
+All this being said 'as' in usage is seen as the 'default' way to do a 'dynamic like' cast with these special behaviour appropriate for the type when necessary.
+
+By having the free function and method versions of 'as' and 'dynamicCast', you can choose if you want a 'raw' or 'smart' pointer type returned from the cast. If you just want to test if something is a certain type, then using as/dynamicCast free functions is the faster way to do it. If you *know* that a raw pointer is ok, because the object will remain in scope, then again using the free function is better because it does less work. But as the examples following show, care is needed because if you get it wrong the object might go out of scope and leave the raw pointer pointing to a deleted object. When in doubt the safe choice is to typically use .as (or .dynamicCast if appropriate) methods. 
+
+Following example shows the different types of casting...
+
+```C++
+
+void someFunction(Decl* decl, Type* type)
+{
+    RefPtr<Decl> declRefPtr(decl);
+    RefPtr<Type> typeRefPtr(type);
+
+    // Use of as
+    {
+        // Casting with as on a free function returns a raw pointer
+        GenericDecl* genericDeclRaw0 = as<GenericDecl>(decl);
+        // Free function again returns a raw pointer
+        GenericDecl* genericDeclRaw1 = as<GenericDecl>(declRefPtr);
+
+        // Using the as *method* returns a smart pointer holding the cast result
+        RefPtr<GenericDecl> genericDeclRefPtr0 = declRefPtr.as<GenericDecl>();
+        
+        // Of course you can use auto with either
+        auto genericDeclRefPtr1 = declRefPtr.as<GenericDecl>();
+        
+        auto genericDeclRaw2 = as<GenericDecl>(declRefPtr);
+    }
+    
+    // Currently using as on anything not cast *from* Type is the same as dynamicCast.
+    // But on Type* sometimes you may want to control the cast
+    {
+        // With a NamedExpressionType sometimes you don't want 'as' behaviour - if we want to see the information about the name (not the thing 
+        // it relates to (the canonical type)
+        NamedExpressionType* namedExpressionRawPtr = dynamicCast<NamedExpressionType>(type);
+        
+        
+        // Returns the smart pointer 
+        auto namedExpressionRefPtr = typeRefPtr.as<NamedExpressionType>();
+    }
+    
+```
+
+It is important to be aware of what style of cast you use where. Take for example the following function ...
+```C++
+    RefPtr<Expr> substitute(RefPtr<Expr> expr) const
+    {
+        return DeclRefBase::Substitute(expr);
+    }
+``` 
+    
+If you want to do a cast on it, you need to be careful especially about scope, for example...
+
+```C++
+    RefPtr<Expr> expr = ...;
+    
+    {
+        // Whoops! This is a problem. When using the free function, the cast is to a *raw* pointer, so obj 
+        // receives a raw pointer. When the RefPtr returned from Substitute goes out of scope (when the statement is left)
+        // the ref will be removed and if the ref count was 1 destroyed. Now obj points to a freed object and so a crash is
+        // likely to follow in the future! 
+        
+        auto obj = as<RefObject>(substitute(expr));
+    }
+    // So how do we avoid this? Well it depends what the function is returning and the scope. If it's returning a smart pointer, 
+    // you could use the .as method
+    {
+        // This can only compile if it is a smart pointer (raw pointers don't have an as method)
+        auto obj = substitute(expr).as<RefObject>();
+    }
+
+    // Another option is to put the created thing in a smart pointer so you know it's in scope
+    {
+        RefPtr<Expr> sub = substitute(expr);
+        // Ok as long as sub is in scope
+        auto obj = as<RefObject>(sub);
+       
+    }
+ 
+    // More awkwardly you could use free function, but assign to a smart pointer, thus maintaining scope
+    {
+        RefPtr<RefObject> obj = as<RefObject>(substitute(expr));
+    }
+
+```
+
+The following code shows the change in behavior of 'as' is based on the source *pointer* type **NOT** the *object* type..
+
+```C++
+    // Derives from Type
+    NamedExpressionType* exprType = ...;
+
+    
+    // Will be the Type* of the *canonical* type, because the pointer is Type derived and we are using as!
+    Type* type0 = as<Type>(exprType);
+    // It' going to be pointing to a different object, because type0 is the cast of the *canonical* type, because exprType derives from Type
+    SLANG_ASSERT(type0 != exprType);
+    
+    // If I do a dynamicCast the result is either nullptr or a pointer that *must* point to the same object
+    Type* type1 = dynamicCast<Type>(exprType);
+    SLANG_ASSERT(type1 == exprType);
+    
+    
+    // Here, the pointer is pointing to a NamedExpressionType derived object. Which derives from Type. BUT our pointer here does *not* derive from type.
+    RefObject* refObj = exprType;
+    
+    // 'as' just looks at the from type, and it doesn't derive from Type (it's just RefObject), so it does regular as, which is dynamicCast
+    Type* type2 = as<Type>(refObject);
+    
+    SLANG_ASSERT(type2 == exprType);
+    
+    // Finally... 
+    
+    // Is true even though exprType is a NamedExpression, because the cast is on the canonical type
+    SLANG_ASSERT(as<NamedExpression>(exprType) == nullptr);
+    
+    // dynamicCast is always the same object returned, so must match
+    SLANG_ASSERT(dynamicCast<NamedExpression>(exprType) == exprType);
+```
diff --git a/external/slang/share/doc/slang/design/coding-conventions.md b/external/slang/share/doc/slang/design/coding-conventions.md
new file mode 100644
index 00000000..bc540783
--- /dev/null
+++ b/external/slang/share/doc/slang/design/coding-conventions.md
@@ -0,0 +1,282 @@
+Slang Project Coding Conventions
+================================
+
+Principles
+----------
+
+This document attempts to establish conventions to be used in the Slang codebase.
+We have two goals for this convention.
+
+The first goal is to make the code look relatively consistent so that it is easy to navigate and understand for contributors.
+Having varying styles across different modules, files, functions, or lines of code makes the overall design and intention of the codebase harder to follow.
+
+The second goal is to minimize the scope complexity of diffs when multiple maintainers work together on the codebase.
+In the absence of an enforced style, developers tend to "clean up" code they encounter to match their personal preferences, and in so doing create additional diffs that increase the chances of merge conflicts and pain down the line.
+
+Because the Slang codebase has passed through many hands and evolved without a pre-existing convention, these two goals can come into conflict.
+We encourage developers to err on the side of leaving well enough alone (favoring the second goal).
+Don't rewrite or refactor code to match these conventions unless you were already going to have to touch all of those lines of code anyway.
+
+Note that external code that is incorporated into the project is excluded from all of these conventions.
+
+Languages
+---------
+
+### C++
+
+Most code in the Slang project is implemented in C++.
+We currently assume support for some C++11 idioms, but have explicitly avoided adding dependencies on later versions.
+
+As a general rule, be skeptical of "modern C++" ideas unless they are clearly better to simpler alternatives.
+We are not quite in the realm of "Orthodox C++", but some of the same guidelines apply:
+
+* Don't use exceptions for non-fatal errors (and even then support a build flag to opt out of exceptions)
+* Don't use the built-in C++ RTTI system (home-grown is okay)
+* Don't use the C++ variants of C headers (e.g., `<cstdio>` instead of `<stdio.h>`)
+* Don't use the STL containers
+* Don't use iostreams
+
+The compiler implementation does not follow some of these guidelines at present; that should not be taken as an excuse to further the proliferation of stuff like `dynamic_cast`.
+Do as we say, not as we do.
+
+Some relatively recent C++ features that are okay to use:
+
+* Rvalue references for "move semantics," but only if you are implementing performance-critical containers or other code where this really matters.
+
+* `auto` on local variables, if the expected type is clear in context
+
+* Lambdas are allowed, but think carefully about whether just declaring a subroutine would also work.
+
+* Using `>>` to close multiple levels of templates, instead of `> >` (but did you really need all those templates?)
+
+* `nullptr`
+
+* `enum class`
+
+* Range-based `for` loops
+
+* `override`
+
+* Default member initializers in `class`/`struct` bodies
+
+Templates are suitable in cases where they improve clarity and type safety.
+As a general rule, it is best when templated code is kept minimal, and forwards to a non-templated function that does the real work, to avoid code bloat.
+
+Any use of template metaprogramming would need to prove itself exceptionally useful to pay for the increase in cognitive complexity.
+We don't want to be in the business of maintaining "clever" code.
+
+As a general rule, `const` should be used sparingly and only with things that are logically "value types."
+If you find yourself having to `const`-qualify a lot of member function in type that you expect to be used as a heap-allocated object, then something has probably gone wrong.
+
+As a general rule, default to making the implementation of a type `public`, and only encapsulate state or operations with `private` when you find that there are complex semantics or invariants that can't be provided without a heavier hand.
+
+### Slang
+
+The Slang project codebase also includes `.slang` files implementing the Slang core module, as well as various test cases and examples.
+The conventions described here are thus the "official" recommendations for how users should format Slang code.
+
+To the extent possible, we will try to apply the same basic conventions to both C++ and Slang.
+In places where we decide that the two languages merit different rules, we will point it out.
+
+Files and Includes
+------------------
+
+### File Names
+
+All files and directories that are added to codebase should have names that contain only ASCII lower-case letters, digits, dots (`.`) and dashes (`-`).
+Operating systems still vary greatly in their handling of case sensitivity for file names, and non-ASCII code points are handled with even less consistency; sticking to a restricted subset of ASCII helps avoids some messy interactions between case-insensitive file systems and case-sensitive source-control systems like Git.
+As with all these conventions, files from external projects are exempted from these restrictions.
+
+### Naming of Source and Header Files
+
+In general the C++ codebase should be organized around logical features/modules/subsystem, each of which has a single `.h` file and zero or more `.cpp` files to implement it.
+
+If there is a single `.cpp` file, its name should match the header: e.g., `parser.h` and `parser.cpp`.
+
+If there is more than one `.cpp` file, their names should start with the header name: e.g., `parser.h` and `parser-decls.cpp` and `parser-exprs.cpp`.
+If there are declarations that need to be shared by the `.cpp` files, but shouldn't appear in the public interface, then can go in a `*-impl.h` header (e.g., `parser-impl.h`).
+
+Use best judgement when deciding what counts as a "feature." One class per file is almost always overkill, but the codebase currently leans too far in the other direction, with some oversized source files.
+
+### Headers
+
+Every header file should have an include guard.
+Within the implementation we can use `#pragma once`, but exported API headers (`slang.h`) should use traditional `#ifdef` style guards (and they should be consumable as both C and C++).
+
+A header should include or forward-declare everything it needs in order to compile.
+It is *not* up to the programmer who `#include`s a header to sort out the dependencies.
+
+Avoid umbrella or "catch-all" headers.
+
+### Source Files
+
+Every source file should start by including the header for its feature/module, before any other includes (this helps ensure that the header correctly includes its dependencies).
+
+Functions that are only needed within that one source file can be marked `static`, but we should avoid using the same name for functions in different files (in order to support lumped/unified builds).
+
+### Includes
+
+In general, includes should be grouped as follows:
+
+* First, the correspodning feature/module header, if we are in a source file
+* Next, any `<>`-enlosed includes for system/OS headers
+* Next, any `""`-enclosed includes for external/third-part code that is stored in the project repository
+* Finally, any includes for other features in the project
+
+Within each group, includes should be sorted alphabetically.
+If this breaks because of ordering issues for system/OS/third-party headers (e.g., `<windows.h>` must be included before `<GL/GL.h>`), then ideally those includes should be mediated by a Slang-project-internal header that features can include.
+
+Namespaces
+----------
+
+Favor fewer namespaces when possible.
+Small programs may not need any.
+
+All standard module code that a Slang user might link against should go in the `Slang` namespace for now, to avoid any possibility of clashes in a static linking scenario.
+The public C API is obviously an exception to this.
+
+
+Code Formatting
+------------------------------
+
+- For C++ files, please format using `clang-format`; `.clang-format` files in
+  the source tree define the style.
+- For CMake files, please format using `gersemi`
+- For shell scripts, please format using `shfmt`
+- For YAML files, please use `prettier`
+
+The formatting for the codebase is overall specified by the
+[`extras/formatting.sh`](./extras/formatting.sh) script.
+
+If you open a pull request and the formatting is incorrect, you can comment
+`/format` and a bot will format your code for you.
+
+Naming
+------
+
+### Casing
+
+Types should in general use `UpperCamelCase`. This includes `struct`s, `class`es, `enum`s and `typedef`s.
+
+Values should in general use `lowerCamelCase`. This includes functions, methods, local variables, global variables, parameters, fields, etc.
+
+Macros should in general use `SCREAMING_SNAKE_CASE`.
+It is important to prefix all macros (e.g., with `SLANG_`) to avoid collisions, since `namespace`s don't affect macros).
+
+In names using camel case, acronyms and initialisms should appear eniterly in either upper or lower case (e.g., `D3DThing d3dThing`) and not be capitalized as if they were ordinary words (e.g., `D3dThing d3dThing`).
+Note that this also applies to uses of "ID" as an abbreviation for "identifier" (e.g., use `nodeID` instead of `nodeId`).
+
+### Prefixes
+
+Prefixes based on types (e.g., `p` for pointers) should never be used.
+
+Global variables should have a `g` prefix, e.g. `gCounter`.
+Non-`const` `static` class members can have an `s` prefix if that suits your fancy.
+Of course, both of these should be avoided, so this shouldn't come up often.
+
+Constant data (in the sense of `static const`) should have a `k` prefix.
+
+In contexts where "information hiding" is relevant/important, such as when a type has both `public` and `private` members, or just has certain operations/fields that are considered "implementation details" that most clients should not be using, an `m_` prefix on member variables and a `_` prefix on member functions is allowed (but not required).
+
+In function parameter lists, an `in`, `out`, or `io` prefix can be added to a parameter name to indicate whether a pointer/reference/buffer is intended to be used for input, output, or both input and output.
+For example:
+
+```c++
+void copyData(void* outBuffer, void const* inBuffer, size_t size);
+
+Result lookupThing(Key k, Thing& outThing);
+
+void maybeAppendExtraNames(std::vector<Name>& ioNames);
+```
+
+Public C APIs will prefix all symbol names while following the casing convention (e.g. `SlangModule`, `slangLoadModule`, etc.).
+
+### Enums
+
+C-style `enum` should use the following convention:
+
+```c++
+enum Color
+{
+    kColor_Red,
+    kColor_Green,
+    kColor_Blue,
+
+    kColorCount,
+};
+```
+
+When using `enum class`, drop the `k` and type name as prefix, but retain the `UpperCamelCase` tag names:
+
+```c++
+enum class Color
+{
+    Red,
+    Green,
+    Blue,
+
+    Count,
+};
+```
+
+When defining a set of flags, separate the type definition from the `enum`:
+
+```c++
+typedef unsigned int Axes;
+enum
+{
+    kAxes_None = 0,
+
+    kAxis_X = 1 << 0,
+    kAxis_Y = 1 << 1,
+    kAxis_Z = 1 << 2,
+
+    kAxes_All = kAxis_X | kAxis_Y | kAxis_Z,
+};
+```
+
+Note that the type name reflects the plural case, while the cases that represent individual bits are named with a singular prefix.
+
+In public APIs, all `enum`s should use the style of separating the type definition from the `enum`, and all cases should use `SCREAMING_SNAKE_CASE`:
+
+```c++
+typedef unsigned int SlangAxes;
+enum
+{
+    SLANG_AXES_NONE = 0,
+
+    SLANG_AXIS_X = 1 << 0,
+    SLANG_AXIS_Y = 1 << 1,
+    SLANG_AXIS_Z = 1 << 2,
+
+    SLANG_AXES_ALL = SLANG_AXIS_X | SLANG_AXIS_Y | SLANG_AXIS_Z,
+};
+```
+
+### General
+
+Names should default to the English language and US spellings, to match the dominant conventions of contemporary open-source projects.
+
+Function names should either be named with action verbs (`get`, `set`, `create`, `emit`, `parse`, etc.) or read as questions (`isEnabled`, `shouldEmit`, etc.).
+
+Whenever possible, compiler concepts should be named using the most widely-understood term available: e.g., we use `Token` over `Lexeme`, and `Lexer` over `Scanner` simply because they appear to be the more common names.
+
+Avoid abbreviations and initialisms unless they are already widely established across the codebase; a longer name may be cumbersome to write in the moment, but the code will probably be read many more times than it is written, so clarity should be preferred.
+An important exception to this is common compiler concepts or techniques which may have laboriously long names: e.g., Static Single Assignment (SSA), Sparse Conditional Copy Propagation (SCCP), etc.
+
+One gotcha particular to compiler front-ends is that almost every synonym for "type" has some kind of established technical meaning; most notably the term "kind" has a precise meaning that is relevant in our domain.
+It is common practice in C and C++ to define tagged union types with a selector field called a "type" or "kind," which does not usually match this technical definition.
+If a developer wants to avoid confusion, they are encouraged to use the term "flavor" instead of "type" or "kind" since this term (while a bit silly) is less commonly used in the literature.
+
+Comments and Documentation
+--------------------------
+
+You probably know the drill: comments are good, but an out-of-date comment can be worse than no comment at all.
+Try to write comments that explain the "why" of your code more than the "what."
+When implementing a textbook algorithm or technique, it may help to imagine giving the reviewer of your code a brief tutorial on the topic.
+
+In cases where comments would benefit from formatting, use Markdown syntax.
+We do not currently have a setup for extracting documentation from comments, but if we add one we will ensure that it works with Markdown.
+
+When writing comments, please be aware that your words could be read by many people, from a variety of cultures and backgrounds.
+Default to a plain-spoken and professional tone and avoid using slang, idiom, profanity, etc.
diff --git a/external/slang/share/doc/slang/design/decl-refs.md b/external/slang/share/doc/slang/design/decl-refs.md
new file mode 100644
index 00000000..5c195869
--- /dev/null
+++ b/external/slang/share/doc/slang/design/decl-refs.md
@@ -0,0 +1,166 @@
+Understanding Declaration References (Out of Date)
+====================================
+
+This document is intended as a reference for developers working on the Slang compiler implementation.
+
+As you work on the code, you'll probably notice a lot of places where we use the `DeclRef<T>` type:
+
+* Expressions like `VarExpr` and `MemberExpr` are subclasses of `DeclRefExpr`, which holds a `DeclRef<Decl>`.
+
+* The most common subclass of `Type` is `DeclRefType`, which holds a `DeclRef<Decl>` for the type declaration.
+
+* Named types (references to `typedef`s) hold a `DeclRef<TypedefDecl>`
+
+* The name lookup process relies a lot on `DeclRef<ContainerDecl>`
+
+So what in the world is a `DeclRef`?
+
+The short answer is that a `DeclRef` packages up two things:
+
+1. A pointer to a `Decl` in the parsed program AST
+
+2. A set of "substitutions" to be applied to that decl
+
+Why do we need `DeclRef`s?
+--------------------------
+
+In a compiler for a simple language, we might represent a reference to a declaration as simply a pointer to the AST node for the declaration, or some kind of handle/ID that references that AST node.
+A representation like that will work in simple cases, for example:
+
+```hlsl
+struct Cell { int value };
+
+Cell a = { 3 };
+int b = a.value + 4;
+```
+
+In this case, the expression node for `a.value` can directly reference the declaration of the field `Cell::value`, and from that we can conclude that the type of the field (and hence the expression) is `int`.
+
+In contrast, things get more complicated as soon as we have a language with generics:
+
+```hlsl
+struct Cell<T> { T value; };
+
+// ...
+
+Cell<int> a = { 3 };
+int b = a.value + 4;
+```
+
+In this case, if we try to have the expression `a.value` only reference `Cell::value`, then the best we can do is conclude that the field has type `T`.
+
+In order to correctly type the `a.value` expression, we need enough additional context to know that it references `Cell<int>::value`, and from that to be able to conclude that a reference to `T` in that context is equivalent to `int`.
+
+We can represent that information as a substitution which maps `T` to `int`:
+
+```
+[ Cell::T => int ]
+```
+
+Then we can encode a reference to `Cell<int>::value` as a reference to the single declaration `Cell::value` with such a substitution applied:
+
+```
+Cell::value [Cell::T => int]
+```
+
+If we then want to query the type of this field, we can first look up the type stored on the AST (which will be a reference to `Cell::T`) and apply the substitutions from our field reference to get:
+
+```
+Cell::T [Cell::T => int]
+```
+
+Of course, we can then simplify the reference by applying the substitutions, to get:
+
+```
+int
+```
+
+How is this implemented?
+------------------------
+
+At the highest level, a `DeclRef` consists of a pointer to a declaration (a `Decl*`) plus a single-linked list of `Substution`s.
+These substitutions fill in the missing information for any declarations on the ancestor chain for the declaration.
+
+Each ancestor of a declaration can introduce an expected substitution along the chain:
+
+* Most declarations don't introduce any substitutions: e.g., when referencing a non-generic `struct` we don't need any addition information.
+
+* A surrounding generic declaration requires a `GenericSubstitution` which specifies the type argument to be plugged in for each type parameter of the declaration.
+
+* A surrounding `interface` declaration usually requires a `ThisTypeSubstitution` that identifies the specific type on which an interface member has been looked up.
+
+All of the expected substitutions should be in place in the general case, even when we might not have additional information. E.g., within a generic declaration like this:
+
+```hlsl
+struct Cell<T>
+{
+	void a();
+	void b() { a(); }
+}
+```
+
+The reference to `a` in the body of `b` will be represented as a declaration reference to `Cell::a` with a substitution that maps `[Cell::T => Cell::T]`. This might seem superfluous, but it makes it clear that we are "applying" the generic to arguments (even if they are in some sense placeholder arguments), and not trying to refer to an unspecialized generic.
+
+There are a few places in the compiler where we might currently bend these rules, but experience has shown that failing to include appropriate substitutions is more often than not a source of bugs.
+
+What in the world is a "this type" substitution?
+------------------------------------------------
+
+When using interface-constrained generics, we need a way to invoke methods of the interface on instances of a generic parameter type.
+For example, consider this code:
+
+```hlsl
+interface IVehicle
+{
+	associatedtype Driver;
+	Driver getDriver();
+}
+
+void ticketDriver<V : IVehicle>(V vehicle)
+{
+	V.Driver driver = vehicle.getDriver();
+	sentTicketTo(driver);
+}
+```
+
+In the expression `vehicle.getDriver`, we are referencing the declaration of `IVehicle::getDriver`, and so a naive reading tells us that the return type of the call is `IVehicle.Driver`, but that is an associated type and not a concrete type. It is clear in context that the expression `vehicle.getDriver()` should result in a `V.Driver`.
+
+The way the compiler encodes that is that we treat the expression `v.getDriver` as first "up-casting" the value `v` (of type `V`) to the interface `IVehicle`. We know this is valid because of the generic constraint `V : IVehicle`. The result of the up-cast operation is an expression with a type that references `IVehicle`, but with a substitution to track the fact that the underlying implementation type is `V`. This amounts to something like:
+
+```
+IVehicle [IVehicle.This => V]
+```
+
+where `IVehicle.This` is a way to refer to "the concrete type that is implementing `IVehicle`".
+
+Looking up the `getDriver` method on this up-cast expression yields a reference to:
+
+```
+IVehicle::getDriver [IVehicle.This => V]
+```
+
+And extracting the return type of that method gives us a reference to the type:
+
+```
+IVehicle::Driver [IVehicle.This => V]
+```
+
+which turns out to be exactly what the front end produces when it evaluates the type reference `V.Driver`.
+
+As this example shows, a "this type" substitution allows us to refer to interface members while retaining knowledge of the specific type on which those members were looked up, so that we can compute correct references to things like associated types.
+
+What does any of this mean for me?
+----------------------------------
+
+When working in the Slang compiler code, try to be aware of whether you should be working with a plain `Decl*` or a full `DeclRef`.
+There are many queries like "what is the return type of this function?" that typically only make sense if you are applying them to a `DeclRef`.
+
+The `syntax.h` file defines helpers for most of the existing declaration AST nodes for querying properties that should represent substitutions (the type of a variable, the return type of a function, etc.).
+If you are writing code that is working with a `DeclRef`, try to use these accessors and avoid being tempted to extract the bare declaration and start querying it.
+
+Some things like `Modifier`s aren't (currently) affected by substitutions, so it can make sense to query them on a bare declaration instead of a `DeclRef`.
+
+Conclusion
+----------
+
+Working with `DeclRef`s can be a bit obtuse at first, but they are the most elegant solution we've found to the problems that arise when dealing with generics and interfaces in the compiler front-end. Hopefully this document gives you enough context to see why they are important, and hints at how their representation in the compiler helps us implement some cases that would be tricky otherwise.
diff --git a/external/slang/share/doc/slang/design/existential-types.md b/external/slang/share/doc/slang/design/existential-types.md
new file mode 100644
index 00000000..0f346905
--- /dev/null
+++ b/external/slang/share/doc/slang/design/existential-types.md
@@ -0,0 +1,252 @@
+Existential Types
+=================
+
+This document attempts to provide some background on "existential types" as they pertain to the design and implementation of Slang.
+The features described here are *not* reflected in the current implementation, so this is mostly a sketch of where we can go with the language and compiler.
+
+Background: Generics and Universal Quantification
+-------------------------------------------------
+
+Currently Slang supports using interfaces as generic constraints. Let's use a contrived example:
+
+```hlsl
+interface IImage { float4 getValue(float2 uv); }
+
+float4 offsetImage<T : IImage>(T image, float2 uv)
+{
+	float2 offset = ...;
+	return image.getValue(uv + offset)
+}
+```
+
+Generics like this are a form of "universal quantification" in the terminology of type theory.
+This makes sense, because *for all* types `T` that satisfy the constraints, `offsetImage` provides an implementation of its functionality.
+
+When we think of translating `offsetImage` to code, we might at first only think about how we can specialize it once we have a particular type `T` in mind.
+However, we can also imagine trying to generate one body of code that can implement `offsetImage` for *any* type `T`, given some kind of runtime representation of types.
+For example, we might generate C++ code like:
+
+```c++
+struct IImageWitnessTable { float4 (*getValue)(void* obj, float2 uv); };
+
+float4 offsetImage(Type* T, IImageWitnessTable* W, void* image, float2 uv)
+{
+	float2 offset = ...;
+	return W->getvalue(image, uv + offset);
+}
+```
+
+This translation takes the generic parameters and turns them into ordinary runtime parameters: the type `T` becomes a pointer to a run-time type representation, while the constraint that `T : IImage` becomes a "witness table" of function pointers that, we assume, implements the `IImage` interface for `T`. So, the syntax of generics is *not* tied to static specialization, and can admit a purely runtime implementation as well.
+
+Readers who are familiar with how languages like C++ are implemented might see the "witness table" above and realize that it is kind of like a virtual function table, just being passed alongside the object, rather than stored in its first word.
+
+Using Interfaces Like Types
+---------------------------
+
+It is natural for a user to want to write code like the following:
+
+```hlsl
+float4 modulateImage(IImage image, float2 uv)
+{
+	float4 factor = ...;
+	return factor * image.getValue(uv);
+}
+```
+
+Unlike `offsetImage`, `modulateImage` is trying to use the `IImage` interface as a *type* and not just a constraint.
+
+This code appears to be asking for a dynamic implementation rather than specialization (we'll get back to that...) and so we should be able to implement it similarly to our translation of `offsetImage` to C++.
+Something like the following makes a lot of sense:
+
+```c++
+struct IImage { Type* T; IImageWitnessTable* W; void* obj; };
+
+float4 modulateImage(IImage image, float2 uv)
+{
+	float4 factor = ...;
+	return factor * image.W->getvalue(image.obj, uv);
+}
+```
+
+Similar to the earlier example, there is a one-to-one mapping of the parameters of the Slang function the user wrote to the parameters of the generated C++ function.
+To make this work, we had to bundle up the information that used to be separate parameters to the generic as a single value of type `IImage`.
+
+Existential Types
+-----------------
+
+It turns out that when we use `IImage` as a type, it is what we'd call an *existential* type.
+That is because if I give you a value `img` of type `IImage` in our C++ model, then you know that *there exists* some type `img.T`, a witness table `img.W` proving the type implements `IImage`, and a value `img.obj` of that type.
+
+Existential types are the bread and butter of object-oriented programming.
+If I give you an `ID3D11Texture2D*` you don't know what its concrete type is, and you just trust me that some concrete type *exists* and that it implements the interface.
+A C++ class or COM component can implement an existential type, with the constraint that the interfaces that a given type can support is limited by the way that virtual function tables are intrusively included inside the memory of the object, rather than externalized.
+Many modern languages (e.g., Go) support adapting existing types to new interfaces, so that a "pointer" of interface type is actually a fat pointer: one for the object, and one for the interface dispatch table.
+Our examples so far have assumed that the type `T` needs to be passed around separately from the witness table `W`, but that isn't strictly required in some implementations.
+
+In type theory, the most important operation you can do with an existential type is to "open" it, which means to have a limited scope in which you can refer to the constituent pieces of a "bundled up" value of a type like `IImage`.
+We could imagine "opening" an existential as something like:
+
+```
+void doSomethingCool<T : IImage>(T val);
+
+void myFunc(IImage img)
+{
+	open img as obj:T in
+	{
+		// In this scope we know that `T` is a type conforming to `IImage`,
+		// and `obj` is a value of type `T`.
+		//
+		doSomethingCool<T>(obj);
+	}
+}
+```
+
+Self-Conformance
+----------------
+
+The above code with `doSomethingCool` and `myFunc` invites a much simpler solution:
+
+```
+void doSomethingCool<T : IImage>(T val);
+
+void myFunc(IImage img)
+{
+	doSomethingCool(img);
+}
+```
+
+This seems like an appealing thing for a language to support, but there are some subtle reasons why this isn't possible to support in general.
+If we think about what `doSomethingCool(img)` is asking for, it seems to be trying to invoke the function `doSomethingCool<IImage>`.
+That function only accepts type parameters that implement the `IImage` interface, so we have to ask ourselves:
+
+Does the (existential) type `IImage` implement the `IImage` interface?
+
+Knowing the implementation strategy outline above, we can re-phrase this question to: can we construct a witness table that implements the `IImage` interface for values of type `IImage`?
+
+For simple interfaces this is sometimes possible, but in the general case there are other desirable language features that get in the way:
+
+* When an interface has associated types, there is no type that can be chosen as the associated type for the interface's existential type. The "obvious" approach of using the constraints on the associated type can lead to unsound logic when interface methods take associated types as parameters.
+
+* When an interface uses the "this type" (e.g., an `IComparable` interface with a `compareTo(ThisType other)` method), it isn't correct to simplify the this type to the interface type (just because you have two `IComarable` values doesn't mean you can compare them - they have to be of the same concrete type!)
+
+* If we allow for `static` method on interfaces, then what implementation would we use for these methods on the interface's existential type?
+
+Encoding Existentials in the IR
+-------------------------------
+
+Existentials are encoded in the Slang IR quite simply. We have an operation `makeExistential(T, obj, W)` that takes a type `T`, a value `obj` that must have type `T`, and a witness table `W` that shows how `T` conforms to some interface `I`. The result of the `makeExistential` operation is then a value of the type `I`.
+
+Rather than include an IR operation to "open" an existential, we can instead just provide accessors for the pieces of information in an existential: one to extract the type field, one to extract the value, and one to extract the witness table. These would idiomatically be used like:
+
+```
+let e : ISomeInterface = /* some existential */
+let T : Type = extractExistentialType(e);
+let W : WitnessTbale = extractExistentialWitnessTable(e);
+let obj : T = extractExistentialValue(e);
+```
+
+Note how the operation to extract `obj` gets its result type from the previously-executed extraction of the type.
+
+Simplifying Code Using Existentials
+-----------------------------------
+
+It might seem like IR code generated using existentials can only be implemented using dynamic dispatch.
+However, within a local scope it is clear that we can simplify expressions whenever `makeExistential` and `extractExistential*` operations are paired.
+For example:
+
+```
+let e : ISomeInterface = makeExistential(A, a, X);
+...
+let B = extractExistentialType(e);
+let b : B = extractExistentialValue(e);
+let Y = extractExistentialWitnessTable(e);
+```
+
+It should be clear in context that we can replace `B` with `A`, `b` with `a`, and `Y` with `X`, after which all of the `extract*` operations and the `makeExistential` operation are dead and can be eliminated.
+
+This kind of simplification works within a single function, as long as there is no conditional logic involving existentials.
+We require further transformation passes to allow specialization in more general cases:
+
+* Copy propagation, redundancy elimination and other dataflow optimizations are needed to simplify use of existentials within functions
+* Type legalization passes, including some amount of scalarization, are needed to "expose" existential-type fields that are otherwise buried in a type
+* Function specialization, is needed so that a function with existential parameters is specialized based on the actual types used at call sites
+
+Transformations just like these are already required when working with resource types (textures/samplers) on targets that don't support first-class computation on resources, so it is possible to share some of the same logic.
+Similarly, any effort we put into validation (to ensure that code is written in a way that *can* be simplified) can hopefully be shared between existentials and resources.
+
+Compositions
+------------
+
+So far I've only talked about existential types based on a single interface, but if you look at the encoding as a tuple `(obj, T, W)` there is no real reason that can't be generalized to hold multiple witness tables: `(obj, T, W0, ... WN)`. Interface compositions could be expressed at the language level using the `&` operator on interface (or existential) types.
+
+The IR encoding doesn't need to change much to support compositions: we just need to allow multiple witness tables on `makeExistential` and have an index operand on `extractExistentialWitnessTable` to get at the right one.
+
+The hardest part of supporting composition of interfaces is actually in how to linearize the set of interfaces in a way that is stable, so that changing a function from using `IA & IB` to `IB & IA` doesn't change the order in which witness tables get packed into an existential value.
+
+Why are we passing along the type?
+----------------------------------
+
+I'm glossing over something pretty significant here, which is why anybody would pass around the type as part of the existential value, when none of our examples so far have made use of it.
+This sort of thing isn't very important for languages where interface polymorphism is limited to heap-allocated "reference" types (or values that have been "boxed" into reference types), because the dynamic type of an object can almost always be read out of the object itself.
+
+When dealing with a value type, though, we have to deal with things like making *copies*:
+
+```
+interface IWritable { [mutating] void write(int val); }
+
+struct Cell : IWritable { int data; void write(int val) { data = val; } }
+
+T copyAndClobber<T : IWritable>(T obj)
+{
+	T copy = obj;
+	obj.write(9999);
+	return copy;
+}
+
+void test()
+{
+	Cell cell = { 0 };
+	Cell result = copyAndClobber(cell);
+	// what is in `result.data`?
+}
+```
+
+If we call `copyAndClober` on a `Cell` value, then does the line `obj.write` overwrite the data in the explicit `copy` that was made?
+It seems clear that a user would expect `copy` to be unaffected in the case where `T` is a value type.
+
+How does that get implemented in our runtime version of things? Let's imagine some C++ translation:
+
+```
+void copyAndClobber(Type* T, IWriteableWitnessTable* W, void* obj, void* _returnVal)
+{
+    void* copy = alloca(T->sizeInBytes);
+    T->copyConstruct(copy, obj);
+
+    W->write(obj, 9999);
+    T->moveConstruct(_returnVal, copy);
+}
+```
+
+Because this function returns a value of type `T` and we don't know how big that is, let's assume the caller is passing in a pointer to the storage where we should write the result.
+Now, in order to have a local `copy` of the `obj` value that was passed in, we need to allocate some scratch storage, and only the type `T` can know how many bytes we need.
+Furthermore, when copying `obj` into that storage, or subsequently copying the `copy` variable into the function result, we need the copy/move semantics of type `T` to be provided by somebody.
+
+This is the reason for passing through the type `T` as part of an existential value.
+
+If we only wanted to deal with reference types, this would all be greatly simplified, because the `sizeInBytes` and the copy/move semantics would be fixed: everything is a single pointer.
+
+All of the same issues arise if we're making copies of existential values:
+
+```
+IWritable copyAndClobberExistential(IWritable obj)
+{
+	IWritable copy = obj;
+	obj.write(9999);
+	return copy;
+}
+```
+
+If we want to stay consistent and say that `copy` is an actual copy of `obj` when the underlying type is a value rather than a reference type, then we need the copy/move operations for `IWritable` to handle invoking the copy/move operations of the underlying encapsulated type.
+
+Aside: it should be clear from these examples that implementing generics and existential types with dynamic dispatch has a lot of complexity when we have to deal with value types (because copying requires memory allocation).
+It is likely that a first implementation of dynamic dispatch support for Slang would restrict it to reference types (and would thus add a `class` keyword for defining reference types).
diff --git a/external/slang/share/doc/slang/design/experimental.md b/external/slang/share/doc/slang/design/experimental.md
new file mode 100644
index 00000000..38707ab1
--- /dev/null
+++ b/external/slang/share/doc/slang/design/experimental.md
@@ -0,0 +1,74 @@
+Deploying Experimental API Additions
+====================================
+
+This page intends to provide guidance to Slang developers when extending the Slang API, particularly when working on experimental features.
+It applies to the "COM-lite" Slang API, rather than the deprecated C Slang API (sp* functions).
+
+* Note: This guidance relates to Slang API changes, not to language changes. That is, what Slang does with shader source code across releases is not discussed here.
+
+The goal is to maintain binary compatibility as much as possible between Slang releases, and to aid applications in dealing with changes to Slang.
+
+Slang is distributed as a dynamic library, and there is an expectation from Slang API users that upgrading by installing an updated slang.dll or slang.so will not break their application unnecessarily.
+
+ABI compatibility within the Slang API can be preserved between releases if some rules are followed by developers.
+
+Slang API uses a "COM-lite" structure wherein functionality is exposed through interfaces on objects. If the interfaces never change, ABI compatibility is preserved, but changes happen. When adding or changing interfaces, please observe the following:
+
+1. It is preferred to create *new* COM interfaces when adding new functionality.
+* This maintains ABI compatibility.
+* Applications must acquire access to the new functionality using QueryInterface(), which will gracefully fail if the slang.dll/slang.so does not implement the functionality.
+
+2. Changes to existing virtual methods in COM interfaces should be avoided, as that is an ABI breakage.
+* If a change is required though, change the interface's UUID.
+
+3. New virtual methods _may_ be added (only) to the end of existing COM interface structs.
+* This does not disturb the ABI compatibility of the associated vtable. Old apps can remain unaware of the new function pointers appended to the end of the vtable.
+* A UUID change is not necessary.
+* Note that in the event that a Slang application which uses the added feature is run with an old slang.dll/slang.so, the experience for the user is not as clean as if the added method belongs to a new interface.
+
+Adding Experimental Interfaces
+==============================
+
+When the above recommendations cannot be followed, as with features that are expected to be iterated on or are regarded as temporary, there are additional recommendations.
+
+Interfaces that are expected to change must be marked `_Experimental` in their class name and in their UUID name.
+
+For example,
+
+
+```csharp
+/* Experimental interface for doing something cool. This interface is susceptible to ABI breakage. */
+struct ICoolNewFeature_Experimental : public ISlangUnknown
+{
+    SLANG_COM_INTERFACE(0x8e12e8e3, 0x5fcd, 0x433e, { 0xaf, 0xcb, 0x13, 0xa0, 0x88, 0xbc, 0x5e, 0xe5 })
+
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL coolMethod() = 0;
+};
+
+#define SLANG_UUID_ICoolNewFeature_Experimental ICoolNewFeature_Experimental::getTypeGuid()
+```
+
+Note: Use uuidgen to generate IIDs new interfaces.
+
+Removing Experimental Interfaces
+================================
+
+By the nature of being marked "Experimental", users have been warned that the interfaces are not officially supported and may be removed. You may simply delete the class and UUID, e.g. "ICoolNewFeature_Experimental" struct may be deleted from slang.h along with the definition of SLANG_UUID_ICoolNewFeature_Experimental.
+
+This will show up in applications as QueryInterface failures.
+
+It is nice, but not required, to retain the interface declarations for some time after removing internal support before deleting them from slang.h, so that applications have time to remove their dependence on the unsupported feature while still being able to compile in the interim.
+
+Changing Experimental Interfaces
+================================
+
+Backwards incompatible changes to Slang COM interfaces should be accompanied with a UUID change.
+
+In the event that an old application runs with a new slang library, applications are more capable of gracefully handling an unavailable interface than a changed one. The former may be still be functional, or include a helpful error message, whereas the latter is most likely a crash of some sort.
+
+Promoting Experimental Interfaces
+=================================
+
+The class name and the UUID name should be changed in slang.h and in the slang source code, e.g. Rename "ICoolNewFeature_Experimental" to just "ICoolFeature".
+
+The SLANG_UUID for the interface should be renamed to omit "EXPERIMENTAL" but its value should remain the same. This is because, if there are no backwards incompatible changes that accompany the promotion from experimental to permanent, applications written against the experimental version can continue working against Slang libraries where the interface was promoted to permanent.
diff --git a/external/slang/share/doc/slang/design/interfaces.md b/external/slang/share/doc/slang/design/interfaces.md
new file mode 100644
index 00000000..b0c48432
--- /dev/null
+++ b/external/slang/share/doc/slang/design/interfaces.md
@@ -0,0 +1,486 @@
+Interfaces Design
+=================
+
+This document intends to lay out the proposed design for a few inter-related features in Slang:
+
+- Interfaces
+- Associated Types
+- Generics
+
+Introduction
+------------
+
+The basic problem here is not unique to shader programming: you want to write code that accomplished one task, while abstracting over how to accomplish another task.
+As an example, we might want to write code to integrate incident radiance over a list of lights, while not concerning ourself with how to evaluate a reflectance function at each of those lights.
+
+If we were doing this task on a CPU, and performance wasn't critical, we could probably handle this with higher-order functions or an equivalent mechanism like function pointers:
+
+    float4 integrateLighting(
+    	Light[] lights,
+    	float4 (*brdf)(float3 wi, float3 wi, void* userData),
+    	void const* brdfUserData)
+    {
+    	float4 result = 0;
+    	for(/* ... */) {
+    		// ...
+    		result += brdf(wi, wo, brdfUserDat);
+    	}
+    	return result;
+    }
+
+Depending on the scenario, we might be able to generate statically specialized code by using templates instead:
+
+    template<typename BRDF>
+    float4 integrateLighting(Light[] lights, BRDF const& brdf)
+    {
+    	// ...
+    	result += brdf(wi, wo);
+    	// ...
+    }
+
+Current shading languages support neither higher-order functions nor templates/generics, so neither of these options is viable.
+Instead practitioners typically use preprocessor techniques to either stich together the final code, or to substitute in different function/type definitions to make a definition like `integrateLighting` reusable.
+
+These ad hoc approaches actually work well in practice; we aren't proposing to replace them *just* to make code abstractly "cleaner."
+Rather, we've found that the ad hoc approaches end up interacting poorly with the resource binding model in modern APIs, so that *something* less ad hoc is required to achieve our performance goals.
+At that point, we might as well ensure that the mechanism we introduce is also a good fit for the problem.
+
+Overview
+--------
+
+The basic idea for our approach is as follows:
+
+- Start with the general *semantics* of a generic-based ("template") approach
+
+- Use the accumulated experience of the programming language community to ensure that our generics are humane (in other words: not like C++)
+
+- Expore the possibility of syntax sugar to let people use more traditional OOP-style syntax when it can reduce verbosity without harming understanding
+
+In general, our conceptual model is being ripped off wholesale from Rust and Swift.
+The basic design principle is "when in doubt, do what Swift does."
+
+Interfaces
+----------
+
+An **interface** in Slang is akin to a `protocol` in Swift or a `trait` in Rust.
+The choice of the `interface` keyword is to highlight the overlap with the conceptually similar construct that appeared in Cg, and then later in HLSL.
+
+### Declaring an interface
+
+An interface is a named collection of **requirements**; any type that **implements** the interface must provide definitions that satisfy those requirements.
+
+Here is a simple interface, with one requirement:
+
+    interface Light
+    {
+    	float3 illuminate(float3 P_world);
+    }
+
+The `Light` interface requires a (member) function called `illuminate` with the given signature.
+
+### Declaring that a type implementats an interface
+
+A user-defined `struct` type can declare that it implements an interface, by using conventional "inheritance" syntax:
+
+    struct PointLight : Light
+    {
+    	float3 P_light;
+
+    	float3 illuminate(float3 P_world)
+    	{
+    		float distance = length(P_light - P_world);
+    		// ...
+    	}
+    }
+
+It is a static error if a type declares that it implements an interface, but it does not provide all of the requirements:
+
+    struct BadLight : Light
+    {
+    	// ERROR: type 'BadLight' cannot implement 'Light'
+    	// because it does not provide the required 'illuminate' function
+    }
+
+### Interface Inheritance
+
+While this document does not propose general notions of inheritance be added to Slang, it does make sense to allow an interface to inherit from zero or more other interfaces:
+
+    interface InfinitessimalLight : Light
+    {
+    	float3 getDirection(float3 P_world);
+    }
+
+In this case the `InfinitessimalLight` interface inherits from `Light`, and declares one new requirement.
+In order to check that a type implements `InfinitessimalLight`, the compiler will need to check both that it implements `Light` and that it provides the new "direct" requirements in `InfinitessimalLight`.
+
+Declaring that a type implements an interface also implicitly declares that it implements all the interfaces that interface transitively inherits from:
+
+    struct DirectionalLight : InfinitessimalLight
+    {
+    	float3 L;
+    	float3 dir;
+
+    	float3 getDirection(float3 P_world) { return dir; }
+
+    	float3 illuminate(float3 P_world)
+    	{
+    		// Okay, this is the point where I recognize
+    		// that this function definition is not
+    		// actually reasonable for a light...
+    }
+
+
+
+### Interfaces and Extensions
+
+It probably needs its own design document, but Slang currently has very basic support for `extension` declarations that can add members to an existing type.
+These blocks correspond to `extension` blocks in Swift, or `impl` blocks in Rust.
+This can be used to declare that a type implements an interface retroactively:
+
+    extension PointLight : InfinitessimalLight
+    {
+    	float3 getDirection(float3 P_world)
+    	{
+    		return normalize(P_light - P_world);
+    	}
+    }
+
+In this case we've used an extension to declare the `PointLight` also implements `InfinitessimalLight`. For the extension to type-check we need to provide the new required function (the compiler must recognize that the implementation of `Light` was already provided by the original type definition).
+
+There are some subtleties around using extensions to add interface implementations:
+
+- If the type already provides a method that matches a requireemnt, can the extension "see" it to satisfying new requirements?
+
+- When can one extension "see" members (or interface implementations) added by another?
+
+A first implementation can probably ignore the issue of interface implementations added by extensions, and only support them directly on type definitions.
+
+Generics
+--------
+
+All of the above discussion around interfaces neglected to show how to actually *use* the fact that, e.g., `PointLight` implements the `Light` interface.
+That is intentional, because at the most basic level, interfaces are designed to be used in the context of **generics**.
+
+### Generic Declarations
+
+The Slang compiler currently has some ad hoc support for generic declarations that it uses to implement the HLSL standard module (which has a few generic types).
+The syntax for those is currently very bad, and it makes sense to converge on the style for generic declarations used by C# and Swift:
+
+    float myGenericFunc<T>(T someValue);
+
+Types can also be generic:
+
+    struct MyStruct<T> { float a; T b; }
+
+Ideally we should also allow interfaces and interface requirements to be generic, but there will probably be some limits due to implementation complexity.
+
+### Type Constraints
+
+Unlike C++, Slang needs to be able to type-check the body of a generic function ahead of time, so it can't rely on `T` having particular members:
+
+    // This generic is okay, because it doesn't assume anything about `T`
+    // (other than the fact that it can be passed as input/output)
+    T okayGeneric<T>(T a) { return a; }
+
+    // This generic is not okay, because it assumes that `T` supports
+    // certain operators, and we have no way of knowing it this is true:
+    T notOkayGeneric<T>(T a) { return a + a; }
+
+In order to rely on non-trivial operations in a generic parameter type like `T`, the user must **constrain** the type parameter using an interface:
+
+    float3 mySurfaceShader<L : Light>(L aLight)
+    {
+    	return aLight.illuminate(...);
+    }
+
+In this example, we have constrained the type parameter `L` so that it must implement the interface `Light`.
+As a result, in the body of the function, the compiler can recognize that `aLight`, which is of type `L`, must implement `Light` and thus have a member `illuminate`.
+
+When calling a function with a constrained type parameter, the compiler must check that the actual type argument (whether provided explicitly or inferred) implements the interface given in the constraint:
+
+    mySurfaceShader<PointLight>(myPointLight);  // OK
+    mySurfaceShader(myPointLight);				// equivalent to previous
+    mySurfaceShader(3.0f); // ERROR: `float` does not implement `Light`
+
+Note that in the erroneous case, the error is reported at the call site, rather than in the body of the callee (as it would be for C++ templates).
+
+For cases where we must constrain a type parameter to implement multiple interfaces, we can join the interface types with `&`:
+
+	interface Foo { void foo(); }
+	interface Bar { void bar(); }
+
+    void myFunc<T : Foo & Bar>(T val)
+    {
+    	val.foo();
+    	val.bar();
+    }
+
+If we end up with very complicated type constraints, then it makes sense to support a "`where` clause" that allows requirements to be stated outside of the generic parameter list:
+
+    void myFunc<T>(T val)
+        where T : Foo,
+        	  T : Bar
+    {}
+
+Bot the use of `&` and `where` are advanced features that we might cut due to implementation complexity.
+
+### Value Parameters
+
+Because HLSL has generics like `vector<float,3>` that already take non-type parameters, the language will need *some* degree of support for generic parameters that aren't types (at least integers need to be supported).
+We need syntax for this that doesn't bloat the common case.
+
+In this case, I think that what I've used in the current Slang implementation is reasonable, where a value parameter needs a `let` prefix:
+
+    void someFunc<
+    	T, 					// type parameter
+    	T : X, 				// type parameter with constraint
+    	T = Y, 				// type parameter with default
+    	T : X = Y, 			// type parameter with constraint and default
+    	let N : int,		// value parameter (type must be explicit)
+    	let N : int = 3>	// value parameter with default
+    	()
+    { ... }
+
+We should also extend the `where` clauses to support inequality constraints on (integer) value parameters to enforce rules about what ranges of integers are valid.
+The front-end should issue error messages if it can statically determine these constraints are violated, but it should probably defer full checking until the IR (maybe... we need to think about how much of a dependent type system we are willing to have).
+
+Associated Types
+----------------
+
+While the syntax is a bit different, the above mechanisms have approximately the same capabilities as Cg interfaces.
+What the above approach can't handle (and neither can Cg) is a reusable definition of a surface material "pattern" that might blend multiple material layers to derive parameters for a specific BRDF.
+
+That is, suppose we have two BRDFs: one with two parameters, and one with six.
+Different surface patterns may want to target different BRDFs.
+So if we write a `Material` interface like:
+
+    interface Material
+    {
+    	BRDFParams evaluatePattern(float2 uv);
+    }
+
+Then what should `BRDFParams` be? The two-parameter or six-parameter case?
+
+An **associated type** is a concept that solves exactly this problem.
+We don't care *what* the concrete type of `BRDFParams` is, so long as *every* implementation of `Material` has one.
+The exact `BRDFParams` type can be different for each implementation of `Material`; the type is *associated* with a particular implementation.
+
+We will crib our syntax for this entirely from Swift, where it is verbose but explicit:
+
+    interface Material
+    {
+    	associatedtype BRDFParams;
+
+    	BRDFParams evaluatePattern(float2 uv);
+
+    	float3 evaluateBRDF(BRDFParams param, float3 wi, float3 wo);
+    }
+
+In this example we've added an associated type requirement so that every implementation of `Material` must supply a type named `BRDFParams` as a member.
+We've also added a requirement that is a function to evaluate the BRDF given its parameters and incoming/outgoing directions.
+
+Using this declaration one can now define a generic function that works on any material:
+
+    float3 evaluateSurface<M : Material, L : Light>(
+    	M material,
+    	L[] lights,
+    	float3 P_world,
+    	float2 uv)
+    {
+    	P.BRDFParams brdfParams = material.evaluatePattern(uv);
+    	for(...)
+    	{
+    		L light = lights[i];
+    		// ...
+    		float3 reflectance = material.evaluateBRDF(brdfParams, ...);
+    	}
+    }
+
+Some quick notes:
+
+- The use of `associatedtype` (for associated types) and `typealias` (for `typedef`-like definitions) as distinct keywords in Swift was well motivated by their experience (they used to use `typealias` for both). I would avoid having the two cases be syntactically identical.
+
+- Swift has a pretty involved inference system where a type doesn't actually need to explicitly provide a type member with the chosen name. Instead, if you have a required method that takes or returns the associated type, then the compiler can infer what the type is by looking at the signature of the methods that meet other requirements. This is a complex and magical feature, and we shouldn't try to duplicate it.
+
+- Both Rust and Swift call this an "associated type." They are related to "virtual types" in things like Scala (which are in turn related to virtual classes in beta/gbeta). There are similar ideas that arise in Haskell-like languages with type classes (IIRC, the term "functional dependencies" is relevant).
+
+### Alternatives
+
+I want to point out a few alternatives to the `Material` design above, just to show that associated types seem to be an elegant solution compared to the alternatives.
+
+First, note that we could break `Material` into two interfaces, so long as we are allowed to place type constraints on associated types:
+
+    interface BRDF
+    {
+    	float3 evaluate(float3 wi, float3 wo);
+    }
+
+    interface Material
+    {
+    	associatedtype B : BRDF;
+
+    	B evaluatePattern(float2 uv);
+    }
+
+This refactoring might be cleaner if we imagine that a shader library would have family of reflectance functions (implementing `BRDF`) and then a large library of material patterns (implementing `Material`) - we wouldn't want each and every material to have to implement a dummy `evaluateBRDF` that just forwards to a BRDF instance nested in it.
+
+Looking at that type `B` there, we might start to wonder if we could just replace this with a generic type parameter on the interface:
+
+    interface Material< B : BRDF >
+    {
+    	B evaluatePattern(float2 uv);
+    }
+
+This would change any type that implements `Material`:
+
+    // old:
+    struct MyMaterial : Material
+    {
+    	typealias B = GGX;
+
+    	GGX evaluatePattern(...) { ... }
+    }
+
+    // new:
+    struct MyMaterial : Material<GGX>
+    {
+    	GGX evaluatePattern(...) { ... }
+    }
+
+That doesn't seem so bad, but it ignores the complexity that arises at any use sites, e.g.:
+
+    float3 evaluateSurface<B : BRDF, M : Material<B>, L : Light>(
+    	M material,
+    	L[] lights,
+    	float3 P_world,
+    	float2 uv)
+    { ... }
+
+The type `B` which is logically an implementation detail of `M` now surfaces to the generic parameter list of any function that wants to traffic in materials.
+This reduces the signal/noise ratio for anybody reading the code, and also means that any top-level code that is supposed to be specializing this function (suppose this was a fragment entry point) now needs to understand how to pick apart the `Material` it has on the host side to get the right type parameters.
+
+This kind of issue has existed in the PL community at least as far back as the ML module system (it is tough to name search, but the concepts of "parameterization" vs. "fibration" is relevant here), and the Scala researchers made a clear argument (I think it was in the paper on "un-types") that there is a categorical distinction between the types that are logicall the *inputs* to an abstraction, and the types that are logically the *outputs*. Generic type parameters and associated types handle these two distinct roles.
+
+Returning an Interface
+----------------------
+
+The revised `Material` definition:
+
+    interface BRDF
+    {
+    	float3 evaluate(float3 wi, float3 wo);
+    }
+
+    interface Material
+    {
+    	associatedtype B : BRDF;
+
+    	B evaluatePattern(float2 uv);
+    }
+
+has a function `evaluatePattern` that returns a type that implements an interface.
+In the case where the return type is concrete, this isn't a problem (and the nature of associated types means that `B` will be concrete in any actual concrete implementation of `Material`).
+
+There is an open question of whether it is ever necessary (or even helpful) to have a function that returns a value of *some* type known to implement an interface, without having to state that type in the function signature.
+This is a point that has [come up](https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md) in the Rust world, where they have discussed using a keyword like `some` to indicate the existential nature of the result type:
+
+	// A function that returns *some* implementation of `Light`
+	func foo<T>() -> some Light;
+
+The Rust proposal linked above has them trying to work toward `impl` as the keyword, and allowing it in both argument and result positions (to cover both universal and existential quantification).
+
+In general, such a feature would need to have many constraints:
+
+- The concrete return type must be fixed (even if clients of the function should be insulated from the choice), given the actual generic arguments provided.
+
+- If the existential is really going to be sealed, then the caller shouldn't be allowed to assume anything *except* that two calls to the same function with identical generic arguments should yield results of identical type.
+
+Under those constraints, it is pretty easy to see that an existential-returning method like:
+
+    interface Foo<T>
+    {
+    	func foo<U>() -> some Bar;
+    }
+
+can in principle be desugared into:
+
+    interface Foo<T>
+    {
+    	associatedtype B<U> : Bar;
+
+    	func foo<U>() -> B<U>;
+    }
+
+with particular loss in what can be expressed.
+The same desugaring approach should apply to global-scope functions that want to return an existential type (just with a global `typealias` instead of an `associatedtype`).
+
+
+It might be inconvenient for the user to have to explicitly write the type-level expression that yields the result type (consider cases where C++ template metaprogrammers would use `auto` as a result type), but there is really no added power.
+
+
+Object-Oriented Sugar
+---------------------
+
+Having to explicitly write out generic parameter lists is tedious, especially in the (common) case where we will have exactly one parameter corresponding to each generic type parameter:
+
+	// Why am I repeating myself?!
+	//
+    void foo<L : Light, M : Material, C : Camera)(
+    	     L   light, M   material, C   camera);
+
+The intent seems to be clear if we instead write:
+
+    void foo(Light light, Material material, Camera camera);
+
+We could consider the latter to be sugar for the former, and allow users to write in familiar syntax akin to what ws already supported in Cg.
+
+We'd have to be careful with such sugar, though, because there is a real and meaningful difference between saying:
+
+- "`material` has type `Material` which is an interface type"
+- "`material` has type `M` where `M` implements `Material`"
+
+In particular, if we start to work with associated types:
+
+    let b = material.evaluatePattern(...);
+
+It makes sense to say that `b` has type `M.BRDF`.
+It does **not** make sense to say that `b` has type `Material.BRDF`, because there is no such concrete type.
+
+(A third option is to say that `b` has type `material.BRDF`, which is basically the point where you have "virtual types" because we are now saying the type is a member of the *instance* and not of an enclosing *type*)
+
+Note that the issue of having or not having object-oriented sugar is technically orthogonal from whether we allow "existential return types."
+However, allowing the user to think of interfaces in traidtional OOP terms leads to it being more likely that they will try to declare:
+
+- functions that return an interface type
+- local variables of interface type (which they might even assign to!)
+- fields of interface type in their `struct`s
+
+All of these complicate the desugaring step, because we would de facto have types/functions that mix up two stages of evaluation: a compile-time type-level step and a run-time value-level step.
+Ultimately, we'd probably need to express these by having a multi-stage IR (with two stages) which we optimize in the staged setting before stage-splitting to get separate type-level and value-level operations (akin to the desugaring for existential return types I described above).
+
+My sense is that a certain amount of multi-stage programming may already be needed to deal with certain HLSL/GLSL idioms. In particular:
+
+- GLSL supports passing unsigned arrays (e.g., `int[] a`) to a function, and then having the function use the size of the array (`a.length`) to do loops, etc. These would need to be lowered to distinct SPIR-V code for every array size used (if I understand the restrictions correctly), and so the feature is perhaps best thought of as passing both a compile-time integer parameter and a run-time array parameter (where the size comes from that parameter)
+
+- HLSL and GLSL both have built-in functions where certain parameters are required to be compile-time constants. A feature-complete front-end must detect when calls to these functions are valid, and report errors to the user. In order to make the errors easier to explain to the user, it would be helpful to have an explicit notion of constant-rate computation, and require that the user express explicit constant-rate parameters/expressions.
+
+All of this ties into the question of whether we need/want to support more general kinds of compile-time evaluation for specialization (e.g., statically-determine `if` statements or loops).
+
+Other Languages
+---------------
+
+It is worth double-checking whether implementing all of this from scratch in Slang is a good idea, or if there is somewhere else we can achieve similar results more quickly:
+
+- The Metal shading language has much of what we'd want. It is based on C++ templates, which are maybe not the ideal mechanism, and the compiler is closed-source so we can't easily add functionality. Still, it should be possible to prototype a lot of what we want on top of Metal 2.
+
+- The open-source HLSL compiler doesn't support any of the new ideas here, but it may be that adding them to `dxc` would be faster than adding them to the Slang project code. Using `dxc` is a no-go for some of the other Slang requirements (that come from our users on the Falcor project).
+
+- Swift already supports almost every thing on our list of requirements, but as it stands today there is no easy path to using it for low-level GPU code generation. It also fails to meet our goals for incremental adoption, high-level source output, etc.
+
+  In the long run, however, the Swift compiler seems like an attractive intercept for this work, because their long-term roadmap seems like it will close a lot of the gap with what we've done so far.
+
+Conclusion
+----------
+
+This document has described the basic syntax and semantics for three related features -- interfaces, generics, and associated types -- along with some commentary on longer-term directions.
+My expectation is that we will use the syntax as laid down here, unless we have a very good reason to depart from it, and we will prioritize implementation work as needed to get interesting shader library functionality up and running.
diff --git a/external/slang/share/doc/slang/design/ir.md b/external/slang/share/doc/slang/design/ir.md
new file mode 100644
index 00000000..ba156c2f
--- /dev/null
+++ b/external/slang/share/doc/slang/design/ir.md
@@ -0,0 +1,275 @@
+The Design of Slang's Intermediate Representation (IR)
+======================================================
+
+This document details some of the important design choices for Slang's IR.
+
+Goals and Non-Goals
+-------------------
+
+The IR needs to balance many goals which can sometimes come into conflict.
+We will start by enumerating these goals (and related non-goals) explicitly so that we can better motivate specific design choices.
+
+* Obviously it must be simple to lower any source code in Slang code to the IR. It is however a non-goal for the lowering process to be lossless; we do not need to recover source-level program structure from the IR.
+
+* The IR must be amenable to standard dataflow analyses and optimizations. It should be possible to read a paper on a compiler algorithm or technique and apply it to our IR in a straightforward manner, and with the expected asymptotic efficiency.
+
+* As a particular case of analysis and optimization, it should be possible to validate flow-dependent properties of an input function/program (e.g., whether an `[unroll]` loop is actually unrollable) using the IR, and emit meaningful error messages that reference the AST-level names/locations of constructs involved in an error.
+
+* It should be possible to compile modules to the IR separately and then "link" them in a way that depends only on IR-level (not AST-level) constructs. We want to allow changing implementation details of a module without forcing a re-compile of IR code using that module (what counts as "implementation details") is negotiable.
+
+* There should be a way to serialize IR modules in a round-trip fashion preserving all of the structure. As a long-term goal, the serialized format should provide stability across compiler versions (working more as an IL than an IR)
+
+* The IR must be able to encode "generic" (type-parameterized) constructs explicitly, and to express transformations from generic to specialized (or dynamic-dispatch) code in the IR. In particular, it must be possible for a module to make use of generic defined in another (separately-compiled) module, with validation performed before linking, and specialization performed after.
+
+* The IR must be able to express code that is close to the level of abstraction of shader intermediate languages (ILs) like SPIR-V and DXIL, so that we can minimize the amount of work required (and the number of issues that can arise) when translating the IR to these targets. This can involve lowering and legalization passes to match the constraints of those ILs, but it should not require too much work to be done outside of the IR.
+
+* It should be possible to translate code in the IR back into high-level-language code, including things like structured control-flow constructs.
+
+* Whenever possible, invariants required by the IR should be built into its structure so that they are easier to maintain.
+
+* We should strive to make the IR encoding, both in memory and when serialized, as compact as is practically possible.
+
+Inspirations
+------------
+
+The IR design we currently use takes inspiration from three main sources:
+
+* The LLVM project provides the basic inspiration for the approach to SSA, such as using a typed IR, the decision to use the same object to represent an instruction and the SSA value it produces, and the push to have an extremely simple `replaceAllUsesWith` primitive. It is easy to forget that it is possible to design a compiler with different design decisions; the LLVM ones just happen to both be well-motivated and well-known.
+
+* The Swift IL (SIL) provides the inspiration for our approach for encoding SSA "phi nodes" (blocks with arguments), and also informs some of how we have approached encoding generics and related features like existential types.
+
+* The SPIR-V IL provides the inspiration for the choice to uniformly represent types as instructions, for how to encode "join points" for structured control flow, and for the concept of "decorations" for encoding additional metadata on instructions.
+
+
+Key Design Decisions
+--------------------
+
+### Everything is an Instruction
+
+The Slang IR strives for an extremely high degree of uniformity, so almost every concept in the IR is ultimately just an instruction:
+
+* Ordinary add/sub/mul/etc. operations are instructions, as are function calls, branches, function parameters, etc.
+
+* Basic blocks in functions, as well as functions themselves are "parent instructions" that can have other instructions as children
+
+* Constant values (e.g., even `true` and `false`) are instructions
+
+* Types are instructions too, and can have operands (e.g., a vector type is the `VectorType` instruction applied to operands for the element type and count)
+
+* Generics are encoded entirely using ordinary instructions: a generic is encoded like a function that just happens to do computation at the type level
+
+* It isn't true right now, but eventually decorations will also be instructions, so that they can have operands like any other instruction
+
+* An overall IR module is itself an instruction so that there is a single tree that owns everything
+
+This uniformity greatly simplifies the task of supporting generics, and also means that operations that need to work over all instructions, such as cloning and serialization, can work with a single uniform representation and avoid special-casing particular opcodes.
+
+The decision to use an extremely uniform design, even going as far to treat types as "ordinary" instructions, is similar to SPIR-V, although we do not enforce many of the constraints SPIR-V does on how type and value instructions can be mixed.
+
+### Instructions Have a Uniform Structure
+
+Every instruction has:
+
+* An opcode
+* A type (the top-level module is the only place where this can be null)
+* Zero or more operands
+* Zero or more decorations
+* Zero or more children
+
+Instructions are not allowed to have any semantically-relevant information that is not in the above list.
+The only exception to this rule is instructions that represent literal constants, which store additional data to represent their value.
+
+The in-memory encoding places a few more restrictions on top of this so that, e.g., currently an instruction can either have operands of children, but not both.
+
+Because everything that could be used as an operand is also an instruction, the operands of an instruction are stored in a highly uniform way as a contiguous array of `IRUse` values (even the type is contiguous with this array, so that it can be treated as an additional operand when required).
+The `IRUse` type maintains explicit links for use-def information, currently in a slightly bloated fashion (there are well-known techniques for reducing the size of this information).
+
+### A Class Hierarchy Mirrored in Opcodes
+
+There is a logical "class hierarchy" for instructions, and we support (but do not mandate) declaring a C++ `struct` type to expose an instruction or group of instructions.
+These `struct` types can be helpful to encode the fact that the program knows an instruction must/should have a particular type (e.g., having a function parameter of type `IRFunction*` prevents users from accidentally passing in an arbitrary `IRInst*` without checking that it is a function first), and can also provide convenience accessors for operands/children.
+
+Do make "dynamic cast" operations on this class hierarchy efficient, we arrange for the instruction opcodes for the in-memory IR to guarantee that all the descendents of a particular "base class" will occupy a contiguous range of opcodes. Checking that an instruction is in that range is then a constant-time operation that only looks at its opcode field.
+
+There are some subtleties to how the opcodes are ordered to deal with the fact that some opcodes have a kind of "multiple inheritance" thing going on, but that is a design wart that we should probably remove over time, rather than something we are proud of.
+
+### A Simpler Encoding of SSA
+
+The traditional encoding of SSA form involves placing "phi" instructions at the start of blocks that represent control-flow join points where a variable will take on different values depending on the incoming edge that is taken.
+There are of course benefits to sticking with tradition, but phi instructions also have a few downsides:
+
+- The operands to phi instructions are the one case where the "def dominates use" constraint of SSA appears to be violated. I say "appears" because officially the action of a phi occurs on the incoming edge (not in the target block) and that edge will of course be dominated by the predecessor block. It still creates a special case that programmers need to be careful about. This also complicates serialization in that there is no order in which the blocks/instructions of a function can be emitted that guarantees that every instruction always precedes all of its uses in the stream.
+
+- All of the phi instructions at the start of the block must effectively operate in parallel, so that they all "read" from the correct operand before "writing" to the target variable. Like the above special case, this is only a problem for a phi related to a loop back-edge. It is of course possible to always remember the special interpretation of phi instructions (that they don't actually execute sequentially like every other instruction in a block), but its another special case.
+
+- The order of operands to a phi instruction needs to be related back to the predecessor blocks, so that one can determine which value is to be used for which incoming edge. Any transformation that modifies the CFG of a function needs to be careful to rewrite phi instructions to match the order in which predecessors are listed, or else the compiler must maintain a side data structure that remembers the mapping (and update it instead).
+
+- Directly interpreting/executing code in an SSA IR with phi instructions is made more difficult because when branching to a block we need to immediately execute any phi instructions based on the block from which we just came. The above issues around phis needing to be executed in parallel, and needing to track how phi operands relate to predecessor blocks also add complexity to an interpreter.
+
+Slang ditches traditional phi functions in favor of an alternative that matches the Swift IL (SIL).
+The idea doesn't really start in Swift, but rather in the existing observation that SSA form IR and a continuation-passing style (CPS) IR are semantically equivalent; one can encode SSA blocks as continuation functions, where the arguments of the continuation stand in for the phi instructions, and a branch to the block becomes just a call.
+
+Like Swift, we do not use an explicit CPS representation, but instead find a middle ground of a traditional SSA IR where instead of phi instructions basic blocks have parameters.
+The first N instructions in a Slang basic block are its parameters, each of which is an `IRParam` instruction.
+
+A block that would have had N phi instructions now has N parameters, but the parameters do not have operands.
+Instead, a branch instruction that targets that block will have N *arguments* to match the parameters, representing the values to be assigned to the parameters when this control-flow edge is taken.
+
+This encoding is equivalent in what it represents to traditional phi instructions, but nicely solves the problems outlined above:
+
+- The phi operands in the successor block are now arguments in the *predecessor* block, so that the "def dominates use" property can be enforced without any special cases.
+
+- The "assignment" of the argument values to parameters is now encoded with a single instruction, so that the simultaneity of all the assignments is more clear. We still need to be careful when leaving SSA form to obey those semantics, but there are no tricky issues when looking at the IR itself.
+
+- There is no special work required to track which phi operands come from which predecessor block, since the operands are attached to the terminator instruction of the predecessor block itself. There is no need to update phi instructions after a CFG change that might affect the predecessor list of a block. The trade-off is that any change in the *number* of parameters of a block now requires changes to the terminator of each predecessor, but that is a less common change (isolated to passes that can introduce or eliminate block parameters/phis).
+
+- It it much more clear how to give an operational semantics to a "branch with arguments" instead of phi instructions: compute the target block, copy the arguments to temporary storage (because of the simultaneity requirement), and then copy the temporaries over the parameters of the target block.
+
+The main caveat of this representation is that it requires branch instructions to have room for arguments to the target block. For an ordinary unconditional branch this is pretty easy: we just put a variable number of arguments after the operand for the target block. For branch instructions like a two-way conditional, we might need to encode two argument lists - one for each target block - and an N-way `switch` branch only gets more complicated.
+
+The Slang IR avoids the problem of needing to store arguments on every branch instruction by banning *critical edges* in IR functions that are using SSA phis/parameters. A critical edge is any edge from a block with multiple successors (meaning it ends in a conditional branch) to one with multiple predecessors (meaning it is a "join point" in the CFG).
+Phi instructions/parameters are only ever needed at join points, and so block arguments are only needed on branches to a join point.
+By ruling out conditional branches that target join points, we avoid the need to encode arguments on conditional branch instructions.
+
+This constraint could be lifted at some point, but it is important to note that there are no programs that cannot be represented as a CFG without critical edges.
+
+### A Simple Encoding of the CFG
+
+A traditional SSA IR represents a function as a bunch of basic blocks of instructions, where each block ends in a *terminator* instruction.
+Terminators are instructions that can branch to another block, and are only allowed at the end of a block.
+The potential targets of a terminator determine the *successors* of the block where it appears, and contribute to the *predecessors* of any target block.
+The successor-to-predecessor edges form a graph over the basic blocks called the control-flow graph (CFG).
+
+A simple representation of a function would store the CFG explicitly as a graph data structure, but in that case the data structure would need to be updated whenever a change is made to the terminator instruction of a branch in a way that might change the successor/predecessor relationship.
+
+The Slang IR avoids this maintenance problem by noting an important property.
+If block `P`, with terminator `t`, is a predecessor of `S`, then `t` must have an operand that references `S`.
+In turn, that means that the list of uses of `S` must include `t`.
+
+We can thus scan through the list of predecessors or successors of a block with a reasonably simple algorithm:
+
+* To find the successors of `P`, find its terminator `t`, identify the operands of `t` that represent successor blocks, and iterate over them. This is O(N) in the number of outgoing CFG edges.
+
+* To find the predecessors of `S`, scan through its uses and identify users that are terminator instructions. For each such user if this use is at an operand position that represents a successor, then include the block containing the terminator in the output. This is O(N) in the number of *uses* of a block, but we expect that to be on the same order as the number of predecessors in practice.
+
+Each of these actually iterates over the outgoing/incoming CFG *edges* of a block (which might contain duplicates if one block jumps to another in, e.g, multiple cases of a `switch`).
+Sometimes you actually want the edges, or don't care about repeats, but in the case where you want to avoid duplicates the user needs to build a set to deduplicate the lists.
+
+The clear benefit of this approach is that the predecessor/successor lists arise naturally from the existing encoding of control-flow instructions. It creates a bit of subtle logic when walking the predecessor/successor lists, but that code only needs to be revisited if we make changes to the terminator instructions that have successors.
+
+### Explicit Encoding of Control-Flow Join Points
+
+In order to allow reconstruction of high-level-language source code from a lower-level CFG, we need to encode something about the expected "join point" for a structured branch.
+This is the logical place where control flow is said to "reconverge" after a branch, e.g.:
+
+```hlsl
+if(someCondition) // join point is "D"
+{
+	A;
+}
+else
+{
+	B;
+	if(C) return;
+}
+D;
+```
+
+Note that (unlike what some programming models would say) a join point is *not* necessarily a postdominator of the conditional branch. In the example above the block with `D` does not postdominate the block with `someCondition` nor the one with `B`. It is even possible to construct cases where the high-level join point of a control-flow construct is unreachable (e.g., the block after an infinite loop).
+
+The Slang IR encodes structured control flow by making the join point be an explicit operand of a structured conditional branch operation. Note that a join-point operand is *not* used when computing the successor list of a block, since it does not represent a control-flow edge.
+This is slightly different from SPIR-V where join points ("merge points" in SPIR-V) are encoded using a metadata instruction that precedes a branch. Keeping the information on the instruction itself avoids cases where we move one but not the other of the instructions, or where we might accidentally insert code between the metadata instruction and the terminator it modifies.
+In the future we might consider using a decoration to represent join points.
+
+When using a loop instruction, the join point is also the `break` label. The SPIR-V `OpLoopMerge` includes not only the join point (`break` target) but also a `continue` target. We do not currently represent structured information for `continue` blocks.
+The reason for this is that while we could keep structured information about `continue` blocks, we might not be able to leverage it when generating high-level code, because the syntactic form of a `for` loop (the only construct in C-like languages where `continue` can go somewhere other than the top of the loop body) only allows an *expression* for the continue clause and not a general *statement*, but we cannot guarantee that after optimization the code in an IR-level "continue clause" would constitute a single expression.
+The approach we use today means that the code in "continue clause" might end up being emitted more than once in final code; this is deemed acceptable because it is what `fxc` already does.
+
+When it comes time to re-form higher-level structured control flow from Slang IR, we use the structuring information in the IR to form single-entry "regions" of code that map to existing high-level control-flow constructs (things like `if` statements, loops, `break` or `continue` statements, etc.).
+The current approach we use requires the structuring information to be maintained by all IR transformations, and also currently relies on some invariants about what optimizations are allowed to do (e.g., we had better not introduce multi-level `break`s into the IR).
+
+In the future, it would be good to investigate adapting the "Relooper" algorithm used in Emscripten so that we can recover valid structured control flow from an arbitrary CFG; for now we put off that work.
+If we had a more powerful restructuring algorithm at hand, we could start to support things like multi-level `break`, and also ensure that `continue` clauses don't lead to code duplication any more.
+
+## IR Global and Hoistable Value Deduplication
+
+Types, constants and certain operations on constants are considered "global value" in the Slang IR. Some other insts like `Specialize()` and `Ptr(x)` are considered as "hoistable" insts, in that they will be defined at the outer most scope where their operands are available. For example, `Ptr(int)` will always be defined at global scope (as direct children of `IRModuleInst`) because its only operand, `int`, is defined at global scope. However if we have `Ptr(T)` where `T` is a generic parameter, then this `Ptr(T)` inst will be always be defined in the block of the generic. Global and hoistable values are always deduplicated and we can always assume two hoistable values with different pointer addresses are distinct values.
+
+The `IRBuilder` class is responsible for ensuring the uniqueness of global/hoistable values. If you call any `IRBuilder` methods that creates a new hoistable instruction, e.g.  `IRBuilder::createIntrinsicInst`, `IRBuilder::emitXXX` or `IRBuilder::getType`, `IRBuilder` will check if an equivalent value already exists, and if so it returns the existing inst instead of creating a new one.
+
+The trickier part here is to always maintain the uniqueness when we modify the IR. When we update the operand of an inst from a non-hoistable-value to a hoistable-value, we may need to hoist `inst` itself as a result. For example, consider the following code:
+```
+%1 = IntType
+%p = Ptr(%1)
+%2 = func {
+   %x = ...;
+   %3 = Ptr(%x);
+   %4 = ArrayType(%3);
+   %5 = Var (type: %4);
+   ...
+}
+```
+
+Now consider the scenario where we need to replace the operand in `Ptr(x)` to `int` (where `x` is some non-constant value), we will get a `Ptr(int)` which is now a global value and should be deduplicated:
+```
+%1 = IntType
+%p = Ptr(%1)
+%2 = func {
+   %x = ...;
+   //%3 now becomes %p.
+   %4 = ArrayType(%p);
+   %5 = Var (type: %4);
+   ...
+}
+```
+Note this code is now breaking the invariant that hoistable insts are always defined at the top-most scope, because `%4` becomes is no longer dependent on any local insts in the function, and should be hoisted to the global scope after replacing `%3` with `%p`. This means that we need to continue to perform hoisting of `%4`, to result this final code:
+```
+%1 = IntType
+%p = Ptr(%1)
+%4 = ArrayType(%p); // hoisted to global scope
+%2 = func {
+   %x = ...;
+   %5 = Var (type: %4);
+   ...
+}
+```
+
+As illustrated above, because we need to maintain the invariants of global/hoistable values, replacing an operand of an inst can have wide-spread effect on the IR.
+
+To help ensure these invariants, we introduce the `IRBuilder.replaceOperand(inst, operandIndex, newOperand)` method to perform all the cascading modifications after replacing an operand. However the `IRInst.setOperand(idx, newOperand)` will not perform the cascading modifications, and using `setOperand` to modify the operand of a hoistable inst will trigger a runtime assertion error.
+
+Similarly, `inst->replaceUsesWith` will also perform any cascading modifications to ensure the uniqueness of hoistable values. Because of this, we need to be particularly careful when using a loop to iterate the IR linked list or def-use linked list and call `replaceUsesWith` or `replaceOperand` inside the loop.
+
+Consider the following code:
+
+```
+IRInst* nextInst = nullptr;
+for (auto inst = func->getFirstChild(); inst; inst = nextInst)
+{
+     nextInst = inst->getNextInst(); // save a copy of nestInst
+     // ...
+     inst->replaceUsesWith(someNewInst); // Warning: this may be unsafe, because nextInst could been moved to parent->parent!
+}
+```
+
+Now imagine this code is running on the `func` defined above, imagine we are now at `inst == %3` and we want to replace `inst` with `Ptr(int)`. Before calling `replaceUsesWith`, we have stored `inst->nextInst` to `nextInst`, so `nextInst` is now `%4`(the array type). Now after we call `replaceUsesWith`, `%4` is hoisted to global scope, so in the next iteration, we will start to process `%4` and follow its `next` pointer to `%2` and we will be processing `func` instead of continue walking the child list!
+
+Because of this, we should never be calling `replaceOperand` or `replaceUsesWith` when we are walking the IR linked list. If we want to do so, we must create a temporary workList and add all the insts to the work list before we make any modifications. The `IRInst::getModifiableChildren` utility function will return a temporary work list for safe iteration on the children. The same can be said to the def-use linked list. There is `traverseUses` and `traverseUsers` utility functions defined in `slang-ir.h` to help with walking the def-use list safely.
+
+Another detail to keep in mind is that  any local references to an inst may become out-of-date after a call to `replaceOperand` or `replaceUsesWith`. Consider the following code:
+```
+IRBuilder builder;
+auto x = builder.emitXXX(); // x is some non-hoistable value.
+auto ptr = builder.getPtrType(x);  // create ptr(x).
+x->replaceUsesWith(intType); // this renders `ptr` obsolete!!
+auto var = builder.emitVar(ptr); // use the obsolete inst to create another inst.
+```
+In this example, calling `replaceUsesWith` will cause `ptr` to represent `Ptr(int)`, which may already exist in the global scope. After this call, all uses of `ptr` should be replaced with the global `Ptr(int)` inst instead. `IRBuilder` has provided the mechanism to track all the insts that are removed due to deduplication, and map those removed but not yet deleted insts to the existing inst. When using `ptr` to create a new inst, `IRBuilder` will first check if `ptr` should map to some existing hoistable inst in the global deduplication map and replace it if possible. This means that after the call to `builder.emitVar`, `var->type` is not equal to to `ptr`.
+
+### Best Practices
+
+In summary, the best practices when modifying the IR is:
+- Never call `replaceUsesWith` or `replaceOperand` when walking raw linked lists in the IR. Always create a work list and iterate on the work list instead. Use `IRInst::getModifiableChildren` and `traverseUses` when you need to modify the IR while iterating.
+- Never assume any local references to an `inst` is up-to-date after a call to `replaceUsesWith` or `replaceOperand`. It is OK to continue using them as operands/types to create a new inst, but do not assume the created inst will reference the same inst passed in as argument.
+
+
diff --git a/external/slang/share/doc/slang/design/overview.md b/external/slang/share/doc/slang/design/overview.md
new file mode 100644
index 00000000..24c31603
--- /dev/null
+++ b/external/slang/share/doc/slang/design/overview.md
@@ -0,0 +1,259 @@
+An overview of the Slang Compiler
+=================================
+
+This document will attempt to walk through the overall flow of the Slang compiler, as an aid to developers who are trying to get familiar with the codebase and its design.
+More emphasis will be given to places where the compiler design is nontraditional, or might surprise newcomers; things that are straightforward won't get much detail.
+
+High-Level Concepts
+-------------------
+
+Compilation is always performed in the context of a *compile request*, which bundles together the options, input files, and request for code generation.
+Inside the code, there is a type `CompileRequest` to represent this.
+
+The user specifies some number of *translation units* (represented in the code as a `TranslationUnitRequest`) which comprise some number of *sources* (files or strings).
+HLSL follows the traditional C model where a "translation unit" is more or less synonymous with a source file, so when compiling HLSL code the command-line `slangc` will treat each source file as its own translation unit.
+For Slang code, the command-line tool will by default put all source files into a single translation unit (so that they represent a shared namespace0).
+
+The user can also specify some number of *entry points* in each translation unit (`EntryPointRequest`), which combines the name of a function to compile with the pipeline stage to compile for.
+
+In a single compile request, we can generate code for zero or more *targets* (represented with `TargetRequest`) a target defines both the format for output code (e.g., DXIL or SPIR-V) and a *profile* that specifies the capability level to assume (e.g., "Shader Model 5.1").
+
+It might not be immediately clear why we have such fine-grained concepts as this, but it ends up being quite important to decide which pieces of the compiler are allowed to depend on which pieces of information (e.g., whether or not a phase of compilation gets to depend on the chosen target).
+
+The "Front End"
+---------------
+
+The job of the Slang front-end is to turn textual source code into a combination of code in our custom intermediate representation (IR) plus layout and binding information for shader parameters.
+
+### Lexing
+
+The first step in the compiler (after a source file has been loaded into memory) is to *lex* it.
+The `Lexer` type is implement in `lexer.{h,cpp}` and produces `Token`s that represent the contents of the file on-demand as requested by the next phase of compilation.
+
+Each token stores a `TokenCode` that indicates the kind of token, the raw text of the token, and the location in the source code where it is located.
+Source locations use a somewhat clever encoding to avoid being bloated (they are a single integer rather than separate file, line, and column fields).
+
+We don't make any attempt in the lexer to extract the actual value of integer and floating-point literals; we just store the raw text.
+We also don't try to distinguish keywords from identifiers; keywords show up as ordinary identifier tokens.
+
+Much of the complexity (and inefficiency) in the current lexer is derived from the need to support C-isms like backspace line continuation, and special case rules like allowing `<>` to delimit a file name string after a `#include`.
+
+### Preprocessing
+
+The preprocessor (`Preprocessor`) in `preprocessor.{h,cpp}` deals with `#include` constructs, macro expansions, etc.
+It pulls tokens from the lexer as needed (making sure to set flags to control the lexer behavior when required) and uses a limited lookahead to decide what to do with each token.
+
+The preprocessor maintains a stack of input streams, with the original source file at the bottom, and pushes entries for `#include`d files, macros to expand etc.
+
+Macro definitions store a sequence of already-lexed tokens, and expansion simply "replays" these tokens.
+Expansion keeps a notion of an "environment" for looking up identifiers and mapping them to macro definitions.
+Calling through to a function-style macro creates a fresh environment that maps the macro parameter names to pseudo-macros for the arguments.
+
+We still tokenize code in inactive preprocessor conditionals, but don't evaluate preprocessor directives inside inactive blocks (except those that may change the active/inactive state).
+Preprocessor directives are each handled as a callback on the preprocessor state and are looked up by name; adding a new directive (if we ever had a reason to) is a fairly simple task.
+
+One important detail of the preprocessor is that it runs over a full source file at once and produces a flat array of `Token`s, so that there is no direct interaction between the parser and preprocessor.
+
+### Parsing
+
+The parser (`Parser` in `parser.{h,cpp}`) is mostly a straightforward recursive-descent parser.
+Because the input is already tokenized before we start, we can use arbitrary lookahead, although we seldom look ahead more than one token.
+
+Traditionally, parsing of C-like languages requires context-sensitive parsing techniques to distinguish types from values, and deal with stuff like the C++ "most vexing parse."
+Slang instead uses heuristic approaches: for example, when we encounter an `<` after an identifier, we first try parsing a generic argument list with a closing `>` and then look at the next token to determine if this looks like a generic application (in which case we continue from there) or not (in which case we backtrack).
+
+There are still some cases where we use lookup in the current environment to see if something is a type or a value, but officially we strive to support out-of-order declarations like most modern languages.
+In order to achieve that goal we will eventually move to a model where we parse the bodies of declarations and functions in a later pass, after we have resolved names in the global scope.
+
+One important choice in the parser is that we strive to avoid hard-coding keywords as much as possible.
+We already track an environment for C-like parsing, and we simply extend that so that we also look up declaration and statement keywords in the environment.
+This means that most of the language "keywords" in Slang aren't keywords at all, and instead are just identifiers that happen to be bound to syntax in the default environment.
+Syntax declarations are associated with a callback that is invoked to parse the construct they name.
+
+The design of treating syntax as ordinary declarations has a long-term motivation (we'd like to support a flexible macro system) but it also has short-term practical benefits.
+It is easy for us to add new modifier keywords to the language without touching the lexer or parser (just adding them to the core module), and we also don't have to worry about any of Slang's extended construct (e.g., `import`) breaking existing HLSL code that just happens to use one of those new keywords as a local variable name.
+
+What the parser produces is an abstract syntax tree (AST).
+The AST currently uses a strongly-typed C++ class hierarchy with a "visitor" API generated via some ugly macro magic.
+Dynamic casting using C++ RTTI is used in many places to check the class of an AST node; we aren't happy with this but also haven't had time to implement a better/faster solution.
+
+In the parsed AST, both types and expressions use the same representation (because in an expression like `A(B)` it is possible that `A` will resolve to a type, or to a function, and we don't know which yet).
+
+One slightly odd design choice in the parser is that it attaching lexical scoping information to the syntax nodes for identifiers, and any other AST node that need access to the scope/environment where it was defined. This is a choice we will probably change at some point, but it is deeply ingrained right now.
+
+### Semantic Checking
+
+The semantic checking step (`check.{h,cpp}`) is, not surprisingly, the most complicated and messiest bit of the compiler today.
+The basic premise is simple: recursively walk the entire AST and apply semantic checking to each construct.
+
+Semantic checking applies to one translation unit at a time.
+It has access to the list of entry points for the translation unit (so it can validate them), but it *not* allowed to depend on the compilation target(s) the user might have selected.
+
+Semantic checking of an expression or type term can yield the same AST node, with type information added, or it can return newly constructed AST needs (e.g., when an implicit cast needs to be inserted).
+Unchecked identifiers or member references are always resolved to have a pointer to the exact declaration node they are referencing.
+
+Types are represented with a distinct class hierarchy from AST nodes, which is also used for a general notion of compile-time values which can be used to instantiate generic types/functions/etc.
+An expression that ends up referring to a type will have a `TypeType` as its type, which will hold the actual type that the expression represents.
+
+The most complicated thing about semantic checking is that we strive to support out-of-order declarations, which means we may need to check a function declaration later in the file before checking a function body early in the file.
+In turn, that function declaration might depend on a reference to a nested type declared somewhere else, etc.
+We currently solve this issue by doing some amount of on-demand checking; when we have a reference to a function declaration and we need to know its type, we will first check if the function has been through semantic checking yet, and if not we will go ahead and recursively type check that function before we proceed.
+
+This kind of unfounded recursion can lead to real problems (especially when the user might write code with circular dependencies), so we have made some attempts to more strictly "phase" the semantic checking, but those efforts have not yet been done systematically.
+
+When code involved generics and/or interfaces, the semantic checking phase is responsible for ensuring that when a type claims to implement an interface it provides all of the requirements of that interface, and it records the mapping from requirements to their implementations for later use. Similarly, the body of a generic is checked to make sure it uses type parameters in ways that are consistent with their constraints, and the AST is amended to make it explicit when an interface requirement is being employed.
+
+### Lowering and Mandatory Optimizations
+
+The lowering step (`lower-to-ir.{h,cpp}`) is responsible for converting semantically valid ASTs into an intermediate representation that is more suitable for specialization, optimization, and code generation.
+The main thing that happens at this step is that a lot of the "sugar" in a high-level language gets baked out. For example:
+
+- A "member function" in a type will turn into an ordinary function that takes an initial `this` parameter
+- A `struct` type nested in another `struct` will turn into an ordinary top-level `struct`
+- Compound expressions will turn into sequences of instructions that bake the order of evaluation
+- High-level control-flow statements will get resolved to a control-flow graph (CFG) of basic blocks
+
+The lowering step is done once for each translation unit, and like semantic checking it does *not* depend on any particular compilation target.
+During this step we attach "mangled" names to any imported or exported symbols, so that each function overload, etc. has a unique name.
+
+After IR code has been generated for a translation unit (now called a "module") we next perform a set of "mandatory" optimizations, including SSA promotion and simple copy propagation and elimination of dead control-flow paths.
+These optimizations are not primarily motivated by a desire to speed up code, but rather to ensure that certain "obvious" simplifications have been performed before the next step of validation.
+
+After the IR has been "optimized" we perform certain validation/checking tasks that would have been difficult or impossible to perform on the AST.
+For example, we can validate that control flow never reached the end of a non-`void` function, and issue an error otherwise.
+There are other validation tasks that can/should be performed at this step, although not all of them are currently implemented:
+
+- We should check that any `[unroll]` loops can actually be unrolled, by ensuring that their termination conditions can be resolved to a compile-time constant (even if we don't know the constant yet)
+
+- We should check that any resource types are being used in ways that can be statically resolved (e.g., that the code never conditionally computes a resource to reference), since this is a requirement for all our current targets
+
+- We should check that the operands to any operation that requires a compile-time constant (e.g., the texel offset argument to certain `Sample()` calls) are passed values that end up being compile-time constants
+
+The goal is to eliminate any possible sources of failure in low-level code generation, without needing to have a global view of all the code in a program.
+Any error conditions we have to push off until later starts to limit the value of our separate compilation support.
+
+### Parameter Binding and Type Layout
+
+The next phase of parameter binding (`parameter-binding.{h,cpp}`) is independent of IR generation, and proceeds based on the AST that came out of semantic checking.
+Parameter binding is the task of figuring out what locations/bindings/offsets should be given to all shader parameters referenced by the user's code.
+
+Parameter binding is done once for each target (because, e.g., Vulkan may bind parameters differently than D3D12), and it is done for the whole compile request (all translation units) rather than one at a time.
+This is because when users compile something like HLSL vertex and fragment shaders in distinct translation units, they will often share the "same" parameter via a header, and we need to ensure that it gets just one location.
+
+At a high level, parameter binding starts by computing the *type layout* of each shader parameter.
+A type layout describes the amount of registers/bindings/bytes/etc. that a type consumes, and also encodes the information needed to compute offsets/registers for individual `struct` fields or array elements.
+
+Once we know how much space each parameter consumes, we then inspect an explicit binding information (e.g., `register` modifiers) that are relevant for the target, and build a data structure to record what binding ranges are already consumed.
+Finally, we go through any parameters without explicit binding information and assign them the next available range of the appropriate size (in a first-fit fashion).
+
+The parameter binding/layout information is what the Slang reflection API exposes. It is layered directly over the Slang AST so that it accurately reflects the program as the user wrote it, and not the result of lowering that program to our IR.
+
+This document describes parameter binding as a "front end" activity, but in practice it is something that could be done in the front-end, the back-end or both.
+When shader code involves generic type parameters, complete layout information cannot be generated until the values of these parameters are fully known, and in practice that might not happen until the back end.
+
+### Serialization
+
+It is not yet fully implemented, but our intention is that the last thing the front-end does is to serialize the following information:
+
+- A stripped-down version of the checked AST for each translation unit including declarations/types, but not function bodies
+
+- The IR code for each translation unit
+
+- The binding/layout information for each target
+
+The above information is enough to type-check a subsequent module that `import`s code compile in the front-end, to link against its IR code, or to load and reflect type and binding information.
+
+
+The "Back End"
+--------------
+
+The Slang back end logically starts with the user specifying:
+
+- An IR module, plus any necessary modules to link in and provide its dependencies
+
+- An entry point in that module, plus arguments for any generic parameters that entry point needs
+
+- A compilation target (e.g., SPIR-V for Vulkan)
+
+- Parameter binding/layout information for that module and entry point, computed for the chosen target
+
+We eventually want to support compiling multiple entry points in one pass of the back end, but for now it assumes a single entry point at a time
+
+### Linking and Target Specialization
+
+The first step we perform is to copy the chosen entry point and anything it depends on, recursively into a "fresh" IR module.
+We make a copy of things so that any optimization/transformation passes we do for one target don't alter the code the front-end produced in ways that affect other targets.
+
+While copying IR code into the fresh module, we have cases where there might be multiple definitions of the same function or other entity.
+In those cases, we apply "target specialization" to pick the definition that is the best for the chosen target.
+This step is where we can select between, say, a built-in definition of the `saturate` function for D3D targets, vs. a hand-written one in a Slang standard module to use for GLSL-based targets.
+
+### API Legalization
+
+If we are targeting a GLSL-based platform, we need to translate "varying" shader entry point parameters into global variables used for cross-stage data passing.
+We also need to translate any "system value" semantics into uses of the special built-in `gl_*` variables.
+
+We currently handle this kind of API-specific legalization quite early in the process, performing it right after linking.
+
+### Generic Specialization
+
+Once the concrete values for generic parameters are know we can set about specializing code to the known types.
+We do this by cloning a function/type/whatever and substituting in the concrete arguments for the parameters.
+This process can be continued as specializing one function may reveal opportunities to specialize others.
+
+During this step we also specialize away lookup of interface requirements through their witness tables, once generic witness-table parameters have been replaced with concrete witness tables.
+
+At the end of specialization, we should have code that makes no use of user-defined generics or interfaces.
+
+### Type Legalization
+
+While HLSL and Slang allow a single `struct` type to contain both "ordinary" data like a `float3` and "resources" like a `Texture2D`, the rules for GLSL and SPIR-V are more restrictive.
+There are some additional wrinkles that arise for such "mixed" types, so we prefer to always "legalize" the types in the users code by replacing an aggregate type like:
+
+```hlsl
+struct Material { float4 baseColor; Texture2D detailMap; };
+Material gMaterial;
+```
+
+with separate declarations for ordinary and resource fields:
+
+```hlsl
+struct Material { float4 baseColor; }
+
+Material gMaterial;
+Texture2D gMaterial_detailMap;
+```
+
+Changing the "shape" of a type like this (so that a single variable becomes more than one) needs to be done consistently across all declarations/functions in the program (hence why we do it after specialization, so that all concrete types are known).
+
+### Other Optimizations
+
+We dont' currently apply many other optimizations on the IR code in the back-end, under the assumption that the lower-level compilers below Slang will do some of the "heavy lifting."
+
+That said, there are certain optimizations that Slang must do eventually, for semantic completeness. One of the most important examples of these is implementing the semantics of the `[unroll]` attribute, since we can't always rely on downstream compilers to have a capable unrolling implementation.
+
+We expect that over time it will be valuable for Slang to support a wider array of optimization passes, as long as they are ones that are considered "safe" to do above the driver interface, because they won't interfere with downstream optimization opportunities.
+
+### Emission
+
+Once we have transformed the IR code into something that should be legal for the chosen target, we emit high-level source code in either HLSL or GLSL.
+
+The emit logic is mostly just a scan over the IR code to emit a high-level declaration for each item: an IR structure type becomes a `struct` declaration, and IR function becomes a function definition, etc.
+
+In order to make the generated code a bit more readable, the Slang compiler currently does *not* emit declarations using their mangled names and instead tries to emit everything using a name based on how it was originally declared.
+
+To improve the readability of function bodies, the emit logic tries to find consecutive sequences of IR instructions that it can emit as a single high-level language expression. This reduces the number of temporaries in the output code, but we need to be careful about inserting parentheses to respect operator precedence, and also to not accidentally change the order of evaluation of code.
+
+When emitting a function body, we need to get from the low-level control flow graph (CFG) to high-level structured control-flow statements like `if`s and loops. We currently do this on a per-function basis during code emission, using an ad hoc algorithm based on control-flow structured information we stored in the IR.
+A future version of the compiler might implement something more complete like the "Relooper" algorithm used by Emscripten.
+
+### Downstream Compiler Execution
+
+Once we have source code, we can invoke downstream compilers like fxc, dxc, and glslang to generate binary code (and optionally to disassemble that code for console output).
+
+The Slang compiler also supports a "pass through" mode where it skips most of the steps outlined so far and just passes text along to these downstream compilers directly. This is primarily intended as a debugging aid for developers working on Slang, since it lets you use the same command-line arguments to invoke both Slang compilation and compilation with these other compilers.
+
+Conclusion
+----------
+
+Hopefully this whirlwind introduction to the flow of the Slang compiler gives some idea of how the project fits together, and makes it easier to dive into the code and start being productive.
diff --git a/external/slang/share/doc/slang/design/parsing.md b/external/slang/share/doc/slang/design/parsing.md
new file mode 100644
index 00000000..9027e06d
--- /dev/null
+++ b/external/slang/share/doc/slang/design/parsing.md
@@ -0,0 +1,68 @@
+# Resolving Ambiguity in Slang's Parser
+
+A typical text-book style compiler front-end usually features explicit stages: tokenization, parsing, and semantic checking. Slang's original design follows this pattern, but the design has a drawback that it cannot effectively disambiguate the syntax due to lack of semantic info during parsing.
+
+For example, without knowing what `X` is, it is impossible to tell whether `X<a&&b>(5)` means calling a generic function `X` with argument `5`, or computing the logical `AND` between condition `X < a` and `b > 5`.
+
+Slang initially addresses this problem with a heursitic: if the compiler sees `IDENTIFIER` followed by `<`, it will try to parse the expression as a generic specialization first, and if that succeeds, it checks the token after the closing `>` to see if the following token is one of the possible "generic specialization followers". In this example, the next token is `(`, which is a "generic specialization follower", so the compiler determines that the expression being parsed is very likely a generic function call, and it will parse the expression as such. For reference, the full set of "generic specialization followers" are: `::`, `.`, `(`, `)`, `[`, `]`, `:`, `,`, `?`, `;`, `==`, `!=`, `>` and `>>`.
+
+This simplistic heuristic is originated from the C# compiler, which works well there since C# doesn't allow generic value arguments, therefore things like `X<a&&b>...` or `X<a<y>...` can never be valid generic specializations. This isn't the case for Slang, where generic arguments can be int or boolean values, so `a&&b` and `a<y` are valid as generic arguments. Although using the same heuristic here works most of the time, it is still causing a lot of confusion to the users when the heuristic fails.
+
+The ambiguity problem can be systematically solved if the parser has access to semantic info. If the parser knows that `X` is / isn't a generic, then it can parse the expression accordingly without any guess work. The key challenge is to make such semantic info available while we are still parsing.
+
+## Two-stage Parsing
+
+Slang solves this problem by breaking parsing into two stages: the decl parsing stage, and body parsing stage. Initially, we will parse the user source in the decl parsing stage. In this stage, we parse all decls, such as `struct`s, variables, functions etc. as usual, except that when we are about to parse the body of a function, we will just collect all tokens enclosed by `{` and `}` and store them in a raw list as a `UnparsedStmt` AST node. By deferring the parsing of function bodies, we no longer need to guess whether a `<` token inside a function body means generic specialization or less-than comparison.
+
+After the decl parsing stage, we have the AST that represents the decl structure but not the function bodies. With this initial AST, we can start semantic checking. Once we reached the `UnparsedStmt` nodes, the semantic visitor will spawn a new `Parser` and start to parse the tokens stored in the `UnparsedStmt` node. When we spawn the parser in a semantic visitor, initialize the parser to be in `Body` parsing stage, and pass a pointer to the semantic visitor to the parser. This way, we are triggering the second parsing stage from the semantic visitor.
+
+During the second parsing stage, whenever we see a `<` and need to disambiguate, we will use the semantic visitor to check the expression that has been parsed so far before `<`. If we are able to type check the expression and find it to be a `DeclRefExpr` referencing a generic decl, or an `OverloadedExpr` where one of the candidate is a generic decl, then we know `<` should be parsed as a generic specialization instead of `operator <`. If the expression before `<` checks to be a reference to a variable or a property, we should parse it as the comparison operator. The reason we are still parsing `<` as generic specialization when the expression before it is an non-generic function or type, is to allow us provide better error messages instead of just a "syntax error" somewhere down the line: in this case the user is most likely treating the non-generic type or function as a generic one by mistake, so we should diagnose as such. In the case that we are unable to properly check the preceeding expression or it checks to something else that we don't know, the compiler will fallback to the heuristic based method for disambiguation.
+
+Note that in the second stage, parsing and semantic checking is interleaved organically. We no longer have a clean boundary between parsing and checking. However, the checking that happens in the second stage is on-demand and checks only necessary parts of the code to determine the type of the expression preceeding the `<` token. Any other code irrelevant to disambiguation purposes are left unchecked. Once the function body is fully parsed, the semantic visitor working on the function will make sure every node of the parsed AST is visited.
+
+This two stage parsing technique should work well to correctly disambiguate code inside a function body. However the current implementation is not 100% bulletproof. Expressions at decl level, such as default values for struct members or function parameters, are still fully parsed in the first stage using the heuristic based method. However this should be a lesser problem in practice, because the default values are typically simple expressions and the chances of running into wrongly disambiguated case is much lower than in function bodies.
+
+## Scope of Local Variables
+
+Another issue linked with parsing is to correctly support the scope of local variables. A local variable should only be visible to code after its declaration within the same `{}` block. Consider this example:
+
+```cpp
+static int input = 100;
+int f()
+{
+    input = 2; // global `input` is now 2
+    int input = input + 1; // local `input` is now 3
+    input = input + 2; // local `input` is now 5
+    return input; // returns 5.
+}
+```
+
+In Slang's implementation, we are creating a `ScopeDecl` container node for each `BlockStatement`, and variable declarations inside the block are added to the same `ScopeDecl`. This creates a problem for two stage parsing: to allow any expression to check during disambiguation, we need to insert variables into the scope as soon as they are parsed, but this means that when we are doing the "full checking" after the entire body is parsed, all variables are already registered in scope and discoverable when we are checking the earlier statements in the block. This means that the compiler cannot report an error if the user attempts to use a variable that is defined later in the block. In the example above, it means that when we are checking the first statement `input = 2`, the lookup logic for `input` will find the local variable instead of the global variable, thus generating the wrong code.
+
+One way to solve this problem is instead of registering all local variables to the same scope owned by the containing `BlockStmt`, we make each local variable declaration own its own scope, that is ended at the end of the owning block. This way, all statements following the local variable declaration become the children of the local variable `DeclStmt`, effectively parsing the above example as:
+
+```cpp
+static int input = 100;
+int f()
+{
+    input = 2; // global `input` is now 2
+    {
+        int input = input + 1; // local `input` is now 3
+        input = input + 2; // local `input` is now 5
+        return input; // returns 5.
+    }
+}
+
+```
+
+This will ensure the scope data-structure matches the semantic scope of the variable, and allow the compiler to produce the correct diagnostics.
+
+However, expressing scope this way creates long nested chains in the AST, and leads to inefficient lookup and deep ASTs that risk overflowing the stack. Instead, Slang stays with the design to put all variables in the same block registered to the same `ScopeDecl`, but uses a separate state on each `VarDecl` called `hiddenFromLookup` to track whether or not the decl should be visible to lookup. During parsing, all decls are set to visible by default, so they can be used for disambiguation purpose. Once parsing is fully done and we are about to check a `BlockStmt`, we will first visit all `DeclStmt`s in the block, mark it as `invisible`, then continue checking the children statements. When checking encounters a `DeclStmt`, it will then mark the decl as `visible`, allowing it to be found by lookup logic for code after the declaration side. This solution allows us to respect the semantic scope of local variables without actually forming a long chain of scopes for a sequence of statements.
+
+## Future Work: Extend Staged Parsing to Decl Scopes
+
+We can further extend this to properly support expressions in global/decl scopes, such as default value expressions for struct members, or the type expressions for functions and global/member variables. To do so, we will use a different strategy for parsing expressions in the first parsing stage. Instead of parsing the expression directly, we should identify the token boundary of an expression without detailed understanding of the syntax. We will parse all expressions into `UnparsedExpr` nodes, which contain unparsed tokens for each expression. By doing so, the first parsing stage will give us an AST that is detailed enough to identify the names of types and functions, and whether or not they are generic. Then we can perform the semantic checking on the intial AST, and use the semantic checking to drive the parsing and checking of any `UnparsedExpr` and `UnparsedStmt`s.
+
+## Future Work: ScopeRef
+
+We can get rid of the `hiddenFromLookup` flag and use a more immutable representation of AST nodes if we introduce the concept of a `ScopeRef` that is a `Scope*` + `endIndex` to mark the boundary of the referenced scope. This way, different statements in a block can have different `ScopeRef` to the same scope but different ending member index. If we are looking up through a `ScopeRef` and find a variable in the scope that has an index greater than `endIndex`, we should treat the variable as invisible and report an error. This is cleaner, allowing better error messages, and avoids having to maintain mutable state flags on Decls.
\ No newline at end of file
diff --git a/external/slang/share/doc/slang/design/semantic-checking.md b/external/slang/share/doc/slang/design/semantic-checking.md
new file mode 100644
index 00000000..10ddd514
--- /dev/null
+++ b/external/slang/share/doc/slang/design/semantic-checking.md
@@ -0,0 +1,216 @@
+Semantic Checking
+=================
+
+The semantic checking logic in the Slang compiler is located in `source/slang/slang-check*`.
+Semantic checking is applied in the front end after parsing, and before lowering of code to the IR.
+
+The main job of the semantic checking stage is to detect and forbid code that has errors in it.
+The errors and other diagnostics reported are intended to be of benefit to the user, but semantic checking is also important for the overall function of the compiler.
+Stages of compilation after semantic checking (e.g., lowering to the IR) are allowed to *assume* that the code they operate on is semantically valid, and may assert-fail or even crash on invalid code.
+Semantic checking is thus not an optional step, and there is no meaningful way to turn it off.
+
+Semantic Checking can be broken into three main kinds of work, and we will discuss how each is implemented in the following sections:
+
+* Checking of "terms" which include expressions and type expressions
+
+* Checking of statements
+
+* Checking of declarations
+
+Checking Terms
+--------------
+
+### Some Terminology for Terms
+
+We use the word "term" to refer generically to something that can be evaluated to produce a result, but where we do not yet know if the result will be a type or a value. For example, `Texture2D` might be a term that results in a type, while `main` might be a term that results in a value (of function type), but both start out as a `NameExpr` in the AST. Thus the AST uses the class hierarchy under `Expr` to represent terms, whether they evaluate to values or types.
+
+There is also the `Type` hierarchy, but it is important to understand that `Type` represents types as their logical immutable selves, while `Expr`s that evaluate to types are *type expressions* which can be concretely pointed to in the user's code. Type expressions have source locations, because they represent something the user wrote in their code, while `Type`s don't have singular locations by default.
+
+The codebase uses the notion of a `TypeRepr` for those `Expr`s that should only ever evaluate to types, and there is also a `TypeExp` type that is meant to package up a `Type` with an optional `Expr` for a type expression that produced it. The names of these implementation types aren't great, and should probably not be spread further.
+
+A value-bearing `Expr` will eventually be given a `Type` that describes the type of value it produces.
+An `Expr` that evaluates to a type will eventually be given a `Type` that uses the `TypeType` subclass to indicate the specific type it evaluated to.
+The `TypeType` idea is kind of kludge to represent "kinds" (the "types of types") in our system.
+More correctly, we should say that every `Expr` gets a *classifier*, with the classifiers for value expressions being `Type`s and the classifiers for type expressions being kinds, but we haven't had time or inclination to fix the model yet.
+
+### The Big Picture
+
+Checking of terms is largely done as an ad hoc postorder traversal of the AST.
+That is, in order to check a compound expression like `f(a)` we first need to check `f` and `a` before we can check the function call.
+
+When checking an expression there are four main things that have to be done:
+
+1. Recursively check all sub-expressions.
+
+2. Detect and diagnose any errors (or warnings) in the current expression.
+
+3. Optionally construct a new expression to replace the current expression (or one of its sub-expressions) in cases where the syntactic form of the input doesn't match the desired semantics (e.g., make an implicit type conversion explicit in the AST).
+
+4. Determine the correct type for the result expression, and store it so that it can be used by subsequent checking.
+
+Those steps may end up being interleaved in practice.
+
+### Handling Errors Gracefully
+
+If an error is detected in a sub-expression, then there are a few issues that need to be dealt with:
+
+* We need to ensure that an erroneous sub-expression can't crash the compiler when it goes on to check a parent expression. For example, leaving the type of an expression as null when it has errors is asking for trouble.
+
+* We ideally want to continue to diagnose other unrelated errors in the same expression, statement, function, or file. That means that we shouldn't just bail out of semantic checking entirely (e.g., by throwing an exception).
+
+* We don't want to produce "cascading" errors where, e.g., an error in `a` causes us to also report an error in `a + b` because no suitable operator overload was found.
+
+We tackle all of these problems by introducing the `ErrorType` and `ErrorExpr` classes.
+If we can't determine a correct type for an expression (say, because it has an error) then we will assign it the type `ErrorType`.
+If we can't reasonably form an expression to return *at all* then we will return an `ErrorExpr` (which has type `ErrorType`).
+
+These classes are designed to make sure that subsequent code won't crash on them (since we have non-null objects), but to help avoid cascading errors.
+Some semantic checking logic will detect `ErrorType`s on sub-expressions and skip its own checking logic (e.g., this happens for function overload resolution), producing an `ErrorType` further up.
+In other cases, expressions with `ErrorType` can be silently consumed.
+For example, an erroneous expression is implicitly convertible to *any* type, which means that assignment of an error expression to a local variable will always succeed, regardless of variable's type.
+
+### Overload Resolution
+
+One of the most involved parts of expression checking is overload resolution, which occurs when there is an expression of the form `f(...)` where `f` could refer to multiple function declarations.
+
+Our basic approach to overload resolution is to iterate over all the candidates and add them to an `OverloadResolveContext`.
+The context is responsible for keeping track of the "best" candidate(s) seen so far.
+
+Traditionally a language defines rules for which overloads are "better" than others that focus only on candidates that actually apply to the call site.
+This is the right way to define language semantics, but it can produce sub-optimal diagnostics when *no* candidate was actually applicable.
+
+For example, suppose the user wrote `f(a,b)` and there are 100 functions names `f`, but none works for the argument types of `a` and `b`.
+A naive approach might just say "no overload applicable to arguments with such-and-such types."
+A more advanced compiler might try to list all 100 candidates, but that wouldn't be helpful.
+If it turns out that of the 100 candidates, only 10 of them have two parameters, then it might be much more helpful to list only the 10 candidates that were even remotely applicable at the call site.
+
+The Slang compiler strives to provide better diagnostics on overload resolution by breaking the checking of a candidate callee into multiple phases, and recording the earliest phase at which a problem was detected (if any).
+Candidates that made it through more phases of checking without errors are considered "better" than other candidates, even if they ultimately aren't applicable.
+
+### Type Conversions
+
+Conversion of values from one type to another can occur both explicitly (e.g., `(int) foo`) and implicitly (e.g., `while(foo)` implicitly converts `foo` to a `bool`).
+
+Type conversion also tied into overload resolution, since some conversions get ranked as "better" than others when deciding between candidates (e.g., converting an `int` to a `float` is preferred over converting it to a `double`).
+
+We try to bottleneck all kinds of type conversion through a single code path so that the various kinds of conversion can be handled equivalently.
+
+### L-Values
+
+An *l-value* is an expression that can be used as the destination of an assignment, or for read-modify-write operations.
+
+We track the l-value-ness of expressions using `QualType` which basically represents a `Type` plus a bit to note whether something is an l-value or not.
+(This type could eventually be compressed down to be stored as a single pointer, but we haven't gotten to that yet)
+We do not currently have a concept like the `const` qualifier in C/C++, that would be visible to the language user.
+
+Propagation of l-value-ness is handled in an ad hoc fashion in the small number of expression cases that can ever produce l-values.
+The default behavior is that expressions are not l-values and the implicit conversion from `Type` to `QualType` reflects this.
+
+Checking Statements
+-------------------
+
+Checking of statements is relatively simpler than checking expressions.
+Statements do not produce values, so they don't get assigned types/classifiers.
+We do not currently have cases where a statement needs to be transformed into an elaborated form as part of checking (e.g., to make implicit behavior explicit), so statement checking operates "in place" rather than optionally producing new AST nodes.
+
+The most interesting part of statement checking is that it requires information about the lexical context.
+Checking a `return` statement requires knowing the surrounding function and its declared result type.
+Checking a `break` statement requires knowing about any surrounding loop or `switch` statements.
+
+We represent the surrounding function explicitly on the `SemanticsStmtVisitor` type, and also use a linked list of `OuterStmtInfo` threaded up through the stack to track lexically enclosing statements.
+
+Note that semantic checking of statements at the AST level does *not* encompass certain flow-sensitive checks.
+For example, the logic in `slang-check-stmt.cpp` does not check for or diagnose any of:
+
+* Functions that fail to `return` a value along some control flow paths
+
+* Unreachable code
+
+* Variables used without being initialized first
+
+All of the above are instead intended to be handled at the IR level (where dataflow analysis is easier) during the "mandatory" optimization passes that follow IR lowering.
+
+Checking Declarations
+---------------------
+
+Checking of declarations is the most complicated and involved part of semantic checking.
+
+### The Problem
+
+Simple approaches to semantic checking of declarations fall into two camps:
+
+1. One can define a total ordering on declarations (usually textual order in the source file) and only allow dependencies to follow that order, so that checking can follow the same order. This is the style of C/C++, which is inherited from the legacy of traditional single-pass compilers.
+
+2. One can define a total ordering on *phases* of semantic checking, so that every declaration in the file is checked at phase N before any is checked at phase N+1. E.g., the types of all variables and functions must be determined before any expressions that use those variables/functions can be checked. This is the style of, e.g., Java and C#, which put a premium on defining context-free languages that don't dictate order of declaration.
+
+Slang tries to bridge these two worlds: it has inherited features from HLSL that were inspired by C/C++, while it also strives to support out-of-order declarations like Java/C#.
+Unsurprisingly, this leads to unique challenges.
+
+Supporting out-of-order declarations means that there is no simple total order on declarations (we can have mutually recursive function or type declarations), and supporting generics with value parameters means there is no simple total order on phases.
+For that last part observe that:
+
+* Resolving an overloaded function call requires knowing the types of the parameters for candidate functions.
+
+* Determining the type of a parameter requires checking type expressions.
+
+* Type expressions may contain value arguments to generics, so checking type expressions requires checking value expressions.
+
+* Value expressions can include function calls (e.g., operator invocations), which then require overload resolution to type-check.
+
+### The Solution
+
+Our declaration checking logic takes the idea of phase-based checking as a starting point, but instead of a global ordering on phases we use a per-declaration order.
+
+Each declaration in the Slang AST will have a `DeclCheckState` that represents "how checked" that declaration is.
+We can apply semantic checking logic to a declaration `D` to raise its state to some desired state `S`.
+
+By default, the logic in `slang-check-decl.cpp` will do a kind of "breadth-first" checking strategy where it will try to raise all declarations to the one state before moving on to the next.
+In many cases this will reproduce the behavior of a Java or C#-style compiler with strict phases.
+
+The main difference for Slang is that whenever, during the checking of some declaration `D`, we discover that we need information from some other declaration `E` that would depend on `E` being in state `S`, we manually call a routine `ensureDecl(E,S)` whose job is to ensure that `E` has been checked enough for us to proceed.
+
+The `ensureDecl` operation will often be a no-op, if the declaration has already been checked previously, but in cases where the declaration *hasn't* been checked yet it will cause the compiler to recursively re-enter semantic checking and try to check `E` until it reached the desired state.
+
+In pathological cases, this method can result in unbounded recursion in the type checker. The breadth-first strategy helps to make such cases less likely, and introducing more phases to semantic checking can also help reduce problems.
+In the long run we may need to investigate options that don't rely on unbounded recursion.
+
+### The Rules
+
+As a programmer contributing to the semantic checking infrastructure, the declaration-checking strategy requires following a few rules:
+
+* If a piece of code is about to rely on some property of a declaration that might be null/absent/wrong if checking hasn't been applied, it should use `ensureDecl` to make sure the declaration in question has been checked enough for that property to be available.
+
+* If adding some `ensureDecl`s leads to an internal compiler error because of circularity in semantic checking, then either the `ensureDecl`s were misplaced, or they were too strong (you asked for more checking than was necessary), or in the worse case we need to add more phases (more `DeclCheckState`s) to separate out the checking steps and break the apparent cycle.
+
+* In very rare cases, semantic checking for a declaration may want to use `SetCheckState` to update the state of the declaration itself before recursively `ensureDecl`ing its child declarations, but this must be done carefully because it means you are claiming that the declaration is in some state `S`, while not having complete the checking that is associated with state `S`.
+
+* It should *never* be necessary to modify `checkModuleDecl` so that it performs certain kinds of semantic analysis on certain declarations before others (e.g., iterate over all the `AggTypeDecl`s before all the `FuncDecl`s). If you find yourself tempted to modify it in such a way, then add more `DeclCheckState`s to reflect the desired ordering. It is okay to have phases of checking that only apply to a subset of declarations.
+
+* Every statement and expression/term should be checked once and only once. If something is being checked twice and leading to failures, the right thing is to fix the source of the problem in declaration checking, rather than make the expression/statement checking be defensive against this case.
+
+Name Lookup
+-----------
+
+Lookup is the processing of resolving the contextual meaning of names either in a lexical scope (e.g., the user wrote `foo` in a function body - what does it refer to?) or in the scope of some type (e.g., the user wrote `obj.foo` for some value `obj` of type `T` - what does it refer to?).
+
+Lookup can be tied to semantic analysis quite deeply.
+In order to know what a member reference like `obj.foo` refers to, we not only need to know the type of `obj`, but we may also need to know what interfaces that type conforms to (e.g., it might be a type parameter `T` with a constraint `T : IFoo`).
+In order to support lookup in the presence of our declaration-checking strategy described above, the lookup logic may be passed a `SemanticsVisitor` that it can use to `ensureDecl()` declarations before it relies on their properties.
+
+However, lookup also currently gets used during parsing, and in those cases it may need to be applied without access to the semantics-checking infrastructure (since we currently separate parsing and semantic analysis).
+In those cases a null `SemanticsVisitor` is passed in, and the lookup process will avoid using lookup approaches that rely on derived semantic information.
+This is fine in practice because the main thing that gets looked up during parsing are names of `SyntaxDecl`s (which are all global) and also global type/function/variable names.
+
+
+Known Issues
+------------
+
+The largest known issue for the semantic checking logic is that there are currently dependencies between parsing and semantic checking.
+Just like a C/C++ parser, the Slang parser sometimes needs to disambiguate whether an identifier refers to a type or value to make forward progress, and that would in general require semantic analysis.
+
+Ideally the way forward is some combination of the following two strategies:
+
+* We should strive to make parsing at the "global scope" fully context-insensitive (e.g., by using similar lookahead heuristics to C#). We are already close to this goal today, but will need to be careful that we do not introduce regressions compared to the old parser (perhaps a "compatibility" mode for legacy HLSL code is needed?)
+
+* We should delay the parsing of nested scopes (both function and type bodies bracketed with `{}`) until later steps of the compiler. Ideally, parsing of function bodies can be done in a context-sensitive manner that interleaves with semantic checking, closer to the traditional C/C++ model (since we don't care about out-of-order declarations in function bodies).
+
diff --git a/external/slang/share/doc/slang/design/serialization.md b/external/slang/share/doc/slang/design/serialization.md
new file mode 100644
index 00000000..008fd6da
--- /dev/null
+++ b/external/slang/share/doc/slang/design/serialization.md
@@ -0,0 +1,331 @@
+Serialization
+=============
+
+Slang has a collection of serialization components. This document will be used to discuss serialization around IR/AST and modules as it currently exists. A separate document will describe the future serialization plans.
+
+All of the serialization aspects here focus on binary serialization. 
+
+The major components are
+
+* IR Serialization
+* AST/Generalized Serialization
+* SourceLoc Serialization
+* Riff container
+* C++ Extractor
+
+Generalized Serialization
+=========================
+
+Generalized serialization is the mechanism used to save 'arbitrary' C++ structures. It is currently used for serializing the AST. Although not necessary, generalized serialization is typically helped out by the `C++ extractor`, which can rudimentary parse C++ source, and extract class-like types and their fields. The extraction then produces header files that contain macros that can then be used to drive serialization. 
+
+It's worth discussing briefly what the philosophy is behind the generalized serialization system. To talk about this design it is worth talking a little about serialization in general and the issues involved. Lets say we have a collection of C++ class instances that contain fields. Some of those fields might be pointers. Others of the fields might be a templated container type like a Dictionary<K,V>. We want to take all of these instances, write them to a file, such that when we read the file back we will have the equivalent objects with equivalent relationships. 
+
+We could imagine a mechanism that saved off each instance, by writing off the address of the object, and then the in memory representation for all the instances that can be reached. When reading back the objects would be at different locations in memory. If we knew where the pointers were, we could use a map of old pointers to the new instances and fix them up. Problems with this simple mechanism occur because...
+
+* If we try to read back on a different machine, with a different pointer size, the object layout will be incompatible
+* If we try to read back on the same machine where the source is compiled by a different compiler, the object layout might be incompatible (say bool or enum are different size)
+* Endianness might be different
+* Knowing where all the pointers are and what they point to and therefore what to serialize is far from simple. 
+* The alignment of types might be different across different processors and different compilers 
+
+The implementation makes a distinction between the 'native' types, the regular C++ in memory types and 'serial' types. Each serializable C++ type has an associated 'serial' type - with the distinction that it can be written out and (with perhaps some other data) read back in to recreate the C++ type. The serial type can be a C++ type, but is such it can be written and read from disk and still represent the same data. 
+
+The approach taken in Slang is to have each 'native' type (ie the C++ type) that is being serialized have a serializable 'dual' type. The serial type can be an explicit C++ type, or it might implicit (ie not have a C++ type) and calculated at Slang startup. 
+
+The important point here is that the Serial type must writable on one target/process and readable correctly on another. 
+
+The easy cases are types that have an alignment and representation that will work over all targets. These would be most built in types - integrals 8,16,32 and float32. Note that int64 and double are *not* so trivial, because on some targets that require 8 byte alignment - so they must be specially defined to have 8 byte alignment. 
+
+Another odd case is bool - it has been on some compilers 32 bits, and on others 8 bits. Thus we need to potentially convert.
+
+For this and other types it is therefore necessary to have function that can convert to and from the serialized dual representation.
+
+## Generalized Field Conversion
+
+For types that contain fields, it would be somewhat laborious to have to write all of the conversion functions by hand. To avoid this we use the macro output of the C++ extractor to automatically generate the appropriate functions. 
+
+Take DeclRefExpr from the AST hierarchy - the extractor produces a macro something like...
+
+```
+#define SLANG_FIELDS_ASTNode_DeclRefExpr(_x_, _param_)\
+    _x_(scope, (RefPtr<Scope>), _param_)\
+    _x_(declRef, (DeclRef<Decl>), _param_)\
+    _x_(name, (Name*), _param_)
+``` 
+
+DeclRefExpr derives from Expr and this might hold other fields and so forth. 
+
+The macros can generate the appropriate conversion functions *if* we have the conversion functions for the field types. Field type conversions can be specified via a special macro that describes how the conversion to and from the type work. To make the association between the native and serial type, as well as provide the functions to convert, we use the template
+
+```
+template <typename T>
+struct SerialTypeInfo;
+```
+and specialize it for each native type. The specialization holds
+
+* SerialType - The type that will be used to represent the native type
+* NativeType - The native type
+* SerialAlignment - A value that holds what kind of alignment the SerialType needs to be serializable (it may be different from SLANG_ALIGN_OF(SerialType)!)
+* toSerial - A function that with the help of SerialWriter convert the NativeType into the SerialType
+* toNative - A function that with the help of SerialReader convert the SerialType into the NativeType
+
+It is useful to have a structure that can hold the type information, so it can be stored. That is achieved with
+
+```
+template <typename T>
+struct SerialGetType;
+```
+
+This template can be specialized for a specific native types - but all it holds is just a function getType, which returns a `SerialType*`, which just holds the information held in the SerialTypeInfo template, but additionally including the size of the SerialType.
+
+So we need to define a specialized SerialTypeInfo for each type that can be a field in a NodeBase/RefObject derived type. We don't need to define anything explicitly for the NodeBase derived types, as we will just generate the layout from the fields. How do we know the fields? We just used the macros generated from the C++ extractor.
+
+So first a few things to observe...
+
+1) Some types don't need any conversion to be serializable - int8_t, or float the bits can just be written out and read in (1)
+2) Some types need a conversion but it's very simple - for example an enum without explicit size, being written as an explicit size
+3) Some types can be written out but would not be directly readable or usable with different targets/processors, so need converting
+4) Some types require complex conversions that require programmer code - like Dictionary/List
+
+For types that need no conversion (1), we can just use the template SerialIdentityTypeInfo
+
+```
+template <>
+struct SerialTypeInfo<SomeType> : public SerialIdentityTypeInfo<SomeType> {};
+```
+
+This specialization means that SomeType can be written out and read in across targets/compilers without problems.
+
+For (2) we have another template that will do the conversion for us
+
+```
+template <typename NATIVE_T, typename SERIAL_T>
+struct SerialConvertTypeInfo;
+```
+
+That we can use as above, and specify the native and serial types.
+
+For (3) there are a few scenarios. For any field in a serial type we must store in the serialized type such that the representation will work across all processors/compilers. So one problematic type is `bool`. It's not specified how it's laid out in memory - and some compiles have stored it as a word. Most recently it's been stored as a byte. To make sure bool is ok for serialization therefore we store as a uint8_t.
+
+Another example would be double. It's 64 bits, but on some arches/compilers it's SLANG_ALIGN_OF is 4 and on others it's 8. On some architectures a non aligned read will lead to a fault, on others it might be very slow. To work around this problem therefore we have to ensure double has the alignment that will work across all targets - and that alignment is 8. In that specific case that issue is handled via SerialBasicTypeInfo, which makes the SerialAlignment the sizeof the type.
+
+For (4) there are a few things to say. First a type can always implement a custom version of how to do a conversion by specializing `SerialTypeInfo`. But there remains another nagging issue - types which allocate/use other memory that changes at runtime. Clearly we cannot define 'any size of memory' in a fixed SerialType defined in a specialization of SerialTypeInfo. The mechanism to work around this is to allow arbitrary arrays to be stored, that can be accessed via an SerialIndex. This will be discussed more once we discuss a little more about the file system, and SerialIndex. 
+
+## Struct value types
+
+There is a mechanism to allow the simple serialization of 'value' struct types for this to work it requires
+
+* The fields of the struct are serializable and public
+* The super class (if there is one) is serializable
+
+If this is the case, it is not necessary to write a `SerialTypeInfo<T>` specialization, the C++ extractor and it's reflection can generate the specialization for you. The steps needed
+
+* Place SLANG_VALUE_CLASS(your type) in the definition of your struct 
+* Make sure that the header containing the struct definition is included in the ones C++ extractor examines
+* Instead of implementing SerialTypeInfo for your type use the macro SLANG_VALUE_TYPE_INFO(your type)
+
+If there are problems looking at the contents of `slang-generated-value.h` and `slang-generated-value-macro.h`.
+
+It should be noted that currently because of limitations in the C++ extractor, all of the types must be defined in the same scope.
+
+Also because value types are always fields in generalized serialization, they do not need to be identified with a sub type, even though C++ extractor does generate a ValueType enum.
+
+## Generalized Serialization Format
+
+The serialization format used is 'stream-like' with each 'object' stored in order. Each object is given an index starting from 1. 0 is used to be in effect nullptr. The stream looks like
+
+```
+SerialInfo::Entry (for index 1)
+Payload for type in entry
+
+SerialInfo::Entry (for index 2)
+Payload for type in entry
+
+... 
+... 
+```
+
+That when writing we have an array that maps each index to a pointer to the associated header. We also have a map that maps native pointers to their indices. The Payload *is* the SerialType for thing saved. The payload directly follows the Entry data. Each object in this list can only be a few types of things
+
+* NodeBase derived type
+* RefObject derived type
+* String
+* Array
+
+The actual Entry followed by the payloads are allocated and stored when writing in a MemoryArena. When we want to write into a stream, we can just iterate over each entry in order and write it out.
+
+You may have spotted a problem here - that some Entry types can be stored without alignment (for example a string - which stores the length VarInt encoded followed by the characters). Others require an alignment - for example an NodeBase derived type that contains a int64_t will *require* 8 byte alignment. That as a feature of the serialization format we want to be able to just map the data into memory, and be able to access all the SerialType as is on the CPU. For that to work we *require* that the payload for each entry has the right alignment for the associated SerialType.
+
+To achieve this we store in the Entry it's alignment requirement *AND* the next entries alignment. With this when we read, as we as stepping through the entries we can find where the next Entry starts. Because the payload comes directly after the Entry - the Entrys size must be a modulo of the largest alignment the payload can have.
+
+For the code that does the conversion between native and serial types it uses either the SerialWriter or SerialReader. This provides the mechanism to turn a pointer into a serializable `SerialIndex` and vice versa. There are some special functions for turning string like types to and forth.
+
+The final mechanism is that of 'Arrays'. An array allows reading or writing a chunk of data associated with a `SerialIndex`. The chunk of data *must* hold data that is serializable. If the array holds pointers - then the serialized array must hold an array of `SerialIndex` values that represent those pointers. When reading back in `SerialIndex` is converted back to a pointer.
+
+Arrays are the escape hatch that allows for more complex types to serialize. Dictionaries for example are saved as a serial type that is two SerialIndices one to a keys array and one to a values array.
+
+Note that writing has two phases, serializing out into an SerialWriter, and then secondly writing out to a stream. 
+
+## Object/Reference Types
+
+When talking about Object/Reference types this means types that can be referenced natively as pointers. Currently that means `NodeBase` and `SerialRefObject` derived types. 
+
+The SerialTypeInfo mechanism is generally for *fields* of object types. That for derived types we use the C++ extractors field list to work out the native fields offsets and types. With this we can then calculate the layout for NodeBase types such that they follow the requirements for serialization - such as alignment and so forth.
+
+This information is held in the SerialClasses, which for a given TypeKind/SubType gives a SerialClassInfo, that specifies fields for just that type. 
+
+It is trivial to work out the SubType for a NodeBase derived class - its just the astTypeNode member in the `NodeBase` type. For a SerialRefObject it is determined by first calling 
+
+```
+const ReflectClassInfo* getClassInfo() const;
+```
+
+Then the m_classID in the `ReflectClassInfo` is the subtype.
+
+## Reading
+
+Due to the care in writing reading is relatively simple. We can just take the contents of the file and put in memory, as long as in memory it has an alignment of at least MAX_ALIGNMENT. Then we can build up an entries table by stepping through the data and writing the pointer.
+
+The toNative functions take an SerialReader - this allows the implementation to ask for pointers and arrays from other parts of the serialized data. It also allows for types to be lazily reconstructed if necessary.
+
+Lazy reconstruction may be useful in the future to partially reconstruct a sub part of the serialized data. In the current implementation, lazy evaluation is used on Strings. The m_objects array holds all of the recreated native 'objects'. Since the objects can be derived from different base classes the associated Entry will describe what it really is.
+
+For the String type, we initially store the object pointer as null. If a string is requested from that index, we see if the object pointer is null, if it is we have to construct the StringRepresentation that will be used. An extra wrinkle is that we allow accessing of a serialized String as a Name or a string or a UnownedSubString. Fortunately a Name just holds a string, and a Name remains in scope as long as it's NamePool does which is passed in.
+
+### Serial type replacement
+
+In generalized serialization systems such as with Java there is a mechanism for reference types to replace their representation on writing, and then on reading replace the read type with the actual type. Write replacement is already used when serializing out modules via the `SerialFilter` mechanism. The actual implementation is `ModuleSerialFilter`, if an object is referenced in a different module that is explicitly specified, it is replaced with `ImportExternalDecl`, that names the actual definition to use. 
+
+Currently when deserializing, the `ImportExternalDecl` is *not* turned back into the item it references. This means there are likely pointers which point to invalid objects. 
+
+If we wanted to do a replacement on reconstruction we could
+
+We could modify reading as follows.
+
+1) Don't construct anything at the start
+2) Find 'root's they must be created and deserialized first
+  . Any read/writeReplace is a root
+  . Any marked (like SourceLocData) is a root. (When deconstructed it also needs to add information to the Reader)
+  . The root of the objects (note we could just deserialize first to last if not already constructed)
+3) During deserialization pointer references and constructed on demand
+4) Extra code is needed to make sure there aren't cycles. Any object is either Pre/Created/Deserialized.
+
+### Other reading issues
+
+As touched on elsewhere SourceLoc information has to be carefully handled. Within the generalized serialization we have the additional problem that we probably don't want to attach SourceLoc or other types explicitly to the SerialReader/SerialWriter. The mechanism to work around this is via the `SerialExtraObjects` structure. This allows types to optionally be available to the Reader/Writer without it having to explicitly know anything about the type.
+
+For all types supporting this mechanism they *require* that they are added to the `SerialExtraType` enum, and that they embed a static kExtraType field in the type. This solution is not as flexible as perhaps using a string map or something of that sort, but it does make lookup very fast and simple which is likely significant as many types contain the SourceLoc type for example.
+
+## Identifying Types
+
+How a NodeBase derived type identifies itself is not directly compatible with how a SerialRefObject represents itself. The NodeBase derived type uses `ASTNodeType` enum. The SerialRefObject uses a `RefObjectType` enum. Thus to uniquely identify a type we typically actually need two bits of information the `SerialTypeKind` as well as the `SerialSubType`. 
+
+```
+enum class SerialTypeKind : uint8_t
+{
+    Unknown,
+
+    String,             ///< String                         
+    Array,              ///< Array
+
+    NodeBase,           ///< NodeBase derived
+    RefObject,          ///< RefObject derived types
+
+    CountOf,
+};
+```
+
+String and Array are special cases described elsewhere. 
+
+If the `SerialTypeKind` is `NodeBase`, then the `SerialSubType` *is* the ASTNodeType. If the `SerialTypeKind` is `RefObject` then the `SerialSubType` *is* RefObjectType. 
+
+`SerialClasses` holds the information on how to serialize non-field Serial types. For each `SerialTypeKind`/`SerialSubType` it holds a `SerialClass`. The SerialClass holds the size of the type, the amount of fields, and the field information. The fields themselves contain a `SerialFieldType` - this holds the pointers to the functions to convert to and from `native` to `serial` types. 
+
+In order to set up all types in a SerialClass without tying SerialClasses to an implementation the class `SerialClassesUtil` is used to set up Slang serialized types in a `SerialClasses` instance. 
+
+IR Serialization
+================
+
+Currently IR serialization is handled via a separate mechanism to 'generalized' serialization.
+
+This mechanism is *much* simpler than generalized serialization, because by design the IR types are very homogeneous in style. There are a few special cases, but in general an instruction consists of
+
+* It's type
+* A SourceLoc
+* 0 or more operands.
+* 0 or more children. 
+
+Within the IR instructions are pointers to IRInst derived types. As previously discussed serializing pointers directly is generally not a good idea. To work around this the pointers are turned into 32 bit indices. Additionally we know that an instruction can belong to at most one other instruction. 
+
+When serializing out special handling is made for child instructions - their indices are made to be a contiguous range of indices for all instructions that belong to each parent. The indices are ordered into the same order as the children are held in the parent. By using this mechanism it is not necessary to directly save off the indices that belong to a parent, only the range of indices. 
+
+The actual serialization mechanism is similar to the generalized mechanism - referenced objects are saved off in order of their indices. What is different is that the encoding fixes the size of the Inst to `IRSerialData`. That this can hold up to two operands, if the instruction has more than two operands then one of the UInt32 is the operand count and the other is an offset to a list of operands. It probably makes sense to alter this in the future to stream the instructions payload directly. 
+
+IR serialization allows a simple compression mechanism, that works because much of the IR serialized data is UInt32 data, that can use a variable byte encoding.
+
+AST Serialization
+=================
+
+AST serialization uses the generalized serialization mechanism. 
+
+When serializing out an AST module it is typical to want to just serialize out the definitions within that module. Without this, the generalized serializer will crawl over the whole of the AST structure serializing every thing that can be reached - including the whole of the core module.
+
+The filter `ModuleSerialFilter` can be used when writing the AST module, it will replace any references to elements outside of the current module with a `ImportExternalDecl`. This contains a mangled name to the item being referenced in another module. 
+
+When serializing back in, it may be possible to turn these references into the actual element, if the module containing the definition has been loaded. This probably can't work in general though, as if we have two modules that reference items in the other, then it isn't possible to fix up on load. 
+
+A way around this would be to not replace on reading (or only replace items that can be found). Then go through the `ImportExternalDecl` elements doing the lookup, and potentially loading other modules. There are several issues here though 
+
+* On first loading pointers that have been replaced will claim to be a type they are typically *NOT*
+* Once we have determined what `ImportExternalDecl` should replaced with, how do we replace it?
+
+On the first point, this is perhaps undesirable (on a variety of levels - such as debugging), but isn't as terrible as it could be, as the actual type identification is managed by Slang via the `astTypeNode`. So there is a simple way of identifying what the type actually is.
+
+On the second point - this isn't so simple. If we had an indirection, we could do the replacement quickly and trivially, without having to to fix up all the pointers. We probably don't want to add such an indirection into the pointer based system so choices are
+
+* Store where all the pointers are, and fix them up
+* Traverse the hierarchy replacing pointers
+
+Within the current mechanism storing where all the pointers are is not so simple - it would require the setting of any pointer to record where that pointer is stored, and for that to remain the location. Doing so would require setting all pointers to go through some recording mechanism. Pointers held in containers - like the Dictionary may not be directly available. Moreover even if they *were* doing such a behavior may break the containers invariants - for example replacing a keys pointer, may change it's hash.
+
+Traversing the hierarchy would be something akin to the serialization process. It would require specially handling for field types to do the replacement. There would need to be special handling for struct value types. 
+
+SourceLoc Serialization
+=======================
+
+SourceLoc serialization presents several problems. Firstly we have two distinct serialization mechanisms that need to use it - IR serialization and generalized serialization. That being the case it cannot be saved directly in either, even though it may be referenced by either. 
+
+To keep things simple for now we build up SourceLoc information for both IR and general serialization via their writers adding their information into a SerialSourceLocWriter. Then we can save this information into a RIFF section, that can be loaded before either general or IR deserialization is used.  
+
+When reading the SourceLoc information has to be located and deserialized before any AST or IR deserialization. The SourceLoc data can then be turned into a SerialSourceLocReader, which is then either set on the `SerialReaders` `SerialExtraObjects`. Or passed to the `IRSerialReader`.
+
+Riff Container
+==============
+
+[Riff](https://en.wikipedia.org/wiki/Resource_Interchange_File_Format) is used as a mechanism to store binary sections. The format allows for a hierarchy of `chunks` that hold binary data. How the data is interpreted depends on the [FOURCC](https://en.wikipedia.org/wiki/FourCC) associated with each chunk. 
+
+As previously touched on there are multiple different mechanisms used for serialization. IR serialization, generalized serialization, SourceLoc serialization - there are also other uses, such as serializing of entry point information. Riff is used to combine all of these incompatible binary parts together such that they can be stored together.
+
+The handling of these riff containers is held within the `SerialContainerUtil` class. 
+
+C++ Extractor
+=============
+
+The C++ Extractor is the tool `slang-cpp-extractor` that can be used to example C++ files to extract class definitions and associated fields. These files contain in the form of macros information about each class as well as reflected fields. These generated files can then be used to implement serialization without having to explicitly specify fields in C++ source code.
+
+Issues
+======
+
+* No support for forward/backward compatibility. 
+** Adding fields/classes will typically break compatibility
+* Binary files do not contain data to describe themselves
+** It is *not* possible to write a stand alone tool that can dump any serialized file - it's iterpretation depends on the version of Slang it was written from
+* The Riff mechanism use for container usage is somewhat ad-hoc
+* Re-referencing AST nodes from other modules does not happen automatically on deserialization
+* There are several mechanisms used for serialization that are not directly compatible
+
+## C++ extractor issues
+
+* All types (and typedefs) that are serialized must be defined in the same scope - child types don't work correctly 
+* When using value serialization serialization all the members that are serializable must be public
+* The types output in slang fields do not correctly take into account scope (this is a similar issue to the issue above)
diff --git a/external/slang/share/doc/slang/design/stdlib-intrinsics.md b/external/slang/share/doc/slang/design/stdlib-intrinsics.md
new file mode 100644
index 00000000..2ea50cd5
--- /dev/null
+++ b/external/slang/share/doc/slang/design/stdlib-intrinsics.md
@@ -0,0 +1,254 @@
+Core Module Intrinsics
+======================
+
+The following document aims to cover a variety of systems used to add target specific features. They are most extensively used in the slang core module.
+
+**NOTE!** These features should *not* be considered stable! They can be used in regular slang code to add features, but they risk breaking with any Slang version change. Additionally the features implementation can be very particular to what is required for a specific feature set, so might not work as expected in all scenarios.
+
+As these features are in flux, it is quite possible this document is behind the current features available within the Slang code base.
+
+If you want to add support for a feature for a target to Slang, implementing it as a part of the Slang standard modules is typically a good way to progress. Depending on the extension/feature it may not be possible to add support exclusively via changes to the standard module alone. That said most support for target specific extensions and features involve at least some changes to the slang standard modules including the core module, and typically using the mechanisms described here.
+
+## Core Module
+
+The main place these features are used are within the slang core module. This is implemented with a set of slang files within the slang project
+
+* core.meta.slang 
+* hlsl.meta.slang
+* diff.meta.slang
+
+Looking at these files will demonstrate the features in use. 
+
+Most of the intrinsics and attributes have names that indicate that they are not for normal use. This is typically via a `__` prefix.
+
+The `.meta.slang` files look largely like Slang source files, but their contents can also be generated programmatically with C++ code. A section of code can drop into `C++` code if it is proceeded by `${{{{`. The C++ section is closed with a closing `}}}}`. This mechanism is typically used to generate different versions of a similar code sequence. Values from the C++ code can be accessed via the `$()`, where the contents of the brackets specifies something that can be calculated from within the C++ code.
+
+As an example, to produce an an array with values 0 to 9 we could write...
+
+```slang
+
+// Slang code
+${{{{
+// C++ code, calling out to a C++ function getTime, the result is held in variable time
+int cppTime = getTime();
+}}}}
+
+// Back to Slang code, can access the C++ variable previously defined as cppTime. Due to $().
+// The code inside the $() is executed on the C++ side, so can do calculations. In practice it would be easier
+// to just use call $(getTime() + 1), but this demonstrates variables are accessible.
+int slangTime = $(cppTime + 1);
+```
+
+# Attributes
+
+## [__readNone]
+
+A `[__readNone]` indicates a function that computes its results strictly based on argument values, without reading or writing through any pointer arguments, or any other state that could be observed by a caller.
+
+## [__NoSideEffect]
+
+Specifies a function declaration has no observable side effects. 
+
+## [__unsafeForceInlineEarly]
+
+Inlines the contained code, but does so very early stage. Being earlier allows allows some kinds of inlining transformations to work, that wouldn't work with regular inlining. It also means it must be used with *care*, because it may produce unexpected results for more complex scenarios.  
+
+## [__NonCopyableType]
+
+Marks a type to be non-copyable, causing SSA pass to skip turning variables of the the type into SSA values.
+
+## [__AlwaysFoldIntoUseSiteAttribute]
+
+A call to the decorated function should always be folded into its use site.
+
+## [KnownBuiltin("name")]
+
+A `[KnownBuiltin("name")]` attribute allows the compiler to identify this declaration during compilation, despite obfuscation or linkage removing optimizations
+
+# Intrinsics
+
+<a id="target-intrinsic"></a>
+## __target_intrinsic(target, expansion)
+
+This is a widely used and somewhat complicated intrinsic. Placed on a declaration it describes how the declaration should be emitted for a target. The complexity is that `expansion` is applied via a variety of rules. `target` is a "target capability", commonly it's just the emit target for the intrinsic, so one of...
+
+* hlsl
+* glsl
+* cuda - CUDA
+* cpp - C++ output (used for exe, shared-library or host-callable)
+
+* spirv - Used for slangs SPIR-V direct mechanism
+
+A function definition can have a `target_intrinsic` *and* a body. In that case, the body will be used for targets where the `target_intrinsic` isn't defined. 
+
+If the intrinsic can be emitted as is, the expansion need not be specified. If only the *name* needs to changed (params can be passed as is), only the name to be expanded to needs to be specified *without* `()`. In this scenario it is not necessary to specify as a string in quotes, and just the identifier name can be used.
+
+Currently `HLSL` has a special handling in that it is *assumed* if a declaration exists that it can be emitted verbatim to HLSL.  
+
+The target can also be a capability atom. The atoms are listed in "slang-capability-defs.h".
+
+What is perhaps of importance here is that for some features for a specific target can have multiple ways of achieving the same effect - for example "GL_NV_ray_tracing" and "GL_EXT_ray_tracing" are two different ray tracing extensions available for Vulkan through GLSL. The `-profile` option can disambiguate which extension is actually desired, and the capability with that name on the `target_intrinsic` specifies how to implement that feature for that specific extension.
+
+The expansion mechanism is implemented in "slang-intrinsic-expand.cpp" which will be most up to date.
+
+The `expansion` value can be a string or an identifier. If it is an identifier, it will just be emitted as is replacing the name of the declaration the intrinsics is associated with.
+
+Sections of the `expansion` string that are to be replaced are prefixed by the `$` sigil.
+
+* $0-9 - Indicates the parameter at that index. For a method call $0 is `this`.
+* $T0-9 - The type for the param at the index. If the type is a texture resource derived type, returns the *element* type.
+* $TR - The return type
+* $G0-9 - Replaced by the type/value at that index of specialization
+* $S0-9 - The scalar type of the generic at the index.
+* $p - Used on texturing operations. Produces the combined texture sampler arguments as needed for GLSL.
+* $C - The $C intrinsic is a mechanism to change the name of an invocation depending on if there is a format conversion required between the type associated by the resource and the backing ImageFormat. Currently this is only implemented on CUDA, where there are specialized versions of the RWTexture writes that will do a format conversion.
+* $E - Sometimes accesses need to be scaled. For example in CUDA the x coordinate for surface access is byte addressed. $E will return the byte size of the *backing element*.
+* $c - When doing texture access in GLSL the result may need to be cast. In particular if the underlying texture is 'half' based, GLSL only accesses (read/write) as float. So we need to cast to a half type on output. When storing into a texture it is still the case the value written must be half - but we don't need to do any casting there as half is coerced to float without a problem.
+* $z - If we are calling a D3D texturing operation in the form t.Foo(s, ...), where `t` is a Texture&lt;T&gt;, then this is the step where we try to properly swizzle the output of the equivalent GLSL call into the right shape.
+* $N0-9 - Extract the element count from a vector argument so that we can use it in the constructed expression.
+* $V0-9 - Take an argument of some scalar/vector type and pad it out to a 4-vector with the same element type (this is the inverse of `$z`).
+* $a - We have an operation that needs to lower to either `atomic*` or `imageAtomic*` for GLSL, depending on whether its first operand is a subscript into an array. This `$a` is the first `a` in `atomic`, so we will replace it accordingly.
+* $A - We have an operand that represents the destination of an atomic operation in GLSL, and it should be lowered based on whether it is an ordinary l-value, or an image subscript. In the image subscript case this operand will turn into multiple arguments to the `imageAtomic*` function.
+* $XP - Ray tracing ray payload
+* $XC - Ray tracing callable payload
+* $XH - Ray tracing hit object attribute
+* $P - Type-based prefix as used for CUDA and C++ targets (I8 for int8_t, F32 - float etc)
+
+## __attributeTarget(astClassName)
+
+For an attribute, specifies the AST class (and derived class) the attribute can be applied to.
+
+## __builtin
+
+Identifies the declaration is being "builtin".
+
+## __builtin_requirement(requirementKind)
+
+A modifier that indicates a built-in associated type requirement (e.g., `Differential`). The requirement is one of `BuiltinRequirementKind`.
+
+The requirement value can just be specified via the `$()` mechanism. 
+
+## __builtin_type(tag)
+
+Specifies a builtin type - the integer value of one of the enumeration BaseType.
+
+## __magic_type(clsName, tag)
+
+Used before a type declaration. The clsName is the name of the class that is used to represent the type in the AST in Slang *C++* code. The tag is an optional integer value that is in addition and meaningful in the context of the class type.
+
+##__intrinsic_type(op)
+
+Used to specify the IR opcode associated with a type. The IR opcode is listed as something like `$(kIROp_HLSLByteAddressBufferType)`, which will expand to the integer value of the opcode (because the opcode value is an enum value that is visible from C++). It is possible to just write the opcode number, but that is generally inadvisable as the ids for ops are not stable. If a code change in Slang C++ adds or removes an opcode the number is likely to be incorrect.
+
+As an example from the core module
+
+```slang
+__magic_type(HLSLByteAddressBufferType)
+__intrinsic_type($(kIROp_HLSLByteAddressBufferType))
+struct ByteAddressBuffer
+{
+    // ...
+};
+```
+
+# General
+
+## __generic<>
+
+Is an alternate syntax for specifying a declaration that is generic. The more commonly used form is to list the generic parameters in `<>` after the name of the declaration.
+
+## attribute_syntax
+
+Attribute syntax provides a mechanism to introduce an attribute type in Slang.
+
+Right now the basic form is:
+
+```
+attribute_syntax [name(parmName: paramType, ...)] : syntaxClass;
+```
+
+There can be 0 or more params associated with the attribute, and if so the () are not needed.
+
+* `name` gives the name of the attribute to define.
+* `paramName` is the name of param that are specified with attribute use
+* `paramType` is the type of the value associated with the param 
+* `syntaxClass` is the name of an AST node class that we expect this attribute to create when checked.
+
+For example 
+
+```
+__attributeTarget(FuncDecl)
+attribute_syntax [CudaDeviceExport] : CudaDeviceExportAttribute;
+```
+
+Defines an attribute `CudaDeviceExport` which can only be applied to FuncDecl or derived AST types. Once semantically checked will be turned into a `CudaDeviceExportAttribute` attribute in the AST.
+
+With a parameter
+
+```
+__attributeTarget(InterfaceDecl)
+attribute_syntax [anyValueSize(size:int)] : AnyValueSizeAttribute;
+```
+
+Defines an attribute `anyValueSize` that can be applied to `InterfaceDecl` and derived types. It takes a single parameter called `anyValueSize` of `int` type.
+
+## Ref<T>
+
+Allows returning or passing a value "by reference".
+
+# GLSL/Vulkan specific
+
+## __glsl_version(version)
+
+Used to specify the GLSL version number that is required for the subsequent declaration. When Slang emits GLSL source, the version at the start of the file, will be the largest version seen that emitted code uses.
+
+For example
+
+```slang
+__glsl_version(430)
+```
+
+## __glsl_extension
+
+Specifies the GLSL extension that is required for the declaration to work. A declaration that has the intrinsic, when output to GLSL will additionally add `#extension` to the the GLSL or SPIR-V output.  
+
+Multiple extensions can be applied to a decoration if that is applicable, if there are multiple ways of implementing that can be emitted in the same manner (see the section around [target](#target-intrinsic)) for more details.
+
+## __spirv_version
+
+When declaration is used for SPIR-V target will take the highest value seen to be the SPIR-V version required. For compilation through GLSLANG, the value is passed down to to GLSLANG specifying this SPIR-V is being targeted. 
+
+Example 
+
+```
+__spirv_version(1.3)
+```
+
+## vk::spirv_instruction
+
+Provides a way to use a limited amount of `GL_EXT_spirv_intrinsics` the extension.  
+
+```
+vk::spirv_instruction(op, set)
+```
+
+Op is the integer *value* for the op. The `set` is optional string which specifies the instruction set the op is associated with. 
+For example
+
+```
+__specialized_for_target(glsl)
+[[vk::spirv_instruction(1, "NonSemantic.DebugBreak")]]
+void debugBreak();
+``` 
+
+# CUDA specific 
+
+## __cuda_sm_version
+
+When declaration is used with this intrinsic for a CUDA target, the highest shader model seen will be passed down to the downstream CUDA compile (NVRTC).
+
+# NVAPI 
+
+## [__requiresNVAPI]
+
+If declaration is reached during a compilation for an applicable target (D3D11/12), will indicate that [NVAPI support](../nvapi-support.md) is required for declaration to work. 
diff --git a/external/slang/docs/doc-system.md b/external/slang/share/doc/slang/doc-system.md
similarity index 96%
rename from external/slang/docs/doc-system.md
rename to external/slang/share/doc/slang/doc-system.md
index 56635e4e..d552bf1e 100644
--- a/external/slang/docs/doc-system.md
+++ b/external/slang/share/doc/slang/doc-system.md
@@ -85,10 +85,10 @@ Currently the Slang doc system does not support any of the 'advanced' doxygen do
 
 Currently the only documentation style supported is a single file 'markdown' output. Future versions will support splitting into multiple files and linking between them. Also future versions may also support other documentation formats/standards.
 
-It is possible to generate documentation for Slangs internal `stdlib`. This can be achieved with `slangc` via
+It is possible to generate documentation for the slang core module. This can be achieved with `slangc` via
 
 ```
-slangc -doc -compile-stdlib
+slangc -doc -compile-core-module
 ```
 
 The documentation will be written to a file `stdlib-doc.md`.
diff --git a/external/slang/docs/faq.md b/external/slang/share/doc/slang/faq.md
similarity index 97%
rename from external/slang/docs/faq.md
rename to external/slang/share/doc/slang/faq.md
index 5f021d7d..824d9967 100644
--- a/external/slang/docs/faq.md
+++ b/external/slang/share/doc/slang/faq.md
@@ -34,7 +34,7 @@ The implementation of Slang has so far focused heavily on the needs of Falcor.
 
 ### Won't we all just be using C/C++ for shaders soon?
 
-The great thing about both Vulkan and D3D12 moving to publicly-documented binary intermediate langugaes (SPIR-V and DXIL, respectively) is that there is plenty of room for language innovation on top of these interfaces.
+The great thing about both Vulkan and D3D12 moving to publicly-documented binary intermediate languages (SPIR-V and DXIL, respectively) is that there is plenty of room for language innovation on top of these interfaces.
 
 Having support for writing GPU shaders in a reasonably-complete C/C++ language would be great.
 We are supportive of efforts in the "C++ for shaders" direction.
diff --git a/external/slang/share/doc/slang/gfx-user-guide/01-getting-started.md b/external/slang/share/doc/slang/gfx-user-guide/01-getting-started.md
new file mode 100644
index 00000000..ae270450
--- /dev/null
+++ b/external/slang/share/doc/slang/gfx-user-guide/01-getting-started.md
@@ -0,0 +1,264 @@
+---
+layout: user-guide
+---
+
+Getting Started with Slang Graphics Layer
+============================================
+
+[//]: # (ShortTitle: Getting Started)
+
+In this article, we provide instructions on installing the graphics layer into your application, and demonstrate the basic use of the graphics layer via a simple compute shader example. We will use the same [hello-world.slang](https://github.com/shader-slang/slang/blob/master/examples/hello-world/hello-world.slang) shader from the `hello-world` example in the [Slang getting started tutorial](../user-guide/01-get-started.html).
+
+Installation
+------------------
+
+### Obtain Release Package
+
+The Slang graphics library is implemented in `gfx.dll` (`libgfx.so` in unix systems). Since Slang is tightly integrated into the graphics layer, you need to include both `slang.dll` and `gfx.dll` in your application. Official Slang releases provide prebuilt binaries for both libraries as well as the header files to use them. If you prefer to build the libraries yourself, please follow [build instructions](../building).
+
+### Install Header Files
+Once you have built or obtained a Slang release, make the following header files from the release package accessible to your application:
+- `slang-gfx.h`
+- `slang.h`
+- `slang-com-ptr.h`
+- `slang-com-helper.h`
+
+### Linking the Library
+On Windows (with `msvc`), make sure that `gfx.lib` is provided as linker input via the `Linker->Input->Additional Dependencies` project configuration. On Unix systems, make sure to pass `-lgfx` when compiling your application.
+
+Creating a GPU Device
+---------------------------
+
+To start using the graphics layer, create an `IDevice` object by calling `gfxCreateDevice`. The `IDevice` interface is the main entry-point to interact with the graphics layer. It represent GPU device context where all interactions with the GPU take place.
+
+```cpp
+#include "slang-gfx.h"
+
+using namespace gfx;
+
+IDevice* gDevice = nullptr;
+
+void initGfx()
+{
+    IDevice::Desc deviceDesc = {};
+    gfxCreateDevice(deviceDesc, &gDevice);
+}
+```
+
+The `IDevice::Desc` struct passed to `gfxCreateDevice` defines many configurations on how a device shall be created. Most notably, the `deviceType` field specifies what underlying graphics API to use. By default, `gfxCreateDevice` will attempt to use the best API available on current platform. On Windows, the layer will prefer to use `D3D12` but will also try to use `Vulkan`, `D3D11`, `OpenGL` in order, in case the former API isn't available. On Unix systems, it will always default to `Vulkan` since this is the only API that supports full Graphics capabilities. A user can always specify the `deviceType` field to force the layer to use a specific API. If the device creation succeeds, `gfxCreateDevice` will return `SLANG_OK(0)`.
+
+Similar to the Slang API, objects created by the graphics layer also conforms to the COM standard. The user to responsible for calling `release` method on every object returned to the user by the layer to prevent memory leaks.
+
+Enabling the Debug Layer
+--------------------------
+
+The Slang Graphics Layer provides a debug layer that can be enabled to perform additional validations to ensure correctness. To enable the debug layer, simply call `gfxEnableDebugLayer` before calling `gfxCreateDevice`.
+
+To receive diagnostic messages, you need to create a class that implements the `IDebugCallback` interface, and call `gfxSetDebugCallback` to provide the callback instance to the graphics layer. For example:
+
+```cpp
+struct MyDebugCallback : public IDebugCallback
+{
+    virtual SLANG_NO_THROW void SLANG_MCALL handleMessage(
+        DebugMessageType type,
+        DebugMessageSource source,
+        const char* message) override
+    {
+        printf("%s\n", message);
+    }
+};
+
+MyDebugCallback gCallback;
+void initGfx()
+{
+    gfxEnableDebugLayer();
+    gfxSetDebugCallback(&gCallback);
+
+    IDevice::Desc deviceDesc = {};
+    gfxCreateDevice(&deviceDesc, &gDevice);
+}
+```
+
+
+Creating a Command Queue
+------------------------------
+A command queue is where the GPU device takes commands from the application to execute. To create a command queue, call `IDevice::createCommandQueue`.
+```cpp
+ICommandQueue* gQueue = nullptr;
+
+ICommandQueue::Desc queueDesc = {ICommandQueue::QueueType::Graphics};
+device->createCommandQueue(queueDesc, &gQueue);
+```
+
+Allocating a Command Buffer
+------------------------------
+A command buffer is treated as a _transient_ resource by the graphics layer. A transient resource is required by the GPU during execution of a task, and are no longer needed when the execution has completed. Slang graphics layer provides an `ITransientResourceHeap` object to efficiently manage the life cycle of transient resources. In order to allocate a command buffer, we need to create an `ITransientResourceHeap` object first by calling `IDevice::createTransientResourceHeap`.
+
+```cpp
+ITransientResourceHeap* gTransientHeap;
+
+ITransientResourceHeap::Desc transientHeapDesc = {};
+transientHeapDesc.constantBufferSize = 4096;
+device->createTransientResourceHeap(transientHeapDesc, &gTransientHeap);
+```
+
+With a `TransientResourceHeap`, we can call `createCommandBuffer` method to allocate a command buffer:
+
+```cpp
+ICommandBuffer* commandBuffer;
+gTransientHeap->createCommandBuffer(&commandBuffer);
+```
+
+A user should regularly call `ITransientResourceHeap::synchronizeAndReset` to recycle all previously allocated transient resources. A standard practice is to create two `TransientResourceHeap`s in a double-buffered renderer, and alternate the transient heap on each frame to allocate command buffers and other transient resources. With this setup, the application can call `synchronizeAndReset` at start of each frame on the corresponding transient resource heap to make sure all transient resources are timely recycled.
+
+Creating Buffer Resource
+------------------------------
+We need to create the buffer resources used our `hello-world` shader as input and output. This can be done via `IDevice::createBufferResource` method. When creating a resource, the user must specify a resource state that the resource will be in by default, as well as all allowed resource states the resource can be in. Resource states in the graphics layer follows the same model of resource states in D3D12, and the user can also assume the same automatic resource promotion/demotion behavior in D3D12.
+
+```cpp
+const int numberCount = 4;
+float initialData[] = {0.0f, 1.0f, 2.0f, 3.0f};
+IBufferResource::Desc bufferDesc = {};
+bufferDesc.sizeInBytes = numberCount * sizeof(float);
+bufferDesc.format = Format::Unknown;
+bufferDesc.elementSize = sizeof(float);
+bufferDesc.defaultState = ResourceState::UnorderedAccess;
+bufferDesc.allowedStates = ResourceStateSet(ResourceState::UnorderedAccess,
+                                            ResourceState::ShaderResource);
+IBufferResource* inputBuffer0;
+SLANG_RETURN_ON_FAIL(device->createBufferResource(
+    bufferDesc,
+    (void*)initialData,
+    &inputBuffer0));
+```
+
+
+Creating a Pipeline State
+---------------------------
+
+A pipeline state object encapsulates the shader program to execute on the GPU device, as well as other fix function states for graphics rendering. In this example, we will be compiling and running a simple compute shader written in Slang. To do that we need to create a compute pipeline state from a Slang `IComponentType`. We refer the reader to the (Slang getting started tutorial)[../user-guide/01-getting-started.html] on how to create a Slang `IComponentType` from a shader file. The following source creates a Graphics layer `IPipelineState` object from a shader module represented by a `slang::IComponentType` object:
+
+```cpp
+void createComputePipelineFromShader(
+    IComponentType* slangProgram, 
+    IPipelineState*& outPipelineState)
+{
+    // The `IComponentType` parameter that represents the compute
+    // kernel, we can use it to create a `IShaderProgram` object in the graphics
+    // layer.
+    IShaderProgram* shaderProgram = nullptr;
+    IShaderProgram::Desc programDesc = {};
+    programDesc.pipelineType = PipelineType::Compute;
+    programDesc.slangProgram = slangProgram;
+    gDevice->createShaderProgram(programDesc, &shaderProgram);
+    
+    // Create a compute pipeline state from `shaderProgram`.
+    ComputePipelineStateDesc pipelineDesc = {};
+    pipelineDesc.program = shaderProgram;
+    gDevice->createComputePipelineState(pipelineDesc, &outPipelineState);
+
+    // Since we no longer need to use `shaderProgram` after creating
+    // a pipeline state, we should release it to prevent memory leaks.
+    shaderProgram->release();
+}
+```
+
+Recording Commands to Run a Compute Shader
+------------------------------------
+
+[//]: # (ShortTitle: Recording Commands)
+
+Now that we have created all the resources and allocated a command buffer, we can start recording commands to
+set the compute pipeline state, bind shader parameters, and dispatch a kernel launch.
+
+Since we are only using compute commands, we begin the recording by calling `ICommandBuffer::encodeComputeCommands`. This methods returns a transient `IComputeCommandEncoder` object for accepting actual compute commands.
+
+```cpp
+IComputeCommandEncoder* encoder = commandBuffer->encodeComputeCommands();
+```
+
+The first command is to bind the pipeline state we created earlier:
+
+```cpp
+IShaderObject* rootObject = encoder->bindPipeline(pipelineState);
+```
+
+Binding a pipeline state yields a transient `IShaderObject` object. We can use the `IShaderObject` instance to bind shader parameters. For the `hello-world` shader, we need to bind three parameters: `buffer0`, `buffer1` and `result`.
+
+```cpp
+// Create a resource view for buffer0.
+IBufferView* buffer0View;
+{
+    IResourceView::Desc viewDesc = {};
+    viewDesc.type = IResourceView::Type::ShaderResource;
+    viewDesc.format = Format::Unknown;
+    SLANG_RETURN_ON_FAIL(device->createBufferView(inputBuffer0, viewDesc, &buffer0View));
+}
+// Bind the resource view to shader.
+rootObject->setResource(ShaderOffset{0,0,0}, buffer0View);
+
+// Create a resource view for buffer1.
+IBufferView* buffer1View;
+{
+    IResourceView::Desc viewDesc = {};
+    viewDesc.type = IResourceView::Type::ShaderResource;
+    viewDesc.format = Format::Unknown;
+    SLANG_RETURN_ON_FAIL(device->createBufferView(inputBuffer1, viewDesc, &buffer1View));
+}
+// Bind the resource view to shader.
+rootObject->setResource(ShaderOffset{0,1,0}, buffer1View);
+
+// Create a resource view for resultBuffer.
+IBufferView* resultView;
+{
+    IResourceView::Desc viewDesc = {};
+    viewDesc.type = IResourceView::Type::UnorderedAccess;
+    viewDesc.format = Format::Unknown;
+    SLANG_RETURN_ON_FAIL(device->createBufferView(resultBuffer, viewDesc, &resultView));
+}
+rootObject->setResource(ShaderOffset{0,2,0}, resultView);
+```
+
+> #### Note
+> Since `rootObject` is a transient object returned by the command encoder, it is automatically released
+> with the command encoder. Calling `release` on `rootObject` is OK but not needed.
+
+After binding all shader parameters, we can now dispatch the kernel:
+
+```cpp
+encoder->dispatchCompute(1, 1, 1);
+```
+
+> #### Note
+> Command encoders are transient objects managed by a command buffer, it is automatically released
+> with the command buffer. Calling `release` on `rootObject` is OK but not needed.
+
+When we are done recording commands, we need to close the command encoder and the command buffer.
+
+```cpp
+encoder->endEncoding();
+commandBuffer->close();
+```
+
+Now we are ready to submit the command buffer to the command queue, and wait for the GPU execution to finish.
+```cpp
+gQueue->executeCommandBuffer(commandBuffer);
+gQueue->wait();
+```
+
+Cleaning Up
+----------------
+
+At the end of our example, we need to make sure all created objects are released by calling the `release` method:
+
+```cpp
+commandBuffer->release();
+gQueue->release();
+gTransientResourceHeap->release();
+inputBuffer0->release();
+buffer0View->release();
+...
+gDevice->release();
+```
+
+The order of calls to `release` does not matter, as long as all objects are released from the user.
diff --git a/external/slang/share/doc/slang/gfx-user-guide/index.md b/external/slang/share/doc/slang/gfx-user-guide/index.md
new file mode 100644
index 00000000..4671a1a3
--- /dev/null
+++ b/external/slang/share/doc/slang/gfx-user-guide/index.md
@@ -0,0 +1,25 @@
+---
+layout: user-guide
+---
+
+Slang Graphics Layer
+=============
+
+The Slang Graphics Layer is an abstraction library of graphics APIs to support cross-platform applications that utilize GPU graphics/compute capabilities. The Slang Graphics Layer tightly integrates the Slang shading language to provide the most complete cross-platform GPU application development experience. The Slang language and compilation API is designed to work best when the application assumes several best practices in terms of shader specialization and parameter binding. The Slang Graphics Layer is following exactly the same best practices supported by Slang's compilation model. Outside of shader-related areas, the graphics layer's interface is designed to closely follow the modern graphics API models in Direct3D 12, Vulkan and Metal, such that the layer is only purposed to abstracting the differences between these underlying APIs instead of providing a higher level abstract that simplifies the interface. This design philosophy allows users to benefit from the ideas in the Slang shading language without giving up precise control on other aspects of the graphics API.
+
+The current support status of operating system and graphics APIs is shown in the following matrix.
+
+|               | Windows            | Linux              |
+| :------------ | :----------------: | :----------------: |
+| Direct3D 12   | Yes                | No                 |
+| Direct3D 11   | Yes                | No                 |
+| Vulkan        | Yes                | Yes                |
+| OpenGL        | Yes                | No                 |
+| CPU emulation | Yes (Compute Only) | Yes (Compute Only) |
+| CUDA          | Yes (Compute Only) | Yes (Compute Only) |
+
+
+> #### Note
+> The graphics layer is still under active development and we intend to add more platforms and APIs in the future.
+
+In this documentation, we will walk through various parts of the library and demonstrate how it can be used in your application.
\ No newline at end of file
diff --git a/external/slang/share/doc/slang/gfx-user-guide/nav.html b/external/slang/share/doc/slang/gfx-user-guide/nav.html
new file mode 100644
index 00000000..97757326
--- /dev/null
+++ b/external/slang/share/doc/slang/gfx-user-guide/nav.html
@@ -0,0 +1,5 @@
+<nav>
+    <li><a href="../../">Docs</a></li>
+    <li><a href="index.html">Slang Graphics Layer</a></li>
+
+</nav>
\ No newline at end of file
diff --git a/external/slang/share/doc/slang/gfx-user-guide/toc.html b/external/slang/share/doc/slang/gfx-user-guide/toc.html
new file mode 100644
index 00000000..be64cca1
--- /dev/null
+++ b/external/slang/share/doc/slang/gfx-user-guide/toc.html
@@ -0,0 +1,18 @@
+<ul class="toc_root_list"><li data-link="index"><span>Slang Graphics Layer</span>
+<ul class="toc_list">
+<li data-link="01-getting-started"><span>Getting Started</span>
+<ul class="toc_list">
+<li data-link="01-getting-started#installation"><span>Installation</span></li>
+<li data-link="01-getting-started#creating-a-gpu-device"><span>Creating a GPU Device</span></li>
+<li data-link="01-getting-started#enabling-the-debug-layer"><span>Enabling the Debug Layer</span></li>
+<li data-link="01-getting-started#creating-a-command-queue"><span>Creating a Command Queue</span></li>
+<li data-link="01-getting-started#allocating-a-command-buffer"><span>Allocating a Command Buffer</span></li>
+<li data-link="01-getting-started#creating-buffer-resource"><span>Creating Buffer Resource</span></li>
+<li data-link="01-getting-started#creating-a-pipeline-state"><span>Creating a Pipeline State</span></li>
+<li data-link="01-getting-started#recording-commands-to-run-a-compute-shader"><span>Recording Commands</span></li>
+<li data-link="01-getting-started#cleaning-up"><span>Cleaning Up</span></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
\ No newline at end of file
diff --git a/external/slang/share/doc/slang/gfx-user-guide/unsupported-formats.md b/external/slang/share/doc/slang/gfx-user-guide/unsupported-formats.md
new file mode 100644
index 00000000..f93567a9
--- /dev/null
+++ b/external/slang/share/doc/slang/gfx-user-guide/unsupported-formats.md
@@ -0,0 +1,266 @@
+Unsupported Formats
+======================
+
+GFX currently does not support the following listed D3D and Vulkan formats.
+With the exception of `D24_UNORM_S8_UINT`, these formats have been omitted as
+their counterpart API does not have a corresponding format. `D24_UNORM_S8_UINT`
+has been omitted as it is only supported by Nvidia.
+
+- `DXGI_FORMAT_R32G8X24_TYPELESS`
+- `DXGI_FORMAT_D32_FLOAT_S8X24_UINT`
+- `DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS`
+- `DXGI_FORMAT_X32_TYPELESS_G8X24_UINT`
+- `DXGI_FORMAT_R24G8_TYPELESS`
+- `DXGI_FORMAT_D24_UNORM_S8_UINT`
+- `DXGI_FORMAT_R24_UNORM_X8_TYPELESS`
+- `DXGI_FORMAT_X24_TYPELESS_G8_UINT`
+- `DXGI_FORMAT_A8_UNORM`
+- `DXGI_FORMAT_R1_UNORM`
+- `DXGI_FORMAT_R8G8_B8G8_UNORM`
+- `DXGI_FORMAT_G8R8_G8B8_UNORM`
+- `DXGI_FORMAT_BC1_TYPELESS`
+- `DXGI_FORMAT_BC2_TYPELESS`
+- `DXGI_FORMAT_BC3_TYPELESS`
+- `DXGI_FORMAT_BC4_TYPELESS`
+- `DXGI_FORMAT_BC5_TYPELESS`
+- `DXGI_FORMAT_B8G8R8X8_UNORM`
+- `DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM`
+- `DXGI_FORMAT_B8G8R8X8_TYPELESS`
+- `DXGI_FORMAT_B8G8R8X8_UNORM_SRGB`
+- `DXGI_FORMAT_BC6H_TYPELESS`
+- `DXGI_FORMAT_BC7_TYPELESS`
+- `DXGI_FORMAT_AYUV`
+- `DXGI_FORMAT_Y410`
+- `DXGI_FORMAT_Y416`
+- `DXGI_FORMAT_NV12`
+- `DXGI_FORMAT_P010`
+- `DXGI_FORMAT_P016`
+- `DXGI_FORMAT_420_OPAQUE`
+- `DXGI_FORMAT_YUY2`
+- `DXGI_FORMAT_Y210`
+- `DXGI_FORMAT_Y216`
+- `DXGI_FORMAT_NV11`
+- `DXGI_FORMAT_AI44`
+- `DXGI_FORMAT_IA44`
+- `DXGI_FORMAT_P8`
+- `DXGI_FORMAT_A8P8`
+- `DXGI_FORMAT_P208`
+- `DXGI_FORMAT_V208`
+- `DXGI_FORMAT_V408`
+- `DXGI_FORMAT_SAMPLER_FEEDBACK_MIN_MIP_OPAQUE`
+- `DXGI_FORMAT_SAMPLER_FEEDBACK_MIP_REGION_USED_OPAQUE`
+- `VK_FORMAT_R4G4_UNORM_PACK8`
+- `VK_FORMAT_R4G4B4A4_UNORM_PACK16`
+- `VK_FORMAT_B4G4R4A4_UNORM_PACK16`
+- `VK_FORMAT_B5G6R5_UNORM_PACK16`
+- `VK_FORMAT_R5G5B5A1_UNORM_PACK16`
+- `VK_FORMAT_B5G5R5A1_UNORM_PACK16`
+- `VK_FORMAT_R8_USCALED`
+- `VK_FORMAT_R8_SSCALED`
+- `VK_FORMAT_R8_SRGB`
+- `VK_FORMAT_R8G8_USCALED`
+- `VK_FORMAT_R8G8_SSCALED`
+- `VK_FORMAT_R8G8_SRGB`
+- `VK_FORMAT_R8G8B8_UNORM`
+- `VK_FORMAT_R8G8B8_SNORM`
+- `VK_FORMAT_R8G8B8_USCALED`
+- `VK_FORMAT_R8G8B8_SSCALED`
+- `VK_FORMAT_R8G8B8_UINT`
+- `VK_FORMAT_R8G8B8_SINT`
+- `VK_FORMAT_R8G8B8_SRGB`
+- `VK_FORMAT_B8G8R8_UNORM`
+- `VK_FORMAT_B8G8R8_SNORM`
+- `VK_FORMAT_B8G8R8_USCALED`
+- `VK_FORMAT_B8G8R8_SSCALED`
+- `VK_FORMAT_B8G8R8_UINT`
+- `VK_FORMAT_B8G8R8_SINT`
+- `VK_FORMAT_B8G8R8_SRGB`
+- `VK_FORMAT_R8G8B8A8_USCALED`
+- `VK_FORMAT_R8G8B8A8_SSCALED`
+- `VK_FORMAT_B8G8R8A8_SNORM`
+- `VK_FORMAT_B8G8R8A8_USCALED`
+- `VK_FORMAT_B8G8R8A8_SSCALED`
+- `VK_FORMAT_B8G8R8A8_UINT`
+- `VK_FORMAT_B8G8R8A8_SINT`
+- `VK_FORMAT_A8B8G8R8_UNORM_PACK32`
+- `VK_FORMAT_A8B8G8R8_SNORM_PACK32`
+- `VK_FORMAT_A8B8G8R8_USCALED_PACK32`
+- `VK_FORMAT_A8B8G8R8_SSCALED_PACK32`
+- `VK_FORMAT_A8B8G8R8_UINT_PACK32`
+- `VK_FORMAT_A8B8G8R8_SINT_PACK32`
+- `VK_FORMAT_A8B8G8R8_SRGB_PACK32`
+- `VK_FORMAT_A2R10G10B10_UNORM_PACK32`
+- `VK_FORMAT_A2R10G10B10_SNORM_PACK32`
+- `VK_FORMAT_A2R10G10B10_USCALED_PACK32`
+- `VK_FORMAT_A2R10G10B10_SSCALED_PACK32`
+- `VK_FORMAT_A2R10G10B10_UINT_PACK32`
+- `VK_FORMAT_A2R10G10B10_SINT_PACK32`
+- `VK_FORMAT_A2B10G10R10_SNORM_PACK32`
+- `VK_FORMAT_A2B10G10R10_USCALED_PACK32`
+- `VK_FORMAT_A2B10G10R10_SSCALED_PACK32`
+- `VK_FORMAT_A2B10G10R10_SINT_PACK32`
+- `VK_FORMAT_R16_USCALED`
+- `VK_FORMAT_R16_SSCALED`
+- `VK_FORMAT_R16G16_USCALED`
+- `VK_FORMAT_R16G16_SSCALED`
+- `VK_FORMAT_R16G16B16_UNORM`
+- `VK_FORMAT_R16G16B16_SNORM`
+- `VK_FORMAT_R16G16B16_USCALED`
+- `VK_FORMAT_R16G16B16_SSCALED`
+- `VK_FORMAT_R16G16B16_UINT`
+- `VK_FORMAT_R16G16B16_SINT`
+- `VK_FORMAT_R16G16B16_SFLOAT`
+- `VK_FORMAT_R16G16B16A16_USCALED`
+- `VK_FORMAT_R16G16B16A16_SSCALED`
+- `VK_FORMAT_R64_UINT`
+- `VK_FORMAT_R64_SINT`
+- `VK_FORMAT_R64_SFLOAT`
+- `VK_FORMAT_R64G64_UINT`
+- `VK_FORMAT_R64G64_SINT`
+- `VK_FORMAT_R64G64_SFLOAT`
+- `VK_FORMAT_R64G64B64_UINT`
+- `VK_FORMAT_R64G64B64_SINT`
+- `VK_FORMAT_R64G64B64_SFLOAT`
+- `VK_FORMAT_R64G64B64A64_UINT`
+- `VK_FORMAT_R64G64B64A64_SINT`
+- `VK_FORMAT_R64G64B64A64_SFLOAT`
+- `VK_FORMAT_X8_D24_UNORM_PACK32`
+- `VK_FORMAT_S8_UINT`
+- `VK_FORMAT_D16_UNORM_S8_UINT`
+- `VK_FORMAT_D24_UNORM_S8_UINT`
+- `VK_FORMAT_D32_SFLOAT_S8_UINT`
+- `VK_FORMAT_BC1_RGB_UNORM_BLOCK`
+- `VK_FORMAT_BC1_RGB_SRGB_BLOCK`
+- `VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK`
+- `VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK`
+- `VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK`
+- `VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK`
+- `VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK`
+- `VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK`
+- `VK_FORMAT_EAC_R11_UNORM_BLOCK`
+- `VK_FORMAT_EAC_R11_SNORM_BLOCK`
+- `VK_FORMAT_EAC_R11G11_UNORM_BLOCK`
+- `VK_FORMAT_EAC_R11G11_SNORM_BLOCK`
+- `VK_FORMAT_ASTC_4x4_UNORM_BLOCK`
+- `VK_FORMAT_ASTC_4x4_SRGB_BLOCK`
+- `VK_FORMAT_ASTC_5x4_UNORM_BLOCK`
+- `VK_FORMAT_ASTC_5x4_SRGB_BLOCK`
+- `VK_FORMAT_ASTC_5x5_UNORM_BLOCK`
+- `VK_FORMAT_ASTC_5x5_SRGB_BLOCK`
+- `VK_FORMAT_ASTC_6x5_UNORM_BLOCK`
+- `VK_FORMAT_ASTC_6x5_SRGB_BLOCK`
+- `VK_FORMAT_ASTC_6x6_UNORM_BLOCK`
+- `VK_FORMAT_ASTC_6x6_SRGB_BLOCK`
+- `VK_FORMAT_ASTC_8x5_UNORM_BLOCK`
+- `VK_FORMAT_ASTC_8x5_SRGB_BLOCK`
+- `VK_FORMAT_ASTC_8x6_UNORM_BLOCK`
+- `VK_FORMAT_ASTC_8x6_SRGB_BLOCK`
+- `VK_FORMAT_ASTC_8x8_UNORM_BLOCK`
+- `VK_FORMAT_ASTC_8x8_SRGB_BLOCK`
+- `VK_FORMAT_ASTC_10x5_UNORM_BLOCK`
+- `VK_FORMAT_ASTC_10x5_SRGB_BLOCK`
+- `VK_FORMAT_ASTC_10x6_UNORM_BLOCK`
+- `VK_FORMAT_ASTC_10x6_SRGB_BLOCK`
+- `VK_FORMAT_ASTC_10x8_UNORM_BLOCK`
+- `VK_FORMAT_ASTC_10x8_SRGB_BLOCK`
+- `VK_FORMAT_ASTC_10x10_UNORM_BLOCK`
+- `VK_FORMAT_ASTC_10x10_SRGB_BLOCK`
+- `VK_FORMAT_ASTC_12x10_UNORM_BLOCK`
+- `VK_FORMAT_ASTC_12x10_SRGB_BLOCK`
+- `VK_FORMAT_ASTC_12x12_UNORM_BLOCK`
+- `VK_FORMAT_ASTC_12x12_SRGB_BLOCK`
+- `VK_FORMAT_G8B8G8R8_422_UNORM`
+- `VK_FORMAT_B8G8R8G8_422_UNORM`
+- `VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM`
+- `VK_FORMAT_G8_B8R8_2PLANE_420_UNORM`
+- `VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM`
+- `VK_FORMAT_G8_B8R8_2PLANE_422_UNORM`
+- `VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM`
+- `VK_FORMAT_R10X6_UNORM_PACK16`
+- `VK_FORMAT_R10X6G10X6_UNORM_2PACK16`
+- `VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16`
+- `VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16`
+- `VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16`
+- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16`
+- `VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16`
+- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16`
+- `VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16`
+- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16`
+- `VK_FORMAT_R12X4_UNORM_PACK16`
+- `VK_FORMAT_R12X4G12X4_UNORM_2PACK16`
+- `VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16`
+- `VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16`
+- `VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16`
+- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16`
+- `VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16`
+- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16`
+- `VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16`
+- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16`
+- `VK_FORMAT_G16B16G16R16_422_UNORM`
+- `VK_FORMAT_B16G16R16G16_422_UNORM`
+- `VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM`
+- `VK_FORMAT_G16_B16R16_2PLANE_420_UNORM`
+- `VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM`
+- `VK_FORMAT_G16_B16R16_2PLANE_422_UNORM`
+- `VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM`
+- `VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG`
+- `VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG`
+- `VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG`
+- `VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG`
+- `VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG`
+- `VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG`
+- `VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG`
+- `VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG`
+- `VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT`
+- `VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK_EXT`
+- `VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT`
+- `VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK_EXT`
+- `VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT`
+- `VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK_EXT`
+- `VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK_EXT`
+- `VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT`
+- `VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK_EXT`
+- `VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK_EXT`
+- `VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK_EXT`
+- `VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK_EXT`
+- `VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK_EXT`
+- `VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK_EXT`
+- `VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT`
+- `VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT`
+- `VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT`
+- `VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT`
+- `VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT`
+- `VK_FORMAT_G8B8G8R8_422_UNORM_KHR`
+- `VK_FORMAT_B8G8R8G8_422_UNORM_KHR`
+- `VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM_KHR`
+- `VK_FORMAT_G8_B8R8_2PLANE_420_UNORM_KHR`
+- `VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM_KHR`
+- `VK_FORMAT_G8_B8R8_2PLANE_422_UNORM_KHR`
+- `VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM_KHR`
+- `VK_FORMAT_R10X6_UNORM_PACK16_KHR`
+- `VK_FORMAT_R10X6G10X6_UNORM_2PACK16_KHR`
+- `VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16_KHR`
+- `VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16_KHR`
+- `VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16_KHR`
+- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16_KHR`
+- `VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16_KHR`
+- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16_KHR`
+- `VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16_KHR`
+- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16_KHR`
+- `VK_FORMAT_R12X4_UNORM_PACK16_KHR`
+- `VK_FORMAT_R12X4G12X4_UNORM_2PACK16_KHR`
+- `VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16_KHR`
+- `VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16_KHR`
+- `VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16_KHR`
+- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16_KHR`
+- `VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16_KHR`
+- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16_KHR`
+- `VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16_KHR`
+- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16_KHR`
+- `VK_FORMAT_G16B16G16R16_422_UNORM_KHR`
+- `VK_FORMAT_B16G16R16G16_422_UNORM_KHR`
+- `VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM_KHR`
+- `VK_FORMAT_G16_B16R16_2PLANE_420_UNORM_KHR`
+- `VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM_KHR`
+- `VK_FORMAT_G16_B16R16_2PLANE_422_UNORM_KHR`
+- `VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM_K`
diff --git a/external/slang/share/doc/slang/gpu-feature/derivatives-in-compute/derivatives-in-compute.md b/external/slang/share/doc/slang/gpu-feature/derivatives-in-compute/derivatives-in-compute.md
new file mode 100644
index 00000000..038ea148
--- /dev/null
+++ b/external/slang/share/doc/slang/gpu-feature/derivatives-in-compute/derivatives-in-compute.md
@@ -0,0 +1,9 @@
+### Derivatives In Compute 
+An entry point may be decorated with `[DerivativeGroupQuad]` or `[DerivativeGroupLinear]` to specify how to use derivatives in compute shaders.
+
+GLSL syntax may also be used, but is not recommended (`derivative_group_quadsNV`/`derivative_group_linearNV`).
+
+Targets:
+* **_SPIRV:_** Enables `DerivativeGroupQuadsNV` or `DerivativeGroupLinearNV`.
+* **_GLSL:_** Enables `derivative_group_quadsNV` or `derivative_group_LinearNV`.
+* **_HLSL:_** Does nothing. `sm_6_6` is required to use derivatives in compute shaders. HLSL uses an equivalent of `DerivativeGroupQuad`.
diff --git a/external/slang/share/doc/slang/gpu-feature/texture/footprint-queries.md b/external/slang/share/doc/slang/gpu-feature/texture/footprint-queries.md
new file mode 100644
index 00000000..6e76414c
--- /dev/null
+++ b/external/slang/share/doc/slang/gpu-feature/texture/footprint-queries.md
@@ -0,0 +1,205 @@
+Texture Footprint Queries
+=========================
+
+Slang supports querying the *footprint* of a texture sampling operation: the texels that would be accessed when performing that operation.
+This feature is supported on Vulkan via the `GL_NV_shader_texture_footprint` extension, and on D3D12 via the `NvFootprint*` functions exposed by NVAPI.
+
+# Background
+
+There are many GPU rendering techniques that involve generating a texture (e.g., by rendering to it) and then sampling from that texture in a 3D rendering pass, such that it is difficult to predict *a priori* which parts of the texture will be accessed, or not.
+As one example, consider rendering a shadow map that will be accessed when shading a g-buffer.
+Depending on the geometry that was rendered into the g-buffer, and the occlusion that might exist, some parts of the shadow map might not be needed at all.
+
+In principle, an application could use a compute pass on the g-buffer to compute, for each pixel, the part of the shadow-map texture that it will access - its footprint.
+The application could then aggregate these footprints into a stencil mask or other data structure that could be used to optimize the rendering pass that generates the shadow map.
+
+Unfortunately, it is almost impossible for applications to accurately and reliably predict the texel data that particular sampling operations will require, once non-trivial texture filtering modes are considered.
+Sampling operations support a wide variety of state that affects the lookup and filtering of texels. For example:
+
+* When bilinear filtering is enabled, a sampling operation typically accesses the four texels closest to the sampling location and blends them.
+
+* When trilinear filtering is enabled, a sampling operation may access texels at two different mip levels.
+
+* When anisotropic filtering is enabled, a sampling operation may take up to N *taps* (where N is the maximum supported degree of anisotropy), each of which may itself access a neighborhood of texels to produce a filtered value for that tap.
+
+* When sampling a cube map, a sampling operation may straddle the "seam" between two or even three cube faces.
+
+Texture footprint queries are intended to solve this problem by providing application developers with a primitive that can query the footprint of a texture sampling operation using the exact same sampler state and texture coordinates that will be used when sampling the texture later.
+
+# Slang Shader API
+
+Rather than exactly mirror the Vulkan GLSL extension or the NVAPI functions, the Slang core module provides a single common interface that can map to either of those implementations.
+
+## Basics
+
+A typical 2D texture sampling operation is performed using the `Sample()` method on `Texture2D`:
+
+```hlsl
+Texture2D<float4> texture = ...;
+SamplerState sampler = ...;
+float2 coords = ...;
+
+// Sample a 2D texture
+float4 color = texture.Sample(
+    sampler, coords);
+```
+
+To query the footprint that would be accessed by this operation, we can use an operation like:
+
+```hlsl
+uint granularity = ...;
+TextureFootprint2D footprint = texture.queryFootprintCoarse(granularity,
+    sampler, coords);
+```
+
+Note that the same arguments used to call `Sample` above are here passed to `queryFootprint` in the exact same order.
+The returned `footprint` encodes a conservative footprint of the texels that would be accessed by the equivalent `Sample` operation above.
+
+Texture footprints are encoded in terms of blocks of texels, and the size of those blocks determined the *granularity* of the footprint.
+The `granularity` argument to `queryFootprintCoarse` above indicates the granularity of blocks that the application requests.
+
+In cases where a filtering operation might access two mip levels - one coarse and one fine - a footprint query only returns information about one of the two levels.
+The application selects between these options by calling either `queryFootprintCoarse` or `queryFootprintFine`.
+
+## Variations
+
+A wide range of footprint queries are provided, corresponding to various cases of texture sampling operations with different parameters.
+For 2D textures, the following functions are supported:
+
+```hlsl
+TextureFootprint2D Texture2D.queryFootprintCoarse(
+    uint granularity, SamplerState sampler, float2 coords);
+TextureFootprint2D Texture2D.queryFootprintFine(
+    uint granularity, SamplerState sampler, float2 coords);
+TextureFootprint2D Texture2D.queryFootprintCoarseBias(
+    uint granularity, SamplerState sampler, float2 coords,
+    float lodBias);
+TextureFootprint2D Texture2D.queryFootprintFineBias(
+    uint granularity, SamplerState sampler, float2 coords,
+    float lodBias);
+TextureFootprint2D Texture2D.queryFootprintCoarseLevel(
+    uint granularity, SamplerState sampler, float2 coords,
+    float lod);
+TextureFootprint2D Texture2D.queryFootprintFineLevel(
+    uint granularity, SamplerState sampler, float2 coords,
+    float lod);
+TextureFootprint2D Texture2D.queryFootprintCoarseGrad(
+    uint granularity, SamplerState sampler, float2 coords,
+    float2 dx, float2 dy);
+TextureFootprint2D Texture2D.queryFootprintFineGrad(
+    uint granularity, SamplerState sampler, float2 coords,
+    float2 dx, float2 dy);
+
+// Vulkan-only:
+TextureFootprint2D Texture2D.queryFootprintCoarseClamp(
+    uint granularity, SamplerState sampler, float2 coords,
+    float lodClamp);
+TextureFootprint2D Texture2D.queryFootprintFineClamp(
+    uint granularity, SamplerState sampler, float2 coords,
+    float lodClamp);
+TextureFootprint2D Texture2D.queryFootprintCoarseBiasClamp(
+    uint granularity, SamplerState sampler, float2 coords,
+    float lodBias,
+    float lodClamp);
+TextureFootprint2D Texture2D.queryFootprintFineBiasClamp(
+    uint granularity, SamplerState sampler, float2 coords,
+    float lodBias,
+    float lodClamp);
+TextureFootprint2D Texture2D.queryFootprintCoarseGradClamp(
+    uint granularity, SamplerState sampler, float2 coords,
+    float2 dx, float2 dy,
+    float lodClamp);
+TextureFootprint2D Texture2D.queryFootprintFineGradClamp(
+    uint granularity, SamplerState sampler, float2 coords,
+    float2 dx, float2 dy,
+    float lodClamp);
+```
+
+For 3D textures, the following functions are supported:
+
+```hlsl
+TextureFootprint3D Texture3D.queryFootprintCoarse(
+    uint granularity, SamplerState sampler, float3 coords);
+TextureFootprint3D Texture3D.queryFootprintFine(
+    uint granularity, SamplerState sampler, float3 coords);
+TextureFootprint3D Texture3D.queryFootprintCoarseBias(
+    uint granularity, SamplerState sampler, float3 coords,
+    float lodBias);
+TextureFootprint3D Texture3D.queryFootprintFineBias(
+    uint granularity, SamplerState sampler, float3 coords,
+    float lodBias);
+TextureFootprint3D Texture3D.queryFootprintCoarseLevel(
+    uint granularity, SamplerState sampler, float3 coords,
+    float lod);
+TextureFootprint3D Texture3D.queryFootprintFineLevel(
+    uint granularity, SamplerState sampler, float3 coords,
+    float lod);
+
+// Vulkan-only:
+TextureFootprint3D Texture3D.queryFootprintCoarseClamp(
+    uint granularity, SamplerState sampler, float3 coords,
+    float lodClamp);
+TextureFootprint3D Texture3D.queryFootprintFineClamp(
+    uint granularity, SamplerState sampler, float3 coords,
+    float lodClamp);
+TextureFootprint3D Texture3D.queryFootprintCoarseBiasClamp(
+    uint granularity, SamplerState sampler, float3 coords,
+    float lodBias,
+    float lodClamp);
+TextureFootprint3D Texture3D.queryFootprintFineBiasClamp(
+    uint granularity, SamplerState sampler, float3 coords,
+    float lodBias,
+    float lodClamp);
+```
+
+## Footprint Types
+
+Footprint queries on 2D and 3D textures return values of type `TextureFootprint2D` and `TextureFootprint3D`, respectively, which are built-in `struct`s defined in the Slang core module:
+
+```
+struct TextureFootprint2D
+{
+    typealias Anchor        = uint2;
+    typealias Offset        = uint2;
+    typealias Mask          = uint2;
+    typealias LOD           = uint;
+    typealias Granularity   = uint;
+
+    property anchor         : Anchor        { get; }
+    property offset         : Offset        { get; }
+    property mask           : Mask          { get; }
+    property lod            : LOD           { get; }
+    property granularity    : Granularity   { get; }
+    property isSingleLevel  : bool          { get; }
+}
+
+struct TextureFootprint3D
+{
+    typealias Anchor        = uint3;
+    typealias Offset        = uint3;
+    typealias Mask          = uint2;
+    typealias LOD           = uint;
+    typealias Granularity   = uint;
+
+    property anchor         : Anchor        { get; }
+    property offset         : Offset        { get; }
+    property mask           : Mask          { get; }
+    property lod            : LOD           { get; }
+    property granularity    : Granularity   { get; }
+    property isSingleLevel  : bool          { get; }
+}
+```
+
+A footprint is encoded in terms of *texel groups*, where the `granularity` determines the size of those groups.
+When possible, the returned footprint will match the granularity passed into the query operation, but a larger granularity may be selected in cases where the footprint is too large to encode at the requested granularity.
+
+The `anchor` property specifies an anchor point in the texture, in the vicinity of the footprint. Its components are in multiples of 8 texel groups.
+
+The `offset` property specifies how the bits in `mask` map to texel groups in the vicinity of the `anchor` point.
+
+The `mask` property is a 64-bit bitfield (encoded as a `uint2`), where each bit represents footprint coverage of one texel group, within a 8x8 (for 2D textures) or 4x4x4 neighborhood of texel groups.
+
+The `lod` property indicates the mipmap level that would be accessed by the sampling operation.
+
+The `isSingleLevel` property indicates if the sampling operation is known to access only a single mip level.
+Note that this property will always be `false` when using the D3D/NVAPI path.
diff --git a/external/slang/docs/language-guide.md b/external/slang/share/doc/slang/language-guide.md
similarity index 95%
rename from external/slang/docs/language-guide.md
rename to external/slang/share/doc/slang/language-guide.md
index 4790794c..d445051d 100644
--- a/external/slang/docs/language-guide.md
+++ b/external/slang/share/doc/slang/language-guide.md
@@ -26,7 +26,7 @@ New Features
 
 ### Import Declarations
 
-In order to support better software modularity, and also to deal with the issue of how to integrate shader libraries written in Slang into other langauges, Slang introduces an `import` declaration construct.
+In order to support better software modularity, and also to deal with the issue of how to integrate shader libraries written in Slang into other languages, Slang introduces an `import` declaration construct.
 
 The basic idea is that if you write a file `foo.slang` like this:
 
@@ -53,7 +53,7 @@ When it comes time to generate output code, Slang will output any declarations f
 
 A few other details worth knowing about `import` declarations:
 
-* The name you use on the `import` line gets translated into a file name with some very simple rules. An underscore (`_`) in the name turns into a dash (`-`) in the file name, and dot separators (`.`) turn into directory seprators (`/`). After these substitutions, `.slang` is added to the end of the name.
+* The name you use on the `import` line gets translated into a file name with some very simple rules. An underscore (`_`) in the name turns into a dash (`-`) in the file name, and dot separators (`.`) turn into directory separators (`/`). After these substitutions, `.slang` is added to the end of the name.
 
 * If there are multiple `import` declarations naming the same file, it will only be imported once. This is also true for nested imports.
 
@@ -61,12 +61,12 @@ A few other details worth knowing about `import` declarations:
 
 * If file `A.slang` imports `B.slang`, and then some other file does `import A;`, then only the names from `A.slang` are brought into scope, not those from `B.slang`. This behavior can be controlled by having `A.slang` use `__exported import B;` to also re-export the declarations it imports from `B`.
 
-* An import is *not* like a `#include`, and so the file that does the `import` can't see preprocessor macros defined in the imported file (and vice versa). Think of `import foo;` as closer to `using namspace foo;` in C++ (perhaps without the same baggage).
+* An import is *not* like a `#include`, and so the file that does the `import` can't see preprocessor macros defined in the imported file (and vice versa). Think of `import foo;` as closer to `using namespace foo;` in C++ (perhaps without the same baggage).
 
 ### Explicit Parameter Blocks
 
-One of the most important new features of modern APIs like Direct3D 12 and Vulkan is an interface for providing shader parameters using efficient *parameter blocks* that can be stored in GPU memory (these are implemented as descritpor tables/sets in D3D12/Vulkan, and "attribute buffers" in Metal).
-However, HLSL and GLSL don't support explicit syntax for parmaeter blocks, and so shader programmers are left to manually pack parameters into blocks either using `register`/`layout` modifiers, or with API-based remapping (in the D3D12 case).
+One of the most important new features of modern APIs like Direct3D 12 and Vulkan is an interface for providing shader parameters using efficient *parameter blocks* that can be stored in GPU memory (these are implemented as descriptor tables/sets in D3D12/Vulkan, and "attribute buffers" in Metal).
+However, HLSL and GLSL don't support explicit syntax for parameter blocks, and so shader programmers are left to manually pack parameters into blocks either using `register`/`layout` modifiers, or with API-based remapping (in the D3D12 case).
 
 Slang supports a simple and explicit syntax for exploiting parameter blocks:
 
@@ -190,7 +190,7 @@ interface IMaterial
 What is the type `???` that `evalPattern` should return? We know that it needs to be a type that supports `IBRDF`, but *which* type?
 One material might want to use `DisneyBRDF` while another wants to use `KajiyaKay`.
 
-The solution in Slang, as in modern languages like Swift and Rust, is to use *associated types* to express the depdence of the BRDF type on the material type:
+The solution in Slang, as in modern languages like Swift and Rust, is to use *associated types* to express the dependence of the BRDF type on the material type:
 
 ```hlsl
 interface IMaterial
diff --git a/external/slang/share/doc/slang/language-reference/01-introduction.md b/external/slang/share/doc/slang/language-reference/01-introduction.md
new file mode 100644
index 00000000..fe606398
--- /dev/null
+++ b/external/slang/share/doc/slang/language-reference/01-introduction.md
@@ -0,0 +1,35 @@
+> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
+
+Introduction
+============
+
+Slang is a programming language primarily designed for use in *shader programming*, by which we mean performance oriented GPU programming for real-time graphics.
+
+Overview
+--------
+
+This document aims to provide a detailed reference for the Slang language and its supported constructs.
+
+The Slang compiler *implementation* may deviate from the language as documented here, in a few key ways:
+
+* The implementation is necessarily imperfect, and can have bugs
+
+* The implementation may not fully support constructs documented here, or their capabilities may not be as complete as what is documented
+
+* The implementation may support certain constructs that are experimental, deprecated, or are otherwise intentionally undocumented
+
+Where possible, this document will call out known deviations between the language as defined here and the implementation in the compiler.
+
+Terminology
+-----------
+
+> Note: This section is not yet complete.
+>
+> This section should detail how the document uses terms like "may" and "must," if we intend for those to be used in a manner consistent with [RFC 2119](https://www.ietf.org/rfc/rfc2119.txt). 
+
+Typographical Conventions
+-------------------------
+
+> Note: This section is not yet complete.
+>
+> This section should clarify how the document displays code fragments, grammar productions, etc.
diff --git a/external/slang/share/doc/slang/language-reference/02-lexical-structure.md b/external/slang/share/doc/slang/language-reference/02-lexical-structure.md
new file mode 100644
index 00000000..c966ab6e
--- /dev/null
+++ b/external/slang/share/doc/slang/language-reference/02-lexical-structure.md
@@ -0,0 +1,121 @@
+> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
+
+Lexical Structure
+=================
+
+Source Units
+------------
+
+A _source unit_ comprises a sequence of zero or more _characters_ which for purposes of this document are defined as Unicode scalars (code points).
+
+Encoding
+--------
+
+Implementations *may* accept source units stored as files on disk, buffers in memory, or any appropriate implementation-specified means.
+When source units are stored as byte sequences, they *should* be encoded using UTF-8.
+Implementations *may* support additional implemented-specified encodings.
+
+Whitespace
+----------
+
+_Horizontal whitespace_ consists of space (U+0020) and horizontal tab (U+0009).
+
+A _line break_ consists of a line feed (U+000A), carriage return (U+000D) or a carriage return followed by a line feed (U+000D, U+000A).
+Line breaks are used as line separators rather than terminators; it is not necessary for a source unit to end with a line break.
+
+Escaped Line Breaks
+-------------------
+
+An _escaped line break_ comprises a backslack (`\`, U+005C) follow immediately by a line break.
+
+Comments
+--------
+
+A _comment_ is either a line comment or a block comment:
+
+```hlsl
+// a line comment
+/* a block comment */
+```
+
+A _line comment_ comprises two forward slashes (`/`, U+002F) followed by zero or more characters that do not contain a line break.
+A line comment extends up to, but does not include, a subsequent line break or the end of the source unit.
+
+A _block comment_ begins with a forward slash (`/`, U+002F) followed by an asterisk (`*`, U+0052). 
+A block comment is terminated by the next instance of an asterisk followed by a forward slash (`*/`).
+A block comment contains all characters between where it begins and where it terminates, including any line breaks.
+Block comments do not nest.
+It is an error if a block comment that begins in a source unit is not terminated in that source unit.
+
+Phases
+------
+
+Compilation of a source unit proceeds _as if_ the following steps are executed in order:
+
+1. Line numbering (for subsequent diagnostic messages) is noted based on the locations of line breaks
+
+2. Escaped line breaks are eliminated. No new characters are inserted to replace them. Any new escaped line breaks introduced by this step are not eliminated.
+
+3. Each comments is replaced with a single space (U+0020)
+
+4. The source unit is _lexed_ into a sequence of tokens according the lexical grammar in this chapter
+
+5. The lexed sequence of tokens is _preprocessed_ to produce a new sequence of tokens (Chapter 3)
+
+6. Subsequent processing is performed on the preprocessed sequence of tokens
+
+Identifiers
+-----------
+
+An _identifier_ begins with an uppercase or lowercase ASCII letter (`A` through `Z`, `a` through `z`), or an underscore (`_`).
+After the first character, ASCII digits (`0` through `9`) may also be used in an identifier.
+
+The identifier consistent of a single underscore (`_`) is reserved by the language and must not be used by programs.
+Otherwise, there are no fixed keywords or reserved words.
+Words that name a built-in language construct can also be used as user-defined identifiers and will shadow the built-in definitions in the scope of their definition.
+
+Literals
+--------
+
+### Integer Literals
+
+An _integer literal_ consists of an optional radix specifier followed by digits and an optional suffix.
+
+The _radix specifier_ may be:
+
+* `0x` or `0X` to specify a hexadecimal literal (radix 16)
+* `0b` or `0B` to specify a binary literal (radix 2)
+
+When no radix specifier is present a radix of 10 is used.
+
+Octal literals (radix 8) are not supported.
+A `0` prefix on an integer literal does *not* specify an octal literal as it does in C.
+Implementations *may* warn on integer literals with a `0` prefix in case users expect C behavior.
+
+The _digits_ of an integer literal may include ASCII `0` through `9`.
+In the case of a hexadecimal literal, digits may include the letters `A` through `F` (and `a` through `f`) which represent digit values of 10 through 15.
+It is an error for an integer literal to include a digit with a value greater than or equal to the radix.
+The digits of an integer literal may also include underscore (`_`) characters, which are ignored and have no semantic impact.
+
+The _suffix_ on an integer literal may be used to indicate the desired type of the literal:
+
+* A `u` suffix indicates the `uint` type
+* An `l` or `ll` suffix indicates the `int64_t` type
+* A `ul` or `ull` suffix indicates the `uint64_t` type
+
+### Floating-Point Literals
+
+> Note: This section is not yet complete.
+
+### String Literals
+
+> Note: This section is not yet complete.
+
+### Character Literals
+
+> Note: This section is not yet complete.
+
+Operators and Punctuation
+-------------------------
+
+> Note: This section is not yet complete.
diff --git a/external/slang/share/doc/slang/language-reference/03-preprocessor.md b/external/slang/share/doc/slang/language-reference/03-preprocessor.md
new file mode 100644
index 00000000..de579efe
--- /dev/null
+++ b/external/slang/share/doc/slang/language-reference/03-preprocessor.md
@@ -0,0 +1,19 @@
+> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
+
+Preprocessor
+============
+
+Slang supports a C-style preprocessor with the following directives:
+
+* `#include`
+* `#define`
+* `#undef`
+* `#if`, `#ifdef`, `#ifndef`
+* `#else`, `#elif`
+* `#endif`
+* `#error`
+* `#warning`
+* `#line`
+* `#pragma`
+
+> Note: This section is not yet complete.
diff --git a/external/slang/share/doc/slang/language-reference/04-types.md b/external/slang/share/doc/slang/language-reference/04-types.md
new file mode 100644
index 00000000..3ccc7bdc
--- /dev/null
+++ b/external/slang/share/doc/slang/language-reference/04-types.md
@@ -0,0 +1,339 @@
+> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
+
+Types
+=====
+
+This section defines the kinds of types supported by Slang.
+
+Types in Slang do not necessarily prescribe a single _layout_ in memory.
+The discussion of each type will specify any guarantees about layout it provides; any details of layout not specified here may depend on the target platform, compiler options, and context in which a type is used.
+
+Void Type
+---------
+
+The type `void` contains no data and has a single, unnamed, value.
+
+A `void` value takes up no space, and thus does not affect the layout of types.
+Formally, a `void` value behaves as if it has a size of zero bytes, and one-byte alignment.
+
+Scalar Types
+------------
+
+### Boolean Type
+
+The type `bool` is used to represent Boolean truth values: `true` and `false`.
+
+The size of a `bool` varies across target platforms; programs that need to ensure a matching in-memory layout between targets should not use `bool` for in-memory data structures.
+On all platforms, the `bool` type must be _naturally aligned_ (its alignment is its size).
+
+### Integer Types
+
+The following integer types are defined:
+
+| Name          | Description |
+|---------------|-------------|
+| `int8_t`      | 8-bit signed integer |
+| `int16_t`     | 16-bit signed integer |
+| `int`         | 32-bit signed integer |
+| `int64_t`     | 64-bit signed integer |
+| `uint8_t`     | 8-bit unsigned integer |
+| `uint16_t`    | 16-bit unsigned integer |
+| `uint`        | 32-bit unsigned integer |
+| `uint64_t`    | 64-bit unsigned integer |
+
+All signed integers used two's complement representation.
+All arithmetic operations on integers (both signed and unsigned) wrap on overflow/underflow.
+
+All target platforms must support the `int` and `uint` types.
+Specific [target platforms](../target-compatibility.md) may not support the other integer types.
+
+All integer types are stored in memory with their natural size and alignment on all targets that support them.
+
+### Floating-Point Types
+
+The following floating-point type are defined:
+
+| Name          | Description                   |
+|---------------|-------------------------------|
+| `half`        | 16-bit floating-point number (1 sign bit, 5 exponent bits, 10 fraction bits) |
+| `float`       | 32-bit floating-point number (1 sign bit, 8 exponent bits, 23 fraction bits) |
+| `double`      | 64-bit floating-point number (1 sign bit, 11 exponent bits, 52 fraction bits) |
+
+All floating-point types are laid out in memory using the matching IEEE 754 standard format (`binary16`, `binary32`, `binary64`).
+Target platforms may define their own rules for rounding, precision, denormals, infinities, and not-a-number values.
+
+All target platforms must support the `float` type.
+Specific [targets](../target-compatibility.md) may not support the other floating-point types.
+
+All floating-point types are stored in memory with their natural size and alignment on all targets that support them.
+
+Vector Types
+------------
+
+A vector type is written as `vector<T, N>` and represents an `N`-element vector with elements of type `T`.
+The _element type_ `T` must be one of the built-in scalar types, and the _element count_ `N` must be a specialization-time constant integer.
+The element count must be between 2 and 4, inclusive.
+
+A vector type allows subscripting of its elements like an array, but also supports element-wise arithmetic on its elements.
+_Element-wise arithmetic_ means mapping unary and binary operators over the elements of a vector to produce a vector of results:
+
+```hlsl
+vector<int,4> a = { 1, 2, 30, 40 };
+vector<int,4> b = { 10, 20, 3, 4 };
+
+-a; // yields { -1, -2, -30, -40 }
+a + b; // yields { 11, 22, 33, 44 }
+b / a; // yields { 10, 10, 0, 0 }
+a > b; // yields { false, false, true, true }
+```
+
+A vector type is laid out in memory as `N` contiguous values of type `T` with no padding.
+The alignment of a vector type may vary by target platforms.
+The alignment of `vector<T,N>` will be at least the alignment of `T` and may be at most `N` times the alignment of `T`.
+
+As a convenience, Slang defines built-in type aliases for vectors of the built-in scalar types.
+E.g., declarations equivalent to the following are provided by the Slang core module:
+
+```hlsl
+typealias float4 = vector<float, 4>;
+typealias int8_t3 = vector<int8_t, 3>;
+```
+
+### Legacy Syntax
+
+For compatibility with older codebases, the generic `vector` type includes default values for `T` and `N`, being declared as:
+
+```hlsl
+struct vector<T = float, let N : int = 4> { ... }
+```
+
+This means that the bare name `vector` may be used as a type equivalent to `float4`:
+
+```hlsl
+// All of these variables have the same type
+vector a;
+float4 b;
+vector<float> c;
+vector<float, 4> d;
+```
+
+Matrix Types
+------------
+
+A matrix type is written as `matrix<T, R, C>` and represents a matrix of `R` rows and `C` columns, with elements of type `T`.
+The element type `T` must be one of the built-in scalar types.
+The _row count_ `R` and _column count_ `C` must be specialization-time constant integers.
+The row count and column count must each be between 2 and 4, respectively.
+
+A matrix type allows subscripting of its rows, similar to an `R`-element array of `vector<T,C>` elements.
+A matrix type also supports element-wise arithmetic.
+
+Matrix types support both _row-major_ and _column-major_ memory layout.
+Implementations may support command-line flags or API options to control the default layout to use for matrices.
+
+> Note: Slang currently does *not* support the HLSL `row_major` and `column_major` modifiers to set the layout used for specific declarations.
+
+Under row-major layout, a matrix is laid out in memory equivalently to an `R`-element array of `vector<T,C>` elements.
+
+Under column-major layout, a matrix is laid out in memory equivalent to the row-major layout of its transpose.
+This means it will be laid out equivalently to a `C`-element array of `vector<T,R>` elements.
+
+As a convenience, Slang defines built-in type aliases for matrices of the built-in scalar types.
+E.g., declarations equivalent to the following are provided by the Slang core module:
+
+```hlsl
+typealias float3x4 = matrix<float, 3, 4>;
+typealias int64_t4x2 = matrix<int64_t, 4, 2>;
+```
+
+> Note: For programmers using OpenGL or Vulkan as their graphics API, and/or who are used to the GLSL language,
+> it is important to recognize that the equivalent of a GLSL `mat3x4` is a Slang `float3x4`.
+> This is despite the fact that GLSL defines a `mat3x4` as having 3 *columns* and 4 *rows*, while a Slang `float3x4` is defined as having 3 rows and 4 columns.
+> This convention means that wherever Slang refers to "rows" or "columns" of a matrix, the equivalent terms in the GLSL, SPIR-V, OpenGL, and Vulkan specifications are "column" and "row" respectively (*including* in the compound terms of "row-major" and "column-major")
+> While it may seem that this choice of convention is confusing, it is necessary to ensure that subscripting with `[]` can be efficiently implemented on all target platforms.
+> This decision in the Slang language is consistent with the compilation of HLSL to SPIR-V performed by other compilers.
+
+### Legacy Syntax
+
+For compatibility with older codebases, the generic `matrix` type includes default values for `T`, `R`, and `C`, being declared as:
+
+```hlsl
+struct matrix<T = float, let R : int = 4, let C : int = 4> { ... }
+```
+
+This means that the bare name `matrix` may be used as a type equivalent to `float4x4`:
+
+```hlsl
+// All of these variables have the same type
+matrix a;
+float4x4 b;
+matrix<float, 4, 4> c;
+```
+
+Structure Types
+---------------
+
+Structure types are introduced with `struct` declarations, and consist of an ordered sequence of named and typed fields:
+
+```hlsl
+struct S
+{
+    float2 f;
+    int3 i;
+}
+```
+
+### Standard Layout
+
+The _standard layout_ for a structure type uses the following algorithm:
+
+* Initialize variables `size` and `alignment` to zero and one, respectively
+* For each field `f` of the structure type:
+  * Update `alignment` to be the maximum of `alignment` and the alignment of `f`
+  * Set `size` to the smallest multiple of `alignment` not less than `size`
+  * Set the offset of field `f` to `size`
+  * Add the size of `f` to `size`
+
+When this algorithm completes, `size` and `alignment` will be the size and alignment of the structure type.
+
+Most target platforms do not use the standard layout directly, but it provides a baseline for defining other layout algorithms.
+Any layout for structure types must guarantee an alignment at least as large as the standard layout.
+
+### C-Style Layout
+
+C-style layout for structure types differs from standard layout by adding an additional final step:
+
+* Set `size` the smallest multiple of `alignment` not less than `size`
+
+This mirrors the layout rules used by typical C/C++ compilers.
+
+### D3D Constant Buffer Layout
+
+D3D constant buffer layout is similar to standard layout with two differences:
+
+* The initial alignment is 16 instead of one
+
+* If a field would have _improper straddle_, where the interval `(fieldOffset, fieldOffset+fieldSize)` (exclusive on both sides) contains any multiple of 16, *and* the field offset is not already a multiple of 16, then the offset of the field is adjusted to the next multiple of 16
+
+Array Types
+-----------
+
+An _array type_ is either a statically-sized or dynamically-sized array type.
+
+A known-size array type is written `T[N]` where `T` is a type and `N` is a specialization-time constant integer.
+This type represents an array of exactly `N` values of type `T`.
+
+An unknown-size array type is written `T[]` where `T` is a type.
+This type represents an array of some fixed, but statically unknown, size.
+
+> Note: Unlike in C and C++, arrays in Slang are always value types, meaning that assignment and parameter passing of arrays copies their elements.
+
+### Declaration Syntax
+
+For variable and parameter declarations using traditional syntax, a variable of array type may be declared by using the element type `T` as a type specifier (before the variable name) and the `[N]` to specify the element count after the variable name:
+
+```hlsl
+int a[10];
+```
+
+Alternatively, the array type itself may be used as the type specifier:
+
+```hlsl
+int[10] a;
+```
+
+When using the `var` or `let` keyword to declare a variable, the array type must not be split:
+
+```hlsl
+var a : int[10];
+```
+
+> Note: when declaring arrays of arrays (often thought of as "multidimensional arrays") a programmer must be careful about the difference between the two declaration syntaxes.
+> The following two declarations are equivalent:
+>
+> ```hlsl
+> int[3][5] a;
+> int a[5][3];
+> ```
+>
+> In each case, `a` is a five-element array of three-element arrays of `int`s.
+> However, one declaration orders the element counts as `[3][5]` and the other as `[5][3]`.
+
+### Element Count Inference
+
+When a variable is declared with an unknown-size array type, and also includes an initial-value expression:
+
+```hlsl
+int a[] = { 0xA, 0xB, 0xC, 0xD };
+```
+
+The compiler will attempt to infer an element count based on the type and/or structure of the initial-value expression.
+In the above case, the compiler will infer an element count of 4 from the structure of the initializer-list expression.
+Thus the preceding declaration is equivalent to:
+
+```hlsl
+int a[4] = { 0xA, 0xB, 0xC, 0xD };
+```
+
+A variable declared in this fashion semantically has a known-size array type and not an unknown-size array type; the use of an unknown-size array type for the declaration is just a convenience feature.
+
+### Standard Layout
+
+The _stride_ of a type is the smallest multiple of its alignment not less than its size.
+
+Using the standard layout for an array type `T[]` or `T[N]`:
+
+* The _element stride_ of the array type is the stride of its element type `T`
+* Element `i` of the array starts at an offset that is `i` times the element stride of the array
+* The alignment of the array type is the alignment of `T`
+* The size of an unknown-size array type is unknown
+* The size of a known-size array with zero elements is zero
+* The size of a known-size array with a nonzero number `N` of elements is the size of `T` plus `N - 1` times the element stride of the array
+
+### C-Style Layout
+
+The C-style layout of an array type differs from the standard layout in that the size of a known-size array with a nonzero number `N` of elements is `N` times the element stride of the array.
+
+### D3D Constant Buffer Layout
+
+The D3D constant buffer layout of an array differs from the standard layout in that the element stride of the array is set to the smallest multiple of the alignment of `T` that is not less than the stride of `T`
+
+This Type
+---------
+
+Within the body of a structure or interface declaration, the keyword `This` may be used to refer to the enclosing type.
+Inside of a structure type declaration, `This` refers to the structure type itself.
+Inside of an interface declaration, `This` refers to the concrete type that is conforming to the interface (that is, the type of `this`).
+
+Opaque Types
+------------
+
+_Opaque_ types are built-in types that (depending on the target platform) may not have a well-defined size or representation in memory.
+Similar languages may refer to these as "resource types" or "object types."
+
+The full list of opaque types supported by Slang can be found in the core module reference, but important examples are:
+
+* Texture types such as `Texture2D<T>`, `TextureCubeArray<T>`, and `RWTexture2DMS<T>`
+* Sampler state types: `SamplerState` and `SamplerComparisonState`
+* Buffer types like `ConstantBuffer<T>` and  `StructuredBuffer<T>`
+* Parameter blocks: `ParameterBlock<T>`
+
+Layout for opaque types depends on the target platform, and no specific guarantees can be made about layout rules across platforms.
+
+Known and Unknown Size
+----------------------
+
+Every type has either known or unknown size.
+Types with unknown size arise in a few ways:
+
+* An unknown-size array type has unknown size
+
+* A structure type has unknown size if any field type has unknown size
+
+The use of types with unknown size is restricted as follows:
+
+* A type with unknown size cannot be used as the element type of an array
+
+* A type with unknown size can only be used as the last field of a structure type
+
+* A type with unknown size cannot be used as a generic argument to specialize a user-defined type, function, etc. Specific built-in generic types/functions may support unknown-size types, and this will be documented on the specific type/function.
diff --git a/external/slang/share/doc/slang/language-reference/05-expressions.md b/external/slang/share/doc/slang/language-reference/05-expressions.md
new file mode 100644
index 00000000..64bee737
--- /dev/null
+++ b/external/slang/share/doc/slang/language-reference/05-expressions.md
@@ -0,0 +1,353 @@
+> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
+
+Expressions
+===========
+
+Expressions are terms that can be _evaluated_ to produce values.
+This section provides a list of the kinds of expressions that may be used in a Slang program.
+
+In general, the order of evaluation of a Slang expression proceeds from left to right.
+Where specific expressions do not follow this order of evaluation, it will be noted.
+
+Some expressions can yield _l-values_, which allows them to be used on the left-hand-side of assignment, or as arguments for `out` or `in out` parameters.
+
+Literal Expressions
+-------------------
+
+Literal expressions are never l-values.
+
+### Integer Literal Expressions
+
+An integer literal expression consists of a single integer literal token:
+
+```hlsl
+123
+```
+
+An unsuffixed integer literal expression always has type `int`.
+
+### Floating-Point Literal Expressions
+
+A floating-point literal expression consists of a single floating-point literal token:
+
+```hlsl
+1.23
+```
+
+A unsuffixed floating-point literal expression always has type `float`.
+
+### Boolean Literal Expressions
+
+Boolean literal expressions use the keywords `true` and `false`.
+
+### String Literal Expressions
+
+A string literal expressions consists of one or more string literal tokens in a row:
+
+```hlsl
+"This" "is one" "string"
+```
+
+Identifier Expression
+---------------------
+
+An _identifier expression_ consists of a single identifier:
+
+```hlsl
+someName
+```
+
+When evaluated, this expression looks up `someName` in the environment of the expression and yields the value of a declaration with a matching name.
+
+An identifier expression is an l-value if the declaration it refers to is mutable.
+
+### Overloading
+
+It is possible for an identifier expression to be _overloaded_, such that it refers to one or more candidate declarations with the same name.
+If the expression appears in a context where the correct declaration to use can be disambiguated, then that declaration is used as the result of  the name expression; otherwise use of an overloaded name is an error at the use site.
+
+### Implicit Lookup
+
+It is possible for a name expression to refer to nested declarations in two ways:
+
+* In the body of a method, a reference to `someName` may resolve to `this.someName`, using the implicit `this` parameter of the method
+
+* When a global-scope `cbuffer` or `tbuffer` declaration is used, `someName` may refer to a field declared inside the `cbuffer` or `tbuffer`
+
+Member Expression
+-----------------
+
+A _member expression_ consists of a base expression followed by a dot (`.`) and an identifier naming a member to be accessed:
+
+```hlsl
+base.m
+```
+
+When `base` is a structure type, this expression looks up the field or other member named by `m`.
+Just as for an identifier expression, the result of a member expression may be overloaded, and might be disambiguated based on how it is used.
+
+A member expression is an l-value if the base expression is an l-value and the member it refers to is mutable.
+
+### Implicit Dereference
+
+If the base expression of a member reference is a _pointer-like type_ such as `ConstantBuffer<T>`, then a member reference expression will implicitly dereference the base expression to refer to the pointed-to value (e.g., in the case of `ConstantBuffer<T>` this is the buffer contents of type `T`).
+
+### Vector Swizzles
+
+When the base expression of a member expression is of a vector type `vector<T,N>` then a member expression is a _vector swizzle expression_.
+The member name must conform to these constraints:
+
+* The member name must comprise between one and four ASCII characters
+* The characters must be come either from the set (`x`, `y`, `z`, `w`) or (`r`, `g`, `b`, `a`), corresponding to element indics of (0, 1, 2, 3)
+* The element index corresponding to each character must be less than `N`
+
+If the member name of a swizzle consists of a single character, then the expression has type `T` and is equivalent to a subscript expression with the corresponding element index.
+
+If the member name of a swizzle consists of `M` characters, then the result is a `vector<T,M>` built from the elements of the base vector with the corresponding indices.
+
+A vector swizzle expression is an l-value if the base expression was an l-value and the list of indices corresponding to the characters of the member name contains no duplicates.
+
+### Matrix Swizzles
+
+> Note: The Slang implementation currently doesn't support matrix swizzles.
+
+### Static Member Expressions
+
+When the base expression of a member expression is a type instead of a value, the result is a _static member expression_.
+A static member expression can refer to a static field or static method of a structure type.
+A static member expression can also refer to a case of an enumeration type.
+
+A static member expression (but not a member expression in general) may use the token `::` instead of `.` to separate the base and member name:
+
+```hlsl
+// These are equivalent
+Color.Red
+Color::Red
+```
+
+This Expression
+---------------
+
+A _this expression_ consists of the keyword `this` and refers to the implicit instance of the enclosing type that is being operated on in instance methods, subscripts, and initializers.
+
+The type of `this` is `This`.
+
+Parenthesized Expression
+----------------------
+
+An expression wrapped in parentheses `()` is a _parenthesized expression_ and evaluates to the same value as the wrapped expression.
+
+Call Expression
+---------------
+
+A _call expression_ consists of a base expression and a list of argument expressions, separated by commas and enclosed in `()`:
+
+```hlsl
+myFunction( 1.0f, 20 )
+```
+
+When the base expression (e.g., `myFunction`) is overloaded, a call expression can disambiguate the overloaded expression based on the number and type or arguments present.
+
+The base expression of a call may be a member reference expression:
+
+```hlsl
+myObject.myFunc( 1.0f )
+```
+
+In this case the base expression of the member reference (e.g., `myObject` in this case) is used as the argument for the implicit `this` parameter of the callee.
+
+### Mutability
+
+If a `[mutating]` instance is being called, the argument for the implicit `this` parameter must be an l-value.
+
+The argument expressions corresponding to any `out` or `in out` parameters of the callee must be l-values.
+
+A call expression is never an l-value.
+
+### Initializer Expressions
+
+When the base expression of a call is a type instead of a value, the expression is an initializer expression:
+
+```hlsl
+float2(1.0f, 2.0f)
+```
+
+An initializer expression initialized an instance of the specified type using the given arguments.
+
+An initializer expression with only a single argument is treated as a cast expression:
+
+```hlsl
+// these are equivalent
+int(1.0f)
+(int) 1.0f
+```
+
+Subscript Expression
+--------------------
+
+A _subscript expression_ consists of a base expression and a list of argument expressions, separated by commas and enclosed in `[]`:
+
+```hlsl
+myVector[someIndex]
+```
+
+A subscript expression invokes one of the subscript declarations in the type of the base expression. Which subscript declaration is invoked is resolved based on the number and types of the arguments.
+
+A subscript expression is an l-value if the base expression is an l-value and if the subscript declaration it refers to has a setter or by-reference accessor.
+
+Subscripts may be formed on the built-in vector, matrix, and array types.
+
+
+Initializer List Expression
+---------------------------
+
+An _initializer list expression_ comprises zero or more expressions, separated by commas, enclosed in `{}`:
+
+```
+{ 1, "hello", 2.0f }
+```
+
+An initialier list expression may only be used directly as the initial-value expression of a variable or parameter declaration; initializer lists are not allowed as arbitrary sub-expressions.
+
+> Note: This section will need to be updated with the detailed rules for how expressions in the initializer list are used to initialize values of each kind of type.
+
+Cast Expression
+---------------
+
+A _cast expression_ attempt to coerce a single value (the base expression) to a desired type (the target type):
+
+```hlsl
+(int) 1.0f
+```
+
+A cast expression can perform both built-in type conversions and invoke any single-argument initializers of the target type.
+
+### Compatibility Feature
+
+As a compatibility feature for older code, Slang supports using a cast where the base expression is an integer literal zero and the target type is a user-defined structure type:
+
+```hlsl
+MyStruct s = (MyStruct) 0;
+```
+
+The semantics of such a cast are equivalent to initialization from an empty initializer list:
+
+```hlsl
+MyStruct s = {};
+```
+
+Assignment Expression
+---------------------
+
+An _assignment expression_ consists of a left-hand side expression, an equals sign (`=`), and a right-hand-side expressions:
+
+```hlsl
+myVar = someValue
+```
+
+The semantics of an assignment expression are to:
+
+* Evaluate the left-hand side to produce an l-value,
+* Evaluate the right-hand side to produce a value
+* Store the value of the right-hand side to the l-value of the left-hand side
+* Yield the l-value of the left-hand-side
+
+Operator Expressions
+--------------------
+
+### Prefix Operator Expressions
+
+The following prefix operators are supported:
+
+| Operator 	| Description |
+|-----------|-------------|
+| `+`		| identity |
+| `-`		| arithmetic negation |
+| `~` 		| bit-wise Boolean negation |
+| `!`		| Boolean negation |
+| `++`		| increment in place |
+| `--`		| decrement in place |
+
+A prefix operator expression like `+val` is equivalent to a call expression to a function of the matching name `operator+(val)`, except that lookup for the function only considers functions marked with the `__prefix` keyword.
+
+The built-in prefix `++` and `--` operators require that their operand is an l-value, and work as follows:
+
+* Evaluate the operand to produce an l-value
+* Read from the l-value to yield an _old value_
+* Increment or decrement the value to yield a _new value_
+* Write the new value to the l-value
+* Yield the new value
+
+### Postfix Operator Expressions
+
+The following postfix operators are supported:
+
+| Operator 	| Description |
+|-----------|-------------|
+| `++`		| increment in place |
+| `--`		| decrement in place |
+
+A postfix operator expression like `val++` is equivalent to a call expression to a function of the matching name `operator++(val)`, except that lookup for the function only considers functions marked with the `__postfix` keyword.
+
+The built-in prefix `++` and `--` operators require that their operand is an l-value, and work as follows:
+
+* Evaluate the operand to produce an l-value
+* Read from the l-value to yield an _old value_
+* Increment or decrement the value to yield a _new value_
+* Write the new value to the l-value
+* Yield the old value
+
+### Infix Operator Expressions
+
+The follow infix binary operators are supported:
+
+| Operator 	| Kind        | Description |
+|-----------|-------------|-------------|
+| `*`		| Multiplicative 	| multiplication |
+| `/`		| Multiplicative 	| division |
+| `%`		| Multiplicative 	| remainder of division |
+| `+`		| Additive 			| addition |
+| `-`		| Additive 			| subtraction |
+| `<<`		| Shift 			| left shift |
+| `>>`		| Shift 			| right shift |
+| `<` 		| Relational 		| less than |
+| `>`		| Relational 		| greater than |
+| `<=`		| Relational 		| less than or equal to |
+| `>=`		| Relational 		| greater than or equal to |
+| `==`		| Equality 			| equal to |
+| `!=`		| Equality 			| not equal to |
+| `&`		| BitAnd 			| bitwise and |
+| `^`		| BitXor			| bitwise exclusive or |
+| `\|`		| BitOr 			| bitwise or |
+| `&&`		| And 				| logical and |
+| `\|\|`	| Or 				| logical or |
+| `+=`		| Assignment  		| compound add/assign |
+| `-=`      | Assignment  		| compound subtract/assign |
+| `*=`      | Assignment  		| compound multiply/assign |
+| `/=`      | Assignment  		| compound divide/assign |
+| `%=`      | Assignment  		| compound remainder/assign |
+| `<<=`     | Assignment  		| compound left shift/assign |
+| `>>=`     | Assignment  		| compound right shift/assign |
+| `&=`      | Assignment  		| compound bitwise and/assign |
+| `\|=`     | Assignment  		| compound bitwise or/assign |
+| `^=`      | Assignment  		| compound bitwise xor/assign |
+| `=`       | Assignment  		| assignment |
+| `,`		| Sequencing  		| sequence |
+
+With the exception of the assignment operator (`=`), an infix operator expression like `left + right` is equivalent to a call expression to a function of the matching name `operator+(left, right)`.
+
+### Conditional Expression
+
+The conditional operator, `?:`, is used to select between two expressions based on the value of a condition:
+
+```hlsl
+useNegative ? -1.0f : 1.0f
+```
+
+The condition may be either a single value of type `bool`, or a vector of `bool`.
+When a vector of `bool` is used, the two values being selected between must be vectors, and selection is performed component-wise.
+
+> Note: Unlike C, C++, GLSL, and most other C-family languages, Slang currently follows the precedent of HLSL where `?:` does not short-circuit.
+>
+> This decision may change (for the scalar case) in a future version of the language.
+> Programmer are encouraged to write code that does not depend on whether or not `?:` short-circuits.
diff --git a/external/slang/share/doc/slang/language-reference/06-statements.md b/external/slang/share/doc/slang/language-reference/06-statements.md
new file mode 100644
index 00000000..5c3b77ad
--- /dev/null
+++ b/external/slang/share/doc/slang/language-reference/06-statements.md
@@ -0,0 +1,237 @@
+> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
+
+Statements
+==========
+
+Statements are used to define the bodies of functions and determine order of evaluation and control flow for an entire program.
+Statements are distinct from expressions in that statements do not yield results and do not have types.
+
+This section lists the kinds of statements supported by Slang.
+
+Expression Statement
+--------------------
+
+An expression statement consists of an expression followed by a semicolon:
+
+```hlsl
+doSomething();
+a[10] = b + 1;
+```
+
+An implementation may warn on an expression statement that has to effect on the results of execution.
+
+Declaration Statement
+---------------------
+
+A declaration may be used as a statement:
+
+```hlsl
+let x = 10;
+var y = x + 1;
+int z = y - x;
+```
+
+> Note: Currently only variable declarations are allowed in statement contexts, but other kinds of declarations may be enabled in the future.
+
+Block Statement
+---------------
+
+A block statement consists of zero or more statements wrapped in curly braces `{}`:
+
+```hlsl
+{
+	int x = 10;
+	doSomething(x);
+}
+```
+
+A block statement provides local scoping to declarations.
+Declarations in a block are visible to later statements in the same block, but not to statements or expressions outside of the block.
+
+Empty Statement
+---------------
+
+A single semicolon (`;`) may be used as an empty statement equivalent to an empty block statement `{}`.
+
+Conditional Statements
+----------------------
+
+### If Statement
+
+An _if statement_ consists of the `if` keyword and a conditional expression in parentheses, followed by a statement to execute if the condition is true:
+
+```hlsl
+if(somethingShouldHappen)
+    doSomething();
+```
+
+An if statement may optionally include an _else clause_ consisting of the keyword `else` followed by a statement to execute if the condition is false:
+
+```hlsl
+if(somethingShouldHappen)
+ 	doSomething();
+else
+	doNothing();
+```
+
+### Switch Statement
+
+A _switch statement_ consists of the `switch` keyword followed by an expression wrapped in parentheses and a _body statement_:
+
+```hlsl
+switch(someValue)
+{
+	...
+}
+```
+
+The body of a switch statement must be a block statement, and its body must consist of switch case clauses.
+A _switch case clause_ consists of one or more case labels or default labels, followed by one or more statements:
+
+```hlsl
+// this is a switch case clause
+case 0:
+case 1:
+    doBasicThing();
+    break;
+
+// this is another switch case clause
+default:
+    doAnotherThing();
+    break;
+```
+
+A _case label_ consists of the keyword `case` followed by an expressions and a colon (`:`).
+The expression must evaluate to a compile-time constant integer.
+
+A _default label_ consists of the keyword `default` followed by a colon (`:`).
+
+It is an error for a case label or default label to appear anywhere other than the body of a `switch` statement.
+It is an error for a statement to appear inside the body of a `switch` statement that is no part of a switch case clause.
+
+Each switch case clause must exit the `switch` statement via a `break` or other control transfer statement.
+"Fall-through" from one switch case clause to another is not allowed.
+
+Loop Statements
+---------------
+
+### For Statement
+
+A _for statement_ uses the following form:
+
+```hlsl
+for( <initial statement> ; <condition expression> ; <side effect expression> ) <body statement>
+```
+
+The _initial statement_ is optional, but may declare a variable whose scope is limited to the for statement.
+
+The _condition expression_ is optional. If present it must be an expression that can be coerced to type `bool`. If absent, a true value is used as the condition.
+
+The _side effect expression_ is optional. If present it will executed for its effects before each testing the condition for every loop iteration after the first.
+
+The _body statement_ is a statement that will be executed for each iteration of the loop.
+
+### While Statement
+
+A _while statement_ uses the following form:
+
+```hlsl
+while( <condition expression> ) <body statement>
+```
+
+and is equivalent to a `for` loop of the form:
+
+```hlsl
+for( ; <condition expression> ; ) <body statement>
+```
+
+### Do-While Statement
+
+A _do-while statement_ uses the following form:
+
+```hlsl
+do <body statement> while( <condition expression> )
+```
+
+and is equivalent to a `for` loop of the form:
+
+```hlsl
+for(;;)
+{
+	<body statement>
+	if(<condition expression>) continue; else break;
+}
+```
+
+Control Transfer Statements
+---------------------------
+
+### Break Statement
+
+A `break` statement transfers control to after the end of the closest lexically enclosing switch statement or loop statement:
+
+```hlsl
+break;
+```
+
+### Continue Statement
+
+A `continue` statement transfers control to the start of the next iteration of a loop statement.
+In a for statement with a side effect expression, the side effect expression is evaluated when `continue` is used:
+
+```hlsl
+break;
+```
+
+### Return Statement
+
+A `return` statement transfers control out of the current function.
+
+In the body of a function with a `void` result type, the `return` keyword may be followed immediately by a semicolon:
+
+```hlsl
+return;
+```
+
+Otherwise, the `return` keyword must be followed by an expression to use as the value to return to the caller:
+
+```hlsl
+return someValue;
+```
+
+The value returned must be able to coerce to the result type of the lexically enclosing function.
+
+### Discard Statement
+
+A `discard` statement can only be used in the context of a fragment shader, in which case it causes the current invocation to terminate and the graphics system to discard the corresponding fragment so that it does not get combined with the framebuffer pixel at its coordinates.
+
+Operations with side effects that were executed by the invocation before a `discard` will still be performed and their results will become visible according to the rules of the platform.
+
+Compile-Time For Statement
+--------------------------
+
+A _compile-time for statement_ is used as an alternative to preprocessor techniques for loop unrolling.
+It looks like:
+
+```hlsl
+$for( <name> in Range(<initial-value>, <upper-bound>)) <body statement>
+```
+
+The _initial value_ and _upper bound_ expressions must be compile-time constant integers.
+The semantics of a compile-time for statement are as if it were expanded into:
+
+```hlsl
+{
+	let <name> = <initial-value>;
+	<body statement>
+}
+{
+	let <name> = <initial-value> + 1;
+	<body statement>
+}
+...
+{
+	let <name> = <upper-bound> - 1;
+	<body statement>
+}
+```
diff --git a/external/slang/share/doc/slang/language-reference/07-declarations.md b/external/slang/share/doc/slang/language-reference/07-declarations.md
new file mode 100644
index 00000000..2c6e6bdb
--- /dev/null
+++ b/external/slang/share/doc/slang/language-reference/07-declarations.md
@@ -0,0 +1,770 @@
+> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
+
+Declarations
+============
+
+Modules
+-------
+
+A module consists of one or more source units that are compiled together.
+The global declarations in those source units comprise the body of the module.
+
+In general, the order of declarations within a source unit does not matter; declarations can refer to other declarations (of types, functions, variables, etc.) later in the same source unit.
+Declarations (other than `import` declarations) may freely be defined in any source unit in a module; declarations in one source unit of a module may freely refer to declarations in other source units.
+
+Imports
+-------
+
+An import declaration is introduced with the keyword `import`:
+
+```hlsl
+import Shadowing;
+```
+
+An import declaration searches for a module matching the name given in the declaration, and brings the declarations in that module into scope in the current source unit.
+
+> Note: an `import` declaration only applies to the scope of the current source unit, and does *not* import the chosen module so that it is visible to other source units of the current module.
+
+The name of the module being imported may use a compound name:
+
+```hlsl
+import MyApp.Shadowing;
+```
+
+The mechanism used to search for a module is implementation-specific.
+
+> Note: The current Slang implementation searches for a module by translating the specified module name into a file path by:
+>
+> * Replacing any dot (`.`) separators in a compound name with path separators (e.g., `/`)
+>
+> * Replacing any underscores (`_`) in the name with hyphens (`-`)
+>
+> * Appending the extension `.slang`
+>
+> The implementation then looks for a file matching this path on any of its configured search paths.
+> If such a file is found it is loaded as a module comprising a single source unit.
+
+The declarations of an imported module become visible to the current module, but they are not made visible to code that later imports the current module.
+
+> Note: An experimental feature exists for an "exported" import declaration:
+>
+> ```hlsl
+> // inside A.slang
+> __exported import Shadowing;
+> ```
+>
+> This example imports the declarations from `Shadowing` into the current module (module `A`),
+> and also sets up information so that if other code declares `import A` then it can see
+> both the declarations in `A` and those in `Shadowing`.
+
+> Note: Mixing `import` declarations and traditional preprocessor-based (`#include`) modularity
+> in a codebase can lead to surprising results.
+>
+> Some things to be aware of:
+>
+> * Preprocessor definitions in your module do *not* affect the code of modules you `import`.
+>
+> * Preprocessor definitions in a module you `import` do *not* affect your code
+>
+> * The above caveats also apply to "include guards" and `#pragma once`, since they operate at the granularity of a source unit (not across modules)
+>
+> * If you `import` two modules, and then both `#include`  the same file, then those two modules may end up with duplicate declarations with the same name.
+>
+> As a general rule, be wary of preprocessor use inside of code meant to be an `import`able module.
+
+Variables
+---------
+
+Variables are declared using the keywords `let` and `var`:
+
+```hlsl
+let x = 7;
+var y = 9.0;
+```
+
+A `let` declaration introduces an immutable variable, which may not be assigned to or used as the argument for an `in out` or `out` parameter.
+A `var` declaration introduces a mutable variable.
+
+An explicit type may be given for a variable by placing it after the variable name and a colon (`:`):
+
+```hlsl
+let x : int = 7;
+var y : float = 9.0;
+```
+
+If no type is specified for a variable, then a type will be inferred from the initial-value expression.
+It is an error to declare a variable that has neither a type specifier or an initial-value expression.
+It is an error to declare a variable with `let` without an initial-value expression.
+
+A variable declared with `var` may be declared without an initial-value expression if it has an explicit type specifier:
+
+```
+var y : float;
+```
+
+In this case the variable is _uninitialized_ at the point of declaration, and must be explicitly initialized by assigning to it.
+Code that uses the value of an uninitialized variable may produce arbitrary results, or even exhibit undefined behavior depending on the type of the variable.
+Implementations *may* issue an error or warning for code that might make use of an uninitialized variable.
+
+### Traditional Syntax
+
+Variables may also be declared with traditional C-style syntax:
+
+```hlsl
+const int x = 7;
+float y = 9.0;
+```
+
+For traditional variable declarations a type must be specified.
+
+> Note: Slang does not support an `auto` type specifier like C++.
+
+Traditional variable declarations are immutable if they are declared with the `const` modifier, and are otherwise mutable.
+
+### Variables at Global Scope
+
+Variables declared at global scope may be either a global constant, a static global variables, or a global shader parameters.
+
+#### Global Constants
+
+A variable declared at global scope and marked with `static` and `const` is a _global constant_.
+
+A global constant must have an initial-value expression, and that initial-value expression must be a compile-time constant expression.
+
+#### Static Global Variables
+
+A variable declared at global scope and marked with `static` (but not with `const`) is a _static global variable_.
+
+A static global variable provides storage for each invocation executing an entry point.
+Assignments to a static global variable from one invocation do not affect the value seen by other invocations.
+
+> Note: the semantics of static global variable are similar to a "thread-local" variable in other programming models.
+
+A static global variable may include an initial-value expression; if an initial-value expression is included it is guaranteed to be evaluated and assigned to the variable before any other expression that references the variable is evaluated.
+There is no guarantee that the initial-value expression for a static global variable is evaluated before entry point execution begins, or even that the initial-value expression is evaluated at all (in cases where the variable might not be referenced at runtime).
+
+> Note: the above rules mean that an implementation may perform dead code elimination on static global variables, and may choose between eager and lazy initialization of those variables at its discretion.
+
+#### Global Shader Parameters
+
+A variable declared at global scope and not marked with `static` (even if marked with `const`) is a _global shader parameter_.
+
+Global shader parameters are used to pass arguments from application code into invocations of an entry point.
+The mechanisms for parameter passing are specific to each target platform.
+
+> Note: Currently only global shader parameters of opaque types or arrays of opaque types are supported.
+
+A global shader parameter may include an initial-value epxression, but such an expression does not affect the semantics of the compiled program.
+
+> Note: Initial-value expressions on global shader parameters are only useful to set up "default values" that can be read via reflection information and used by application code.
+
+### Variables at Function Scope
+
+Variables declared at _function scope_ (in the body of a function, initializer, subscript accessor, etc.) may be either a function-scope constant, function-scope static variable, or a local variable.
+
+#### Function-Scope Constants
+
+A variable declared at function scope and marked with both `static` and `const` is a _function-scope constant_.
+Semantically, a function-scope constant behaves like a global constant except that is name is only visible in the local scope.
+
+#### Function-Scope Static Variables
+
+A variable declared at function scope and marked with `static` (but not `const`) is a _function-scope static variable_.
+Semantically, a function-scope static variable behaves like a global static variable except that its name is only visible in the local scope.
+
+The initial-value expression for a function-scope static variable may refer to non-static variables in the body of the function.
+In these cases initialization of the variable is guaranteed not to occur until at least the first time the function body is evaluated for a given invocation.
+
+#### Local Variables
+
+A variable declared at function scope and not marked with `static` (even if marked with `const`) is a _local variable_.
+A local variable has unique storage for each _activation_ of a function by an invocation.
+When a function is called recursively, each call produces a distinct activation with its own copies of local variables.
+
+Functions
+---------
+
+Functions are declared using the `func` keyword:
+
+```hlsl
+func add(x: int, y: float) -> float { return float(x) + y; }
+```
+
+Parameters
+----------
+
+The parameters of the function are declared as `name: type` pairs.
+
+Parameters may be given a _default value_ by including an initial-value-expression clause:
+
+```hlsl
+func add(x: int, y: float = 1.0f) { ... }
+```
+
+Parameters may be marked with a _direction_ which affects how data is passed between caller and callee:
+
+```hlsl
+func add(x: in out int, y : float) { x += ... }
+```
+
+The available directions are:
+
+* `in` (the default) indicates typical pass-by-value (copy-in) semantics. The callee receives a *copy* of the argument passed by the caller.
+
+* `out` indicates copy-out semantics. The callee writes to the parameter and then a copy of that value is assigned to the argument of the caller after the call returns.
+
+* `in out` or `inout` indicates pass-by-value-result (copy-in and copy-out) semantics. The callee receives a copy of the argument passed by the caller, it may manipulate the copy, and then when the call returns the final value is copied back to the argument of the caller.
+
+An implementation may assume that at every call site the arguments for `out` or `in out` parameters never alias.
+Under those assumptions, the `out` and `inout` cases may be optimized to use pass-by-reference instead of copy-in and copy-out.
+
+> Note: Applications that rely on the precise order in which write-back for `out` and `in out` parameters is performed are already on shaky semantic ground.
+
+Body
+----
+
+The _body_ of a function declaration consists of statements enclosed in curly braces `{}`.
+
+In some cases a function declaration does not include a body, and in these cases the declaration must be terminated with a semicolon (`;`):
+
+```hlsl
+func getCount() -> int;
+```
+
+> Note: Slang does not require "forward declaration" of functions, although
+> forward declarations are supported as a compatibility feature.
+>
+> The only place where a function declaration without a definition should be
+> required is in the body of an `interface` declaration.
+
+
+The result type of a function mayb be specified after the parameter list using a _result type clause_ consisting of an arrow (`->`) followed by a type.
+If the function result type is `void`, the result type clause may be elided:
+
+```hlsl
+func modify(x: in out int) { x++; }
+```
+
+
+### Traditional Syntax
+
+Functions can also be declared with traditional C-style syntax:
+
+```hlsl
+float add(int x, float y) { return float(x) + y; }
+
+void modify(in out int x) { x ++; }
+```
+
+> Note: Currently traditional syntax must be used for shader entry point functions,
+> because only the traditional syntax currently supports attaching semantics to
+> parameters.
+
+### Entry Points
+
+An _entry point_ is a function that will be used as the starting point of execution for one or more invocations of a shader.
+
+
+
+Structure Types
+---------------
+
+Structure types are declared using the `struct` keyword:
+
+```hlsl
+struct Person
+{
+    var age : int;
+    float height;
+
+    int getAge() { return age; }
+    func getHeight() -> float { return this.height; }
+    static func getPopulation() -> int { ... }
+}
+```
+
+The body of a structure type declaration may include variable, type, function, and initializer declarations.
+
+### Fields
+
+Variable declarations in the body of a structure type declaration are also referred to as _fields_.
+
+A field that is marked `static` is shared between all instances of the type, and is semantically like a global variable marked `static`.
+
+A non-`static` field is also called an _instance field_.
+
+### Methods
+
+Function declarations in the body of a structure type declaration are also referred to as _methods_.
+
+A method declaration may be marked `static`.
+A `static` method must be invoked on the type itself (e.g., `Person.getPopulation()`).
+
+A non-`static` method is also referred to as an _instance method_.
+Instance methods must be invoked on an instance of the type (e.g., `somePerson.getAge()`).
+The body of an instance method has access to an implicit `this` parameter which refers to the instance on which the method was invoked.
+
+By default the `this` parameter of an instance method acts as an immutable variable.
+An instance method with the `[mutating]` attribute receives a mutable `this` parameter, and can only be invoked on a mutable value of the structure type.
+
+### Inheritance
+
+A structure type declaration may include an _inheritance clause_ that consists of a colon (`:`) followed by a comma-separated list of types that the structure type inherits from:
+
+```
+struct Person : IHasAge, IHasName
+{ .... }
+```
+
+When a structure type declares that it inherits from an interface, the programmer asserts that the structure type implements the required members of the interface.
+
+### Syntax Details
+
+A structure declaration does *not* need to be terminated with a semicolon:
+
+```hlsl
+// A terminating semicolon is allowed
+struct Stuff { ... };
+
+// The semicolon is not required
+struct Things { ... }
+```
+
+When a structure declarations ends without a semicolon, the closing curly brace (`}`) must be the last non-comment, non-whitespace token on its line.
+
+For compatibility with C-style code, a structure type declaration may be used as the type specifier in a traditional-style variable declaration:
+
+```hlsl
+struct Association
+{
+    int from;
+    int to;
+} associations[] =
+{
+    { 1, 1 },
+    { 2, 4 },
+    { 3, 9 },
+};
+```
+
+If a structure type declaration will be used as part of a variable declaration, then the next token of the variable declaration must appear on the same line as the closing curly brace (`}`) of the structure type declaration.
+The whole variable declaration must be terminated with a semicolon (`;`) as normal.
+
+
+Enumeration Types
+-----------------
+
+Enumeration type declarations are introduced with the `enum` keyword:
+
+```hlsl
+enum Color
+{
+    Red,
+    Green = 3,
+    Blue,
+}
+```
+
+### Cases
+
+The body of an enumeration type declaration consists of a comma-separated list of case declarations.
+An optional trailing comma may terminate the lis of cases.
+
+A _case declaration_ consists of the name of the case, along with an optional initial-value expression that specifies the _tag value_ for that case.
+If the first case declaration in the body elides an initial-value expression, the value `0` is used for the tag value.
+If any other case declaration elides an initial-value expressions, its tag value is one greater than the tag value of the immediately preceding case declaration.
+
+An enumeration case is referred to as if it were a `static` member of the enumeration type (e.g., `Color.Red`).
+
+### Inheritance
+
+An enumeration type declaration may include an inheritance clause:
+
+```hlsl
+enum Color : uint
+{ ... }
+```
+
+The inheritance clause of an enumeration declaration may currently only be used to specify a single type to be used as the _tag type_ of the enumeration type.
+The tag type of an enumeration must be a built-in scalar integer type.
+The tag value of each enumeration case will be a value of the tag type.
+
+If no explicit tag type is specified, the type `int` is used instead.
+
+> Note: The current Slang implementation has bugs that prevent explicit tag types from working correctly.
+
+### Conversions
+
+A value of an enumeration type can be implicitly converted to a value of its tag type:
+
+```hlsl
+int r = Color.Red;
+```
+
+Values of the tag type can be explicitly converted to the enumeration type:
+
+```hlsl
+Color red = Color(r);
+```
+
+Type Aliases
+------------
+
+A type alias is declared using the `typealias` keyword:
+
+```hlsl
+typealias Height  = int;
+```
+
+A type alias defines a name that will be equivalent to the type to the right of `=`.
+
+### Traditional Syntax
+
+Type aliases can also be declared with traditional C-style syntax:
+
+```hlsl
+typedef int Height;
+```
+
+Constant Buffers and Texture Buffers
+------------------------------------
+
+As a compatibility feature, the `cbuffer` and `tbuffer` keywords can be used to introduce variable declarations.
+
+A declaration of the form:
+
+```hlsl
+cbuffer Name
+{
+    F field;
+    // ...
+}
+```
+
+is equivalent to a declaration of the form:
+
+```hlsl
+struct AnonType
+{
+    F field;
+    // ...
+}
+__transparent ConstantBuffer<AnonType> anonVar;
+```
+
+In this expansion, `AnonType` and `anonVar` are fresh names generated for the expansion that cannot collide with any name in user code, and the modifier `__transparent` makes it so that an unqualified reference to `field` can implicitly resolve to `anonVar.field`.
+
+The keyword `tbuffer` uses an equivalent expansion, but with `TextureBuffer<T>` used instead of `ConstantBuffer<T>`.
+
+Interfaces
+----------
+
+An interface is declared using the `interface` keyword:
+
+```hlsl
+interface IRandom
+{
+    uint next();
+}
+```
+
+The body of an interface declaration may contain function, initializer, subscript, and associated type declarations.
+Each declaration in the body of an interface introduces a _requirement_ of the interface.
+Types that declare conformance to the interface must provide matching implementations of the requirements.
+
+Functions, initializers, and subscripts declared inside an interface must not have bodies; default implementations of interface requirements are not currently supported.
+
+An interface declaration may have an inheritance clause:
+
+```hlsl
+interface IBase
+{
+    int getBase();
+}
+
+interface IDerived : IBase
+{
+    int getDerived();
+}
+```
+
+The inheritance clause for an interface must only list other interfaces.
+If an interface `I` lists another interface `J` in its inheritance clause, then `J` is a _base interface_ of `I`.
+In order to conform to `I`, a type must also conform to `J`.
+
+Associated Types
+----------------
+
+An associated type declaration is introduced with `associatedtype`:
+
+```hlsl
+associatedtype Iterator;
+```
+
+An associated type declaration introduces a type into the signature of an interface, without specifying the exact concrete type to use.
+An associated type is an interface requirement, and different implementations of an interface may provide different types that satisfy the same associated type interface requirement:
+
+```
+interface IContainer
+{
+    associatedtype Iterator;
+    ...
+}
+
+struct MyArray : IContainer
+{
+    typealias Iterator = Int;
+    ...
+}
+
+struct MyLinkedList : IContainer
+{
+    struct Iterator { ... }
+    ...
+}
+```
+
+It is an error to declare an associated type anywhere other than the body of an interface declaration.
+
+An associated type declaration may have an inheritance clause.
+The inheritance clause of an associated type may only list interfaces; these are the _required interfaces_ for the associated type.
+A concrete type that is used to satisfy an associated type requirement must conform to all of the required interfaces of the associated type.
+
+Initializers
+------------
+
+An initializer declaration is introduced with the `__init` keyword:
+
+```hlsl
+struct MyVector
+{
+    float x, float y;
+
+    __init(float s)
+    {
+        x = s;
+        y = s;
+    }
+}
+```
+
+> Note: Initializer declarations are a non-finalized and unstable feature, as indicated by the double-underscore (`__`) prefix on the keyword.
+> Arbitrary changes to the syntax and semantics of initializers may be introduced in future versions of Slang.
+
+An initializer declaration may only appear in the body of an interface or a structure type.
+An initializer defines a method for initializing an instance of the enclosing type.
+
+> Note: A C++ programmer might think of an initializer declaration as similar to a C++ _constructor_.
+
+An initializer has a parameter list and body just like a function declaration.
+An initializer must not include a result type clause; the result type of an initializer is always the enclosing type.
+
+An initializer is invoked by calling the enclosing type as if it were a function.
+E.g., in the example above, the initializer in `MyVector` can be invoked as `MyVector(1.0f)`.
+
+
+An initializer has access to an implicit `this` variable that is the instance being initialized; an initializer must not be marked `static`.
+The `this` variable of an initializer is always mutable; an initializer need not, and must not, be marked `[mutating]`.
+
+> Note: Slang currently does not enforce that a type with an initializer can only be initialized using its initializers.
+> It is possible for user code to declare a variable of type `MyVector` above, and explicitly write to the `x` and `y` fields to initialize it.
+> A future version of the language may close up this loophole.
+
+> Note: Slang does not provide any equivalent to C++ _destructors_ which run automatically when an instance goes out of scope.
+
+Subscripts
+----------
+
+A subscript declaration is introduced with the `__subscript` keyword:
+
+```hlsl
+struct MyVector
+{
+    ...
+
+    __subscript(int index) -> float
+    {
+        get { return index == 0 ? x : y; }
+    }
+}
+```
+
+> Note: subscript declarations are a non-finalized and unstable feature, as indicated by the double-underscore (`__`) prefix on the keyword.
+> Arbitrary changes to the syntax and semantics of subscript declarations may be introduced in future versions of Slang.
+
+A subscript declaration introduces a way for a user-defined type to support subscripting with the `[]` braces:
+
+```hlsl
+MyVector v = ...;
+float f = v[0];
+```
+
+A subscript declaration lists one or more parameters inside parentheses, followed by a result type clause starting with `->`.
+The result type clause of a subscript declaration cannot be elided.
+
+The body of a subscript declaration consists of _accessor declarations_.
+Currently only `get` accessor declarations are supported for user code.
+
+A `get` accessor declaration introduces a _getter_ for the subscript.
+The body of a getter is a code block like a function body, and must return the appropriate value for a subcript operation.
+The body of a getter can access the parameters of the enclosing subscript, as a well as an implicit `this` parameter of the type that encloses the accessor.
+The `this` parameter of a getter is immutable; `[mutating]` getters are not currently supported.
+
+Extensions
+----------
+
+An extension declaration is introduced with the `extension` keyword:
+
+```hlsl
+extension MyVector
+{
+    float getLength() { return sqrt(x*x + y*y); }
+    static int getDimensionality() { return 2; }
+}
+```
+
+An extension declaration adds behavior to an existing type.
+In the example above, the `MyVector` type is extended with an instance method `getLength()`, and a static method `getDimensionality()`.
+
+An extension declaration names the type being extended after the `extension` keyword.
+The body of an extension declaration may include type declarations, functions, initializers, and subscripts.
+
+> Note: The body of an extension may *not* include variable declarations.
+> An extension cannot introduce members that would change the in-memory layout of the type being extended.
+
+The members of an extension are accessed through the type that is being extended.
+For example, for the above extension of `MyVector`, the introduced methods are accessed as follows:
+
+```hlsl
+MyVector v = ...;
+
+float f = v.getLength();
+int n = MyVector.getDimensionality();
+```
+
+An extension declaration need not be placed in the same module as the type being extended; it is possible to extend a type from third-party or standard module code.
+The members of an extension are only visible inside of modules that `import` the module declaring the extension;
+extension members are *not* automatically visible wherever the type being extended is visible.
+
+An extension declaration may include an inheritance clause:
+
+```hlsl
+extension MyVector : IPrintable
+{
+    ...
+}
+```
+
+The inheritance clause of an extension declaration may only include interfaces.
+When an extension declaration lists an interface in its inheritance clause, it asserts that the extension introduces a new conformance, such that the type being extended now conforms to the given interface.
+The extension must ensure that the type being extended satisfies all the requirements of the interface.
+Interface requirements may be satisfied by the members of the extension, members of the original type, or members introduced through other extensions visible at the point where the conformance was declared.
+
+It is an error for overlapping conformances (that is, of the same type to the same interface) to be visible at the same point.
+This includes cases where two extensions declare the same conformance, as well as those where the original type and an extension both declare the same conformance.
+The conflicting conformances may come from the same module or difference modules.
+
+In order to avoid problems with conflicting conformances, when a module `M` introduces a conformance of type `T` to interface `I`, one of the following should be true:
+
+* the type `T` is declared in module `M`, or
+* the type `I` is declared in module `M`
+
+Any conformance that does not follow these rules (that is, where both `T` and `I` are imported into module `M`) is called a _retroactive_ conformance, and there is no way to guarantee that another module `N` will not introduce the same conformance.
+The runtime behavior of programs that include overlapping retroactive conformances is currently undefined.
+
+Currently, extension declarations can only apply to structure types; extensions cannot apply to enumeration types or interfaces.
+
+Generics
+--------
+
+Many kinds of declarations can be made _generic_: structure types, interfaces, extensions, functions, initializers, and subscripts.
+
+A generic declaration introduces a _generic parameter list_ enclosed in angle brackets `<>`:
+
+```hlsl
+T myFunction<T>(T left, T right, bool condition)
+{
+    return condition ? left : right;
+}
+```
+
+### Generic Parameters
+
+A generic parameter list can include one or more parameters separated by commas.
+The allowed forms for generic parameters are:
+
+* A single identifier like `T` is used to declare a _generic type parameter_ with no constraints.
+
+* A clause like `T : IFoo` is used to introduce a generic type parameter `T` where the parameter is _constrained_ so that it must conform to the `IFoo` interface.
+
+* A clause like `let N : int` is used to introduce a generic value parameter `N`, which takes on values of type `int`.
+
+> Note: The syntax for generic value parameters is provisional and subject to possible change in the future.
+
+Generic parameters may declare a default value with `=`:
+
+```hlsl
+T anotherFunction<T = float, let N : int = 4>(vector<T,N> v);
+```
+
+For generic type parameters, the default value is a type to use if no argument is specified.
+For generic value parameters, the default value is a value of the same type to use if no argument is specified.
+
+### Explicit Specialization
+
+A generic is _specialized_ by applying it to _generic arguments_ listed inside angle brackets `<>`:
+
+```hlsl
+anotherFunction<int, 3>
+```
+
+Specialization produces a reference to the declaration with all generic parameters bound to concrete arguments.
+
+When specializing a generic, generic type parameters must be matched with type arguments that conform to the constraints on the parameter, if any.
+Generic value parameters must be matched with value arguments of the appropriate type, and that are specialization-time constants.
+
+An explicitly specialized function, type, etc. may be used wherever a non-generic function, type, etc. is expected:
+
+```hlsl
+int i = anotherFunction<int,3>( int3(99) );
+```
+
+### Implicit Specialization
+
+If a generic function/type/etc. is used where a non-generic function/type/etc. is expected, the compiler attempts _implicit specialization_.
+Implicit specialization infers generic arguments from the context at the use site, as well as any default values specified for generic parameters.
+
+For example, if a programmer writes:
+
+```hlsl
+int i = anotherFunction( int3(99) );
+```
+
+The compiler will infer the generic arguments `<int, 3>` from the way that `anotherFunction` was applied to a value of type `int3`.
+
+> Note: Inference for generic arguments currently only takes the types of value arguments into account.
+> The expected result type does not currently affect inference.
+
+### Syntax Details
+
+The following examples show how generic declarations of different kinds are written:
+
+```
+T genericFunction<T>(T value);
+funct genericFunction<T>(value: T) -> T;
+
+__init<T>(T value);
+
+__subscript<T>(T value) -> X { ... }
+
+struct GenericType<T>
+{
+    T field;
+}
+
+interface IGenericInterface<T> : IBase<T>
+{
+}
+```
+
+> Note: Currently there is no user-exposed syntax for writing a generic extension.
diff --git a/external/slang/share/doc/slang/language-reference/08-attributes.md b/external/slang/share/doc/slang/language-reference/08-attributes.md
new file mode 100644
index 00000000..f4d900d3
--- /dev/null
+++ b/external/slang/share/doc/slang/language-reference/08-attributes.md
@@ -0,0 +1,32 @@
+> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
+
+Attributes
+==========
+
+> Note: This section is not yet complete.
+
+## [[vk::spirv_instruction]]
+
+** SPIR-V only **
+
+This attribute is only available for Vulkan SPIR-V output.
+
+The attribute allows access to SPIR-V intrinsics, by supplying a function declaration with the appropriate signature for the SPIR-V op and no body. The intrinsic takes a single parameter which is the integer value for the SPIR-V op.
+
+In the example below the add function, uses the mechanism to directly use the SPIR-V integer add 'op' which is 128 in this case.
+
+```HLSL
+// 128 is OpIAdd in SPIR-V
+[[vk::spirv_instruction(128)]]
+uint add(uint a, uint b);
+
+RWStructuredBuffer<uint> resultBuffer;
+
+[numthreads(4,1,1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+    uint threadId = dispatchThreadID.x;
+    resultBuffer[threadId] = add(threadId, threadId);
+}
+```
+
diff --git a/external/slang/share/doc/slang/language-reference/README.md b/external/slang/share/doc/slang/language-reference/README.md
new file mode 100644
index 00000000..b3a7954b
--- /dev/null
+++ b/external/slang/share/doc/slang/language-reference/README.md
@@ -0,0 +1,16 @@
+> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
+
+Slang Language Reference
+========================
+
+Contents
+--------
+
+* [1 - Introduction](01-introduction.md)
+* [2 - Lexical Structure](02-lexical-structure.md)
+* [3 - Preprocessor](03-preprocessor.md)
+* [4 - Types](04-types.md)
+* [5 - Expressions](05-expressions.md)
+* [6 - Statements](06-statements.md)
+* [7 - Declarations](07-declarations.md)
+* [8 - Attributes](08-attributes.md)
diff --git a/external/slang/docs/layout.md b/external/slang/share/doc/slang/layout.md
similarity index 98%
rename from external/slang/docs/layout.md
rename to external/slang/share/doc/slang/layout.md
index 12144c15..75e4b986 100644
--- a/external/slang/docs/layout.md
+++ b/external/slang/share/doc/slang/layout.md
@@ -46,7 +46,7 @@ The order of parameters in the user's code is derived by "walking" through the c
 
 * Walk through each source file of a translation unit in the order they were added/listed
 
-* Walk through global-scope shader paramter declarations (global variables, `cbuffer`s, etc.) in the order they are listed in the (preprocessed) file.
+* Walk through global-scope shader parameter declarations (global variables, `cbuffer`s, etc.) in the order they are listed in the (preprocessed) file.
 
 * After all global parameters for a translation unit have been walked, walk through any entry points in the translation unit.
 
@@ -64,7 +64,7 @@ Computing Resource Requirements
 
 Each shader parameter computes its resource requirements based on its type, and how it is declared.
 
-* Global-scope parameters, entry point `uniform` parameters, and `cbuffer` decalrations all use the "default" layout rules
+* Global-scope parameters, entry point `uniform` parameters, and `cbuffer` declarations all use the "default" layout rules
 
 * Entry point non-`uniform` parameters use "varying" layout rules, either input or output
 
diff --git a/external/slang/docs/nvapi-support.md b/external/slang/share/doc/slang/nvapi-support.md
similarity index 95%
rename from external/slang/docs/nvapi-support.md
rename to external/slang/share/doc/slang/nvapi-support.md
index ac3a2e94..cb96f65f 100644
--- a/external/slang/docs/nvapi-support.md
+++ b/external/slang/share/doc/slang/nvapi-support.md
@@ -46,7 +46,7 @@ Thus causing the prelude to include nvHLSLExtns.h, and specifying the slot and p
 
 The actual values for the slot and optionally the space, are found by Slang examining the values of those values at the end of preprocessing input Slang source files. 
 
-This means that if you compile Slang source that has implicit use NVAPI, the slot and optionally the space must be defined. This can be achieved with a command line -D, throught the API or through having suitable `#define`s in the Slang source code.
+This means that if you compile Slang source that has implicit use NVAPI, the slot and optionally the space must be defined. This can be achieved with a command line -D, through the API or through having suitable `#define`s in the Slang source code.
 
 It is worth noting if you *replace* the default HLSL prelude, and use NVAPI then it will be necessary to have something like the default HLSL prelude part of your custom prelude.
 
@@ -63,7 +63,7 @@ The astute reader may have noticed that the default Slang HLSL prelude *does* co
 #endif
 ```
 
-This means that the *downstream* compiler (such as DXC and FXC) must be able to handle this include. Include paths can be specified for downstream compilers via the [-X mechanism](command-line-slangc.md#downstream-arguments). So for example...
+This means that the *downstream* compiler (such as DXC and FXC) must be able to handle this include. Include paths can be specified for downstream compilers via the [-X mechanism](user-guide/08-compiling.md#downstream-arguments). So for example...
 
 ```
 -Xfxc -IpathTo/nvapi -Xdxc -IpathTo/nvapi
diff --git a/external/slang/docs/repro.md b/external/slang/share/doc/slang/repro.md
similarity index 82%
rename from external/slang/docs/repro.md
rename to external/slang/share/doc/slang/repro.md
index 54ad8b97..4d469fa2 100644
--- a/external/slang/docs/repro.md
+++ b/external/slang/share/doc/slang/repro.md
@@ -3,9 +3,9 @@ Slang Compilation Reproduction
 
 Slang has both API and command line support for reproducing compilations, so called 'repro' functionality.
 
-One use of the feature is if a compilation fails, or produces an unexpected or wrong result, it provides a simple to use mechanism where the compilation can be repeated or 'reproduced', most often on another machine. Instead of having to describe all the options, and make sure all of the files that are used are copied, and in such a way that it repeats the result, all that is required is for the compilation to be run on the host machine with repro capture enabled, and then that 'repro' used for a compilation on the test machine. There are also some mechanisms where the contents of the orginal compilation can be altered.
+One use of the feature is if a compilation fails, or produces an unexpected or wrong result, it provides a simple to use mechanism where the compilation can be repeated or 'reproduced', most often on another machine. Instead of having to describe all the options, and make sure all of the files that are used are copied, and in such a way that it repeats the result, all that is required is for the compilation to be run on the host machine with repro capture enabled, and then that 'repro' used for a compilation on the test machine. There are also some mechanisms where the contents of the original compilation can be altered.
 
-The actual data saved is the contents of the SlangCompileReqest. Currently no state is saved from the SlangSession. Saving and loading a SlangCompileRequest into a new SlangCompileRequest should provide two SlangCompileRequests with the same state, and with the second compile request having access to all the files contents the original request had directly in memory. 
+The actual data saved is the contents of the SlangCompileRequest. Currently no state is saved from the SlangSession. Saving and loading a SlangCompileRequest into a new SlangCompileRequest should provide two SlangCompileRequests with the same state, and with the second compile request having access to all the files contents the original request had directly in memory.
 
 There are a few command line options
 
@@ -26,9 +26,9 @@ First it is worth just describing what is required to reproduce a compilation. M
 
 In order to capture a complete repro file typically a compilation has to be attempted. The state before compilation can be recorded (through the API for example), but it may not be enough to repeat a compilation, as files referenced by the compilation would not yet have been accessed. The repro feature records all of these accesses and contents of such files such that compilation can either be completed or at least to the same point as was reached on the host machine. 
 
-One of the more subtle issues around reproducing a compilation is around filenames. Using the API, a client can specify source files without names, or multiple files with the same name. If files are loaded via `ISlangFileSystem`, they are typically part of a hiearchical file system. This could mean they are referenced relatively. This means there can be distinct files with the same name but differenciated by directory. The files may not easily be reconstructed back into a similar hieararchical file system - as depending on the include paths (or perhaps other mechanisms) the 'files' and their contents could be arranged in a manner very hard to replicate. To work around this the repro feature does not attempt to replicate a hierarchical file system. Instead it gives every file a unique name based on their original name. If there are multiple files with the same name it will 'uniquify' them by appending an index. Doing so means that the contents of the file system can just be held as a flat collection of files. This is not enough to enable repeating the compilation though, as we now need Slang to know which files to reference when they are requested, as they are now no longer part of a hierarchical file system and their names may have been altered. To achieve this the repro functionality stores off a map of all path requests to their contents (or lack there of). Doing so means that the file system still appears to Slang as it did in the original compilation, even with all the files being actually stored using the simpler 'flat' arrangement. 
+One of the more subtle issues around reproducing a compilation is around filenames. Using the API, a client can specify source files without names, or multiple files with the same name. If files are loaded via `ISlangFileSystem`, they are typically part of a hierarchical file system. This could mean they are referenced relatively. This means there can be distinct files with the same name but differentiated by directory. The files may not easily be reconstructed back into a similar hieararchical file system - as depending on the include paths (or perhaps other mechanisms) the 'files' and their contents could be arranged in a manner very hard to replicate. To work around this the repro feature does not attempt to replicate a hierarchical file system. Instead it gives every file a unique name based on their original name. If there are multiple files with the same name it will 'uniquify' them by appending an index. Doing so means that the contents of the file system can just be held as a flat collection of files. This is not enough to enable repeating the compilation though, as we now need Slang to know which files to reference when they are requested, as they are now no longer part of a hierarchical file system and their names may have been altered. To achieve this the repro functionality stores off a map of all path requests to their contents (or lack there of). Doing so means that the file system still appears to Slang as it did in the original compilation, even with all the files being actually stored using the simpler 'flat' arrangement.
 
-This means that when a repro is 'extracted' it does so to a directory which holds the files with their unique 'flat' names. The name of the directory is the name of the repro file without it's extension, or if it has no extension, with the postfix '-files'. This directory will be referered to from now on as the `repro directory`.
+This means that when a repro is 'extracted' it does so to a directory which holds the files with their unique 'flat' names. The name of the directory is the name of the repro file without it's extension, or if it has no extension, with the postfix '-files'. This directory will be referred to from now on as the `repro directory`.
 
 When a repro is loaded, before files are loaded from the repro itself, they will first be looked for via their unique names in the `repro directory`. If they are not there the contents of the repro file will be used. If they are there, their contents will be used instead of the contents in the repro. This provides a simple mechanism to be able to alter the source in a repro. The steps more concretely would be...
 
@@ -85,6 +85,6 @@ The function `spExtractRepro` allows for extracting the files used in a request
     
 The function `spLoadReproAsFileSystem` creates a file system that can access the contents of the repro with the same paths that were used on the originating system. The ISlangFileSystemExt produced can be set on a request and used for compilation.    
     
-Repro files are currently stored in a binary format. This format is sensitive to changes in the API, as well as internal state within a SlangCompileRequest. This means that the functionality can only be guarenteed to work with exactly the same version of Slang on the same version of compiler. In practice things are typically not so draconian, and future versions will aim to provide a more clear slang repro versioning system, and work will be performed to make more generally usable. 
+Repro files are currently stored in a binary format. This format is sensitive to changes in the API, as well as internal state within a SlangCompileRequest. This means that the functionality can only be guaranteed to work with exactly the same version of Slang on the same version of compiler. In practice things are typically not so draconian, and future versions will aim to provide a more clear slang repro versioning system, and work will be performed to make more generally usable.
 
-Finally this version of the repo system does not take into account endianess at all. The system the repro is saved from must have the same endianess as the system loaded on.
+Finally this version of the repo system does not take into account endianness at all. The system the repro is saved from must have the same endianness as the system loaded on.
diff --git a/external/slang/share/doc/slang/scripts/Program.cs b/external/slang/share/doc/slang/scripts/Program.cs
new file mode 100644
index 00000000..8a87f153
--- /dev/null
+++ b/external/slang/share/doc/slang/scripts/Program.cs
@@ -0,0 +1,235 @@
+﻿using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+namespace toc
+{
+    public class Builder
+    {
+        public static string getAnchorId(string title)
+        {
+            StringBuilder sb = new StringBuilder();
+            title = title.Trim().ToLower();
+
+            foreach (var ch in title)
+            {
+                if (ch >= 'a' && ch <= 'z' || ch >= '0' && ch <= '9'
+                    || ch == '-'|| ch =='_')
+                    sb.Append(ch);
+                else if (ch == ' ' )
+                    sb.Append('-');
+            }
+            return sb.ToString();
+        }
+
+        public class Node
+        {
+            public List<string> fileNamePrefix = new List<string>();
+            public string title;
+            public string shortTitle;
+            public string fileID;
+            public List<string> sections = new List<string>();
+            public List<string> sectionShortTitles = new List<string>();
+            public List<Node> children = new List<Node>();
+        }
+
+        public static void buildTOC(StringBuilder sb, Node n)
+        {
+            sb.AppendFormat("<li data-link=\"{0}\"><span>{1}</span>\n", n.fileID, n.shortTitle);
+            if (n.children.Count != 0)
+            {
+                sb.AppendLine("<ul class=\"toc_list\">");
+                foreach(var c in n.children)
+                    buildTOC(sb, c);
+                sb.AppendLine("</ul>");
+            }
+            else if (n.sections.Count != 0)
+            {
+                sb.AppendLine("<ul class=\"toc_list\">");
+                for (int i = 0; i < n.sections.Count; i++)
+                {
+                    var s = n.sections[i];
+                    var shortTitle = n.sectionShortTitles[i];
+                    sb.AppendFormat("<li data-link=\"{0}#{1}\"><span>{2}</span></li>\n", n.fileID, getAnchorId(s), shortTitle);
+                }
+                sb.AppendLine("</ul>");
+            }
+            sb.AppendLine("</li>");
+        }
+        public static string buildTOC(Node n)
+        {
+            StringBuilder sb = new StringBuilder();
+            sb.Append(@"<ul class=""toc_root_list"">");
+            buildTOC(sb, n);
+            sb.Append(@"</ul>");
+            return sb.ToString();
+        }
+
+        public static bool isChild(Node parent, Node child)
+        {
+            if (parent.fileNamePrefix.Count < child.fileNamePrefix.Count)
+            {
+                bool equal = true;
+                for (int k = 0; k < parent.fileNamePrefix.Count; k++)
+                {
+                    if (parent.fileNamePrefix[k] != child.fileNamePrefix[k])
+                    {
+                        equal = false;
+                        break;
+                    }
+                }
+                return equal;
+            }
+            return false;
+        }
+
+        public static string getNextNonEmptyLine(string[] lines, int i)
+        {
+            i++;
+            while (i < lines.Length)
+            {
+                if (lines[i].Trim().Length != 0)
+                    return lines[i];
+                i++;
+            }
+            return "";
+        }
+        const string shortTitlePrefix = "[//]: # (ShortTitle: ";
+
+        public static string maybeGetShortTitleImpl(string originalTitle, string[] lines, int line)
+        {
+            string nextLine = getNextNonEmptyLine(lines, line);
+            if (nextLine.StartsWith(shortTitlePrefix))
+            {
+                return nextLine.Substring(shortTitlePrefix.Length, nextLine.Length - shortTitlePrefix.Length - 1).Trim();
+            }
+            return originalTitle;
+        }
+
+        public static string escapeString(string input)
+        {
+            StringBuilder sb = new StringBuilder();
+            foreach (var ch in input)
+            {
+                if (ch == '<')
+                    sb.Append("&lt;");
+                else if (ch == '>')
+                    sb.Append("&gt;");
+                else
+                    sb.Append(ch);
+            }
+            return sb.ToString();
+        }
+        public static string maybeGetShortTitle(string originalTitle, string[] lines, int line)
+        {
+            string title = maybeGetShortTitleImpl(originalTitle, lines, line);
+            return escapeString(title);
+        }
+        public static string Run(string path)
+        {
+            StringBuilder outputSB = new StringBuilder();
+            outputSB.AppendFormat("Building table of contents from {0}...\n", path);
+            var files = System.IO.Directory.EnumerateFiles(path, "*.md").OrderBy(f => System.IO.Path.GetFileName(f));
+            List<Node> nodes = new List<Node>();
+            foreach (var f in files)
+            {
+                var content = File.ReadAllLines(f);
+                Node node = new Node();
+                node.fileID = Path.GetFileNameWithoutExtension(f);
+                outputSB.AppendFormat("  {0}.md\n", node.fileID);
+                bool mainTitleFound = false;
+                for (int i = 1; i < content.Length; i++)
+                {
+                    if (content[i].StartsWith("==="))
+                    {
+                        mainTitleFound = true;
+                        node.title = content[i-1];
+                        node.shortTitle = maybeGetShortTitle(node.title, content, i);
+                    }
+                    if (content[i].StartsWith("---"))
+                    {
+                        if (!mainTitleFound) continue;
+                        node.sections.Add(content[i-1]);
+                        node.sectionShortTitles.Add(maybeGetShortTitle(content[i - 1], content, i));
+                    }
+                    if (content[i].StartsWith("#") && !content[i].StartsWith("##") && node.title == null)
+                    {
+                        mainTitleFound = true;
+                        node.title = content[i].Substring(1, content[i].Length - 1).Trim();
+                        node.shortTitle = maybeGetShortTitle(node.title, content, i);
+                    }
+                    if (content[i].StartsWith("##") && !content[i].StartsWith("###"))
+                    {
+                        if (!mainTitleFound) continue;
+                        var sectionStr = content[i].Substring(2, content[i].Length - 2).Trim();
+                        node.sections.Add(sectionStr);
+                        node.sectionShortTitles.Add(maybeGetShortTitle(sectionStr, content, i));
+                    }
+                    if (content[i].StartsWith("permalink:"))
+                    {
+                        var prefixLength = ("permalink:").Length;
+                        var permaPath = content[i].Substring(prefixLength, content[i].Length - prefixLength).Trim();
+                        node.fileID = Path.GetFileName(permaPath);
+                    }
+                }
+                if (node.title == null)
+                {
+                    outputSB.AppendFormat("Error: {0} does not define a title.", f);
+                    node.title = "Untitiled";
+                }
+                var titleSecs = Path.GetFileName(f).Split('-');
+                foreach (var s in titleSecs)
+                {
+                    if (s.Length == 2 && s[1]>='0' && s[1] <= '9')
+                    {
+                        node.fileNamePrefix.Add(s);
+                    }
+                    else
+                    {
+                        break;
+                    }
+                }
+                // Find parent node.
+                Node parent=null;
+                for (int l = nodes.Count-1; l>=0; l--)
+                {
+                    var n = nodes[l];
+                    if (isChild(n, node))
+                    {
+                       parent = n;
+                       break;
+                    }
+                }
+                if (parent != null)
+                    parent.children.Add(node);
+                else
+                {
+                    // find child
+                    foreach (var other in nodes)
+                    {
+                        if (isChild(node, other))
+                        {
+                            node.children.Add(other);
+                        }
+                    }
+                    foreach (var c in node.children)
+                    {
+                        nodes.Remove(c);
+
+                    }
+                    nodes.Add(node);
+                }
+            }
+            var root = nodes.Find(x=>x.fileID=="index");
+            if (root != null)
+            {
+                var html = buildTOC(root);
+                var outPath = Path.Combine(path, "toc.html");
+                File.WriteAllText(outPath, html);
+                outputSB.AppendFormat("Output written to: {0}\n", outPath);
+            }
+            return outputSB.ToString();
+        }
+    }
+}
diff --git a/external/slang/share/doc/slang/scripts/release-note.sh b/external/slang/share/doc/slang/scripts/release-note.sh
new file mode 100644
index 00000000..d63ee873
--- /dev/null
+++ b/external/slang/share/doc/slang/scripts/release-note.sh
@@ -0,0 +1,95 @@
+#!/usr/bin/env bash
+# This script generates a release note.
+# It prints information about breaking-changes first and the rest.
+# The content is mostly based on `git log --oneline --since 202X-YY-ZZ`.
+
+# Usage: the script takes one command-line argument that will be used on '-since' option of git command.
+# As an example, you can run a script with a following command, and it will print commit titles between today and 2024-07-01.
+# ```
+# docs/scripts/release-note.sh 2024-07-01
+# ```
+
+# This script is supposed to work on all Windows based shell systems including WSL and git-bash.
+# If you make any modifications, please test them, because CI doesn't test this script.
+
+verbose=true
+$verbose && echo "Reminder: PLEASE make sure your local repo is up-to-date before running the script." >&2
+
+gh=""
+for candidate in "$(which gh.exe)" "/mnt/c/Program Files/GitHub CLI/gh.exe" "/c/Program Files/GitHub CLI/gh.exe" "/cygdrive/c/Program Files/GitHub CLI/gh.exe"; do
+  if [ -x "$candidate" ]; then
+    gh="$candidate"
+    break
+  fi
+done
+if [ "x$gh" = "x" ] || ! [ -x "$gh" ]; then
+  echo "File not found: gh.exe"
+  echo "gh.exe can be downloaded from https://cli.github.com"
+  exit 1
+fi
+$verbose && echo "gh.exe is found from: $gh" >&2
+
+if [ "x$1" = "x" ]; then
+  echo "This script requires 'since' information for git-log command."
+  echo "Usage: $0 2024-07-30"
+  exit 1
+fi
+since="$1"
+
+commits="$(git log --oneline --since $since)"
+commitsCount="$(echo "$commits" | wc -l)"
+
+echo "=== Breaking changes ==="
+breakingChanges=""
+for i in $(seq $commitsCount); do
+  line="$(echo "$commits" | head -$i | tail -1)"
+
+  # Get PR number from the git commit title
+  pr="$(echo "$line" | grep '#[1-9][0-9][0-9][0-9][0-9]*' | sed 's|.* (\#\([1-9][0-9][0-9][0-9][0-9]*\))|\1|')"
+  [ "x$pr" = "x" ] && continue
+
+  # Check if the PR is marked as a breaking change
+  if "$gh" issue view $pr --json labels | grep -q 'pr: breaking change'; then
+    breakingChanges+="$line"
+  fi
+done
+if [ "x$breakingChanges" = "x" ]; then
+  echo "No breaking changes"
+else
+  echo "$breakingChanges"
+fi
+echo ""
+
+echo "=== All changes for this release ==="
+for i in $(seq $commitsCount); do
+  line="$(echo "$commits" | head -$i | tail -1)"
+
+  result="$line"
+  for dummy in 1; do
+    # Get PR number from the git commit title
+    pr="$(echo "$line" | grep '#[1-9][0-9][0-9][0-9][0-9]*' | sed 's|.* (\#\([1-9][0-9][0-9][0-9][0-9]*\))|\1|')"
+    [ "x$pr" = "x" ] && break
+
+    # Mark breaking changes with "[BREAKING]"
+    if "$gh" issue view $pr --json labels | grep -q 'pr: breaking change'; then
+      result="[BREAKING] $line"
+    fi
+
+    # Get the issue number for the PR
+    body="$("$gh" issue view $pr --json body)"
+    [ "x$body" = "x" ] && break
+    issue="$(echo "$body" | grep '#[1-9][0-9][0-9][0-9][0-9]*' | sed 's|.*\#\([1-9][0-9][0-9][0-9][0-9]*\).*|\1|')"
+    [ "x$issue" = "x" ] && break
+
+    # Get the labels of the issue
+    label="$("$gh" issue view $issue --json labels)"
+    [ "x$label" = "x" ] && break
+
+    # Get the goal type from the labels
+    goal="$(echo "$label" | grep '"goal:' | sed 's|.*"goal:\([^"]*\)".*|\1|')"
+    [ "x$goal" = "x" ] && break
+
+    result+=" (#$issue:$goal)"
+  done
+  echo "$result"
+done
diff --git a/external/slang/docs/shader-execution-reordering.md b/external/slang/share/doc/slang/shader-execution-reordering.md
similarity index 100%
rename from external/slang/docs/shader-execution-reordering.md
rename to external/slang/share/doc/slang/shader-execution-reordering.md
diff --git a/external/slang/docs/shader-playground.md b/external/slang/share/doc/slang/shader-playground.md
similarity index 100%
rename from external/slang/docs/shader-playground.md
rename to external/slang/share/doc/slang/shader-playground.md
diff --git a/external/slang/docs/stdlib-doc.md b/external/slang/share/doc/slang/stdlib-doc.md
similarity index 99%
rename from external/slang/docs/stdlib-doc.md
rename to external/slang/share/doc/slang/stdlib-doc.md
index 1431fdfb..a3b69cbe 100644
--- a/external/slang/docs/stdlib-doc.md
+++ b/external/slang/share/doc/slang/stdlib-doc.md
@@ -60418,7 +60418,7 @@ matrix<T,N,M> fwidth<T, N:int, M:int>(matrix<T,N,M> x);
  This function can be applied to scalars, vectors, and matrices of
  built-in scalar types.
 
- Note: these functions are not curently implemented for Vulkan/SPIR-V output.
+ Note: these functions are not currently implemented for Vulkan/SPIR-V output.
 
 ## Signature 
 
diff --git a/external/slang/share/doc/slang/stdlib-docgen.md b/external/slang/share/doc/slang/stdlib-docgen.md
new file mode 100644
index 00000000..37c4d30e
--- /dev/null
+++ b/external/slang/share/doc/slang/stdlib-docgen.md
@@ -0,0 +1,123 @@
+# Slang Core Module Documentation Generation Tool
+
+Slang's core module reference (https://shader-slang.com/stdlib-reference) is generated by `slangc` from the source of the core module.
+This page covers how `slangc` can be used to generate this documentation.
+
+## Generating Documentation
+
+Follow these steps to generate the core module reference documentation and view the generated markdown files locally:
+
+```
+# clone stdlib-reference repo
+git clone https://github.com/shader-slang/stdlib-reference
+cd stdlib-reference
+
+# delete existing pages
+rm -rf ./interfaces
+rm -rf ./types
+rm -rf ./global-decls
+rm -rf ./attributes
+
+# generate updated pages
+slangc -compile-core-module -doc
+
+# optional: move generated toc.html to `_includes`
+mv toc.html ./_includes/stdlib-reference-toc.html
+```
+
+`slangc` will read the `config.txt` file in the stdlib-reference repository, and then generate all the markdown files
+located in `types`, `attributes`, `interfaces` and `global-decls` directory.
+
+Note that the `index.md` in root is not generated.
+
+You should review the generated markdown file to make sure it is formatted correctly after making comment edits in the
+`*.meta.slang` files.
+
+
+## Writing and Updating Documentation
+
+The core module documentation is done directly in comments inside `source/slang/*.meta.slang` files.
+A documentation comment should be placed directly above the declaration, either inside a `/**   */` comment block, or
+after `///`. The following directives are allowed in comments:
+
+- `@param paramName description` documents a parameter or a generic parameter.
+- `@remarks` starts the remarks section.
+- `@see` starts the "See also" section.
+- `@return` starts the `Return value" section.
+- `@example` starts the "Example" section.
+- `@category categoryID Category Name` marks the decl to be in a category. The category name is only required for the first time `categoryID` is used, and omitted for the remaining `@category` lines.
+- `@internal` marks the declaration as internal.
+- `@experimental` marks the declaration as experimental.
+- `@deprecated` marks the declaration as deprecated.
+
+You can use markdown syntax in any part of the comment.
+
+For overloaded functions, only document the first overload. List all parameters from all overloads in the same comment block for the first overload. Documentation on the remaining overloads will be ignored by the tool. If an overloaded decl has differing documentation on different overload candidates, the `slangc` tool will emit a warning.
+
+The following code is an example of how `_Texture.Sample` is documented. Notice that only the first overload is documented, and it also includes documentation for parameters which are only present in subsequent overloads, such as `offset`.
+
+```csharp
+    /// Samples the texture at the given location.
+    ///
+    ///@param s The `SamplerState` to use for the sampling operation. This parameter is omitted when `this` is a combined texture sampler type (`isCombined == 0`).
+    ///@param location The location to sample the texture at.
+    ///@param offset Texel offset to apply.
+    ///@param clamp The max level of detail to use.
+    ///@param[out] status The result status of the operation.
+    ///                   This parameter is currently only used when targeting HLSL.
+    ///                   For other targets, the result status is always 0.
+    ///@return The sampled texture value.
+    ///@see `SampleBias`, `SampleLevel`, `SampleGrad`, `SampleCmp`, `SampleCmpLevelZero`.
+    ///@remarks
+    /// The `Sample` function is defined for all read-only texture types, including
+    /// `Texture1D`, `Texture2D`, `Texture3D`, `TextureCube`,
+    /// `Texture1DArray`, `Texture2DArray` and `TextureCubeArray`.
+    ///
+    /// The function is not available for read-write texture types.
+    ///
+    /// For HLSL/D3D targets, the texture element type must be a scalar or vector of float or half types.
+    ///
+    [__readNone]
+    [ForceInline]
+    [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, texture_sm_4_0_fragment)]
+    T Sample(vector<float, Shape.dimensions+isArray> location)
+    {
+        ...
+    }
+
+    [__readNone]
+    [ForceInline]
+    [require(cpp_glsl_hlsl_metal_spirv_wgsl, texture_sm_4_0_fragment)]
+    T Sample(vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset)
+    {
+        ...
+    }
+
+```
+
+Note that unlike doxygen, the directives marks the start of a new section, and applies to all following paragraphs. You don't need to repetitively mark new paragraphs
+as with `@remarks`.
+
+## What to document
+
+- Provide a brief description of the declaration in under three sentenses.
+- Document all nuances, including target specific behaviors in the remarks section.
+- Include examples if needed in the examples section.
+- Provide a see also section with links to related declarations.
+
+After updating comments, build `slangc`, and run `slangc -compile-core-module -doc` in `stdlib-reference` directory to update the markdown files for preview.
+Your PR only needs to include changes to *.meta.slang files. Once your PR is merged, slang CI will run `slangc` and push the updated markdown files to
+the `stdlib-reference` repo.
+
+## Hiding a declaration
+
+Use `// @hidden:` to hide all declarations after the line for docgen purpose.
+Use `// @public: ` to stop hiding all declarations after the line. These two special lines act like
+C++'s visibility modifiers: they apply to everything after it.
+
+## How to preview generated html page locally
+
+To preview github pages locally, you need to follow instructions on setting up Jekyll: 
+https://docs.github.com/en/pages/setting-up-a-github-pages-site-with-jekyll/testing-your-github-pages-site-locally-with-jekyll
+
+You will need to use Jekyll to create a Gem file before serving it.
diff --git a/external/slang/docs/target-compatibility.md b/external/slang/share/doc/slang/target-compatibility.md
similarity index 62%
rename from external/slang/docs/target-compatibility.md
rename to external/slang/share/doc/slang/target-compatibility.md
index 0fdeeca8..d9ccb484 100644
--- a/external/slang/docs/target-compatibility.md
+++ b/external/slang/share/doc/slang/target-compatibility.md
@@ -1,66 +1,68 @@
-Slang Target Compatibility 
-==========================
+# Slang Target Compatibility
 
 Shader Model (SM) numbers are D3D Shader Model versions, unless explicitly stated otherwise.
-OpenGL compatibility is not listed here, because OpenGL isn't an officially supported target. 
+OpenGL compatibility is not listed here, because OpenGL isn't an officially supported target.
 
 Items with a + means that the feature is anticipated to be added in the future.
 Items with ^ means there is some discussion about support later in the document for this target.
 
-| Feature                                             |    D3D11     |    D3D12     |     VK     |      CUDA     |    CPU
-|-----------------------------------------------------|--------------|--------------|------------|---------------|---------------
-| [Half Type](#half)                                  |     No       |     Yes ^    |   Yes      |     Yes ^     |    No +
-| Double Type                                         |     Yes      |     Yes      |   Yes      |     Yes       |    Yes
-| Double Intrinsics                                   |     No       |   Limited +  |  Limited   |     Most      |    Yes
-| [u/int8_t Type](#int8_t)                            |     No       |   No         |   Yes ^    |     Yes       |    Yes
-| [u/int16_t Type](#int16_t)                          |     No       |   Yes ^      |   Yes ^    |     Yes       |    Yes
-| [u/int64_t Type](#int64_t)                          |     No       |   Yes ^      |   Yes      |     Yes       |    Yes
-| u/int64_t Intrinsics                                |     No       |   No         |   Yes      |     Yes       |    Yes
-| [int matrix](#int-matrix)                           |     Yes      |   Yes        |   No +     |     Yes       |    Yes
-| [tex.GetDimensions](#tex-get-dimensions)            |     Yes      |   Yes        |   Yes      |     No        |    Yes
-| [SM6.0 Wave Intrinsics](#sm6-wave)                  |     No       |   Yes        |  Partial   |     Yes ^     |    No
-| SM6.0 Quad Intrinsics                               |     No       |   Yes        |   No +     |     No        |    No
-| [SM6.5 Wave Intrinsics](#sm6.5-wave)                |     No       |   Yes ^      |   No +     |     Yes ^     |    No
-| [WaveMask Intrinsics](#wave-mask)                   |     Yes ^    |   Yes ^      |   Yes +    |     Yes       |    No
-| [WaveShuffle](#wave-shuffle)                        |     No       |   Limited ^  |   Yes      |     Yes       |    No
-| [Tesselation](#tesselation)                         |     Yes ^    |   Yes ^      |   No +     |     No        |    No
-| [Graphics Pipeline](#graphics-pipeline)             |     Yes      |   Yes        |   Yes      |     No        |    No
-| [Ray Tracing DXR 1.0](#ray-tracing-1.0)             |     No       |   Yes ^      |   Yes ^    |     No        |    No
-| Ray Tracing DXR 1.1                                 |     No       |   Yes        |   No +     |     No        |    No
-| [Native Bindless](#native-bindless)                 |     No       |    No        |   No       |     Yes       |    Yes
-| [Buffer bounds](#buffer-bounds)                     |     Yes      |   Yes        |   Yes      |   Limited ^   |    Limited ^
-| [Resource bounds](#resource-bounds)                 |     Yes      |   Yes        |   Yes      | Yes (optional)|    Yes
-| Atomics                                             |     Yes      |   Yes        |   Yes      |     Yes       |    Yes
-| Group shared mem/Barriers                           |     Yes      |   Yes        |   Yes      |     Yes       |    No + 
-| [TextureArray.Sample float](#tex-array-sample-float)|     Yes      |   Yes        |   Yes      |     No        |    Yes
-| [Separate Sampler](#separate-sampler)               |     Yes      |   Yes        |   Yes      |     No        |    Yes
-| [tex.Load](#tex-load)                               |     Yes      |   Yes        |   Yes      |  Limited ^    |    Yes
-| [Full bool](#full-bool)                             |     Yes      |   Yes        |   Yes      |     No        |    Yes ^ 
-| [Mesh Shader](#mesh-shader)                         |     No       |   Yes        |   Yes      |     No        |    No
-| [`[unroll]`](#unroll]                               |     Yes      |   Yes        |   Yes ^    |     Yes       |    Limited + 
-| Atomics                                             |     Yes      |   Yes        |   Yes      |     Yes       |    No + 
-| [Atomics on RWBuffer](#rwbuffer-atomics)            |     Yes      |   Yes        |   Yes      |     No        |    No + 
-| [Sampler Feedback](#sampler-feedback)               |     No       |   Yes        |   No +     |     No        |    Yes ^
-| [RWByteAddressBuffer Atomic](#byte-address-atomic)  |     No       |   Yes ^      |   Yes ^    |     Yes       |    No +
-| [Shader Execution Reordering](#ser)                 |     No       |   Yes ^      |   Yes ^    |     No        |    No 
-| [debugBreak](#debug-break)                          |     No       |   No         |   Yes      |     Yes       |    Yes
-| [realtime clock](#realtime-clock)                   |     No       |   Yes ^      |   Yes      |     Yes       |    No
+| Feature                                              | D3D11 | D3D12     | VK      | CUDA           | Metal | CPU       |
+| ---------------------------------------------------- | ----- | --------- | ------- | -------------- | ----- | --------- |
+| [Half Type](#half)                                   | No    | Yes ^     | Yes     | Yes ^          | Yes   | No +      |
+| Double Type                                          | Yes   | Yes       | Yes     | Yes            | No    | Yes       |
+| Double Intrinsics                                    | No    | Limited + | Limited | Most           | No    | Yes       |
+| [u/int8_t Type](#int8_t)                             | No    | No        | Yes ^   | Yes            | Yes   | Yes       |
+| [u/int16_t Type](#int16_t)                           | No    | Yes ^     | Yes ^   | Yes            | Yes   | Yes       |
+| [u/int64_t Type](#int64_t)                           | No    | Yes ^     | Yes     | Yes            | Yes   | Yes       |
+| u/int64_t Intrinsics                                 | No    | No        | Yes     | Yes            | Yes   | Yes       |
+| [int matrix](#int-matrix)                            | Yes   | Yes       | No +    | Yes            | No    | Yes       |
+| [tex.GetDimensions](#tex-get-dimensions)             | Yes   | Yes       | Yes     | No             | Yes   | Yes       |
+| [SM6.0 Wave Intrinsics](#sm6-wave)                   | No    | Yes       | Partial | Yes ^          | No    | No        |
+| SM6.0 Quad Intrinsics                                | No    | Yes       | No +    | No             | No    | No        |
+| [SM6.5 Wave Intrinsics](#sm6.5-wave)                 | No    | Yes ^     | No +    | Yes ^          | No    | No        |
+| [WaveMask Intrinsics](#wave-mask)                    | Yes ^ | Yes ^     | Yes +   | Yes            | No    | No        |
+| [WaveShuffle](#wave-shuffle)                         | No    | Limited ^ | Yes     | Yes            | No    | No        |
+| [Tesselation](#tesselation)                          | Yes ^ | Yes ^     | No +    | No             | No    | No        |
+| [Graphics Pipeline](#graphics-pipeline)              | Yes   | Yes       | Yes     | No             | Yes   | No        |
+| [Ray Tracing DXR 1.0](#ray-tracing-1.0)              | No    | Yes ^     | Yes ^   | No             | No    | No        |
+| Ray Tracing DXR 1.1                                  | No    | Yes       | No +    | No             | No    | No        |
+| [Native Bindless](#native-bindless)                  | No    | No        | No      | Yes            | No    | Yes       |
+| [Buffer bounds](#buffer-bounds)                      | Yes   | Yes       | Yes     | Limited ^      | No ^  | Limited ^ |
+| [Resource bounds](#resource-bounds)                  | Yes   | Yes       | Yes     | Yes (optional) | Yes   | Yes       |
+| Atomics                                              | Yes   | Yes       | Yes     | Yes            | Yes   | Yes       |
+| Group shared mem/Barriers                            | Yes   | Yes       | Yes     | Yes            | Yes   | No +      |
+| [TextureArray.Sample float](#tex-array-sample-float) | Yes   | Yes       | Yes     | No             | Yes   | Yes       |
+| [Separate Sampler](#separate-sampler)                | Yes   | Yes       | Yes     | No             | Yes   | Yes       |
+| [tex.Load](#tex-load)                                | Yes   | Yes       | Yes     | Limited ^      | Yes   | Yes       |
+| [Full bool](#full-bool)                              | Yes   | Yes       | Yes     | No             | Yes   | Yes ^     |
+| [Mesh Shader](#mesh-shader)                          | No    | Yes       | Yes     | No             | Yes   | No        |
+| [`[unroll]`](#unroll]                                | Yes   | Yes       | Yes ^   | Yes            | No ^  | Limited + |
+| Atomics                                              | Yes   | Yes       | Yes     | Yes            | Yes   | No +      |
+| [Atomics on RWBuffer](#rwbuffer-atomics)             | Yes   | Yes       | Yes     | No             | Yes   | No +      |
+| [Sampler Feedback](#sampler-feedback)                | No    | Yes       | No +    | No             | No    | Yes ^     |
+| [RWByteAddressBuffer Atomic](#byte-address-atomic)   | No    | Yes ^     | Yes ^   | Yes            | Yes   | No +      |
+| [Shader Execution Reordering](#ser)                  | No    | Yes ^     | Yes ^   | No             | No    | No        |
+| [debugBreak](#debug-break)                           | No    | No        | Yes     | Yes            | No    | Yes       |
+| [realtime clock](#realtime-clock)                    | No    | Yes ^     | Yes     | Yes            | No    | No        |
 
 <a id="half"></a>
+
 ## Half Type
 
 There appears to be a problem writing to a StructuredBuffer containing half on D3D12. D3D12 also appears to have problems doing calculations with half.
 
-In order for half to work in CUDA, NVRTC must be able to include `cuda_fp16.h` and related files. Please read the [CUDA target documentation](cuda-target.md) for more details. 
+In order for half to work in CUDA, NVRTC must be able to include `cuda_fp16.h` and related files. Please read the [CUDA target documentation](cuda-target.md) for more details.
 
 <a id="int8_t"></a>
+
 ## u/int8_t Type
 
-Not currently supported in D3D11/D3D12 because not supported in HLSL/DXIL/DXBC. 
+Not currently supported in D3D11/D3D12 because not supported in HLSL/DXIL/DXBC.
 
 Supported in Vulkan via the extensions `GL_EXT_shader_explicit_arithmetic_types` and `GL_EXT_shader_8bit_storage`.
 
 <a id="int16_t"></a>
+
 ## u/int16_t Type
 
 Requires SM6.2 which requires DXIL and therefore DXC and D3D12. For DXC this is discussed [here](https://github.com/Microsoft/DirectXShaderCompiler/wiki/16-Bit-Scalar-Types).
@@ -68,78 +70,89 @@ Requires SM6.2 which requires DXIL and therefore DXC and D3D12. For DXC this is
 Supported in Vulkan via the extensions `GL_EXT_shader_explicit_arithmetic_types` and `GL_EXT_shader_16bit_storage`.
 
 <a id="int64_t"></a>
+
 ## u/int64_t Type
 
 Requires SM6.0 which requires DXIL for D3D12. Therefore not available with DXBC on D3D11 or D3D12.
 
 <a id="int-matrix"></a>
+
 ## int matrix
 
-Means can use matrix types containing integer types. 
+Means can use matrix types containing integer types.
 
 <a id="tex-get-dimensions"></a>
+
 ## tex.GetDimensions
 
 tex.GetDimensions is the GetDimensions method on 'texture' objects. This is not supported on CUDA as CUDA has no equivalent functionality to get these values. GetDimensions work on Buffer resource types on CUDA.
 
 <a id="sm6-wave"></a>
+
 ## SM6.0 Wave Intrinsics
 
-CUDA has premliminary support for Wave Intrinsics, introduced in [PR #1352](https://github.com/shader-slang/slang/pull/1352). Slang synthesizes the 'WaveMask' based on program flow and the implied 'programmer view' of exectution. This support is built on top of WaveMask intrinsics with Wave Intrinsics being replaced with WaveMask Intrinsic calls with Slang generating the code to calculate the appropriate WaveMasks.
+CUDA has premliminary support for Wave Intrinsics, introduced in [PR #1352](https://github.com/shader-slang/slang/pull/1352). Slang synthesizes the 'WaveMask' based on program flow and the implied 'programmer view' of execution. This support is built on top of WaveMask intrinsics with Wave Intrinsics being replaced with WaveMask Intrinsic calls with Slang generating the code to calculate the appropriate WaveMasks.
 
 Please read [PR #1352](https://github.com/shader-slang/slang/pull/1352) for a better description of the status.
 
 <a id="sm6.5-wave"></a>
+
 ## SM6.5 Wave Intrinsics
 
-SM6.5 Wave Intrinsics are supported, but requires a downstream DXC compiler that supports SM6.5. As it stands the DXC shipping with windows does not. 
+SM6.5 Wave Intrinsics are supported, but requires a downstream DXC compiler that supports SM6.5. As it stands the DXC shipping with windows does not.
 
 <a id="wave-mask"></a>
+
 ## WaveMask Intrinsics
 
-In order to map better to the CUDA sync/mask model Slang supports 'WaveMask' intrinsics. They operate in broadly the same way as the Wave intrinsics, but require the programmer to specify the lanes that are involved. To write code that uses wave intrinsics acrosss targets including CUDA, currently the WaveMask intrinsics must be used. For this to work, the masks passed to the WaveMask functions should exactly match the 'Active lanes' concept that HLSL uses, otherwise the result is undefined. 
+In order to map better to the CUDA sync/mask model Slang supports 'WaveMask' intrinsics. They operate in broadly the same way as the Wave intrinsics, but require the programmer to specify the lanes that are involved. To write code that uses wave intrinsics across targets including CUDA, currently the WaveMask intrinsics must be used. For this to work, the masks passed to the WaveMask functions should exactly match the 'Active lanes' concept that HLSL uses, otherwise the result is undefined.
 
 The WaveMask intrinsics are not part of HLSL and are only available on Slang.
 
 <a id="wave-shuffle"></a>
+
 ## WaveShuffle
 
-`WaveShuffle` and `WaveBroadcastLaneAt` are Slang specific intrinsic additions to expand the options available around `WaveReadLaneAt`. 
+`WaveShuffle` and `WaveBroadcastLaneAt` are Slang specific intrinsic additions to expand the options available around `WaveReadLaneAt`.
 
-To be clear this means they will not compile directly on 'standard' HLSL compilers such as `dxc`, but Slang HLSL *output* (which will not contain these intrinsics) can (and typically is) compiled via dxc.
+To be clear this means they will not compile directly on 'standard' HLSL compilers such as `dxc`, but Slang HLSL _output_ (which will not contain these intrinsics) can (and typically is) compiled via dxc.
 
 The difference between them can be summarized as follows
 
-* WaveBroadcastLaneAt - laneId must be a compile time constant 
-* WaveReadLaneAt - laneId can be dynamic but *MUST* be the same value across the Wave ie 'dynamically uniform' across the Wave
-* WaveShuffle - laneId can be truly dynamic (NOTE! That it is not strictly truly available currently on all targets, specifically HLSL)
+- WaveBroadcastLaneAt - laneId must be a compile time constant
+- WaveReadLaneAt - laneId can be dynamic but _MUST_ be the same value across the Wave ie 'dynamically uniform' across the Wave
+- WaveShuffle - laneId can be truly dynamic (NOTE! That it is not strictly truly available currently on all targets, specifically HLSL)
 
 Other than the different restrictions on laneId they act identically to WaveReadLaneAt.
 
 `WaveBroadcastLaneAt` and `WaveReadLaneAt` will work on all targets that support wave intrinsics, with the only current restriction being that on GLSL targets, only scalars and vectors are supported.
 
-`WaveShuffle` will always work on CUDA/Vulkan. 
+`WaveShuffle` will always work on CUDA/Vulkan.
 
-On HLSL based targets currently `WaveShuffle` will be converted into `WaveReadLaneAt`. Strictly speaking this means it *requires* the `laneId` to be `dynamically uniform` across the Wave. In practice some hardware supports the loosened usage, and others does not. In the future this may be fixed in Slang and/or HLSL to work across all hardware. For now if you use `WaveShuffle` on HLSL based targets it will be necessary to confirm that `WaveReadLaneAt` has the loosened behavior for all the hardware intended. If target hardware does not support the loosened restrictions it's behavior is undefined. 
+On HLSL based targets currently `WaveShuffle` will be converted into `WaveReadLaneAt`. Strictly speaking this means it _requires_ the `laneId` to be `dynamically uniform` across the Wave. In practice some hardware supports the loosened usage, and others does not. In the future this may be fixed in Slang and/or HLSL to work across all hardware. For now if you use `WaveShuffle` on HLSL based targets it will be necessary to confirm that `WaveReadLaneAt` has the loosened behavior for all the hardware intended. If target hardware does not support the loosened restrictions it's behavior is undefined.
 
 <a id="tesselation"></a>
+
 ## Tesselation
 
-Although tesselation stages should work on D3D11 and D3D12 they are not tested within our test framework, and may have problems. 
+Although tesselation stages should work on D3D11 and D3D12 they are not tested within our test framework, and may have problems.
 
 <a id="native-bindless"></a>
-## Native Bindless  
 
-Bindless is possible on targets that support it - but is not the default behavior for those targets, and typically require significant effort in Slang code. 
+## Native Bindless
+
+Bindless is possible on targets that support it - but is not the default behavior for those targets, and typically require significant effort in Slang code.
 
 'Native Bindless' targets use a form of 'bindless' for all targets. On CUDA this requires the target to use 'texture object' style binding and for the device to have 'compute capability 3.0' or higher.
 
 <a id="resource-bounds"></a>
-## Resource bounds 
+
+## Resource bounds
 
 For CUDA this is optional as can be controlled via the SLANG_CUDA_BOUNDARY_MODE macro in the `slang-cuda-prelude.h`. By default it's behavior is `cudaBoundaryModeZero`.
 
 <a id="buffer-bounds"></a>
+
 ## Buffer Bounds
 
 This is the feature when accessing outside of the bounds of a Buffer there is well defined behavior - on read returning all 0s, and on write, the write being ignored.
@@ -148,24 +161,30 @@ On CPU there is only bounds checking on debug compilation of C++ code. This will
 
 On CUDA out of bounds accesses default to element 0 (!). The behavior can be controlled via the SLANG_CUDA_BOUND_CHECK macro in the `slang-cuda-prelude.h`. This behavior may seem a little strange - and it requires a buffer that has at least one member to not do something nasty. It is really a 'least worst' answer to a difficult problem and is better than out of range accesses or worse writes.
 
+In Metal, accessing a buffer out of bounds is undefined behavior.
+
 <a id="tex-array-sample-float"></a>
-## TextureArray.Sample float 
+
+## TextureArray.Sample float
 
 When using 'Sample' on a TextureArray, CUDA treats the array index parameter as an int, even though it is passed as a float.
 
 <a id="separate-sampler"></a>
+
 ## Separate Sampler
 
-This feature means that a multiple Samplers can be used with a Texture. In terms of the HLSL code this can be seen as the 'SamplerState' being a parameter passed to the 'Sample' method on a texture object. 
+This feature means that a multiple Samplers can be used with a Texture. In terms of the HLSL code this can be seen as the 'SamplerState' being a parameter passed to the 'Sample' method on a texture object.
 
 On CUDA the SamplerState is ignored, because on this target a 'texture object' is the Texture and Sampler combination.
 
 <a id="graphics-pipeline"></a>
+
 ## Graphics Pipeline
 
-CPU and CUDA only currently support compute shaders. 
+CPU and CUDA only currently support compute shaders.
 
 <a id="ray-tracing-1.0"></a>
+
 ## Ray Tracing DXR 1.0
 
 Vulkan does not support a local root signature, but there is the concept of a 'shader record'. In Slang a single constant buffer can be marked as a shader record with the `[[vk::shader_record]]` attribute, for example:
@@ -175,53 +194,61 @@ Vulkan does not support a local root signature, but there is the concept of a 's
 cbuffer ShaderRecord
 {
 	uint shaderRecordID;
-} 
+}
 ```
 
-In practice to write shader code that works across D3D12 and VK you should have a single constant buffer marked as 'shader record' for VK and then on D3D that constant buffer should be bound in the local root signature on D3D. 
+In practice to write shader code that works across D3D12 and VK you should have a single constant buffer marked as 'shader record' for VK and then on D3D that constant buffer should be bound in the local root signature on D3D.
 
 <a id="tex-load"></a>
+
 ## tex.Load
 
-tex.Load is only supported on CUDA for Texture1D. Additionally CUDA only allows such access for linear memory, meaning the bound texture can also not have mip maps. Load *is* allowed on RWTexture types of other dimensions including 1D on CUDA.
+tex.Load is only supported on CUDA for Texture1D. Additionally CUDA only allows such access for linear memory, meaning the bound texture can also not have mip maps. Load _is_ allowed on RWTexture types of other dimensions including 1D on CUDA.
 
 <a id="full-bool"></a>
+
 ## Full bool
 
-Means fully featured bool support. CUDA has issues around bool because there isn't a vector bool type built in. Currently bool aliases to an int vector type. 
+Means fully featured bool support. CUDA has issues around bool because there isn't a vector bool type built in. Currently bool aliases to an int vector type.
 
-On CPU there are some issues in so far as bool's size is not well defined in size an alignment. Most C++ compilers now use a byte to represent a bool. In the past it has been backed by an int on some compilers. 
+On CPU there are some issues in so far as bool's size is not well defined in size an alignment. Most C++ compilers now use a byte to represent a bool. In the past it has been backed by an int on some compilers.
 
 <a id="unroll"></a>
+
 ## `[unroll]`
 
-The unroll attribute allows for unrolling `for` loops. At the moment the feature is dependent on downstream compiler support which is mixed. In the longer term the intention is for Slang to contain it's own loop unroller - and therefore not be dependent on the feature on downstream compilers. 
+The unroll attribute allows for unrolling `for` loops. At the moment the feature is dependent on downstream compiler support which is mixed. In the longer term the intention is for Slang to contain it's own loop unroller - and therefore not be dependent on the feature on downstream compilers.
 
-On C++ this attribute becomes SLANG_UNROLL which is defined in the prelude. This can be predefined if there is a suitable mechanism, if there isn't a definition SLANG_UNROLL will be an empty definition. 
+On C++ this attribute becomes SLANG_UNROLL which is defined in the prelude. This can be predefined if there is a suitable mechanism, if there isn't a definition SLANG_UNROLL will be an empty definition.
 
 On GLSL and VK targets loop unrolling uses the [GL_EXT_control_flow_attributes](https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GL_EXT_control_flow_attributes.txt) extension.
 
+Metal Shading Language does not support loop unrolling.
+
 Slang does have a cross target mechanism to [unroll loops](language-reference/06-statements.md), in the section `Compile-Time For Statement`.
 
 <a id="rwbuffer-atomics"></a>
+
 ## Atomics on RWBuffer
 
 For VK the GLSL output from Slang seems plausible, but VK binding fails in tests harness.
 
-On CUDA RWBuffer becomes CUsurfObject, which is a 'texture' type and does not support atomics. 
+On CUDA RWBuffer becomes CUsurfObject, which is a 'texture' type and does not support atomics.
 
 On the CPU atomics are not supported, but will be in the future.
 
 <a id="sampler-feedback"></a>
+
 ## Sampler Feedback
 
-The HLSL [sampler feedback feature](https://microsoft.github.io/DirectX-Specs/d3d/SamplerFeedback.html) is available for DirectX12. The features requires shader model 6.5 and therefore a version of [DXC](https://github.com/Microsoft/DirectXShaderCompiler) that supports that model or higher. The Shader Model 6.5 requirement also means only DXIL binary format is supported. 
+The HLSL [sampler feedback feature](https://microsoft.github.io/DirectX-Specs/d3d/SamplerFeedback.html) is available for DirectX12. The features requires shader model 6.5 and therefore a version of [DXC](https://github.com/Microsoft/DirectXShaderCompiler) that supports that model or higher. The Shader Model 6.5 requirement also means only DXIL binary format is supported.
 
-There doesn't not appear to be a similar feature available in Vulkan yet, but when it is available support should be addeed.
+There doesn't not appear to be a similar feature available in Vulkan yet, but when it is available support should be added.
 
-For CPU targets there is the IFeedbackTexture interface that requires an implemention for use. Slang does not currently include CPU implementations for texture types.  
+For CPU targets there is the IFeedbackTexture interface that requires an implementation for use. Slang does not currently include CPU implementations for texture types.
 
 <a id="byte-address-atomic"></a>
+
 ## RWByteAddressBuffer Atomic
 
 The additional supported methods on RWByteAddressBuffer are...
@@ -246,18 +273,20 @@ uint64_t RWByteAddressBuffer::InterlockedXorU64(uint byteAddress, uint64_t value
 ```
 
 On HLSL based targets this functionality is achieved using [NVAPI](https://developer.nvidia.com/nvapi). Support for NVAPI is described
-in the separate [NVAPI Support](nvapi-support.md) document.  
+in the separate [NVAPI Support](nvapi-support.md) document.
 
 On Vulkan, for float the [`GL_EXT_shader_atomic_float`](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_EXT_shader_atomic_float.html) extension is required. For int64 the [`GL_EXT_shader_atomic_int64`](https://raw.githubusercontent.com/KhronosGroup/GLSL/master/extensions/ext/GL_EXT_shader_atomic_int64.txt) extension is required.
 
-CUDA requires SM6.0 or higher for int64 support. 
+CUDA requires SM6.0 or higher for int64 support.
 
 <a id="mesh-shader"></a>
+
 ## Mesh Shader
 
 There is preliminary [Mesh Shader support](https://github.com/shader-slang/slang/pull/2464).
 
 <a id="ser"></a>
+
 ## Shader Execution Reordering
 
 More information about [Shader Execution Reordering](shader-execution-reordering.md).
@@ -265,20 +294,22 @@ More information about [Shader Execution Reordering](shader-execution-reordering
 Currently support is available in D3D12 via NVAPI, and for Vulkan via the [GL_NV_shader_invocation_reorder](https://github.com/KhronosGroup/GLSL/blob/master/extensions/nv/GLSL_NV_shader_invocation_reorder.txt) extension.
 
 <a id="debug-break"></a>
+
 ## Debug Break
 
 Slang has preliminary support for `debugBreak()` intrinsic. With the appropriate tooling, when `debugBreak` is hit it will cause execution to halt and display in the attached debugger.
 
-Currently this is supported in all targets except HLSL. Note that on some targets if there isn't an appropriate debugging environment the debugBreak might cause execution to fail or potentially it is ignored. 
+This is not supported on HLSL, GLSL, SPIR-V or Metal backends. Note that on some targets if there isn't an appropriate debugging environment the debugBreak might cause execution to fail or potentially it is ignored.
 
-On C++ targets debugBreak is implemented using SLANG_BREAKPOINT defined in "slang-cpp-prelude.h". If there isn't a suitable intrinsic, this will default to attempting to write to `nullptr` leading to a crash. 
+On C++ targets debugBreak is implemented using SLANG_BREAKPOINT defined in "slang-cpp-prelude.h". If there isn't a suitable intrinsic, this will default to attempting to write to `nullptr` leading to a crash.
 
 Some additional details:
 
-* If [slang-llvm](cpu-target.md#slang-llvm) is being used as the downstream compiler (as is typical with `host-callable`), it will crash into the debugger, but may not produce a usable stack trace.
-* For "normal" C++ downstream compilers such as Clang/Gcc/Visual Studio, to break into readable source code, debug information is typically necessary. Disabling optimizations may be useful to break on the appropriate specific line, and have variables inspectable.
+- If [slang-llvm](cpu-target.md#slang-llvm) is being used as the downstream compiler (as is typical with `host-callable`), it will crash into the debugger, but may not produce a usable stack trace.
+- For "normal" C++ downstream compilers such as Clang/Gcc/Visual Studio, to break into readable source code, debug information is typically necessary. Disabling optimizations may be useful to break on the appropriate specific line, and have variables inspectable.
 
 <a id="realtime-clock"></a>
+
 ## Realtime Clock
 
 Realtime clock support is available via the API
@@ -290,10 +321,10 @@ uint getRealtimeClockLow();
 uint2 getRealtimeClock();
 ```
 
-On D3D this is supported through NVAPI via `NvGetSpecial`. 
+On D3D this is supported through NVAPI via `NvGetSpecial`.
 
 On Vulkan this is supported via [VK_KHR_shader_clock extension](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_clock.html)
 
 On CUDA this is supported via [clock](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#time-function).
 
-Currently this is not supported on CPU, although this will potentially be added in the future.
\ No newline at end of file
+Currently this is not supported on CPU, although this will potentially be added in the future.
diff --git a/external/slang/share/doc/slang/update_spirv.md b/external/slang/share/doc/slang/update_spirv.md
new file mode 100644
index 00000000..19dd4741
--- /dev/null
+++ b/external/slang/share/doc/slang/update_spirv.md
@@ -0,0 +1,195 @@
+# Updating external spirv
+
+There are three directories under `external` that are related to SPIR-V:
+- external/spirv-headers
+- external/spirv-tools
+- external/spirv-tools-generated
+
+In order to use the latest or custom SPIR-V, they need to be updated.
+
+
+## Fork `shader-slang/SPIRV-Tools` repo and update it
+
+Currently Slang uses [shader-slang/SPIRV-Tools](https://github.com/shader-slang/SPIRV-Tools) forked from [KhronosGroup/SPIRV-Tools](https://github.com/KhronosGroup/SPIRV-Tools).
+In order for Slang to use the latest changes from `KhronosGroup/SPIRV-Tools`, `shader-slang/SPIRV-Tools` needs to be updated.
+
+1. Fork `shader-slang/SPIRV-Tools` to your personal github organization like `your-name/SPIRV-Tools`.
+1. Clone it on your local machine.
+   ```
+   git clone https://github.com/your-name/SPIRV-Tools.git # replace `your-name` to the actual URL
+   ```
+1. Fetch from `KhronosGroup/SPIRV-Tools`.
+   ```
+   git remote add khronos https://github.com/KhronosGroup/SPIRV-Tools.git
+   git fetch khronos
+   ```
+1. Create a branch for a Pull Request.
+   ```
+   git checkout -b merge/update
+   ```
+1. Rebase to khronos/main
+   ```
+   git rebase khronos/main # use ToT
+   ```
+1. Push to Github.
+   ```
+   git push origin merge/update
+   ```
+
+The steps above will create a branch called `merge/update`. You can use a different name but this document will use the name.
+
+
+## Modify `.gitmodules` and use the `merge/update` branch
+
+Before creating a Pull Request for `merge/update`, you should test and make sure everything works.
+
+On a Slang repo side, you need to create a branch for the following changes.
+```
+git clone https://github.com/your-name/slang.git # replace `your-name` to the actual URL
+cd slang
+git checkout -b update_spirv
+```
+
+Open `.gitmodules` and modify the setting to the following,
+```
+[submodule "external/spirv-tools"]
+	path = external/spirv-tools
+	url = https://github.com/your-name/SPIRV-Tools.git
+[submodule "external/spirv-headers"]
+	path = external/spirv-headers
+	url = https://github.com/KhronosGroup/SPIRV-Headers.git
+```
+Note that you need to replace `your-name` with the actual URL from the previous step.
+
+Apply the URL changes with the following commands,
+```
+git submodule sync
+git submodule update --init --recursive
+
+cd spirv-headers
+git fetch
+git checkout origin/main # use ToT
+cd ..
+
+cd external
+cd spirv-tools
+git fetch
+git checkout merge/update # use merger/update branch
+```
+
+
+## Build spirv-tools
+
+A directory, `external/spirv-tools/generated`, holds a set of files generated from spirv-tools directory.
+You need to build spirv-tools in order to generate them.
+
+```
+cd external
+cd spirv-tools
+python3.exe utils\git-sync-deps # this step may require you to register your ssh public key to gitlab.khronos.org
+cmake.exe . -B build
+cmake.exe --build build --config Release
+```
+
+
+## Copy the generated files from `spirv-tools` to `spirv-tools-generated`
+
+Copy some of generated files from `external/spirv-tools/build/` to `external/spirv-tools-generated/`.
+The following files are ones you need to copy at the moment, but the list may change in the future.
+```
+DebugInfo.h
+NonSemanticShaderDebugInfo100.h
+OpenCLDebugInfo100.h
+build-version.inc
+core.insts-unified1.inc
+debuginfo.insts.inc
+enum_string_mapping.inc
+extension_enum.inc
+generators.inc
+glsl.std.450.insts.inc
+nonsemantic.clspvreflection.insts.inc
+nonsemantic.shader.debuginfo.100.insts.inc
+nonsemantic.vkspreflection.insts.inc
+opencl.debuginfo.100.insts.inc
+opencl.std.insts.inc
+operand.kinds-unified1.inc
+spv-amd-gcn-shader.insts.inc
+spv-amd-shader-ballot.insts.inc
+spv-amd-shader-explicit-vertex-parameter.insts.inc
+spv-amd-shader-trinary-minmax.insts.inc
+```
+
+
+## Build Slang and run slang-test
+
+There are many ways to build Slang executables. Refer to the [document](https://github.com/shader-slang/slang/blob/master/docs/building.md) for more detail.
+For a quick reference, you can build with the following commands,
+```
+cmake.exe --preset vs2019
+cmake.exe --build --preset release
+```
+
+After building Slang executables, run `slang-test` to see all tests are passing.
+```
+set SLANG_RUN_SPIRV_VALIDATION=1
+build\Release\bin\slang-test.exe -use-test-server -server-count 8
+```
+
+It is often the case that some of tests fail, because of the changes on SPIRV-Header.
+You need to properly resolve them before proceed.
+
+
+## Create A Pull Request on `shader-slang/SPIRV-Tools`
+
+After testing is done, you should create a Pull Request on `shader-slang/SPIRV-Tools` repo.
+
+1. The git-push command will show you a URL for creating a Pull Request like following,
+   > https://github.com/your-name/SPIRV-Tools/pull/new/merge/update # replace `your-name` to the actual URL
+
+   Create a Pull Request.
+1. Wait for all workflows to pass.
+1. Merge the PR and take a note of the commit ID for the next step.
+
+Note that this process will update `shader-slang/SPIRV-Tools` repo, but your merge is not used by `slang` repo yet.
+
+
+## Create a Pull Request on `shader-slang/slang`
+
+After the PR is merged to `shader-slang/SPIRV-Tools`, `slang` needs to start using it.
+
+On the clone of Slang repo, revert the changes in `.gitmodules` if modified.
+```
+# revert the change in .gitmodules
+git checkout .gitmodules
+git submodule sync
+git submodule update --init --recursive
+```
+
+You need to stage and commit the latest commit IDs of spirv-tools and spirv-headers.
+Note that when you want to use a new commit IDs of the submodules, you have to stage with git-add command for the directly of the submodule itself.
+```
+cd external
+
+# Add changes in spirv-tools-generated
+git add spirv-tools-generated
+
+# Add commit ID of spirv-headers
+cd spirv-headers
+git fetch
+git checkout origin/main # Use ToT
+cd ..
+git add spirv-headers
+
+# Add commit ID of spirv-tools
+cd spirv-tools
+git fetch
+git checkout merge/update # Use merge/update branch
+cd ..
+git add spirv-tools
+
+# Add more if there are other changes to resolve the test failures.
+
+git commit
+git push origin update_spirv
+```
+Once all changes are pushed to GitHub, you can create a Pull Request on `shader-slang/slang`.
diff --git a/external/slang/share/doc/slang/user-guide/00-introduction.md b/external/slang/share/doc/slang/user-guide/00-introduction.md
new file mode 100644
index 00000000..023256b9
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/00-introduction.md
@@ -0,0 +1,103 @@
+---
+layout: user-guide
+permalink: /user-guide/introduction
+---
+
+Introduction
+============
+
+Welcome to the _Slang User's Guide_, an introduction to the Slang language, compiler, and API.
+
+Why use Slang?
+--------------
+
+The Slang system helps real-time graphics developers write cleaner and more maintainable GPU code, without sacrificing run-time performance.
+Slang extends the HLSL language with thoughtfully selected features from modern general-purpose languages that support improved developer productivity and code quality.
+These features have been carefully implemented with an understanding of GPU performance.
+
+Some of the benefits of Slang include:
+
+* Slang is backwards compatible with most existing HLSL code
+
+* _Parameter blocks_ allow shader parameters to be grouped by update rate in order to take advantage of Direct3D 12 descriptor tables and Vulkan descriptor sets, without verbose and error-prone per-parameter markup
+
+* _Interfaces_ and _generics_ provide first-class alternative to hacky preprocessor-based or string-pasting shader specialization. Preprocessor hacks can be replaced with a well-understood language feature already used in Rust, Swift, C#, Java, and more.
+
+* _Automatic differentiation_ greatly simplifies the implementation of learning-based techniques in shaders. Slang supports automatically generating both forward derivative and backward derivative propagation functions from forward computation code.
+
+* Slang supports a first class _module_ system, which enables true separate compilation and semantic checking of shader code. 
+
+* Slang supports compute, rasterization, and ray-tracing shaders
+
+* The same Slang compiler can generate code for DX bytecode, DXIL, SPIR-V, HLSL, GLSL, CUDA, and more
+
+* Slang provides a robust and feature-complete reflection API, which provides binding/offset/layout information about all shader parameters in a consistent format across all the supported targets
+
+Who is Slang for?
+-----------------
+
+Slang aims to be the best language possible for real-time graphics developers who care about code quality, portability and performance.
+
+### Real-Time Graphics Developers
+
+Slang is primarily intended for developers creating real-time graphics applications that run on end-user/client machines, such as 3D games and digital content creation (DCC) tools.
+
+Slang can still provide value in other scenarios -- offline rather than real-time rendering, non-graphics GPU programming, or for applications that run on a server instead of client machines -- but the system has been designed first and foremost around the requirements of real-time graphics.
+
+### From Hobbyists to Professionals
+
+The Slang language is simple and familiar enough for hobbyist developers to use, but scales up to the demands of professional development teams creating next-generation game renderers.
+
+### Developers of Multi-Platform Applications
+
+The Slang system builds for multiple OSes, supports many graphics APIs, and works with GPUs from multiple hardware vendors.
+The project is completely open-source and patches to support additional platforms are welcome.
+
+Even for developers who only care about a single target platform or graphics API, Slang can provide a better programming experience than the default/native GPU language for that API.
+
+### Developers with an existing investment in HLSL code
+
+One of Slang's key features is its high degree of compatibility with existing HLSL code.
+Developers who are currently responsible for large HLSL codebases but find themselves chafing at the restrictions of that language can incrementally adopt the features of Slang to improve the quality of their codebase over time.
+
+Developers who do not have an existing investment in HLSL code, or who already have a large codebase in some other language will need to carefully consider the trade-offs in migrating to a new language (whether Slang or something else).
+
+Who is this guide for?
+----------------------
+
+The content of this guide is written for real-time graphics programmers with a moderate or higher experience level.
+It assumes the reader has previously used a real-time shading language like HLSL, GLSL, or MetalSL together with an API like Direct3D 11/12, Vulkan, or Metal.
+We also assume that the reader is familiar enough with C/C++ to understand code examples and API signatures in those languages.
+
+If you are new to programming entirely, this guide is unlikely to be helpful.
+If you are an experienced programmer but have never worked in real-time graphics with GPU shaders, you may find some of the terminology or concepts from the domain confusing.
+
+If you've only ever used OpenGL or Direct3D 11 before, some references to concepts in "modern" graphics APIs like D3D12/Vulkan/Metal may be confusing.
+This effect may be particularly pronounced for OpenGL users.
+
+It may be valuable for a user with limited experience with "modern" graphics APIs to work with both this guide and a guide to their chosen API (e.g., Direct3D 12, Vulkan, or Metal) so that concepts in each can reinforce the other.
+
+When introducing Slang language features, this guide may make reference to languages such as Swift, Rust, C#, or Java.
+Readers who almost exclusively use C/C++ may find certain features surprising or confusing, especially if they insist on equating concepts with the closest thing in C++ (assuming "generics `==` templates").
+
+Goals and Non-Goals
+-------------------
+
+The rest of this guide introduces the services provided by the Slang system and explains how to use them to solve challenges in real-time graphics programming.
+When services are introduced one after another, it may be hard to glimpse the bigger picture: why these particular services? Why these implementations? Why these APIs?
+
+Before we dive into actually _using_ Slang, let us step back and highlight some of the key design goals (and non-goals) that motivate the design:
+
+* **Performance**: Real-time graphics demands high performance, which motivates the use of GPUs. Whenever possible, the benefits of using Slang must not come at the cost of performance. When a choice involves a performance trade-off the *user* of the system should be able to make that choice.
+
+* **Productivity**: Modern GPU codebases are large and growing. Productivity in a large codebase is less about _writing_ code quickly, and more about having code that is understandable, maintainable, reusable, and extensible. Language concepts like "modularity" or "separate compilation" are valuable if they foster greater developer productivity.
+
+* **Portability**: Real-time graphics developers need to support a wide variety of hardware, graphics APIs, and operating systems. These platforms differ greatly in the level of functionality they provide. Some systems hand-wave portability concerns out of existence by enforcing a "lowest common denominator" approach and/or raising their "min spec" to exclude older or less capable platforms; our goals differ greatly. We aspire to keep our "min spec" as low as is practical (e.g., supporting Direct3D 11 and not just Direct3D 12), while also allowing each target to expose its distinguishing capabilities.
+
+* **Ease of Adoption**: A language feature or service is worthless if nobody can use it. When possible, the system should be compatible with existing code and approaches. New language features should borrow syntax and semantics from other languages users might be familiar with. APIs and tools might need to support complicated and detailed use-cases, but should also provide conveniences and short-cuts for the most common cases.
+
+* **Predictability**: Code should do what it appears to, consistently, across as many platforms as possible. Whenever possible the compiler should conform to programmer expectation, even in the presence of "undefined behavior." Tools and optimization passes should keep their behavior as predictable as possible; simple tools empower the user to do smart things.
+
+* **Limited Scope**: The Slang system is a language, compiler, and module. It is not an engine, not a renderer, and not a "framework." The Slang system explicitly does *not* assume responsibility for interacting with GPU APIs to load code, allocate resources, bind parameters, or kick off work. While a user *may* use the Slang runtime library in their application, they are not *required* to do so.
+
+The ordering here is significant, with earlier goals generally being more important than later ones.
diff --git a/external/slang/share/doc/slang/user-guide/01-get-started.md b/external/slang/share/doc/slang/user-guide/01-get-started.md
new file mode 100644
index 00000000..7868b422
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/01-get-started.md
@@ -0,0 +1,102 @@
+---
+layout: user-guide
+permalink: /user-guide/get-started
+---
+
+# Getting Started with Slang
+
+Slang enables you to do many powerful things with shader code, including compiling shader code to many different platforms, obtaining reflection information, organizing your shader library in a modern modular fashion, controlling specialization and more. The following sections help you getting started with the basics of Slang in a simple example. We will assume Windows as the operating system, but the steps performed here are similar for other platforms.
+
+## Installation
+
+The easiest way to start using Slang is to download a [binary release](https://github.com/shader-slang/slang/releases/) from the github repository. Once you have downloaded and extracted the files from a release package, you can find the `slangc.exe` executable under `/bin/windows-x64/release/`. In this tutorial we will use the `slangc` standalone Slang compiler included in a release package. Note that `slang.dll` and `slang-glslang.dll` must be placed in the same directory as `slangc.exe` as they are required by the standalone executable.
+
+If you are interested in building from source, please refer to the [documentation on building Slang](../building.md). 
+
+## Your first Slang shader
+
+In this section we demonstrate how to write a simple compute shader in Slang that adds numbers from two buffers and writes the results into a third buffer. To start, create a text file named `hello-world.slang` in any directory, and paste the following content in the newly created file:
+
+```hlsl
+// hello-world.slang
+StructuredBuffer<float> buffer0;
+StructuredBuffer<float> buffer1;
+RWStructuredBuffer<float> result;
+
+[shader("compute")]
+[numthreads(1,1,1)]
+void computeMain(uint3 threadId : SV_DispatchThreadID)
+{
+    uint index = threadId.x;
+    result[index] = buffer0[index] + buffer1[index];
+}
+```
+
+> #### Note ####
+> Slang has official language extension support for both [Visual Studio](https://marketplace.visualstudio.com/items?itemName=shader-slang.slang-vs-extension) and [Visual Studio Code](https://marketplace.visualstudio.com/items?itemName=shader-slang.slang-language-extension). The extensions are powered by the Slang compiler to support a wide range of
+> assisting features including auto-completion, function signature hinting, semantic highlighting and more.
+
+As you can see, `hello-world.slang` is no different from a normal HLSL shader file. In fact, Slang is compatible with most HLSL code you would write. On top of HLSL, Slang has added many new language and compiler features that simplifies various tasks with shader code, which we will cover in future chapters. For now we will demonstrate one key feature of Slang: cross-compiling to different platforms.
+
+Slang supports compiling shaders into many different targets including Direct3D 11, Direct3D 12, Vulkan, CUDA and C++ (for execution on CPU). You can run `slangc` with the following command line to compile `hello-world.slang` into Vulkan SPIRV:
+
+```bat
+.\slangc.exe hello-world.slang -profile glsl_450 -target spirv -o hello-world.spv -entry computeMain
+```
+
+If you would like to see the equivalent GLSL of the generated SPIRV code, simply change the `-target` argument to `glsl`:
+```bat
+.\slangc.exe hello-world.slang -profile glsl_450 -target glsl -o hello-world.glsl -entry computeMain
+```
+
+The resulting `hello-world.glsl` generated by `slangc` is shown below:
+```glsl
+// hello-world.glsl (generated by slangc)
+#version 450
+layout(row_major) uniform;
+layout(row_major) buffer;
+
+#line 2 0
+layout(std430, binding = 0) readonly buffer _S1 {
+    float _data[];
+} buffer0_0;
+
+#line 3
+layout(std430, binding = 1) readonly buffer _S2 {
+    float _data[];
+} buffer1_0;
+
+#line 4
+layout(std430, binding = 2) buffer _S3 {
+    float _data[];
+} result_0;
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main()
+{
+
+#line 10
+    uint index_0 = gl_GlobalInvocationID.x;
+    float _S4 = ((buffer0_0)._data[(index_0)]);
+
+#line 11
+    float _S5 = ((buffer1_0)._data[(index_0)]);
+
+#line 11
+    float _S6 = _S4 + _S5;
+
+#line 11
+    ((result_0)._data[(index_0)]) = _S6;
+
+#line 8
+    return;
+}
+```
+
+As you can see, things are being translated just as expected to GLSL: the HLSL `StructuredBuffer` and `RWStructuredBuffer` types are mapped to shader storage objects and the `[numthreads]` attribute are translated into proper `layout(...) in` qualifier on the `main` entry-point.
+
+Note that in the generated GLSL code, all shader parameters are qualified with explicit binding layouts. This is because Slang provides a guarantee that all parameters will have fixed bindings regardless of shader optimization. Without generating explicit binding layout qualifiers, the downstream compiler in the driver may change the binding of a parameter depending on whether any preceding parameters are eliminated during optimization passes. In practice this causes a pain in application code, where developers will need to rely on run-time reflection to determine the binding location of a compiled shader kernel. The issue gets harder to manage when the application also needs to deal with shader specializations. Since Slang will always generate explicit binding locations in its output on all targets as if no parameters are eliminated, the user is assured that parameters always gets a deterministic binding location without having to write any manual binding qualifiers in the Slang code themselves. In fact, we strongly encourage users not to qualify their Slang code with explicit binding qualifiers and let the Slang compiler do its work to properly lay out parameters. This is best practice to maintain code modularity and avoid potential binding location conflicts between different shader modules.
+
+## The full example
+
+The full Vulkan example that sets up and runs the `hello-world.slang` shader in located in the [/examples/hello-world](https://github.com/shader-slang/slang/tree/master/examples/hello-world) directory of the Slang repository. The example code initializes a Vulkan context and runs the compiled SPIRV code. The example code demonstrates how to use the Slang API to load and compile shaders.
diff --git a/external/slang/share/doc/slang/user-guide/02-conventional-features.md b/external/slang/share/doc/slang/user-guide/02-conventional-features.md
new file mode 100644
index 00000000..aaeea411
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/02-conventional-features.md
@@ -0,0 +1,1164 @@
+---
+layout: user-guide
+permalink: /user-guide/conventional-features
+---
+
+Conventional Language Features
+==============================
+
+Many of the language concepts in Slang are similar to those in other real-time shading languages like HLSL and GLSL, and also to general-purpose programming languages in the "C family."
+This chapter covers those parts of the Slang language that are _conventional_ and thus unlikely to surprise users who are already familiar with other shading languages, or languages in the C family.
+
+Readers who are comfortable with HLSL variables, types, functions, statements, as well as conventions for shader parameters and entry points may prefer to skip this chapter.
+Readers who are not familiar with HLSL, but who are comfortable with GLSL and/or C/C++, may want to carefully read the sections on types, expressions, shader parameters, and entry points while skimming the others.
+
+Types
+-----
+
+Slang supports conventional shading language types including scalars, vectors, matrices, arrays, structures, enumerations, and resources.
+
+> #### Note ####
+> Slang has limited support for pointers when targeting platforms with native pointer support, including SPIRV, C++, and CUDA.
+
+### Scalar Types
+
+#### Integer Types
+
+The following integer types are provided:
+
+| Name          | Description |
+|---------------|-------------|
+| `int8_t`      | 8-bit signed integer |
+| `int16_t`     | 16-bit signed integer |
+| `int`         | 32-bit signed integer |
+| `int64_t`     | 64-bit signed integer |
+| `uint8_t`     | 8-bit unsigned integer |
+| `uint16_t`    | 16-bit unsigned integer |
+| `uint`        | 32-bit unsigned integer |
+| `uint64_t`    | 64-bit unsigned integer |
+
+All targets support the 32-bit `int` and `uint` types, but support for the other types depends on the capabilities of each target platform.
+
+Integer literals can be both decimal and hexadecimal. An integer literal can be explicitly made unsigned 
+with a `u` suffix, and explicitly made 64-bit with the `ll` suffix. The type of a decimal non-suffixed integer literal is the first integer type from
+the list [`int`, `int64_t`] which can represent the specified literal value. If the value cannot fit, the literal is represented as 
+an `uint64_t` and a warning is given. The type of hexadecimal non-suffixed integer literal is the first type from the list 
+[`int`, `uint`, `int64_t`, `uint64_t`] that can represent the specified literal value. For more information on 64 bit integer literals see the documentation on [64 bit type support](../64bit-type-support.md).
+
+The following floating-point type are provided:
+
+| Name          | Description                  |
+|---------------|------------------------------|
+| `half`        | 16-bit floating-point number |
+| `float`       | 32-bit floating-point number |
+| `double`      | 64-bit floating-point number |
+
+All targets support the 32-bit `float`, but support for the other types depends on the capabilities of each target platform.
+
+### Boolean Type
+
+The type `bool` is used to represent Boolean truth value: `true` and `false`. 
+
+For compatibility reasons, the `sizeof(bool)` depends on the target. 
+
+| Target |      sizeof(bool)      |
+|--------| ---------------------- |
+| GLSL   | 4 bytes / 32-bit value |
+| HLSL   | 4 bytes / 32-bit value |
+| CUDA   | 1 bytes /  8-bit value |
+
+> #### Note ####
+> When storing bool types in structures, make sure to either pad host-side data structures accordingly, or store booleans as, eg, `uint8_t`, to guarantee
+> consistency with the host language's boolean type.
+
+#### The Void Type
+
+The type `void` is used as a placeholder to represent the result type of functions that don't return anything.
+
+### Vector Types
+
+Vector types can be written as `vector<T,N>` where `T` is a scalar type and `N` is an integer from 2 to 4 (inclusive).
+The type `vector<T,N>` is a vector of `N` _elements_ (also called _components_) each of type `T`.
+
+As a convenience, pre-defined vector types exist for each scalar type and valid element count, with a name using the formula `<<scalar-type>><<element-count>>`.
+For example, `float3` is a convenient name for `vector<float,3>`.
+
+> Note: Slang doesn't support vectors longer than 4 elements. They map to native vector types on many platforms, including CUDA, and none of these platforms support vectors longer than 4 elements. If needed, you can use an array like `float myArray[8]`.
+
+### Matrix Types
+
+Matrix types can be written as `matrix<T,R,C>` where `T` is a scalar type and both `R` and `C` are integers from 2 to 4 (inclusive).
+The type `matrix<T,R,C>` is a matrix with _elements_ of type `T`, and comprising `R` rows and `C` columns.
+
+As a convenience, pre-defined matrix types exist for each scalar type and valid row/column count, with a name using the formula `<<scalar-type>><<row-count>>x<<column-count>>`.
+For example, a `float3x4` is a convenient name for `matrix<float,3,4>`.
+
+> #### Note ####
+> Readers familiar with GLSL should be aware that a Slang `float3x4` represents a matrix with three rows and four columns, while a GLSL `mat3x4` represents a matrix with three *columns* and four *rows*.
+> In most cases, this difference is immaterial because the subscript expression `m[i]` returns a `float4` (`vec4`) in either language.
+> For now it is enough to be aware that there is a difference in convention between Slang/HLSL/D3D and GLSL/OpenGL.
+
+### Array Types
+
+An array type `T[N]` represents an array of `N` elements of type `T`.
+When declaring a variable with an array type, the `[]` brackets come after the variable name, following the C convention for variable declarations:
+
+```hlsl
+// the type of `a` is `int[3]`
+int a[3];
+```
+
+Sometimes a value with an array type can be declared without an explicit element count.
+In some cases the element count is then inferred from the initial value of a variable:
+
+```hlsl
+// the type of `a` is `int[3]`
+int a[] = { 1, 2, 3 };
+```
+
+In other cases, the result is a _unsized_ array, where the actual element count will be determined later:
+
+```hlsl
+// the type of `b` is `int[]`
+void f( int b[] )
+{ ... }
+```
+
+It is allowed to pass a sized array as argument to an unsized array parameter when calling a function.
+
+Array types has a `getCount()` member function that returns the length of the array.
+
+```hlsl
+int f( int b[] )
+{
+    return b.getCount(); // Note: all arguments to `b` must be resolvable to sized arrays.
+}
+
+void test()
+{
+    int arr[3] = { 1, 2, 3 };
+    int x = f(arr); // OK, passing sized array to unsized array parameter, x will be 3.
+}
+```
+
+Please note that if a function calls `getCount()` method on an unsized array parameter, then all
+calls to that function must provide a sized array argument, otherwise the compiler will not be able
+to resolve the size and will report an error. The following code shows an example of valid and
+invalid cases.
+
+```hlsl
+int f( int b[] )
+{
+    return b.getCount();
+}
+int g( int b[] )
+{
+    return f(b); // transitive calls are allowed.
+}
+uniform int unsizedParam[];
+void test()
+{
+    g(unsizedParam); // Not OK, `unsizedParam` doesn't have a known size at compile time.
+    int arr[3];
+    g(arr); // OK.
+}
+```
+
+There are more limits on how runtime-sized arrays can be used than on arrays of statically-known element count.
+
+> #### Note ####
+> In Slang arrays are _value types_, meaning that assignment, parameter passing, etc. semantically copy values of array type.
+> In some languages -- notably C, C++, C#, and Java -- assignment and parameter passing for treat arrays as _reference types_,
+> meaning that these operations assign/pass a reference to the same underlying storage.
+
+### Structure Types
+
+Structure types can be introduced with the `struct` keyword, as in most C-family languages:
+
+```hlsl
+struct MyData
+{
+    int a;
+    float b;
+}
+```
+
+> #### Note ####
+> Unlike C, and like most other C-family languages, the `struct` keyword in Slang introduces a type directly, and there is no need to combine it with a `typedef`.
+
+> #### Note ####
+> Slang allows for a trailing semicolon (`;`) on `struct` declarations, but does not require it.
+
+> #### Note ####
+> Unlike C/C++, `class` is not a valid keyword for GPU code and it is reserved for CPU/host side logic.
+
+Structure types can have constructors. Constructors are defined with the `__init` keyword:
+
+```hlsl
+struct MyData
+{
+     int a;
+     __init() { a = 5; }
+     __init(int t) { a = t; }
+}
+void test()
+{
+     MyData d;  // invokes default constructor, d.a = 5
+     MyData h = MyData(4); // invokes overloaded constructor, h.a = 4
+}
+```
+
+> #### Note ####
+> Slang currently does not allow default values on struct members, but we intend to support them in the future.
+
+### Enumeration Types
+
+Enumeration types can be introduced with the `enum` keyword to provide type-safe constants for a range of values:
+
+```hlsl
+enum Channel
+{
+    Red,
+    Green,
+    Blue
+}
+```
+
+Unlike C/C++, `enum` types in Slang are always scoped by default (like `enum class` in C++). You can write `enum class` in Slang if it makes you happy, but it isn't required. If you want a `enum` type to be unscoped, you can use the `[UnscopedEnum]` attribute:
+```csharp
+[UnscopedEnum]
+enum Channel
+{
+    Red, Green, Blue
+}
+void test(Channel c)
+{
+    if (c == Red) { /*...*/ }
+}
+```
+
+You can specify an explicit underlying integer type for `enum` types:
+```csharp
+enum Channel : uint16_t
+{
+    Red, Green, Blue
+}
+```
+
+By default, the underlying type of an enumeration type is `int`. Enumeration types are implicitly convertible to their underlying type. All enumeration types conform to the builtin `ILogical` interface, which provides operator overloads for bit operations. The following code is allowed:
+
+```csharp
+void test()
+{
+    Channel c = Channel.Red | Channel.Green;
+}
+```
+
+You can explicitly assign values to each enum case:
+```csharp
+enum Channel
+{
+    Red = 5,
+    Green,   // = 6
+    Blue     // = 7
+}
+```
+Slang automatically assigns integer values to enum cases without an explicit value. By default, the value starts from 0 and is increment by 1 for each
+enum case.
+
+You can override the implicit value assignment behavior with the `[Flags]` attribute, which will make value assignment start from 1 and increment by power of 2, making it suitable for enums that represent bit flags. For example:
+```csharp
+[Flags]
+enum Channel
+{
+    Red,   //  = 1
+    Green, //  = 2
+    Blue,  //  = 4
+    Alpha, //  = 8
+}
+```
+
+### Opaque Types
+
+The Slang core module defines a large number of _opaque_ types which provide access to objects that are allocated via GPU APIs.
+
+What all opaque types have in common is that they are not "first-class" types on most platforms.
+Opaque types (and structure or array types that contain them) may be limited in the following ways (depending on the platform):
+
+* Functions that return opaque types may not be allowed
+* Global and `static` variables that use opaque types may not be allowed
+* Opaque types may not appear in the element types of buffers, except where explicitly noted as allowed
+
+#### Texture Types
+
+Texture types -- including `Texture2D`, `TextureCubeArray`, `RWTexture2D`, and more -- are used to access formatted data for read, write, and sampling operations.
+Textures can be used to represent simple images, but also support _mipmapping_ as a way to reduce noise when sampling at lower than full resolution.
+The full space of texture types follows the formula:
+
+    <<access>>Texture<<base shape>><<multisampleness>><<arrayness>><<element type>>
+
+where:
+
+* The _access_ can be read-only (no prefix), read-write (`RW`), or read-write with a guarantee of rasterization order for operations on the given resource (`RasterizerOrdered`).
+* The _base shape_ can be `1D`, `2D`, `3D`, or `Cube`.
+* The _multisample-ness_ can be non-multiple-sample, or multi-sampled (`MS`).
+* The _array-ness_  can either be non-arrayed, or arrayed (`Array`).
+* The _element type_ can either be explicitly specified (`<T>`) or left as the default of `float4`
+
+Not all combinations of these options are supported, and some combinations may be unsupported on some targets.
+
+#### Sampler
+
+Sampler types encapsulate parameters that control addressing and filtering for texture-sampling operations.
+There are two sampler types: `SamplerState` and `SamplerComparisonState`.
+`SamplerState` is applicable to most texture sampling operations, while `SamplerComparisonState` is used for "shadow" texture sampling operations which compare texels to a reference value before filtering.
+
+> #### Note ####
+> Some target platforms and graphics APIs do not support separation of textures and sampling state into distinct types in shader code.
+> On these platforms the Slang texture types include their own sampling state, and the sampler types are placeholder types that carry no data.
+
+#### Buffers
+
+There are multiple buffer types supported by modern graphics APIs, with substantially different semantics.
+
+##### Formatted Buffers
+
+Formatted buffers (sometimes referred to as "typed buffers" or "buffer textures") are similar to 1D textures (in that they support format conversion on loads), without support for mipmapping.
+The formula for formatted buffer types is:
+
+    <<access>>Buffer<<arrayness>><<element type>>
+
+Where the _access_, _array-ness_, and _element type_ are the same as for textures, with the difference that _element type_ is not optional.
+
+A buffer type like `Buffer<float4>` represents a GPU resource that stores one or more values that may be fetched as a `float4` (but might internally be stored in another format, like RGBA8).
+
+##### Flat Buffers
+
+Flat buffers differ from formatted buffers in that they do not support format conversion.
+Flat buffers are either _structured_ buffers or _byte-addressed_ buffers.
+
+Structured buffer types like `StructuredBuffer<T>` include an explicit element type `T` that will be loaded and stored from the buffer.
+Byte-addressed buffer types like `ByteAddressBuffer` do not specify any particular element type, and instead allow for values to be loaded or stored from any (suitably aligned) byte offset in the buffer.
+Both structured and byte-addressed buffers can use an _access_ to distinguish between read-only and read-write usage.
+
+##### Constant Buffers
+
+Constant buffers (sometimes also called "uniform buffers") are typically used to pass immutable parameter data from a host application to GPU code.
+The constant buffer type `ConstantBuffer<T>` includes an explicit element type.
+Unlike formatted or flat buffers, a constant buffer conceptually contains only a *single* value of its element type, rather than one or more values.
+
+Expressions
+-----------
+
+Slang supports the following expression forms with nearly identical syntax to HLSL, GLSL, and C/C++:
+
+* Literals: `123`, `4.56`, `false`
+
+> #### Note ####
+> Unlike C/C++, but like HLSL/GLSL, an unsuffixed floating-point literal has the `float` type in Slang, rather than `double`
+
+* Member lookup: `structValue.someField`, `MyEnumType.FirstCase`
+
+* Function calls: `sin(a)`
+
+* Vector/matrix initialization: `int4(1, 2, 3, 4)`
+
+* Casts: `(int)x`, `double(0.0)`
+
+* Subscript (indexing): `a[i]`
+
+* Initializer lists: `int b[] = { 1, 2, 3 };`
+
+* Assignment: `l = r`
+
+* Operators: `-a`, `b + c`, `d++`, `e %= f`
+
+> #### Note ####
+> Like HLSL but unlike most other C-family languages, the `&&` and `||` operators do *not* currently perform "short-circuiting". 
+> they evaluate all of their operands unconditionally.
+> However, the `?:` operator does perform short-circuiting if the condition is a scalar. Use of `?:` where the condition is a vector is deprecated in Slang. The vector version of `?:` operator does *not* perform short-circuiting, and the user is advised to call `select` instead.
+> The default behavior of these operators is likely to change in a future Slang release.
+
+Additional expression forms specific to shading languages follow.
+
+### Operators on Vectors and Matrices
+
+The ordinary unary and binary operators can also be applied to vectors and matrices, where they apply element-wise.
+
+> #### Note ####
+> In GLSL, most operators apply component-wise to vectors and matrices, but the multiplication operator `*` computes the traditional linear-algebraic product of two matrices, or a matrix and a vector.
+> Where a GLSL programmer would write `m * v` to multiply a `mat3x4` by a `vec3`, a Slang programmer should write `mul(v,m)` to multiply a `float3` by a `float3x4`.
+> In this example, the order of operands is reversed to account for the difference in row/column conventions.
+
+### Swizzles
+
+Given a value of vector type, a _swizzle_ expression extracts one or more of the elements of the vector to produce a new vector.
+For example, if `v` is a vector of type `float4`, then `v.xy` is a `float2` consisting of the `x` and `y` elements of `v`.
+Swizzles can reorder elements (`v.yx`) or include duplicate elements (`v.yyy`).
+
+> #### Note ####
+> Unlike GLSL, Slang only supports `xyzw` and `rgba` as swizzle elements, and not the seldom-used `stpq`.
+
+> #### Note ####
+> Unlike HLSL, Slang does not currently support matrix swizzle syntax.
+
+Statements
+----------
+
+Slang supports the following statement forms with nearly identical syntax to HLSL, GLSL, and C/C++:
+
+* Expression statements: `f(a, 3);`, `a = b * c;`
+
+* Local variable declarations: `int x = 99;`
+
+* Blocks: `{ ... }`
+
+* Empty statement: `;`
+
+* `if` statements
+
+* `switch` statements
+
+> #### Note ####
+> Unlike C/C++, `case` and `default` statements must be directly nested under a `switch`, rather than being allowed under nested control flow (Duff's Device and similar idioms are not allowed).
+> In addition, while multiple `case`s can be grouped together, all other forms of "fall through" are unsupported.
+
+* `for` statements
+
+* `while` statements
+
+* `do`-`while` statements
+
+* `break` statements
+
+* `continue` statements
+
+* `return` statements
+
+> #### Note ####
+> Slang does not support the C/C++ `goto` keyword.
+
+> #### Note ####
+> Slang does not support the C++ `throw` keyword.
+
+Additional statement forms specific to shading languages follow.
+
+### Discard Statements
+
+A `discard` statement can be used in the context of a fragment shader to terminate shader execution for the current fragment, and to cause the graphics system to discard the corresponding fragment.
+
+Functions
+---------
+
+Slang supports function definitions with traditional C syntax:
+
+```hlsl
+float addSomeThings(int x, float y)
+{
+    return x + y;
+}
+```
+
+In addition to the traditional C syntax, you can use the modern syntax to define functions with the `func` keyword:
+```swift
+func addSomeThings(x : int, y : float) -> float
+{
+    return x + y;
+}
+```
+
+Slang supports overloading of functions based on parameter types.
+
+Function parameters may be marked with a _direction_ qualifier:
+
+* `in` (the default) indicates a by-value input parameter
+* `out` indicates an output parameter
+* `inout` or `in out` indicates an input/output parameter
+
+> #### Note ####
+> The `out` and `inout` directions are superficially similar to non-`const` reference parameters in C++.
+> In cases that do not involve aliasing of mutable memory, the semantics should be equivalent.
+
+Preprocessor
+------------
+
+Slang supports a C-style preprocessor with the following directives;
+
+* `#include`
+* `#define`
+* `#undef`
+* `#if`, `#ifdef`, `#ifndef`
+* `#else`, `#elif`
+* `#endif`
+* `#error`
+* `#warning`
+* `#line`
+* `#pragma`, including `#pragma once`
+
+Variadic macros are supported by the Slang preprocessor.
+
+> #### Note ####
+> The use of `#include` in new code is discouraged as this functionality has
+> been superseded by the module system, please refer to
+> [./04-modules-and-access-control.md](./04-modules-and-access-control.md)
+
+Attributes
+----------
+
+_Attributes_ are a general syntax for decorating declarations and statements with additional semantic information or meta-data.
+Attributes are surrounded with square brackets (`[]`) and prefix the declaration or statement they apply to.
+
+For example, an attribute can indicate the programmer's desire that a loop be unrolled as much as possible:
+
+```hlsl
+[unroll]
+for(int i = 0; i < n; i++)
+{ /* ... */ }
+```
+
+> #### Note ####
+> Traditionally, all attributes in HLSL used a single layer of `[]` brackets, matching C#.
+> Later, C++ borrowed the idea from C# but used two layers of brackets (`[[]]`).
+> Some recent extensions to HLSL have used the C++-style double brackets instead of the existing single brackets syntax.
+> Slang tries to support both alternatives uniformly.
+
+Global Variables and Shader Parameters
+--------------------------------------
+
+By default, global-scope variable declarations in Slang represent _shader parameters_ passed from host application code into GPU code.
+Programmers must explicitly mark a global-scope variable with `static` for it not to be treated as a shader parameter, even if the variable is marked `const`:
+
+```hlsl
+// a shader parameter:
+float a;
+
+// also a shader parameter (despite `const`):
+const int b = 2;
+
+// a "thread-local" global variable
+static int c = 3;
+
+// a compile-time constant
+static const int d = 4;
+```
+
+### Global Constants
+
+A global-scope `static const` variable defines a compile-time constant for use in shader code.
+
+### Global-Scope Static Variables
+
+A non-`const` global-scope  `static` variable is conceptually similar to a global variable in C/C++, with the key difference that it has distinct storage per *thread* rather than being truly global.
+Each logical thread of shader execution initiated by the GPU will be allocated fresh storage for these `static` variables, and values written to those variables will be lost when a shader thread terminates.
+
+> #### Note ####
+> Some target platforms do not support `static` global variables in all use cases.
+> Support for `static` global variables should be seen as a legacy feature, and further use is discouraged.
+
+### Global Shader Parameters
+
+Global shader parameters may use any type, including both opaque and non-opaque types:
+
+```hlsl
+ConstantBuffer<MyData> c;
+Texture2D t;
+float4 color;
+```
+
+To avoid confusion, the Slang compiler will warn on any global shader parameter that includes non-opaque types, because it is likely that a user thought they were declaring a global constant or a traditional global variable.
+This warning may be suppressed by marking the parameter as `uniform`:
+
+```hlsl
+// WARNING: this declares a global shader parameter, not a global variable
+int gCounter = 0;
+
+// OK:
+uniform float scaleFactor;
+```
+
+#### Legacy Constant Buffer Syntax
+
+For compatibility with existing HLSL code, Slang also supports global-scope `cbuffer` declarations to introduce constant buffers:
+
+```hlsl
+cbuffer PerFrameCB
+{
+    float4x4 mvp;
+    float4 skyColor;
+    // ...
+}
+```
+
+A `cbuffer` declaration like this is semantically equivalent to a shader parameter declared using the `ConstantBuffer` type:
+
+```hlsl
+struct PerFrameData
+{
+    float4x4 mvp;
+    float4 skyColor;
+    // ...
+}
+ConstantBuffer<PerFrameData> PerFrameCB;
+```
+
+#### Explicit Binding Markup
+
+For compatibility with existing codebases, Slang supports pre-existing markup syntax for associating shader parameters of opaque types with binding information for specific APIs.
+
+Binding information for Direct3D platforms may be specified using `register` syntax:
+
+```hlsl
+Texture2D a : register(t0);
+Texture2D b : register(t1, space0);
+```
+
+Binding information for Vulkan (and OpenGL) may be specified using `[[vk::binding(...)]]` attributes
+
+```hlsl
+[[vk::binding(0)]]
+Texture2D a;
+
+[[vk::binding(1, 0)]]
+Texture2D b;
+```
+
+A single parameter may use both the D3D-style and Vulkan-style markup, but in each case explicit binding markup only applies to the API family for which it was designed.
+
+> #### Note ####
+> Explicit binding markup is tedious to write and error-prone to maintain.
+> It is almost never required in Slang codebases.
+> The Slang compiler can automatically synthesize bindings in a completely deterministic fashion and in most cases the bindings it generates are what a programmer would have written manually.
+
+Shader Entry Points
+-------------------
+
+An _entry point_ is a function that can be used as the starting point for execution of a GPU thread.
+
+Here is an example of an entry-point function in Slang:
+
+```hlsl
+[shader("vertex")]
+float4 vertexMain(
+    float3 modelPosition : POSITION,
+    uint vertexID : SV_VertexID,
+    uniform float4x4 mvp)
+    : SV_Position
+{ /* ... */ }
+```
+
+In the following sections we will use this example to explain important facets of entry point declarations in Slang.
+
+### Entry Point Attribute and Stages
+
+The `[shader(...)]` attribute is used to mark a function in Slang as a shader entry point, and also to specify which pipeline stage it is meant for.
+In this example, the `vertexMain` shader indicates that it is meant for the `vertex` stage of the traditional rasterization pipeline.
+Rasterization, compute, and ray-tracing pipelines each define their own stages, and new versions of graphics APIs may introduce new stages.
+
+For compatibility with legacy codebases, Slang supports code that leaves off `[shader(...)]` attributes; in these cases application developers must specify the names and stages for their entry points via explicit command-line or API options.
+Such entry points will not be found via `IModule::findEntryPointByName()`. Instead `IModule::findAndCheckEntryPoint()` must be used, and a stage must be specified.
+It is recommended that new codebases always use `[shader(...)]` attributes both to simplify their workflow, and to make code more explicit and "self-documenting."
+
+> #### Note ####
+> In GLSL, a file of shader code may only include one entry point, and all code `#include`d into that file must be compatible with the stage of that entry point. By default, GLSL requires that an entry point be called `main`.
+> Slang allows for multiple entry points to appear in a file, for any combination of stage, and with any valid identifier as a name.
+
+### Parameters
+
+The parameter of an entry-point function represent either _varying_ or _uniform_ inputs.
+Varying inputs are those that may vary over threads invoked as part of the same batch (a draw call, compute dispatch, etc.), while uniform inputs are those that are guaranteed to be the same for all threads in a batch.
+Entry-point parameters in Slang default to varying, but may be explicitly marked `uniform`.
+
+If an entry-point function declares a non-`void` result type, then its result behaves like an anonymous `out` parameter that is varying.
+
+### Binding Semantics
+
+The varying parameters of an entry point must declare a _binding semantic_ to indicate how those parameters should be connected to the execution environment.
+A binding semantic for a parameter may be introduced by suffixing the variable name with a colon (`:`) and an identifier for the chosen binding semantic.
+A binding semantic for a function result is introduced similarly, but comes after the parameter list.
+
+It is not shown in this example, but binding semantics may also be applied to individual `struct` fields, in cases where a varying parameter of `struct` type is used.
+
+#### System-Defined Binding Semantics
+
+In the `vertexMain` entry point, the `vertexID` parameter uses the `SV_VertexID` binding semantic, which is a _system-defined_ binding semantic.
+Standard system-defined semantics are distinguished by the `SV_` prefix.
+
+A system-defined binding semantic on an input parameter indicates that the parameter should receive specific data from the GPU as defined by the pipeline and stage being used.
+For example, in a vertex shader the `SV_VertexID` binding semantic on an input yields the ID of the particular vertex being processed on the current thread.
+
+A system-defined binding semantic on an output parameter or function result indicates that when a shader thread returns from the entry point the value stored in that output should be used by the GPU in a specific way defined by the pipeline and stage being used.
+For example, in a vertex shader the `SV_Position` binding semantic on an output indicates that it represents a clip-space position that should be communicated to the rasterizer.
+
+The set of allowed system-defined binding semantics for inputs and outputs depends on the pipeline and stage of an entry point.
+Some system-defined binding semantics may only be available on specific targets or specific versions of those targets.
+
+> #### Note ####
+> Instead of using ordinary function parameters with system-defined binding semantics, GLSL uses special system-defined global variables with the `gl_` name prefix.
+> Some recent HLSL features have introduced special globally-defined functions that behave similarly to these `gl_` globals.
+
+#### User-Defined Binding Semantics
+
+In the `vertexMain` entry point, the `modelPosition` parameter used the `POSITION` binding semantic, which is a _user-defined_ binding semantic.
+
+A user-defined binding semantic on an input indicates that the parameter should receive data with a matching binding semantic from a preceding stage.
+A user-defined binding semantic on an output indicates that the parameter should provide data to a parameter with a matching binding semantic in a following stage.
+
+Whether or not inputs and outputs with user-defined binding semantics are allowed depends on the pipeline and stage of an entry point.
+
+Different APIs and different stages within the same API may match up entry point inputs/outputs with user-defined binding semantics in one of two ways:
+
+* By-index matching: user-defined outputs from one stage and inputs to the next are matched up by order of declaration. The types of matching output/input parameters must either be identical or compatible (according to API-specific rules). Some APIs also require that the binding semantics of matching output/input parameters are identical.
+
+* By-name matching: user-defined outputs from one stage and inputs to the next are matched up by their binding semantics. The types of matching output/input parameters must either be identical or compatible (according to API-specific rules). The order of declaration of the parameters need not match.
+
+Because the matching policy may differ across APIs, the only completely safe option is for parameters passed between pipeline stages to match in terms of order, type, *and* binding semantic.
+
+> #### Note ####
+> Instead of using ordinary function parameters for user-defined varying inputs/outputs, GLSL uses global-scope variable declarations marked with the `in` or `out` modifier.
+
+### Entry-Point Uniform Parameters
+
+In the `vertexMain` entry point, the `mvp` parameter is an _entry-point uniform parameter_.
+
+Entry-point uniform parameters are semantically similar to global-scope shader parameters, but do not pollute the global scope.
+
+> #### Note ####
+> GLSL does not support entry-point `uniform` parameters; all shader parameters must be declared at the global scope.
+> Historically, HLSL has supported entry-point `uniform` parameters, but this feature was dropped by recent compilers.
+
+Mixed Shader Entry Points
+--------------------------
+
+Through the `[shader(...)]` syntax, users of slang can freely combine multiple entry points into the same file. This can be especially convenient for reuse between entry points which have a logical connection.
+
+For example, mixed entry points offer a convenient way for ray tracing applications to concisely define a complete pipeline in one source file, while also providing users with additional opportunities to improve type safety of 
+shared structure definitions:
+
+```hlsl
+struct Payload { float3 color; };
+
+[shader("raygeneration")]
+void rayGenerationProgram() {
+    Payload payload;
+    TraceRay(/*...*/, payload);
+    /* ... */ 
+}
+
+[shader("closesthit")]
+void closestHitProgram(out Payload payload) { 
+    payload.color = {1.0};
+}
+
+[shader("miss")]
+void missProgram(out Payload payload) { 
+    payload.color = {1.0};
+}
+```
+
+> #### Note ####
+> GLSL does not support multiple entry-points; however, SPIR-V does. Vulkan users wanting to take advantage of Slang mixed entry points must pass `-fvk-use-entrypoint-name` and `-emit-spirv-directly` as compiler arguments.
+
+### Mixed Entry-Point Uniform Parameters
+
+Like with the previous `vertexMain` example, mixed entry point setups also support _entry-point uniform parameters_.
+
+However, because of certain systematic differences between entry point types, a uniform being _global_ or _local_ will have very important consequences on the underlying layout and behavior.
+
+For most all entry point types, D3D12 will use one common root signature to define both global and local uniform parameters. 
+Likewise, Vulkan descriptors will bind to a common pipeline layout. For both of these cases, Slang maps uniforms to the common root signature / pipeline layout. 
+
+However, for ray tracing entry points and D3D12, these parameters map to either _global_ root signatures or to _local_ root signatures, with the latter being stored in the shader binding table.
+In Vulkan, D3D12's global root signatures translate to a shared ray tracing pipeline layout, while local root signatures map again to shader binding table records. 
+
+When entry points match a "ray tracing" type, we bind uniforms which are in the _global_ scope to the _global_ root signature (or ray tracing pipeline layout), while uniforms which are _local_ are bound to shader binding table records, which depend on the underlying runtime record indexing. 
+
+Consider the following:
+
+```hlsl
+uniform float3 globalUniform;
+
+[shader("compute")][numThreads(1,2,3)]
+void computeMain1(uniform float3 localUniform1) 
+{ /* ... */ }
+
+[shader("compute")][numThreads(1,2,3)]
+void computeMain2(uniform float3 localUniform2) 
+{ /* ... */ }
+
+[shader("raygeneration")]
+void rayGenerationMain(uniform float3 localUniform3) 
+{ /* ... */ }
+
+[shader("closesthit")]
+void closestHitMain(uniform float3 localUniform4) 
+{ /* ... */ }
+```
+
+In this example, `globalUniform` is appended to the global root signature / pipeline layouts for _both_ compute _and_ ray generation stages for all four entry points. 
+Compute entry points lack "local root signatures" in D3D12, and likewise Vulkan has no concept of "local" vs "global" compute pipeline layouts, so `localUniform1` is "pushed" to the stack of reserved global uniform parameters for use in `computeMain1`. 
+Leaving that entry point scope "pops" that global uniform parameter such that `localUniform2` can reuse the same binding location for `computeMain2`.
+However, local uniforms for ray tracing shaders map to the corresponding "local" hit records in the shader binding table, and so no "push" or "pop" to the global root signature / pipeline layouts occurs for these parameters. 
+
+Auto-Generated Constructors
+----------
+
+### Auto-Generated Constructors - Struct
+
+Slang has the following rules:
+1. Auto-generate a `__init()` if not already defined.
+
+   Assume:
+   ```csharp
+   struct DontGenerateCtor
+   {
+       int a;
+       int b = 5;
+
+       // Since the user has explicitly defined a constructor
+       // here, Slang will not synthesize a conflicting 
+       // constructor.
+       __init()
+       {
+           // b = 5;
+           a = 5;
+           b = 6;
+       }
+   };
+
+   struct GenerateCtor
+   {
+       int a;
+       int b = 5;
+   
+       // Slang will automatically generate an implicit constructor:
+       // __init()
+       // {
+       //     b = 5;
+       // }
+   };
+   ```
+
+2. If all members have equal visibility, auto-generate a 'member-wise constructor' if not conflicting with a user defined constructor.
+   ```csharp
+   struct GenerateCtorInner
+   {
+       int a;
+
+       // Slang will automatically generate an implicit
+       // __init(int in_a)
+       // {
+       //     a = in_a;
+       // }
+   };
+   struct GenerateCtor : GenerateCtorInner
+   {
+       int b;
+       int c = 5;
+
+       // Slang will automatically generate an implicit
+       // __init(int in_a, int in_b, int in_c)
+       // {
+       //     c = 5;
+       //
+       //     this = GenerateCtorInner(in_a);
+       //
+       //     b = in_b;
+       //     c = in_c;
+       // }
+   };
+   ```
+
+3. If not all members have equal visibility, auto-generate a 'member-wise constructor' based on member visibility if not conflicting with a user defined constructor. 
+
+   We generate 3 different visibilities of 'member-wise constructor's in order:
+      1. `public` 'member-wise constructor'
+         - Contains members of visibility: `public`
+         - Do not generate if `internal` or `private` member lacks an init expression
+      2. `internal` 'member-wise constructor'
+         - Contains members of visibility: `internal`, `public`
+         - Do not generate if `private` member lacks an init expression
+      3. `private` 'member-wise constructor'
+         - Contains members of visibility: `private`, `internal`, `public`
+
+   ```csharp
+   struct GenerateCtorInner1
+   {
+       internal int a = 0;
+    
+       // Slang will automatically generate an implicit
+       // internal __init(int in_a)
+       // {
+       //     a = 0;
+       //
+       //     a = in_a;
+       // }
+   };
+   struct GenerateCtor1 : GenerateCtorInner1
+   {
+       internal int b = 0;
+       public int c;
+
+       // Slang will automatically generate an implicit
+       // internal __init(int in_a, int in_b, int in_c)
+       // {
+       //     b = 0;
+       //
+       //     this = GenerateCtorInner1(in_a);
+       //
+       //     b = in_b;
+       //     c = in_c;
+       // }
+       //
+       // public __init(int in_c)
+       // {
+       //     b = 0;
+       //
+       //     this = GenerateCtorInner1();
+       //
+       //     c = in_c;
+       // }
+   };
+
+   struct GenerateCtorInner2
+   {
+       internal int a;
+       // Slang will automatically generate an implicit
+       // internal __init(int in_a)
+       // {
+       //     a = in_a;
+       // }
+   };
+   struct GenerateCtor2 : GenerateCtorInner2
+   {
+       internal int b;
+       public int c;
+
+       /// Note: `internal b` is missing init expression,
+       // Do not generate a `public` 'member-wise' constructor.
+
+       // Slang will automatically generate an implicit
+       // internal __init(int in_a, int in_b, int in_c)
+       // {
+       //     this = GenerateCtorInner2(in_a);
+       //
+       //     b = in_b;
+       //     c = in_c;
+       // }
+   };
+   ```
+
+Initializer Lists
+----------
+Initializer List's are an expression of the form `{...}`.
+
+```csharp
+int myFunc()
+{
+    int a = {}; // Initializer List
+}
+```
+
+### Initializer List's - Scalar
+
+```csharp
+// Equivalent to `int a = 1`
+int a = {1};
+```
+
+### Initializer List's - Vectors
+
+```csharp
+// Equivalent to `float3 a = float3(1,2,3)`
+float3 a = {1, 2, 3};
+```
+
+### Initializer List's - Arrays/Matrixes
+
+#### Array Of Scalar's
+
+```csharp
+// Equivalent to `int[2] a; a[0] = 1; a[1] = 2;`
+int a[2] = {1, 2}
+```
+
+#### Array Of Aggregate's
+
+```csharp
+// Equivalent to `float3 a[2]; a[0] = {1,2,3}; b[1] = {4,5,6};`
+float3 a[2] = { {1,2,3}, {4,5,6} };
+```
+#### Flattened Array Initializer
+
+```csharp
+// Equivalent to `float3 a[2] = { {1,2,3}, {4,5,6} };`
+float3 a[3] = {1,2,3, 4,5,6}; 
+```
+
+### Initializer Lists - Struct
+
+In most scenarios, using an initializer list to create a struct typed value is equivalent to calling the struct's constructor using the elements in the initializer list as arguments for the constructor, for example:
+```csharp
+struct GenerateCtorInner1
+{
+    internal int a = 0;
+    
+    // Slang will automatically generate an implicit
+    // internal __init(int in_a)
+    // {
+    //     a = 0;
+    //
+    //     a = in_a;
+    // }
+
+    static GenerateCtorInner1 callGenerateCtorInner1()
+    {
+        // Calls `GenerateCtorInner1::__init(1);`
+        return {1};
+    }
+};
+struct GenerateCtor1 : GenerateCtorInner1
+{
+    internal int b = 0;
+    public int c;
+
+    // Slang will automatically generate an implicit
+    // internal __init(int in_a, int in_b, int in_c)
+    // {
+    //     this = GenerateCtorInner1(in_a);
+    //
+    //     b = 0;
+    //
+    //     b = in_b;
+    //     c = in_c;
+    // }
+    //
+    // public __init(int in_c)
+    // {
+    //     this = GenerateCtorInner1();
+    //
+    //     b = 0;
+    //
+    //     c = in_c;
+    // }
+    static GenerateCtorInner1 callInternalGenerateCtor()
+    {
+        // Calls `GenerateCtor1::__init(1, 2, 3);`
+        return {1, 2, 3};
+    }
+    static GenerateCtorInner1 callPublicGenerateCtor()
+    {
+        // Calls `GenerateCtor1::__init(1);`
+        return {1}; 
+    }
+};
+
+...
+
+// Calls `{ GenerateCtor1::__init(3), GenerateCtor1::__init(2) }`
+GenerateCtor1 val[2] = { { 3 }, { 2 } };
+```
+
+In addition, Slang also provides compatibility support for C-style initializer lists with `struct`s. C-style initializer lists can use [Partial Initializer List's](#Partial-Initializer-List's) and [Flattened Array Initializer With Struct's](#Flattened-Array-Initializer-With-Struct)
+
+A struct is considered a C-style struct if:
+1. User never defines a custom constructor with **more than** 0 parameters
+2. All member variables in a `struct` have the same visibility (`public` or `internal` or `private`).
+
+#### Partial Initializer List's
+
+```csharp
+struct Foo
+{
+    int a;
+    int b;
+    int c;
+};
+
+...
+
+// Equivalent to `Foo val; val.a = 1; val.b = 0; val.c = 0;`
+Foo val = {1}; 
+
+// Equivalent to `Foo val; val.a = 2; val.b = 3; val.c = 0;`
+Foo val = {2, 3};
+```
+
+#### Flattened Array Initializer With Struct's
+
+```csharp
+struct Foo
+{
+    int a;
+    int b;
+    int c;
+};
+
+...
+
+// Equivalent to `Foo val[2] = { {0,1,2}, {3,4,5} };`
+Foo val[2] = {0,1,2, 3,4,5};
+```
+
+
+### Initializer Lists - Default Initializer
+
+`{}` will default initialize a value:
+
+#### Non-Struct Type
+
+Value will zero-initialize
+```csharp
+// Equivalent to `int val1 = 0;`
+int val1 = {};
+
+// Equivalent to `float3 val2 = float3(0);`
+float3 val2 = {};
+```
+
+#### Struct Type
+
+1. Attempt to call default constructor (`__init()`) of a `struct`
+
+   ```csharp
+   struct Foo
+   {
+       int a;
+       int b;
+       __init()
+       {
+           a = 5;
+           b = 5;
+       }
+   };
+
+   ...
+
+   // Equivalent to `Foo val = Foo();`
+   Foo val = {};
+   ```
+
+2. As a fallback, zero-initialize the struct
+
+   ```csharp
+   struct Foo
+   {
+       int a;
+       int b;
+   };
+
+   ...
+
+   // Equivalent to `Foo val; val.a = 0; val.b = 0;` 
+   Foo val = {};
+   ```
+
+### Initializer Lists - Other features
+
+Slang allows calling a default-initializer inside a default-constructor.
+
+```c#
+__init()
+{
+    this = {}; //zero-initialize `this`
+}
+```
diff --git a/external/slang/share/doc/slang/user-guide/03-convenience-features.md b/external/slang/share/doc/slang/user-guide/03-convenience-features.md
new file mode 100644
index 00000000..29e8fd2a
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/03-convenience-features.md
@@ -0,0 +1,795 @@
+---
+layout: user-guide
+permalink: /user-guide/convenience-features
+---
+
+# Basic Convenience Features
+
+This topic covers a series of nice-to-have language features in Slang. These features are not supported by HLSL but are introduced to Slang to simplify code development. Many of these features are added to Slang per request of our users. 
+
+## Type Inference in Variable Definitions
+Slang supports automatic variable type inference:
+```csharp
+var a = 1; // OK, `a` is an `int`.
+var b = float3(0, 1, 2); // OK, `b` is a `float3`.
+```
+Automatic type inference require an initialization expression to present. Without an initial value, the compiler is not able to infer the type of the variable. The following code will result in a compiler error:
+```csharp
+var a; // Error, cannot infer the type of `a`.
+```
+
+You may use the `var` keyword to define a variable in a modern syntax:
+```csharp
+var a : int = 1; // OK.
+var b : int; // OK.
+```
+
+## Immutable Values
+The `var` syntax and the traditional C-style variable definition introduce a _mutable_ variable whose value can be changed after its definition. If you wish to introduce an immutable or constant value, you may use the `let` keyword:
+```rust
+let a = 5; // OK, `a` is `int`.
+let b : int = 5; // OK.
+```
+Attempting to change an immutable value will result in a compiler error:
+```rust
+let a = 5;
+a = 6; // Error, `a` is immutable.
+```
+
+
+## Namespaces
+
+You can use the `namespace` syntax to define symbols in a namespace:
+```csharp
+namespace ns
+{
+    int f();
+}
+```
+
+Slang also supports the abbreviated syntax for defining nested namespaces:
+```csharp
+namespace ns1.ns2
+{
+    int f();
+}
+// equivalent to:
+namespace ns1::ns2
+{
+    int f();
+}
+// equivalent to:
+namespace ns1
+{
+    namespace ns2
+    {
+        int f();
+    }
+}
+```
+
+To access symbols defined in a namespace, you can use their qualified name with namespace prefixes:
+```csharp
+void test()
+{
+    ns1.ns2.f();
+    ns1::ns2::f(); // equivalent syntax.
+}
+```
+
+Symbols defined in the same namespace can access each other without a qualified name, this is true even if the referenced symbol is defined in a different file or module:
+```csharp
+namespace ns
+{
+    int f();
+    int g() { f(); } // OK.
+}
+```
+
+You can also use the `using` keyword to pull symbols defined in a different namespace to
+the current scope, removing the requirement for using fully qualified names.
+```cpp
+namespace ns1.ns2
+{
+    int f();
+}
+
+using ns1.ns2;
+// or:
+using namespace ns1.ns2; // alternative syntax.
+
+void test() { f(); } // OK.
+```
+
+## Member functions
+
+Slang supports defining member functions in `struct`s. For example, it is allowed to write:
+
+```hlsl
+struct Foo
+{
+    int compute(int a, int b)
+    {
+        return a + b;
+    }
+}
+```
+
+You can use the `.` syntax to invoke member functions:
+
+```hlsl
+Foo foo;
+int rs = foo.compute(1,2);
+```
+
+Slang also supports static member functions, For example:
+```
+struct Foo
+{
+    static int staticMethod(int a, int b)
+    {
+        return a + b;
+    }
+}
+```
+
+Static member functions are accessed the same way as other static members, via either the type name or an instance of the type:
+
+```hlsl
+int rs = Foo.staticMethod(a, b);
+```
+
+or
+
+```hlsl
+Foo foo;
+...
+int rs = foo.staticMethod(a,b);
+```
+
+### Mutability of member function
+
+For GPU performance considerations, the `this` argument in a member function is immutable by default. Attempting to modify `this` will result in a compile error. If you intend to define a member function that mutates the object, use `[mutating]` attribute on the member function as shown in the following example.
+
+```hlsl
+struct Foo
+{
+    int count;
+    
+    [mutating]
+    void setCount(int x) { count = x; }
+
+    // This would fail to compile.
+    // void setCount2(int x) { count = x; }
+}
+
+void test()
+{
+    Foo f;
+    f.setCount(1); // Compiles
+}
+```
+
+## Properties
+
+Properties provide a convenient way to access values exposed by a type, where the logic behind accessing the value is defined in `getter` and `setter` function pairs. Slang's `property` feature is similar to C# and Swift. 
+```csharp
+struct MyType
+{
+    uint flag;
+
+    property uint highBits
+    {
+        get { return flag >> 16; }
+        set { flag = (flag & 0xFF) + (newValue << 16); }
+    }
+};
+```
+
+Or equivalently in a "modern" syntax:
+
+```csharp
+struct MyType
+{
+    uint flag;
+
+    property highBits : uint
+    {
+        get { return flag >> 16; }
+        set { flag = (flag & 0xFF) + (newValue << 16); }
+    }
+};
+```
+
+You may also use an explicit parameter for the setter method:
+```csharp
+property uint highBits
+{
+    set(uint x) { flag = (flag & 0xFF) + (x << 16);  }
+}
+```
+
+> #### Note ####
+> Slang currently does not support automatically synthesized `getter` and `setter` methods. For example,
+> the following code is not supported:
+> ```
+> property uint highBits {get;set;} // Not supported yet.
+> ```
+
+## Initializers
+
+### Constructors
+> #### Note ####
+> The syntax for defining constructors is subject to future change.
+
+
+Slang supports defining constructors in `struct` types. You can write:
+```csharp
+struct MyType
+{
+    int myVal;
+    __init(int a, int b)
+    {
+        myVal = a + b;
+    }
+}
+```
+
+You can use a constructor to construct a new instance by using the type name in a function call expression:
+```csharp
+MyType instance = MyType(1,2);  // instance.myVal is 3.
+```
+
+You may also use C++ style initializer list to invoke a constructor:
+```csharp
+MyType instance = {1, 2};
+```
+
+If a constructor does not define any parameters, it will be recognized as *default* constructor that will be automatically called at the definition of a variable:
+
+```csharp
+struct MyType
+{
+    int myVal;
+    __init()
+    {
+        myVal = 10;
+    }
+};
+
+int test()
+{
+    MyType test;
+    return test.myVal; // returns 10.
+}
+```
+
+Slang will also implicitly call a *default* constructor of all parents of a derived struct (same as C++):
+```csharp
+struct MyType_Base
+{
+    int myVal1;
+    __init() {myVal1 = 22;}
+}
+
+struct MyType1 : MyType_Base
+{
+    int myVal2;
+    __init()
+    {
+        // implicitly calls `MyType_Base::__init()`
+        myVal2 = 15;
+    }
+}
+testMyType1()
+{
+    MyType1 a;
+    // a.myVal1 == 22
+    // a.myVal2 == 15
+}
+
+struct MyType2 : MyType_Base
+{
+}
+testMyType2()
+{
+    MyType2 b; // implicitly calls `MyType_Base::__init()`
+    // b.myVal1 == 22
+}
+```
+
+### Member Init Expressions
+
+Slang supports member init expressions:
+```csharp
+struct MyType
+{
+    int myVal = 5;
+}
+```
+
+## Operator Overloading
+
+Slang allows defining operator overloads as global methods:
+```csharp
+struct MyType
+{
+    int val;
+    __init(int x) { val = x; }
+}
+
+MyType operator+(MyType a, MyType b)
+{
+    return MyType(a.val + b.val);
+}
+
+int test()
+{
+    MyType rs = MyType(1) + MyType(2);
+    return rs.val; // returns 3.
+}
+```
+Slang currently supports overloading the following operators: `+`, `-`, `*`, `/`, `%`, `&`, `|`, `<`, `>`, `<=`, `>=`, `==`, `!=`, unary `-`, `~` and `!`. Please note that the `&&` and `||` operators are not supported.
+
+In addition, you can overload operator `()` as a member method:
+```csharp
+struct MyFunctor
+{
+    int operator()(float v)
+    {
+        // ...
+    }
+}
+void test()
+{
+    MyFunctor f;
+    int x = f(1.0f); // calls MyFunctor::operator().
+    int y = f.operator()(1.0f); // explicitly calling operator().
+}
+```
+
+## Subscript Operator
+
+Slang allows overriding `operator[]` with `__subscript` syntax:
+```csharp
+struct MyType
+{
+    int val[12];
+    __subscript(int x, int y) -> int
+    {
+        get { return val[x*3 + y]; }
+        set { val[x*3+y] = newValue; }
+    }
+}
+int test()
+{
+    MyType rs;
+    rs[0, 0] = 1;
+    rs[1, 0] = rs[0, 0] + 1
+    return rs[1, 0]; // returns 2.
+}
+```
+
+## Tuple Types
+
+Tuple types can hold collection of values of different types.
+Tuples types are defined in Slang with the `Tuple<...>` syntax, and
+constructed with either a constructor or the `makeTuple` function:
+```csharp
+Tuple<int, float, bool> t0 = Tuple<int, float, bool>(5, 2.0f, false);
+Tuple<int, float, bool> t1 = makeTuple(3, 1.0f, true);
+```
+
+Tuple elements can be accessed with `_0`, `_1` member names:
+```csharp
+int i = t0._0; // 5
+bool b = t1._2; // true
+```
+
+You can use the swizzle syntax similar to vectors and matrices to form new
+tuples:
+
+```csharp
+t0._0_0_1 // evaluates to (5, 5, 2.0f)
+```
+
+You can concatenate two tuples:
+
+```csharp
+concat(t0, t1) // evaluates to (5, 2.0f, false, 3, 1.0f, true)
+```
+
+If all element types of a tuple conforms to `IComparable`, then the tuple itself
+will conform to `IComparable`, and you can use comparison operators on the tuples
+to compare them:
+
+```csharp
+let cmp = t0 < t1; // false
+```
+
+You can use `countof()` on a tuple type or a tuple value to obtain the number of
+elements in a tuple. This is considered a compile-time constant.
+```csharp
+int n = countof(Tuple<int, float>); // 2
+int n1 = countof(makeTuple(1,2,3)); // 3
+```
+
+All tuple types will be translated to `struct` types, and receive the same layout
+as `struct` types.
+
+## `Optional<T>` type
+
+Slang supports the `Optional<T>` type to represent a value that may not exist.
+The dedicated `none` value can be used for any `Optional<T>` to represent no value.
+`Optional<T>::value` property can be used to retrieve the value.
+
+```csharp
+struct MyType
+{
+    int val;
+}
+
+int useVal(Optional<MyType> p)
+{
+    if (p == none)        // Equivalent to `!p.hasValue`
+        return 0;
+    return p.value.val;
+}
+
+int caller()
+{
+    MyType v;
+    v.val = 0;
+    useVal(v);  // OK to pass `MyType` to `Optional<MyType>`.
+    useVal(none);  // OK to pass `none` to `Optional<MyType>`.
+    return 0;
+}
+```
+
+## `if_let` syntax
+Slang supports `if (let name = expr)` syntax to simplify the code when working with `Optional<T>` value. The syntax is similar to Rust's
+`if let` syntax, the value expression must be an `Optional<T>` type, for example:
+
+```csharp
+Optional<int> getOptInt() { ... }
+
+void test()
+{
+    if (let x = getOptInt())
+    {
+          // if we are here, `getOptInt` returns a value `int`.
+          // and `x` represents the `int` value.
+    }
+}
+```
+
+## `reinterpret<T>` operation
+
+Sometimes it is useful to reinterpret the bits of one type as another type, for example:
+```csharp
+struct MyType
+{
+    int a;
+    float2 b;
+    uint c;
+}
+
+MyType myVal;
+float4 myPackedVector = packMyTypeToFloat4(myVal);
+```
+
+The `packMyTypeToFloat4` function is usually implemented by bit casting each field in the source type and assign it into the corresponding field in the target type,
+by calling `intAsFloat`, `floatAsInt` and using bit operations to shift things in the right place.
+Instead of writing `packMyTypeToFloat4` function yourself, you can use Slang's builtin `reinterpret<T>` to do just that for you:
+```
+float4 myPackedVector = reinterpret<float4>(myVal);
+```
+
+`reinterpret` can pack any type into any other type as long as the target type is no smaller than the source type.
+
+## Pointers (limited)
+
+Slang supports pointers when generating code for SPIRV, C++ and CUDA targets. The syntax for pointers is similar to C, with the exception that operator `.` can also be used to dereference a member from a pointer. For example:
+```csharp
+struct MyType
+{
+    int a;
+};
+
+int test(MyType* pObj)
+{
+    MyType* pNext = pObj + 1;
+    MyType* pNext2 = &pNext[1];
+    return pNext.a + pNext->a + (*pNext2).a + pNext2[0].a;
+}
+
+cbuffer Constants
+{
+    MyType *ptr;
+};
+
+int validTest()
+{
+    return test(ptr);
+}
+
+int invalidTest()
+{
+    // cannot produce a pointer from a local variable 
+    MyType obj;
+    return test(&obj); // !! ERROR !!
+}
+```
+
+Pointer types can also be specified using the generic syntax: `Ptr<MyType>` is equivalent to `MyType*`.
+
+### Limitations
+
+- Slang supports pointers to global memory, but not shared or local memory. For example, it is invalid to define a pointer to a local variable.
+
+- Slang supports pointers that are defined as shader parameters (e.g. as a constant buffer field).
+
+- Slang can produce pointers using the & operator from data in global memory.
+
+- Slang doesn't support forming pointers to opaque handle types, e.g. `Texture2D`. For handle pointers, use `DescriptorHandle<T>` instead.
+
+- Slang doesn't support coherent load/stores.
+
+- Slang doesn't support custom alignment specification.
+
+- Slang currently does not support pointers to immutable values, i.e. `const T*`.
+
+## `DescriptorHandle` for Bindless Descriptor Access
+
+Slang supports the `DescriptorHandle<T>` type that represents a bindless handle to a resource. This feature provides a portable way of implementing
+the bindless resource idiom. When targeting HLSL, GLSL and SPIRV where descriptor types (e.g. textures, samplers and buffers) are opaque handles,
+`DescriptorHandle<T>` will translate into a `uint2` so it can be defined in any memory location. The underlying `uint2` value is treated as an index
+to access the global descriptor heap or resource array in order to obtain the actual resource handle. On targets with where resource handles
+are not opaque handles, `DescriptorHandle<T>` maps to `T` and will have the same size and alignment defined by the target.
+
+`DescriptorHandle<T>` is declared as:
+```slang
+struct DescriptorHandle<T> where T:IOpaqueDescriptor {}
+```
+where `IOpaqueDescriptor` is an interface implemented by all resource types, including textures,
+`ConstantBuffer`, `RaytracingAccelerationStructure`, `SamplerState`, `SamplerComparisonState` and all types of `StructuredBuffer`.
+
+You may also write `Texture2D.Handle` as a short-hand of `DescriptorHandle<Texture2D>`.
+
+`DescriptorHandle<T>` supports `operator *`, `operator ->`, and can implicitly convert to `T`, for example:
+
+```slang
+uniform StructuredBuffer<DescriptorHandle<Texture2D>> textures;
+uniform int textureIndex;
+
+// define a descriptor handle using builtin convenience typealias:
+uniform StructuredBuffer<float4>.Handle output;
+
+[numthreads(1,1,1)]
+void main()
+{
+    output[0] = textures[textureIndex].Load(int3(0));
+
+    // Alternatively, this syntax is also valid:
+    (*output)[0] = textures[textureIndex]->Load(int3(0));
+}
+```
+
+By default, when targeting HLSL, `DescriptorHandle<T>` translates to uses of `ResourceDescriptorHeap[index]` and `SamplerDescriptorHeap[index]`.
+In particular, when combined with combined texture sampler types (e.g. `Sampler2D`), Slang will fetch the texture using the first
+component of the handle, and the sampler state from the second component of the handle. For example:
+
+```
+uniform DescriptorHandle<Sampler2D> s;
+void test()
+{
+    s.Sample(uv);
+}
+```
+
+translates to:
+
+```hlsl
+uniform uint2 s;
+void test()
+{
+    Texture2D(ResourceDescriptorHeap[s.x]).Sample(
+        SamplerState(SamplerDescriptorHeap[s.y]),
+        uv
+    );
+}
+```
+
+When targeting SPIRV, Slang will introduce a global array of descriptors and fetch from the global array.
+The descriptor set ID of the global descriptor array can be configured with the `-bindless-space-index`
+(or `CompilerOptionName::BindlessSpaceIndex` when using the API) option.
+
+> #### Note
+> The default implementation for SPIRV may change in the future if SPIRV is extended to provide what is
+> equivalent to D3D's `ResourceDescriptorHeap` construct.
+
+Users can override the default behavior of convering from bindless handle to resource handle, by providing a
+`getDescriptorFromHandle` in user code. For example:
+
+```slang
+// All texture and buffer handles are defined in descriptor set 100.
+[vk::binding(0, 100)]
+__DynamicResource<__DynamicResourceKind.General> resourceHandles[];
+
+// All sampler handles are defined in descriptor set 101.
+[vk::binding(0, 101)]
+__DynamicResource<__DynamicResourceKind.Sampler> samplerHandles[];
+
+export T getDescriptorFromHandle<T>(DescriptorHandle<T> handle) where T : IOpaqueDescriptor
+{
+    __target_switch
+    {
+    case spirv:
+        if (T.kind == ResourceKind.Sampler)
+            return samplerHandles[((uint2)handle).x].asOpaqueDescriptor<T>();
+        else
+            return resourceHandles[((uint2)handle).x].asOpaqueDescriptor<T>();
+    default:
+        return defaultGetDescriptorFromHandle(handle);
+    }
+}
+```
+
+The user can call `defaultGetDescriptorFromHandle` function from their implementation of
+`getDescriptorFromHandle` to dispatch to the default behavior.
+
+By default, the value of a `DescriptorHandle<T>` object is assumed to be dynamically uniform across all
+execution threads. If this is not the case, the user is required to mark the `DescriptorHandle` as `nonuniform`
+*immediately* before dereferencing it:
+```slang
+void test(DescriptorHandle<Texture2D> t)
+{
+    nonuniform(t)->Sample(...);
+}
+```
+
+If the resource pointer value is not uniform and `nonuniform` is not called, the result may be
+undefined.
+
+Extensions
+--------------------
+Slang allows defining additional methods for a type outside its initial definition. For example, suppose we already have a type defined:
+
+```csharp
+struct MyType
+{
+    int field;
+    int get() { return field; }
+}
+```
+
+You can extend `MyType` with new method members:
+```csharp
+extension MyType
+{
+    float getNewField() { return newField; }
+}
+```
+
+All locations that sees the definition of the `extension` can access the new members:
+
+```csharp
+void test()
+{
+    MyType t;
+    float val = t.getNewField();
+}
+```
+
+This feature is similar to extensions in Swift and extension methods in C#.
+
+> #### Note:
+> You can only extend a type with additional methods. Extending with additional data fields is not allowed.
+
+Multi-level break
+-------------------
+
+Slang allows `break` statements with a label to jump into any ancestor control flow break points, and not just the immediate parent.
+Example:
+```
+outer:
+for (int i = 0; i < 5; i++)
+{
+    inner:
+    for (int j = 0; j < 10; j++)
+    {
+        if (someCondition)
+            break outer;
+    }
+}
+```
+
+Force inlining
+-----------------
+Most of the downstream shader compilers will inline all the function calls. However you can instruct Slang compiler to do the inlining
+by using the `[ForceInline]` decoration:
+```
+[ForceInline]
+int f(int x) { return x + 1; }
+```
+
+
+Special Scoping Syntax
+-------------------
+Slang supports three special scoping syntax to allow users to mix in custom decorators and content in the shader code. These constructs allow a rendering engine to define custom meta-data in the shader, or map engine-specific block syntax to a meaningful block that is understood by the compiler via proper `#define`s.
+
+### `__ignored_block`
+An ignored block will be parsed and ignored by the compiler:
+```
+__ignored_block
+{
+    arbitrary content in the source file,
+    will be ignored by the compiler as if it is a comment.
+    Can have nested {} here.
+}
+```
+
+### `__transparent_block`
+Symbols defined in a transparent block will be treated as if they are defined
+in the parent scope:
+```csharp
+struct MyType
+{
+    __transparent_block
+    {
+        int myFunc() { return 0; }
+    }
+}
+```
+Is equivalent to:
+```csharp
+struct MyType
+{
+    int myFunc() { return 0; }
+}
+```
+
+### `__file_decl`
+Symbols defined in a `__file_decl` will be treated as if they are defined in
+the global scope. However, symbols defined in different `__file_decl`s is not visible
+to each other. For example:
+```csharp
+__file_decl
+{
+    void f1()
+    {
+    }
+}
+__file_decl
+{
+    void f2()
+    {
+        f1(); // error: f1 is not visible from here.
+    }
+}
+```
+
+User Defined Attributes (Experimental)
+-------------------
+
+In addition to many system defined attributes, users can define their own custom attribute types to be used in the `[UserDefinedAttribute(args...)]` syntax. The following example shows how to define a custom attribute type.
+
+```csharp
+[__AttributeUsage(_AttributeTargets.Var)]
+struct MaxValueAttribute
+{
+    int value;
+    string description;
+};
+
+[MaxValue(12, "the scale factor")]
+uniform int scaleFactor;
+```
+
+In the above code, the `MaxValueAttribute` struct type is decorated with the `[__AttributeUsage]` attribute, which informs that `MaxValueAttribute` type should be interpreted as a definition for a user-defined attribute, `[MaxValue]`, that can be used to decorate all variables or fields. The members of the struct defines the argument list for the attribute.
+
+The `scaleFactor` uniform parameter is declared with the user defined `[MaxValue]` attribute, providing two arguments for `value` and `description`.
+
+The `_AttributeTargets` enum is used to restrict the type of decls the attribute can apply. Possible values of `_AttributeTargets` can be `Function`, `Param`, `Struct` or `Var`.
+
+The usage of user-defined attributes can be queried via Slang's reflection API through `TypeReflection` or `VariableReflection`'s `getUserAttributeCount`, `getUserAttributeByIndex` and `findUserAttributeByName` methods. 
diff --git a/external/slang/share/doc/slang/user-guide/04-modules-and-access-control.md b/external/slang/share/doc/slang/user-guide/04-modules-and-access-control.md
new file mode 100644
index 00000000..0b4d44e0
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/04-modules-and-access-control.md
@@ -0,0 +1,237 @@
+---
+layout: user-guide
+permalink: /user-guide/modules
+---
+
+Modules and Access Control
+===========================
+
+While the preprocessor `#include`s is still supported, Slang provides a _module_ system for software engineering benefits such as clean expression of sub component boundaries and dependencies, hiding implementation details, and providing a path towards true separate compilation.
+
+
+## Defining a Module
+
+A module in Slang comprises one or more files. A module must have one and only one primary file that is used as the source-of-truth to uniquely identify the module. The primary file must start with `module` declaration. For example, the following code defines a module named `scene`:
+
+```
+// scene.slang
+
+module scene;
+
+// ...
+```
+
+A module can contain more than one file. The additional files are pulled into the module with the `__include` syntax:
+
+```
+// scene.slang
+
+module scene;
+
+__include "scene-helpers";
+
+```
+```
+// scene-helpers.slang
+
+implementing scene;
+// ...
+```
+
+The files being included into a module must start with `implementing <module-name>` declaration.
+
+Note that the `__include` syntax here has a different meaning than the preprocessor `#include`. `__include` has the following semantics:
+1. The preprocessor state at which a file inclusion does not apply to the file being included, and the preprocessor state after parsing the included file will not be visible to the outer "includer" file. For example, `#define`s before a `__include` is not visible to the included file, and `#define`s in the included file is not visible to the file that includes it.
+2. A file will be included into the current module exactly once, no matter how many times a `__include` of that file is encountered.
+3. Circular `__include`s are allowed, given (2).
+4. All files that become part of a module via `__include` can access all other entities defined in the same module, regardless the order of `__include`s.
+
+This means that the following code is valid:
+
+```
+// a.slang
+implementing m;
+void f_a() {}
+
+// b.slang
+implementing "m"; // alternate syntax.
+__include a; // pulls in `a` to module `m`.
+void f_b() { f_a(); }
+
+// c.slang
+implementing "m.slang"; // alternate syntax.
+
+void f_c()
+{
+    // OK, `c.slang` is part of module `m` because it is `__include`'d by
+    // `m.slang`.
+    f_a(); f_b();
+}
+
+// m.slang
+module m;
+__include m; // OK, a file including itself is allowed and has no effect.
+__include "b"; // Pulls in file b (alternate syntax), and transitively pulls in file a.
+__include "c.slang"; // Pulls in file c, specifying the full file name.
+void test() { f_a(); f_b(); f_c(); }
+```
+
+Note that both `module`, `implementing` and `__include` support two flavors of syntax to refer to a module or a file: either via 
+normal identifier tokens or via string literals. For example, the following flavors are equivalent and will resolve to the same file:
+```
+__include dir.file_name; // `file_name` is translated to "file-name".
+__include "dir/file-name.slang";
+__include "dir/file-name";
+```
+
+Also note that a file is considered a part of a module only if the file can be discovered
+via transitive `__include`s from the primary module file. It is possible to have a dangling
+file with the `implementing` declaration that is not `__include`'d by any other files in
+the module. Such dangling files will not be considered as part of the module and will not
+be compiled. The `implementing` declaration is for the purpose of verification and language server code assisting, and does not carry any other semantics that affect compilation.
+
+> #### Note ####
+> When using the identifier token syntax, Slang will translate any underscores(`_`) to hyphens("-") to obtain the file name.
+
+## Importing a Module
+
+At the global scope of a Slang file, you can use the `import` keyword to import another module by name:
+
+```hlsl
+// MyShader.slang
+
+import YourLibrary;
+```
+
+This `import` declaration will cause the compiler to look for a module named `YourLibrary` and make its declarations visible in the current scope. Similar to `__include`, `import` also supports both the identifier-token and the file-name string syntax.
+
+You can only `import` a primary source file of a module. For example, given:
+```
+// m.slang
+module m;
+__include helper;
+
+// helper.slang
+implementing m;
+// ...
+```
+It is only valid for the user code to `import m`. Attempting to `import helper` will result a compile-time error. 
+
+Multiple `import`s of the same module from different input files will only cause the module to be loaded once (there is no need for "include guards" or `#pragma once`).
+Note that preprocessor definitions in the current file will not affect the compilation of `import`ed code, and the preprocessor definitions in the imported code is not visible to the current file.
+
+## Access Control
+
+Slang supports access control modifiers: `public`, `internal` and `private`. The module boundary plays an important role in access control.
+
+`public` symbols are accessible everywhere: from within the different types, different files or different modules.
+
+`private` symbols are only visible to other symbols in the same type. The following example shows the scope of `private` visibility:
+```csharp
+struct MyType
+{
+    private int member;
+
+    int f() { member = 5; } // OK.
+
+    struct ChildType
+    {
+        int g(MyType t)
+        {
+            return t.member; // OK.
+        }
+    }
+}
+
+void outerFunc(MyType t)
+{
+    t.member = 2; // Error, `member` is not visible here.
+}
+```
+
+`internal` symbols are visible throughout the same module, regardless if it is referenced from the same type or same file. But they are not visible to other modules. The following example shows the scope of `internal` visibility:
+
+```csharp
+// a.slang
+module a;
+__include b;
+public struct PS
+{
+    internal int internalMember;
+    public int publicMember;
+}
+internal void f() { f_b(); } // OK, f_b defined in the same module.
+
+// b.slang
+implementing a;
+internal void f_b(); // Defines f_b in module `a`.
+public void publicFunc();
+
+// m.slang
+module m;
+import a;
+void main()
+{
+    f(); // Error, f is not visible here.
+    publicFunc(); // OK.
+    PS p; // OK.
+    p.internalMember = 1; // Error, internalMember is not visible.
+    p.publicMember = 1; // OK.
+}
+```
+
+`internal` is the default visibility if no other access modifiers are specified, an exception is for `interface` members, where the default visibility is the visibility of the interface.
+
+### Additional Validation Rules
+
+The Slang compiler enforces the following rules regarding access control:
+- A more visible entity should not expose less visible entities through its signature. For example, a `public` function cannot have a return type that is `internal`.
+- A member of a `struct`, `interface` and other aggregate types cannot have a higher visibility than its parent.
+- If a `struct` type has visibility `Vs`, and one of its member has visibility `Vm`, and the member is used to satisfy an interface requirement that has visibility `Vr`, then `Vm` must not be lower (less visible) than `min(Vs, Vr)`.
+- Type definitions themselves cannot be `private`, for example, `private struct S {}` is not valid code.
+- `interface` requirements cannot be `private`.
+
+## Organizing File Structure of Modules
+
+Slang does not seek to impose any specific organization of modules. However, there are some conventions that have emerged as being useful.
+
+### Module Organization Suggestions
+
+- The top-level directory contains modules that would be `import`ed by user code.
+- The implementation details of the modules are placed in files at lower levels of the tree.
+
+This has the benefit that it is easy for a user to distinguish the public API from the implementation details.
+
+### Module Organization Example
+
+<img src="../assets/moduletree.png" width="300em" alt="Module organization tree diagram"/>
+
+### Module Organization Example
+
+The above diagram shows a module organization example.
+
+Top-level module files such as `utils.slang` are those that are directly `import`ed by user code. The implementation details of the module are placed in the lower levels of the tree, organized into similarly named subdirectories for clarity.
+
+Modules like `utils.slang` needn't contain anything more than a module declaration and a list of included files, with optional `import` statement(s) to pull in any external dependencies, e.g.
+
+```
+module utils;
+import slangpy;
+
+__include "utils/accumlator.slang";
+__include "utils/tonemap.slang";
+__include "utils/fill.slang";
+```
+
+Here, all the public symbols defined in `accumlator.slang`, `tonemap.slang`, and `fill.slang` are visible to the user of the `utils` module, and these constituent helper files do not need to clutter the top-level file hierarchy.
+
+## Legacy Modules
+
+Slang used to not have support for access control, and all symbols were treated as having `public` visibility. To provide compatibility with existing code, the Slang compiler will detect if the module is written in the legacy language, and treat all symbols as `public` if so.
+
+A module is determined to be written in legacy language if all the following conditions are met:
+- The module is lacking `module` declaration at the beginning.
+- There is no use of `__include`.
+- There is no use of any visibility modifiers -- `public`, `private` or `internal`.
+
+The user is advised that this legacy mode is for compatibility only. This mode may be deprecated in the future, and it is strongly recommended that new code should not rely on this compiler behavior.
diff --git a/external/slang/share/doc/slang/user-guide/05-capabilities.md b/external/slang/share/doc/slang/user-guide/05-capabilities.md
new file mode 100644
index 00000000..6426cb37
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/05-capabilities.md
@@ -0,0 +1,160 @@
+---
+layout: user-guide
+permalink: /user-guide/capabilities
+---
+
+# Capabilities
+
+One of the biggest challenges in maintaining cross-platform shader code is to manage the differences in hardware capabilities across different GPUs, graphics APIs, and shader stages.
+Each graphics API or shader stage may expose operations that are not available on other platforms. Instead of restricting Slang's features to the lowest common denominator of different platforms,
+Slang exposes operations from all target platforms to allow the user to take maximum advantage on a specific target.
+
+A consequence of this approach is that the user is now responsible for maintaining compatibility of their code. For example, if the user writes code that uses a Vulkan extension currently not
+available on D3D/HLSL, they will get an error when attempting to compile that code to D3D.
+
+To help the user to maintain compatibility of their shader code on platforms matter to their applications, Slang's type system can now infer and enforce capability requirements
+to provide assurance that the shader code will be compatible with the specific set of platforms before compiling for that platform.
+
+For example, `Texture2D.SampleCmp` is available on D3D and Vulkan, but not available on CUDA. If the user is intended to write cross-platform code that targets CUDA, they will
+receive a type-checking error when attempting to use `SampleCmp` before the code generation stage of compilation. When using Slang's intellisense plugin, the programmer should
+get a diagnostic message directly in their code editor.
+
+As another example, `discard` is a statement that is only meaningful when used in fragment shaders. If a vertex shader contains a `discard` statement or calling a function that contains
+a `discard` statement, it shall be a type-check error.
+
+## Capability Atoms and Capability Requirements
+
+Slang models code generation targets, shader stages, API extensions and hardware features as distinct capability atoms. For example, `GLSL_460` is a capability atom that stands for the GLSL 460 code generation target,
+`compute` is an atom that represents the compute shader stage, `_sm_6_7` is an atom representing the shader model 6.7 feature set in D3D, `SPV_KHR_ray_tracing` is an atom representing the `SPV_KHR_ray_tracing` SPIR-V extension, and `spvShaderClockKHR` is an atom for the `ShaderClockKHR` SPIRV capability. For a complete list of capabilities supported by the Slang compiler, check the [capability definition file](https://github.com/shader-slang/slang/blob/master/source/slang/slang-capabilities.capdef).
+
+A capability **requirement** can be a single capability atom, a conjunction of capability atoms, or a disjunction of conjunction of capability atoms. A function can declare its
+capability requirement with the following syntax:
+
+```csharp
+[require(spvShaderClockKHR)]
+[require(glsl, GL_EXT_shader_realtime_clock)]
+[require(hlsl_nvapi)]
+uint2 getClock() {...}
+```
+
+Each `[require]` attribute declares a conjunction of capability atoms, and all `[require]` attributes form the final requirement of the `getClock()` function as a disjunction of capabilities:
+```
+(spvShaderClockKHR | glsl + GL_EXT_shader_realtime_clock | hlsl_nvapi)
+```
+
+A capability can __imply__ other capabilities. Here `spvShaderClockKHR` is a capability that implies `SPV_KHR_shader_clock`, which represents the SPIRV `SPV_KHR_shader_clock` extension, and the `SPV_KHR_shader_clock` capability implies `spirv_1_0`, which stands for the spirv code generation target.
+
+When evaluating capability requirements, Slang will expand all implications. Therefore the final capability requirement for `getClock` is:
+```
+  spirv_1_0 + SPV_KHR_shader_clock + spvShaderClockKHR
+| glsl + _GL_EXT_shader_realtime_clock
+| hlsl + hlsl_nvapi
+```
+Which means the function can be called from locations where the `spvShaderClockKHR` capability is available (when targeting SPIRV), or where the `GL_EXT_shader_realtime_clock` extension is available when targeting GLSL,
+or where `nvapi` is available when targeting HLSL.
+
+## Conflicting Capabilities
+
+Certain groups of capabilities are mutually exclusive such that only one capability in the group is allowed to exist. For example, all stage capabilities are mutual exclusive: a requirement for both `fragment` and `vertex` is impossible to satisfy. Currently, capabilities that model different code generation targets (e.g. `hlsl`, `glsl`) or different shader stages (`vertex`, `fragment`, etc.) are mutually exclusive within
+their corresponding group.
+
+If two capability requirements contain different atoms that are conflicting with each other, these two requirements are considered __incompatible__.
+For example, requirement `spvShaderClockKHR + fragment` and requirement `spvShaderClockKHR + vertex` are incompatible, because `fragment` conflicts with `vertex`.
+
+## Requirements in Parent Scope
+
+The capability requirement of a decl is always merged with the requirements declared in its parents. If the decl declares requirements for additional compilation targets, they are added
+to the requirement set as a separate disjunction.
+For example, given:
+```csharp
+[require(glsl)]
+[require(hlsl)]
+struct MyType
+{
+    [require(hlsl, hlsl_nvapi)]
+    [require(spirv)]
+    static void method() { ... }
+}
+```
+`MyType.method` will have requirement `glsl | hlsl + hlsl_nvapi | spirv`.
+
+The `[require]` attribute can also be used on module declarations, so that the requirement will
+apply to all decls within the module. For example:
+```csharp
+[require(glsl)]
+[require(hlsl)]
+[require(spirv)]
+module myModule;
+
+// myFunc has requirement glsl|hlsl|spirv
+public void myFunc()
+{
+}
+```
+
+## Inference of Capability Requirements
+
+By default, Slang will infer the capability requirements of a function given its definition, as long as the function has `internal` or `private` visibility. For example, given:
+```csharp
+void myFunc()
+{
+    if (getClock().x % 1000 == 0)
+        discard;
+}
+```
+Slang will automatically deduce that `myFunc` has capability
+```
+  spirv_1_0 + SPV_KHR_shader_clock + spvShaderClockKHR + fragment
+| glsl + _GL_EXT_shader_realtime_clock + fragment
+| hlsl + hlsl_nvapi + fragment
+```
+Since `discard` statement requires capability `fragment`.
+
+## Inference on target_switch
+
+A `__target_switch` statement will introduce disjunctions in its inferred capability requirement. For example:
+```csharp
+void myFunc()
+{
+    __target_switch
+    {
+    case spirv: ...;
+    case hlsl: ...;
+    }
+}
+```
+The capability requirement of `myFunc` is `(spirv | hlsl)`, meaning that the function can be called from a context where either `spirv` or `hlsl` capability
+is available.
+
+## Capability Aliases
+
+To make it easy to specify capabilities on different platforms, Slang also defines many aliases that can be used in `[require]` attributes.
+For example, Slang declares:
+```
+alias sm_6_6 = _sm_6_6
+             | glsl_spirv_1_5 + sm_6_5
+                + GL_EXT_shader_atomic_int64 + atomicfloat2
+             | spirv_1_5 + sm_6_5
+                + GL_EXT_shader_atomic_int64 + atomicfloat2
+                + SPV_EXT_descriptor_indexing
+             | cuda
+             | cpp;
+```
+So user code can write `[require(sm_6_6)]` to mean that the function requires shader model 6.6 on D3D or equivalent set of GLSL/SPIRV extensions when targeting GLSL or SPIRV.
+Note that in the above definition, `GL_EXT_shader_atomic_int64` is also an alias that is defined as:
+```
+alias GL_EXT_shader_atomic_int64 = _GL_EXT_shader_atomic_int64 | spvInt64Atomics;
+```
+Where `_GL_EXT_shader_atomic_int64` is the atom that represent the true `GL_EXT_shader_atomic_int64` GLSL extension.
+The `GL_EXT_shader_atomic_int64` alias is defined as a disjunction of `_GL_EXT_shader_atomic_int64` and the `Int64Atomics` SPIRV capability so that
+it can be used in both the contexts of GLSL and SPIRV target.
+
+When aliases are used in a `[require]` attribute, the compiler will expand the alias to evaluate the capability set, and remove all incompatible conjunctions.
+For example, `[require(hlsl, sm_6_6)]` will be evaluated to `(hlsl+_sm_6_6)` because all other conjunctions in `sm_6_6` are incompatible with `hlsl`.
+
+## Validation of Capability Requirements
+
+Slang requires all public methods and interface methods to have explicit capability requirements declarations. Omitting capability declaration on a public method means that the method does not require any
+specific capability. Functions with explicit requirement declarations will be verified by the compiler to ensure that it does not use any capability beyond what is declared.
+
+Slang recommends but does not require explicit declaration of capability requirements for entrypoints. If explicit capability requirements are declared on an entrypoint, they will be used to validate the entrypoint the same way as other public methods, providing assurance that the function will work on all intended targets. If an entrypoint does not define explicit capability requirements, Slang will infer the requirements, and only issue a compiler error when the inferred capability is incompatible with the current code generation target.
diff --git a/external/slang/share/doc/slang/user-guide/06-interfaces-generics.md b/external/slang/share/doc/slang/user-guide/06-interfaces-generics.md
new file mode 100644
index 00000000..a07ccc6b
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/06-interfaces-generics.md
@@ -0,0 +1,1065 @@
+---
+layout: user-guide
+permalink: /user-guide/interfaces-generics
+---
+
+Interfaces and Generics
+===========================
+
+This chapter covers two interrelated Slang language features: interfaces and generics. We will talk about what they are, how do they relate to similar features in other languages, how are they parsed and translated by the compiler, and show examples on how these features simplifies and modularizes shader code.
+
+Interfaces
+----------
+
+Interfaces are used to define the methods and services a type should provide. You can define a interface as the following example:
+```csharp
+interface IFoo
+{
+    int myMethod(float arg);
+}
+```
+
+Slang's syntax for defining interfaces are similar to `interface`s in C# and `protocol`s in Swift. In this example, the `IFoo` interface establishes a contract that any type conforming to this interface must provide a method named `myMethod` that accepts a `float` argument and returns an `int` value.
+
+A `struct` type may declare its conformance to an `interface` via the following syntax:
+```csharp
+struct MyType : IFoo
+{
+    int myMethod(float arg)
+    {
+        return (int)arg + 1;
+    }
+}
+```
+By declaring the conformance to `IFoo`, the definition of `MyType` must include a method named `myMethod` with a matching signature to that defined in the `IFoo` interface to satisfy the declared conformance. If a type misses any methods required by the interface, the Slang compiler will generate an error message.
+
+A `struct` type may declare multiple interface conformances:
+```csharp
+interface IBar { uint myMethod2(uint2 x); }
+
+struct MyType : IFoo, IBar
+{
+    int myMethod(float arg) {...}
+    uint myMethod2(uint2 x) {...}
+}
+```
+In this case, the definition of `MyType` must satisfy the requirements from both the `IFoo` and `IBar` interfaces by providing both the `myMethod` and `myMethod2` methods.
+
+Generics
+---------------------
+
+Generics can be used to eliminate duplicate code for shared logic that operates on different types. The following example shows how to define a generic method in Slang.
+
+```csharp
+int myGenericMethod<T>(T arg) where T : IFoo
+{
+    return arg.myMethod(1.0);
+}
+```
+
+The above listing defines a generic method named `myGenericMethod`, which accepts an argument that can be of any type `T` as long as `T` conforms to the `IFoo` interface. The `T` here is called a _generic type parameter_, and it is associated with an _type constraint_ in the following `where` clause to indicate that any type represented by `T` must conform to the interface `IFoo`.
+
+The following listing shows how to invoke a generic method:
+```csharp
+MyType obj;
+int a = myGenericMethod<MyType>(obj); // OK, explicit type argument
+int b = myGenericMethod(obj); // OK, automatic type deduction
+```
+
+You may explicitly specify the concrete type to used for the generic type argument, by providing the types in angular brackets after the method name, or leave it to the compiler to automatically deduce the type from the argument list.
+
+Note that it is important to associate a generic type parameter with a type constraint. In the above example, although the definition of `myGenericMethod` is agnostic of the concrete type `T` will stand for, knowing that `T` conforms to `IFoo` allows the compiler to type-check and pre-compile `myGenericMethod` without needing to substitute `T` with any concrete types first. Similar to languages like C#, Rust, Swift and Java, leaving out the type constraint declaration on type parameter `T` will result in a compile error at the line calling `arg.myMethod` since the compiler cannot verify that `arg` has a member named `myMethod` without any knowledge on `T`. This is a major difference of Slang's generics compared to _templates_ in C++. 
+
+While C++ templates are a powerful language mechanism, Slang has followed the path of many other modern programming languages to adopt the more structural and restricted generics feature instead. This enables the Slang compiler to perform type checking early to give more readable error messages, and to speed-up compilation by reusing a lot of work for different instantiations of `myGenericMethod`.
+
+A generic parameter can also be a value. Currently, integer, bool and enum types are allowed as the type for a generic value parameter. Generic value parameters are declared with the `let` keyword. For example:
+
+```csharp
+void g1<let n : int>() { ... }
+
+enum MyEnum { A, B, C }
+void g2<let e : MyEnum>() { ... }
+
+void g3<let b : bool>() { ... }
+```
+
+### Alternative Syntax
+
+Alternatively, you can use `__generic` keyword to define generic parameters before the method:
+```csharp
+__generic<typename T> // `typename` is optional.
+int myGenericMethod(T arg) where T : IFoo
+{
+    return arg.myMethod(1.0);
+}
+```
+
+The same method can be defined in an alternative simplified syntax without the `where` clause:
+```csharp
+int myGenericMethod<T:IFoo>(T arg) { ... }
+```
+
+Generic value parameters can also be defined using the traditional C-style syntax:
+```csharp
+void g1<typename T, int n>() { ... }
+```
+
+Slang allows multiple `where` clauses, and multiple interface types in a single `where` clause:
+```csharp
+struct MyType<T, U>
+    where T: IFoo, IBar
+    where U : IBaz<T>
+{
+}
+// equivalent to:
+struct MyType<T, U>
+    where T: IFoo
+    where T : IBar
+    where U : IBaz<T>
+{
+}
+```
+
+Supported Constructs in Interface Definitions
+-----------------------------------------------------
+
+Slang supports many other constructs in addition to ordinary methods as a part of an interface definition.
+
+### Properties
+
+```csharp
+interface IFoo
+{
+    property int count {get; set;}
+}
+```
+The above listing declares that any conforming type must define a property named `count` with both a `getter` and a `setter` method.
+
+### Generic Methods
+
+```csharp
+interface IFoo
+{
+    int compute<T>(T val) where T : IBar;
+}
+```
+The above listing declares that any conforming type must define a generic method named `compute` that has one generic type parameter conforming to the `IBar` interface.
+
+### Static Methods
+
+```csharp
+interface IFoo
+{
+    static int compute(int val);
+};
+```
+
+The above listing declares that any conforming type must define a static method named `compute`. This allows the following generic method to pass type-checking:
+```csharp
+void f<T>() where T : IFoo
+{
+    T.compute(5); // OK, T has a static method `compute`.
+}
+```
+
+### Static Constants
+
+You can define static constant requirements in an interface. The constants can be accessed in places where a compile-time constant is needed.
+```csharp
+interface IMyValue
+{
+    static const int value;
+}
+struct MyObject2 : IMyValue
+{
+    static const int value = 2;
+}
+struct GetValuePlus1<T:IMyValue>
+{
+    static const int value = T.value + 1;
+}
+
+static const int result = GetValuePlus1<MyObject2>.value;  // result == 3
+```
+
+### `This` Type
+
+You may use a special keyword `This` in interface definitions to refer to the type that is conforming to the interface. The following examples demonstrate a use of `This` type:
+```csharp
+interface IComparable
+{
+    int comparesTo(This other);
+}
+struct MyObject : IComparable
+{
+    int val;
+    int comparesTo(MyObject other)
+    {
+        return val < other.val ? -1 : 1;
+    }
+}
+```
+In this example, the `IComparable` interface declares that any conforming type must provide a `comparesTo` method that performs a comparison between an object to another object of the same type. The `MyObject` type satisfies this requirement by providing a `comparesTo` method that accepts a `MyObject` typed argument, since in the scope of `MyObject`, `This` type is equivalent to `MyObject`.
+
+### Initializers
+
+Consider a generic method that wants to create and initialize a new instance of generic type `T`:
+```csharp
+void f<T:IFoo>()
+{
+    T obj = /*a newly initialized T*/
+}
+```
+One way to implement this is to introduce a static method requirement in `IFoo`:
+```csharp
+interface IFoo
+{
+    static This create();
+}
+```
+With this interface definition, we can define `f` as following:
+```csharp
+void f<T:IFoo>()
+{
+    T obj = T.create();
+}
+```
+
+This solution works just fine, but it would be nicer if you can just write:
+```csharp
+T obj = T();
+```
+Or simply
+```csharp
+T obj;
+```
+And let the compiler invoke the default initializer defined in the type.
+To enable this, you can include an initializer requirement in the interface definition:
+```csharp
+interface IFoo
+{
+    __init();
+}
+```
+
+Initializers with parameters are supported as well. For example:
+```csharp
+interface IFoo
+{
+    __init(int a, int b);
+}
+void g<T:IFoo>()
+{
+    T obj = {1, 2}; // OK, invoking the initializer on T.
+}
+```
+
+Associated Types
+-------------------------
+
+When writing code using interfaces and generics, there are some situations where an interface method needs to return an object whose type is implementation-dependent. For example, consider the following `IFloatContainer` interface that represents a container of `float` values:
+```csharp
+// Represents a container of float values.
+interface IFloatContainer
+{
+    // Returns the number of elements in this container.
+    uint getCount();
+    // Returns an iterator representing the start of the container.
+    Iterator begin();
+    // Returns an iterator representing the end of the container.
+    Iterator end();
+    // Return the element at the location represented by `iter`.
+    float getElementAt(Iterator iter);
+}
+```
+An implementation of the `IFloatContainer` interface may use different types of iterators. For example, an implementation that is simply an array of `float`s can expose `Iterator` as a simple integer index:
+```csharp
+struct ArrayFloatContainer : IFloatContainer
+{
+    float content[10];
+    uint getCount() { return 10; }
+    uint begin() { return 0; }
+    uint end() { return 10; }
+    float getElementAt(uint iter) { return content[iter]; }
+}
+```
+On the other hand, an implementation that uses multiple buffers as the backing storage may use a more complex type to locate an element:
+```csharp
+// Exposes values in two `StructuredBuffer`s as a single container.
+struct MultiArrayFloatContainer : IFloatContainer
+{
+    StructuredBuffer<float> firstBuffer;
+    StructuredBuffer<float> secondBuffer;
+    uint getCount() { return getBufferSize(firstBuffer) + getBufferSize(secondBuffer); }
+
+    // `uint2.x` indicates which buffer, `uint2.y` indicates the index within the buffer.
+    uint2 begin() { return uint2(0,0); }
+    uint2 end() { return uint2 (1, getBufferSize(secondBuffer)); }
+    float getElementAt(uint2 iter)
+    {
+        if (iter.x == 0) return firstBuffer[iter.y];
+        else return secondBuffer[iter.y];
+    }
+}
+```
+
+Ideally, a generic function that wishes to enumerate values in a `IFloatContainer` shouldn't need to care about the implementation details on what the concrete type of `Iterator` is, and we would like to be able to write the following:
+```csharp
+float sum<T:IFloatContainer>(T container)
+{
+    float result = 0.0f;
+    for (T.Iterator iter = container.begin(); iter != container.end(); iter=iter.next())
+    {
+        float val = container.getElementAt(iter);
+        result += val;
+    }
+    return result;
+}
+```
+Here the `sum` function simply wants to access all the elements and sum them up. The details of what the `Iterator` type actually is does not matter to the definition of `sum`.
+
+The problem is that the `IFloatContainer` interface definition requires methods like `begin()`, `end()` and `getElementAt()` to refer to a iterator type that is implementation dependent. How should the signature of these methods be defined in the interface? The answer is to use _associated types_.
+
+In addition to constructs listed in the previous section, Slang also supports defining associated types in an `interface` definition. An associated type can be defined as following.
+```csharp
+// The interface for an iterator type.
+interface IIterator
+{
+    // An iterator needs to know how to move to the next element.
+    This next();
+}
+
+interface IFloatContainer
+{
+    // Requires an implementation to define a typed named `Iterator` that
+    // conforms to the `IIterator` interface.
+    associatedtype Iterator : IIterator;
+
+    // Returns the number of elements in this container.
+    uint getCount();
+    // Returns an iterator representing the start of the container.
+    Iterator begin();
+    // Returns an iterator representing the end of the container.
+    Iterator end();
+    // Return the element at the location represented by `iter`.
+    float getElementAt(Iterator iter);
+};
+```
+
+This `associatedtype` definition in `IFloatContainer` requires that all types conforming to this interface must also define a type in its scope named `Iterator`, and this iterator type must conform to the `IIterator` interface. An implementation to the `IFloatContainer` interface by using either a `typedef` declaration or a `struct` definition inside its scope to satisfy the associated type requirement. For example, the `ArrayFloatContainer` can be implemented as following:
+```csharp
+struct ArrayIterator : IIterator
+{
+    uint index;
+    __init(int x) { index = x; }
+    ArrayIterator next()
+    {
+        return ArrayIterator(index + 1);
+    }
+}
+struct ArrayFloatContainer : IFloatContainer
+{
+    float content[10];
+
+    // Specify that the associated `Iterator` type is `ArrayIterator`.
+    typedef ArrayIterator Iterator;
+
+    Iterator getCount() { return 10; }
+    Iterator begin() { return ArrayIterator(0); }
+    Iterator end() { return ArrayIterator(10); }
+    float getElementAt(Iterator iter) { return content[iter.index]; }
+}
+```
+
+Alternatively, you may also define the `Iterator` type directly inside a `struct` implementation, as in the following definition for `MultiArrayFloatContainer`:
+```csharp
+// Exposes values in two `StructuredBuffer`s as a single container.
+struct MultiArrayFloatContainer : IFloatContainer
+{
+    // Represents an iterator of this container
+    struct Iterator : IIterator
+    {
+        // `index.x` indicates which buffer the element is located in.
+        // `index.y` indicates which the index of the element inside the buffer.
+        uint2 index;
+
+        // We also need to keep a size of the first buffer so we know when to
+        // switch to the second buffer.
+        uint firstBufferSize;
+
+        // Implementation of IIterator.next()
+        Iterator next()
+        {
+            Iterator result;
+            result.index.x = index.x;
+            result.index.y = index.y + 1;
+            // If we are at the end of the first buffer,
+            // move to the head of the second buffer
+            if (result.index.x == 0 && result.index.y == firstBufferSize)
+            {
+                result.index = uint2(1, 0);
+            }
+            return result;
+        }
+    }
+
+    StructuredBuffer<float> firstBuffer;
+    StructuredBuffer<float> secondBuffer;
+    uint getCount() { return getBufferSize(firstBuffer) + getBufferSize(secondBuffer); }
+
+    Iterator begin()
+    {
+        Iterator iter;
+        iter.index = uint2(0, 0);
+        iter.firstBufferSize = getBufferSize(firstBuffer);
+        return iter;
+    }
+    Iterator end()
+    {
+        Iterator iter;
+        iter.index = uint2(1, getBufferSize(secondBuffer));
+        iter.firstBufferSize = 0;
+        return iter;
+    }
+    float getElementAt(Iterator iter)
+    {
+        if (ite.indexr.x == 0) return firstBuffer[iter.index.y];
+        else return secondBuffer[iter.index.y];
+    }
+}
+```
+
+In summary, an `associatedtype` requirement in an interface is similar to other types of requirements: a method requirement means that an implementation must provide a method matching the interface signature, while an `associatedtype` requirement means that an implementation must provide a type in its scope with the matching name and interface constraint. In general, when defining an interface that is producing and consuming an object whose actual type is implementation-dependent, the type of this object can often be modeled as an associated type in the interface.
+
+
+### Comparing Generics to C++ Templates
+Readers who are familiar with C++ could easily relate the `Iterator` example in previous subsection to the implementation of STL. In C++, the `sum` function can be easily written with templates:
+```C++
+template<typename TContainer>
+float sum(const TContainer& container)
+{
+    float result = 0.0f;
+    // Assumes `TContainer` has a type `Iterator` that supports `operator++`.
+    for (TContainer::Iterator iter = container.begin(); iter != container.end(); ++iter)
+    {
+        result += container.getElementAt(iter);
+    }
+    return result;
+}
+```
+
+A C++ programmer can implement `ArrayFloatContainer` as following:
+```C++
+struct ArrayFloatContainer
+{
+    float content[10];
+
+    typedef uint32_t Iterator;
+
+    Iterator getCount() { return 10; }
+    Iterator begin() { return 0; }
+    Iterator end() { return 10; }
+    float getElementAt(Iterator iter) { return content[iter]; }
+};
+```
+Because C++ does not require a template function to define _constraints_ on the templated type, there are no interfaces or inheritances involved in the definition of `ArrayFloatContainer`. However `ArrayFloatContainer` still needs to define what its `Iterator` type is, so the `sum` function can be successfully specialized with an `ArrayFloatContainer`.
+
+Note that the biggest difference between C++ templates and generics is that templates are not type-checked prior to specialization, and therefore the code that consumes a templated type (`TContainer` in this example) can simply assume `container` has a method named `getElementAt`, and the `TContainer` scope provides a type definition for `TContainer::Iterator`. Compiler error only arises when the programmer is attempting to specialize the `sum` function with a type that does not meet these assumptions. Contrarily, Slang requires all possible uses of a generic type be declared through an interface. By stating that `TContainer:IContainer` in the generics declaration, the Slang compiler can verify that `container.getElementAt` is calling a valid function. Similarly, the interface also tells the compiler that `TContainer.Iterator` is a valid type and enables the compiler to fully type check the `sum` function without specializing it first.
+
+### Similarity to Swift and Rust
+
+Slang's `associatedtype` shares the same semantic meaning with `associatedtype` in a Swift `protocol` or `type` in a Rust `trait`, except that Slang currently does not support the more general `where` clause in these languages. C# does not have an equivalent to `associatedtype`, and programmers need to resort to generic interfaces to achieve similar goals.
+
+Generic Value Parameters
+-------------------------------
+
+So far we have demonstrated generics with _type parameters_. Additionally, Slang also supports generic _value_ parameters.
+The following listing shows an example of generic value parameters.
+```csharp
+struct Array<T, let N : int>
+{
+    T arrayContent[N];
+}
+```
+In this example, the `Array` type has a generic type parameter, `T`, that is used as the element type of the `arrayContent` array, and a generic value parameter `N` of integer type.
+
+Note that the builtin `vector<float, N>` type also has an generic value parameter `N`.
+
+> #### Note ####
+> The only type of generic value parameters are `int`, `uint` and `bool`. `float` and
+> other types cannot be used in a generic value parameter. Computations in a type
+> expression are supported as long as they can be evaluated at compile time. For example,
+`vector<float, 1+1>` is allowed and considered equivalent to `vector<float, 2>`.
+
+
+Type Equality Constraints
+-------------------------
+
+In addition to type conformance constraints as in `where T : IFoo`, Slang also supports type equality constraints. This is mostly useful in specifying additional constraints for
+associated types. For example:
+```csharp
+interface IFoo { associatedtype A; }
+
+// Access all T that conforms to IFoo, and T.A is `int`.
+void foo<T>(T v)
+    where T : IFoo
+    where T.A == int
+{
+}
+
+struct X : IFoo
+{
+    typealias A = int;
+}
+
+struct Y : IFoo
+{
+    typealias A = float;
+}
+
+void test()
+{
+    foo<X>(X()); // OK
+    foo<Y>(Y()); // Error, `Y` cannot be used for `T`.
+}
+```
+
+Interface-typed Values
+-------------------------------
+
+So far we have been using interfaces as constraints to generic type parameters. For example, the following listing defines a generic function with a type parameter `TTransform` constrained by interface `ITransform`:
+
+```csharp
+interface ITransform
+{
+    int compute(MyObject obj);
+}
+
+// Defining a generic method:
+int apply<TTransform : ITransform>(TTransform transform, MyObject object)
+{
+    return transform.compute(object);
+}
+```
+
+While Slang's syntax for defining generic methods bears similarity to generics in C#/Java and templates in C++ and should be easy to users who are familiar with these languages, codebases that make heavy use of generics can quickly become verbose and difficult to read. To reduce the amount of boilerplate, Slang supports an alternate way to define the `apply` method by using the interface type `ITransform` as parameter type directly:
+
+```csharp
+// A method that is equivalent to `apply` but uses simpler syntax:
+int apply_simple(ITransform transform, MyObject object)
+{
+    return transform.compute(object);
+}
+```
+
+Instead of defining a generic type parameter `TTransform` and a method parameter `transform` that has `TTransform` type, you can simply define the same `apply` function like a normal method, with a `transform` parameter whose type is an interface. From the Slang compiler's view, `apply` and `apply_simple` will be compiled to the same target code.
+
+In addition to parameters, Slang allows variables, and function return values to have an interface type as well:
+```csharp
+ITransform test(ITransform arg)
+{
+    ITransform v = arg;
+    return v;
+}
+```
+
+### Restrictions and Caveats
+
+The Slang compiler always attempts to determine the actual type of an interface-typed value at compile time and specialize the code with the actual type. As long as the compiler can successfully determine the actual type, code that uses interface-typed values are equivalent to code written in the generics syntax. However, when interface types are used in function return values, the compiler will not be able to trivially propagate type information. For example:
+```csharp
+ITransform getTransform(int x)
+{
+    if (x == 0)
+    {
+        Type1Transform rs = {};
+        return rs;
+    }
+    else
+    {
+        Type2Transform rs = {};
+        return rs;
+    }
+}
+```
+In this example, the actual type of the return value is dependent on the value of `x`, which may not be known at compile time. This means that the concrete type of the return value at invocation sites of `getTransform` may not be statically determinable. When the Slang compiler cannot infer the concrete type of an interface-type value, it will generate code that performs a dynamic dispatch based on the concrete type of the value at runtime, which may introduce performance overhead. Note that this behavior applies to function return values in the form of `out` parameters as well:
+
+```csharp
+void getTransform(int x, out ITransform transform)
+{
+    if (x == 0)
+    {
+        Type1Transform rs = {};
+        transform = rs;
+    }
+    else
+    {
+        Type2Transform rs = {};
+        transform = rs;
+    }
+}
+```
+This `getTransform` definition can also result in dynamic dispatch code since the type of `transform` may not be statically determinable.
+
+When the compiler is generating dynamic dispatch code for interface-typed values, it requires the concrete type of the interface-typed value to be free of any opaque-typed fields (e.g. resources and buffer types). A compiler error will generated upon such attempts:
+```csharp
+struct MyTransform : ITransform
+{
+    StructuredBuffer<int> buffer;
+    int compute(MyObject obj)
+    {
+        return buffer[0];
+    }
+}
+
+ITransform getTransform(int x)
+{
+    MyTransform rs;
+    // Error: cannot use an opaque value as an interface-typed return value.
+    return rs;
+}
+```
+
+Assigning different values to a mutable interface-typed variable also undermines the compiler's ability to statically determine the type of the variable, and is not supported by the Slang compiler today:
+```csharp
+void test(int x)
+{
+    ITransform t = Type1Transform();
+    // Do something ...
+    // Assign a different type of transform to `t`:
+    // (Not supported by Slang today)
+    t = Type2Transform();
+    // Do something else...
+}
+```
+
+In general, if the use of interface-typed values is restricted to function parameters only, then the all code that involves interface-typed values will be compiled the same way as if the code is written using standard generics syntax.
+
+
+Extending a Type with Additional Interface Conformances
+-----------------------------
+In the previous chapter, we introduced the `extension` feature that lets you define new members to an existing type in a separate location outside the original definition of the type. 
+
+`extensions` can be used to make an existing type conform to additional interfaces. Suppose we have an interface `IFoo` and a type `MyObject` that implements the interface:
+
+```csharp
+interface IFoo
+{
+    int foo();
+};
+
+struct MyObject : IFoo
+{
+    int foo() { return 0; }
+}
+```
+
+Now we introduce another interface, `IBar`:
+```csharp
+interface IBar
+{
+    float bar();
+}
+```
+
+We can define an `extension` to make `MyObject` conform to `IBar` as well:
+```csharp
+extension MyObject : IBar
+{
+    float bar() { return 1.0f }
+}
+```
+
+With this extension, we can use `MyObject` in places that expects an `IBar` as well:
+```csharp
+void use(IBar b)
+{
+    b.bar();
+}
+
+void test()
+{
+    MyObject obj;
+    use(obj); // OK, `MyObject` is extended to conform to `IBar`.
+}
+```
+
+You may define more than one interface conformances in a single `extension`:
+```csharp
+interface IBar2
+{
+    float bar2();
+}
+extension MyObject : IBar, IBar2
+{
+    float bar() { return 1.0f }
+    float bar2() { return 2.0f }
+}
+```
+
+`is` and `as` Operator
+----------------------------
+
+You can use `is` operator to test if an interface-typed value is of a specific concrete type, and use `as` operator to downcast the value into a specific type.
+The `as` operator returns an `Optional<T>` that is not `none` if the downcast succeeds.
+
+```csharp
+interface IFoo
+{
+    int foo();
+}
+struct MyImpl : IFoo
+{
+    int foo() { return 0; }
+}
+void test(IFoo foo)
+{
+    bool t = foo is MyImpl; // true
+    Optional<MyImpl> optV = foo as MyImpl;
+    if (t == (optV != none))
+        printf("success");
+    else
+        printf("fail");
+}
+void main()
+{
+    MyImpl v;
+    test(v);
+}
+// Result:
+// "success"
+```
+
+In addition to casting from an interface type to a concrete type, `as` and `is` operator can be used on generic types as well to cast a generic type into a concrete type. For example:
+```csharp
+T compute<T>(T a1, T a2)
+{
+    if (a1 is float)
+    {
+        return reinterpret<T>((a1 as float).value + (a2 as float).value);
+    }
+    else if (T is int)
+    {
+        return reinterpret<T>((a1 as int).value - (a2 as int).value);
+    }
+    return T();
+}
+// compute(1.0f, 2.0f) == 3.0f
+// compute(3, 1) == 2
+```
+
+Since `as` operator returns a `Optional<T>` type, it can also be used in the `if` predicate to test if an object can be
+casted to a specific type, once the cast test is successful, the object can be used in the `if` block as the casted type
+without the need to retrieve the `Optional<T>::value` property, for example:
+
+```csharp
+interface IFoo
+{
+    void foo();
+}
+
+struct MyImpl1 : IFoo
+{
+    void foo() { printf("MyImpl1");}
+}
+
+struct MyImpl2 : IFoo
+{
+    void foo() { printf("MyImpl2");}
+}
+
+struct MyImpl3 : IFoo
+{
+    void foo() { printf("MyImpl3");}
+}
+
+void test(IFoo foo)
+{
+    // This syntax will be desugared to the following:
+    // {
+    //      Optional<MyImpl1> optVar = foo as MyImpl1;
+    //      if (optVar.hasValue)
+    //      {
+    //          MyImpl1 t = optVar.value;
+    //          t.foo();
+    //      }
+    //      else if ...
+    // }
+    if (let t = foo as MyImpl1) // t is of type MyImpl1
+    {
+        t.foo();
+    }
+    else if (let t = foo as MyImpl2) // t is of type MyImpl2
+    {
+        t.foo();
+    }
+    else
+        printf("fail");
+}
+
+void main()
+{
+    MyImpl1 v1;
+    test(v1);
+
+    MyImpl2 v2;
+    test(v2);
+}
+
+```
+See  [if-let syntax](convenience-features.html#if_let-syntax) for more details.
+
+
+Generic Interfaces
+------------------
+
+Slang allows interfaces themselves to be generic. A common use of generic interfaces is to define the `IEnumerable` type:
+```csharp
+interface IEnumerator<T>
+{
+    This moveNext();
+    bool isEnd();
+    T getValue();
+}
+
+interface IEnumerable<T>
+{
+    associatedtype Enumerator : IEnumerator<T>;
+    Enumerator getEnumerator();
+}
+```
+
+You can constrain a generic type parameter to conform to a generic interface:
+```csharp
+void traverse<TElement, TCollection>(TCollection c)
+    where TCollection : IEnumerable<TElement>
+{
+    ...
+}
+```
+
+
+Generic Extensions
+----------------------
+You can use generic extensions to extend a generic type. For example,
+```csharp
+interface IFoo { void foo(); }
+interface IBar { void bar(); }
+
+struct MyType<T : IFoo>
+{
+    void foo() { ... }
+}
+
+// Extend `MyType<T>` so it conforms to `IBar`.
+extension<T:IFoo> MyType<T> : IBar
+{
+    void bar() { ... }
+}
+// Equivalent to:
+__generic<T:IFoo>
+extension MyType<T> : IBar
+{
+    void bar() { ... }
+}
+```
+
+
+Extensions to Interfaces
+-----------------------------
+
+In addition to extending ordinary types, you can define extensions on all types that conforms to some interface:
+
+```csharp
+// An example interface.
+interface IFoo
+{
+    int foo();
+}
+
+// Extend any type `T` that conforms to `IFoo` with a `bar` method.
+extension<T:IFoo> T
+{
+    int bar() { return 0; }
+}
+
+int use(IFoo foo)
+{
+    // With the extension, all uses of `IFoo` typed values
+    // can assume there is a `bar` method.
+    return foo.bar();
+}
+```
+
+Note that `interface` types cannot be extended, because extending an `interface` with new requirements would make all existing types that conforms
+to the interface no longer valid.
+
+In the presence of extensions, it is possible for a type to have multiple ways to 
+conform to an interface. In this case, Slang will always prefer the more specific conformance
+over the generic one. For example, the following code illustrates this behavior:
+
+```csharp
+interface IBase{}
+interface IFoo
+{
+    int foo();
+}
+
+// MyObject directly implements IBase:
+struct MyObject : IBase, IFoo
+{
+    int foo() { return 0; }
+}
+
+// Generic extension that applies to all types that conforms to `IBase`:
+extension<T:IBase> T : IFoo
+{
+    int foo() { return 1; }
+}
+
+int helper<T:IFoo>(T obj)
+{
+    return obj.foo();
+}
+
+int test()
+{
+    MyObject obj;
+
+    // Returns 0, the conformance defined directly by the type
+    // is preferred.
+    return helper(obj);
+}
+```
+
+This feature is similar to extension traits in Rust.
+
+
+Variadic Generics
+-------------------------
+
+Slang supports variadic generic type parameters:
+```csharp
+struct MyType<each T>
+{}
+```
+
+Here `each T` defines a generic type pack parameter that can be a list of zero or more types. Therefore, the following instantiation of `MyType` is valid:
+```
+MyType // OK
+MyType<int> // OK
+MyType<int, float, void> // OK
+```
+
+A common use of variadic generics is to define `printf`:
+```csharp
+void printf<each T>(String message, expand each T args) { ... }
+```
+
+The type syntax `expand each T` represents a expansion of the type pack `T`. Therefore, the type of `args` parameter is an expanded type pack.
+The `expand` expression can be thought of a map operation of a type pack. For example,
+give type pack `T = int, float, bool`, `expand each T` evaluates to the type pack of the same types, i.e. `expand each T ==> int, float, bool`.
+As a more interesting example, `expand S<each T>` will evaluate to `S<int>, S<float>, S<bool>`.
+
+You can use `expand` expression on tuple or type-pack values to compute an expression for each element of the tuple or type pack.
+For example:
+
+```csharp
+void printNumbers<each T>(expand each T args) where T == int
+{
+    // An single expression statement whose type will be `(void, void, ...)`.
+    // where each `void` is the result of evaluating expression `printf(...)` with
+    // each corresponding element in `args` passed as print operand.
+    //
+    expand printf("%d\n", each args);
+
+    // The above statement is equivalent to:
+    // ```
+    // (printf("%d\n", args[0]), printf("%d\n", args[1]), ..., printf("%d\n", args[n-1]));
+    // ```
+}
+void compute<each T>(expand each T args) where T == int
+{
+    // Maps every element in `args` to `elementValue + 1`, and forward the
+    // new values as arguments to `printNumber`.
+    printNumber(expand (each args) + 1);
+
+    // The above statement is equivalent to:
+    // ```
+    // printNumber(args[0] + 1, args[1] + 1, ..., args[n-1] + 1);
+    // ```
+}
+void test()
+{
+    compute(1,2,3);
+    // Prints:
+    // 2
+    // 3
+    // 4
+}
+```
+
+As another example, you can use `expand` expression to sum up elements in a variadic argument pack:
+```csharp
+void accumulateHelper(inout int dest, int value) { dest += value; }
+
+void sum<each T>(expand each T args) where T == int
+{
+    int result = 0;
+    expand accumulateHelper(result, each args);
+
+    // The above statement is equivalent to:
+    // ```
+    // (accumulateHelper(result, args[0]), accumulateHelper(result, args[1]), ..., accumulateHelper(result, args[n-1]));
+    // ```
+
+    return result;
+}
+
+void test()
+{
+    int x = sum(1,2,3); // x == 6
+}
+```
+
+Note that a variadic type pack parameter must appear at the end of a parameter list. If a generic type contains more than one
+type pack parameters, then each type pack must contain the same number of arguments at instantiation sites.
+
+Builtin Interfaces
+-----------------------------
+
+Slang supports the following builtin interfaces:
+
+- `IComparable`, provides methods for comparing two values of the conforming type. Supported by all basic data types, vector types and matrix types.
+- `IRangedValue`, provides methods for retrieving the minimum and maximum value expressed by the range of the type. Supported by all integer and floating-point scalar types.
+- `IArithmetic`, provides methods for the `+`, `-`, `*`, `/`, `%` and negating operations. Also provide a method for explicit conversion from `int`. Implemented by all builtin integer and floating-point scalar, vector and matrix types.
+- `ILogical`, provides methods for all bit operations and logical `and`, `or`, `not` operations. Also provide a method for explicit conversion from `int`. Implemented by all builtin integer scalar, vector and matrix types.
+- `IInteger`, represents a logical integer that supports both `IArithmetic` and `ILogical` operations. Implemented by all builtin integer scalar types.
+- `IDifferentiable`, represents a value that is differentiable.
+- `IFloat`, represents a logical float that supports both `IArithmetic`, `ILogical` and `IDifferentiable` operations. Also provides methods to convert to and from `float`. Implemented by all builtin floating-point scalar, vector and matrix types.
+- `IArray<T>`, represents a logical array that supports retrieving an element of type `T` from an index. Implemented by array types, vectors, matrices and `StructuredBuffer`.
+- `IRWArray<T>`, represents a logical array whose elements are mutable. Implemented by array types, vectors, matrices, `RWStructuredBuffer` and `RasterizerOrderedStructuredBuffer`.
+- `IFunc<TResult, TParams...>` represent a callable object (with `operator()`) that returns `TResult` and takes `TParams...` as argument.
+- `IMutatingFunc<TResult, TParams...>`, similar to `IFunc`, but the `operator()` method is `[mutating]`.
+- `IDifferentiableFunc<TResult, TParams...>`, similar to `IFunc`, but the `operator()` method is `[Differentiable]`.
+- `IDifferentiableMutatingFunc<TResult, TParams...>`, similar to `IFunc,` but the `operator()` method is `[Differentiable]` and `[mutating]`.
+- `__EnumType`, implemented by all enum types.
+- `__BuiltinIntegerType`, implemented by all integer scalar types.
+- `__BuiltinFloatingPointType`, implemented by all floating-point scalar types.
+- `__BuiltinArithmeticType`, implemented by all integer and floating-point scalar types.
+- `__BuiltinLogicalType`, implemented by all integer types and the `bool` type.
+
+Operator overloads are defined for `IArithmetic`, `ILogical`, `IInteger`, `IFloat`, `__BuiltinIntegerType`, `__BuiltinFloatingPointType`,  `__BuiltinArithmeticType` and `__BuiltinLogicalType` types, so the following code is valid:
+
+```csharp
+T f<T:IFloat>(T x, T y)
+{
+    if (x > T(0))
+        return x + y;
+    else
+        return x - y;
+}
+void test()
+{
+    let rs = f(float3(4), float3(5)); // rs = float3(9,9,9)
+}
+```
diff --git a/external/slang/share/doc/slang/user-guide/07-autodiff.html b/external/slang/share/doc/slang/user-guide/07-autodiff.html
new file mode 100644
index 00000000..b82e23b7
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/07-autodiff.html
@@ -0,0 +1,9 @@
+<html>
+
+<head>
+<meta http-equiv="refresh" content="0; url=https://shader-slang.com/slang/user-guide/autodiff" />
+</head>
+<body>
+<p>This page has been relocated. <a href="https://shader-slang.com/slang/user-guide/autodiff">Click here for the new page.</a></p>
+</body>
+</html>
\ No newline at end of file
diff --git a/external/slang/share/doc/slang/user-guide/07-autodiff.md b/external/slang/share/doc/slang/user-guide/07-autodiff.md
new file mode 100644
index 00000000..443a6db5
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/07-autodiff.md
@@ -0,0 +1,866 @@
+---
+layout: user-guide
+permalink: /user-guide/autodiff
+---
+
+# Automatic Differentiation
+
+To support differentiable graphics systems such as Gaussian splatters, neural radiance fields, differentiable path tracers, and more,
+Slang provides first class support for differentiable programming. 
+An overiew: 
+- Slang supports the `fwd_diff` and `bwd_diff` operators that can generate the forward and backward-mode derivative propagation functions for any valid Slang function annotated with the `[Differentiable]` attribute. 
+- The `DifferentialPair<T>` built-in generic type is used to pass derivatives associated with each function input. 
+- The `IDifferentiable`, and the experimental `IDifferentiablePtrType`, interfaces denote differentiable value and pointer types respectively, and allow finer control over how types behave under differentiation.
+- Futher, Slang allows for user-defined derivative functions through the `[ForwardDerivative(custom_fn)]` and `[BackwardDerivative(custom_fn)]`
+- All Slang features, such as control-flow, generics, interfaces, extensions, and more are compatible with automatic differentiation, though the bottom of this chapter documents some sharp edges & known issues.
+
+## Auto-diff operations `fwd_diff` and `bwd_diff`
+
+In Slang, `fwd_diff` and `bwd_diff` are higher-order functions used to transform Slang functions into their forward or backward derivative methods. To better understand what these methods do, here is a small refresher on differentiable calculus:
+### Mathematical overview: Jacobian and its vector products
+Forward and backward derivative methods are two different ways of computing a dot product with the Jacobian of a given function.
+Parts of this overview are based on JAX's excellent auto-diff cookbook [here](https://jax.readthedocs.io/en/latest/notebooks/autodiff_cookbook.html#how-it-s-made-two-foundational-autodiff-functions). The relevant [wikipedia article](https://en.wikipedia.org/wiki/Automatic_differentiation) is also a great resource for understanding auto-diff.
+ 
+The [Jacobian](https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant) (also called the total derivative) of a function $$\mathbf{f}(\mathbf{x})$$ is represented by $$D\mathbf{f}(\mathbf{x})$$. 
+
+For a general function with multiple scalar inputs and multiple scalar outputs, the Jacobian is a _matrix_ where $$D\mathbf{f}_{ij}$$ represents the [partial derivative](https://en.wikipedia.org/wiki/Partial_derivative) of the $$i^{th}$$ output element w.r.t the $$j^{th}$$ input element $$\frac{\partial f_i}{\partial x_j}$$
+
+As an example, consider a polynomial function
+
+$$ f(x, y) = x^3 + x^2 - y $$
+
+Here, $$f$$ here has 1 output and 2 inputs. $$Df$$ is therefore the row matrix:
+
+$$ Df(x, y) = [\frac{\partial f}{\partial x}, \frac{\partial f}{\partial y}] = [3x^2 + 2x, -1] $$
+
+Another, more complex example with a function that has multiple outputs (for clarity, denoted by $$f_1$$, $$f_2$$, etc..)
+
+$$ \mathbf{f}(x, y) = \begin{bmatrix} f_0(x, y) & f_1(x, y) & f_2(x, y) \end{bmatrix} = \begin{bmatrix} x^3 & y^2x & y^3 \end{bmatrix} $$
+
+Here, $$D\mathbf{f}$$ is a 3x2 matrix with each element containing a partial derivative:
+
+$$ D\mathbf{f}(x, y) = \begin{bmatrix} 
+\partial f_0 / \partial x & \partial f_0 / \partial y \\  
+\partial f_1 / \partial x & \partial f_1 / \partial y \\
+\partial f_2 / \partial x & \partial f_2 / \partial y
+\end{bmatrix} = 
+\begin{bmatrix} 
+3x^2  & 0   \\  
+y^2   & 2yx \\
+0     & 3y^2
+\end{bmatrix} $$
+
+Computing full Jacobians is often unnecessary and expensive. Instead, auto-diff offers ways to compute _products_ of the Jacobian with a vector, which is a much faster operation.
+There are two basic ways to compute this product: 
+ 1. the Jacobian-vector product $$ \langle D\mathbf{f}(\mathbf{x}), \mathbf{v} \rangle $$, also called forward-mode autodiff, and can be computed using `fwd_diff` operator in Slang, and
+ 2. the vector-Jacobian product $$ \langle \mathbf{v}^T, D\mathbf{f}(\mathbf{x}) \rangle $$, also called reverse-mode autodiff, and can be computed using `bwd_diff` operator in Slang. From a linear algebra perspective, this is the transpose of the forward-mode operator. 
+
+#### Propagating derivatives with forward-mode auto-diff
+The products described above allow the _propagation_ of derivatives forward and backward through the function $f$
+
+The forward-mode derivative (Jacobian-vector product) can convert a derivative of the inputs to a derivative of the outputs. 
+For example, lets say inputs $$\mathbf{x}$$ depend on some scalar $$\theta$$, and $$\frac{\partial \mathbf{x}}{\partial \theta}$$ is a vector of partial derivatives describing that dependency.
+
+Invoking forward-mode auto-diff with $$\mathbf{v} = \frac{\partial \mathbf{x}}{\partial \theta}$$ converts this into a derivative of the outputs w.r.t the same scalar $$\theta$$.
+This can be verified by expanding the Jacobian and applying the [chain rule](https://en.wikipedia.org/wiki/Chain_rule) of derivatives:
+
+$$\langle D\mathbf{f}(\mathbf{x}), \frac{\partial \mathbf{x}}{\partial \theta} \rangle = \langle \begin{bmatrix} \frac{\partial f_0}{\partial x_0} & \frac{\partial f_0}{\partial x_1} & \cdots \\ \frac{\partial f_1}{\partial x_0} & \frac{\partial f_1}{\partial x_1} & \cdots \\ \cdots & \cdots & \cdots \end{bmatrix}, \begin{bmatrix} \frac{\partial x_0}{\partial \theta} \\ \frac{\partial x_1}{\partial \theta} \\ \cdots \end{bmatrix} \rangle = \begin{bmatrix} \frac{\partial f_0}{\partial \theta} \\ \frac{\partial f_1}{\partial \theta} \\ \cdots \end{bmatrix} = \frac{\partial \mathbf{f}}{\partial \theta}$$
+
+#### Propagating derivatives with reverse-mode auto-diff
+The reverse-mode derivative (vector-Jacobian product) can convert a derivative w.r.t outputs into a derivative w.r.t inputs.
+For example, lets say we have some scalar $$\mathcal{L}$$ that depends on the outputs $$\mathbf{f}$$, and $$\frac{\partial \mathcal{L}}{\partial \mathbf{f}}$$ is a vector of partial derivatives describing that dependency.
+
+Invoking forward-mode auto-diff with $$\mathbf{v} = \frac{\partial \mathcal{L}}{\partial \mathbf{f}}$$ converts this into a derivative of the same scalar $$\mathcal{L}$$ w.r.t the inputs $$\mathbf{x}$$.
+To provide more intuition for this, we can expand the Jacobian in a same way we did above:
+
+$$\langle \frac{\partial \mathcal{L}}{\partial \mathbf{f}}^T, D\mathbf{f}(\mathbf{x}) \rangle = \langle \begin{bmatrix}\frac{\partial \mathcal{L}}{\partial f_0} & \frac{\partial \mathcal{L}}{\partial f_1} & \cdots \end{bmatrix}, \begin{bmatrix} \frac{\partial f_0}{\partial x_0} & \frac{\partial f_0}{\partial x_1} & \cdots \\ \frac{\partial f_1}{\partial x_0} & \frac{\partial f_1}{\partial x_1} & \cdots \\ \cdots & \cdots & \cdots \end{bmatrix} \rangle = \begin{bmatrix} \frac{\partial \mathcal{L}}{\partial x_0} & \frac{\partial \mathcal{L}}{\partial x_1} & \cdots \end{bmatrix} = \frac{\partial \mathcal{L}}{\partial \mathbf{x}}^T$$
+
+This mode is the most popular, since machine learning systems often construct their differentiable pipeline with multiple inputs (which can number in the millions or billions), and a single scalar output often referred to as the 'loss' denoted by $$\mathcal{L}$$. The desired derivative can be constructed with a single reverse-mode invocation.
+
+### Invoking auto-diff in Slang
+With the mathematical foundations established, we can describe concretely how to compute derivatives using Slang.
+
+In Slang derivatives are computed using `fwd_diff`/`bwd_diff` which each correspond to Jacobian-vector and vector-Jacobian products.
+For forward-diff, to pass the vector $$\mathbf{v}$$ and receive the outputs, we use the `DifferentialPair<T>` type. We use pairs of inputs because every input element $$x_i$$ has a corresponding element $$v_i$$ in the vector, and each original output element has a corresponding output element in the product.
+
+Example of `fwd_diff`:
+```csharp
+[Differentiable] // Auto-diff requires that functions are marked differentiable
+float2 foo(float a, float b) 
+{ 
+    return float2(a * b * b, a * a);
+}
+
+void main()
+{
+    DifferentialPair<float> dp_a = diffPair(
+        1.0, // input 'a'
+        1.0  // vector 'v' for vector-Jacobian product input (for 'a')
+    );
+
+    DifferentialPair<float> dp_b = diffPair(2.4, 0.0);
+
+    // fwd_diff to compute output and d_output w.r.t 'a'.
+    // Our output is also a differential pair.
+    //
+    DifferentialPair<float2> dp_output = fwd_diff(foo)(dp_a, dp_b);
+
+    // Extract output's primal part, which is just the standard output when foo is called normally.
+    // Can also use `.getPrimal()`
+    //
+    float2 output_p = dp_output.p;
+
+    // Extract output's derivative part. Can also use `.getDifferential()`
+    float2 output_d = dp_output.d;
+
+    printf("foo(1.0, 2.4) = (%f %f)\n", output_p.x, output_p.y);
+    printf("d(foo)/d(a) at (1.0, 2.4) = (%f, %f)\n", output_d.x, output_d.y);
+}
+```
+
+Note that all the inputs and outputs to our function become 'paired'. This only applies to differentiable types, such as `float`, `float2`, etc. See the section on differentiable types for more info.
+
+`diffPair<T>(primal_val, diff_val)` is a built-in utility function that constructs the pair from the primal and differential values.  
+
+Additionally, invoking forward-mode also computes the regular (or 'primal') output value (can be obtained from `output.getPrimal()` or `output.p`). The same is _not_ true for reverse-mode.
+
+For reverse-mode, the example proceeds in a similar way, and we still use `DifferentialPair<T>` type. However, note that each input gets a corresponding _output_ and each output gets a corresponding _input_. Thus, all inputs become `inout` differential pairs, to allow the function to write into the derivative part (the primal part is still accepted as an input in the same pair data-structure).
+The one extra rule is that the derivative corresponding to the return value of the function is accepted as the last argument (an extra input). This value does not need to be a pair.
+
+Example:
+```csharp
+[Differentiable] // Auto-diff requires that functions are marked differentiable
+float2 foo(float a, float b) 
+{ 
+    return float2(a * b * b, a * a);
+}
+
+void main()
+{
+    DifferentialPair<float> dp_a = diffPair(
+        1.0 // input 'a'
+    ); // Calling diffPair without a derivative part initializes to 0.
+
+    DifferentialPair<float> dp_b = diffPair(2.4);
+
+    // Derivatives of scalar L w.r.t output.
+    float2 dL_doutput = float2(1.0, 0.0);
+
+    // bwd_diff to compute dL_da and dL_db
+    // The derivative of the output is provided as an additional _input_ to the call
+    // Derivatives w.r.t inputs are written into dp_a.d and dp_b.d
+    //
+    bwd_diff(foo)(dp_a, dp_b, dL_doutput);
+
+    // Extract the derivatives of L w.r.t input
+    float dL_da = dp_a.d;
+    float dL_db = dp_b.d;
+
+    printf("If dL/dOutput = (1.0, 0.0), then (dL/da, dL/db) at (1.0, 2.4) = (%f, %f)", dL_da, dL_db);
+}
+```
+
+## Differentiable Type System
+
+Slang will only generate differentiation code for values that has a *differentiable* type. 
+Differentiable types are defining through conformance to one of two built-in interfaces:
+1. `IDifferentiable`: For value types (e.g. `float`, structs of value types, etc..)
+2. `IDifferentiablePtrType`: For buffer, pointer & reference types that represent locations rather than values.
+
+### Differentiable Value Types
+All basic types (`float`, `int`, `double`, etc..) and all aggregate types (i.e. `struct`) that use any combination of these are considered value types in Slang.
+
+Slang uses the `IDifferentiable` interface to define differentiable types. Basic types that describe a continuous value (`float`, `double` and `half`) and their vector/matrix versions (`float3`, `half2x2`, etc..) are defined as differentiable by the standard library. For all basic types, the type used for the differential (can be obtained with `T.Differential`) is the same as the primal.
+
+#### Builtin Differentiable Value Types
+The following built-in types are differentiable: 
+- Scalars: `float`, `double` and `half`.
+- Vector/Matrix: `vector` and `matrix` of `float`, `double` and `half` types.
+- Arrays: `T[n]` is differentiable if `T` is differentiable.
+- Tuples: `Tuple<each T>` is differentiable if `T` is differentiable. 
+
+
+#### User-defined Differentiable Value Types
+
+However, it is easy to define your own differentiable types.
+Typically, all you need is to implement the `IDifferentiable` interface. 
+
+```csharp
+struct MyType : IDifferentiable
+{
+    float x;
+    float y;
+};
+```
+
+The main requirement of a type implementing `IDifferentiable` is the `Differential` associated type that the compiler uses to carry the corresponding derivative.
+In most cases the `Differential` of a type can be itself, though it can be different if necessary.
+You can access the differential of any differentiable type through `Type.Differential`
+
+Example:
+```csharp
+MyType obj;
+obj.x = 1.f;
+
+MyType.Differential d_obj;
+// Differentiable fields will have a corresponding field in the diff type
+d_obj.x = 1.f;
+```
+
+Slang can automatically derive the `Differential` type in the majority of cases.
+For instance, for `MyType`, Slang can infer the differential trivially:
+```csharp
+struct MyType : IDifferentiable
+{
+    // Automatically inserted by Slang from the fact that 
+    // MyType has 2 floats which are both differentiable
+    //
+    typealias Differential = MyType;
+    // ...
+}
+```
+
+For more complex types that aren't fully differentiable, a new type is synthesized automatically:
+
+```csharp
+struct MyPartialDiffType : IDifferentiable
+{
+    // Automatically inserted by Slang based on which fields are differentiable.
+    typealias MyPartialDiffType = syn_MyPartialDiffType_Differential;
+    
+    float x;
+    uint y;
+};
+
+// Synthesized
+struct syn_MyPartialDiffType_Differential
+{
+    // Only one field since 'y' does not conform to IDifferentiable
+    float x;
+};
+```
+
+You can make existing types differentiable through Slang's extension mechanism.
+For instance, `extension MyType : IDifferentiable { }` will make `MyType` differentiable retroactively.
+
+See the `IDifferentiable` [reference documentation](https://shader-slang.org/stdlib-reference/interfaces/idifferentiable-01/index) for more information on how to override the default behavior.
+
+#### DifferentialPair<T>: Pairs of differentiable value types
+
+The `DifferentialPair<T>` type is used to pass derivatives to a derivative call by representing a pair of values of type `T` and `T.Differential`. Note that `T` must conform to `IDifferentiable`.
+
+`DifferentialPair<T>` can either be created via constructor calls or the `diffPair` utility method.
+
+Example:
+
+```csharp
+MyType obj = {1.f, 2.f};
+
+MyType.Differential d_obj = {0.4f, 3.f};
+
+// The differential part of a differentiable-pair is of the diff type.
+DifferentialPair<MyType> dp_obj = diffPair(obj, d_obj);
+
+// Use .p to extract the primal part
+MyType new_p_obj = dp_obj.p;
+
+// Use .d to extract the differential part
+MyType.Differential new_d_obj = dp_obj.d;
+```
+
+### Differentiable Ptr types
+Pointer types are any type that represents a location or reference to a value rather than the value itself.
+Examples include resource types (`RWStructuredBuffer`, `Texture2D`), pointer types (`Ptr<float>`) and references.
+
+The `IDifferentiablePtrType` interface can be used to denote types that need to transform into pairs during auto-diff. However, unlike
+an `IDifferentiable` type whose derivative portion is an _output_ under `bwd_diff`, the derivative part of `IDifferentiablePtrType` remains an input. This is because only the value is returned as an output, while the location where it needs to be written to, is still effectively an input to the derivative methods.
+
+> #### Note ####
+> Support for `IDifferentiablePtrType` is still experimental. There are no built-in types conforming to this interface, though we plan to add stdlib support in the near future.
+
+`IDifferentiablePtrType` only requires a `Differential` associated type to be specified.
+
+#### DifferentialPtrPair<T>: Pairs of differentiable ptr types
+For types conforming to `IDifferentiablePtrType`, the corresponding pair to use for passing the derivative counterpart is `DifferentialPtrPair<T>`, which represents a pair of `T` and `T.Differential`. Objects of this type can be created using a constructor.
+
+#### Example of defining and using an `IDifferentiablePtrType` object.
+Here is an example of create a differentiable buffer pointer type, and using it within a differentiable function.
+You can find an interactive sample on the Slang playground [here](https://shader-slang.org/slang-playground/?target=WGSL&code=eJy1VF1v2kAQfPevWEWKYhfkmFdMkBrRSpHKhyBSpdIIHfgcTjFn9z4gEeK_d-_ONsZp1L6UF8MxOzszuz62K3KhoMjI27PINU9iTyqhNwrGb_c6TamY5YwrKqAPDyNmDihXjKwzOlPi8a2g3tED_NzewuOWQnKGZFAQpGYSCM_VFikYl4rwDYU8bdOHlkQhH8kYkTBq8ty10bFn4fPvC6tVC5q4_wdplhM1hLVOYwvRiMd2qaQq9k5Yhzq_Mf4CBDZaqnwHCRVsTxTbU295TzYvByKSUX3mI1-yWh-S4Mmz3GAO_HY4Rdd1Yjyhr0EZiaCojEMRopplEToV0HGgJ5Rj1UxyRUFtkRkzgnWpoCELJHvmxJg0WUrFsgwThRvGb9pxM8xxn7MEKtV-M0cc2Awhg5b44aX6LjifyVSryknbrmm7KpTAyRRh4pKuzqzb-kfLNHTuLLE1v7zcpypgqXfTdPFLE0HlIMPiCe4e9h2-T73SVxbmEgVFYVqux3JMXh8QJ_0JTs_icgG-s2qQMT4GMMFHpxNYgKM7U-5JtjJQO3SMiQVxjTDt0I6DfHJP9-_Ja84fcc7u-SXr9-efJyO_F6Guj5eY8UIrrL2s_PFlPl38rdRujym121AItDwmjPsfDdTN8ug6diE6OSO20L_6yoRU0IuMR01lH37yqzKIPybaixqRNniukz5cp1iMQXbLTJUwqQblyErgQu_MHSHdEpivaVtCyXOxLL1oaAhrtndra1Ip9_boInJeqxtsRiTeVvZFMk0LV4dH0g3D3VL4Xq3Mgvvt5oFfO_6X986Zr0UF3bq6F0Zlvs1UzreSEYc9dabgEIrQXR3_ZT61unJKp98JDfhi).
+```csharp
+struct MyBufferPointer : IDifferentiablePtrType
+{
+    // The differential part is another instance of MyBufferPointer.
+    typealias Differential = MyBufferPointer;
+
+    RWStructuredBuffer<float> buf;
+    uint offset;
+};
+
+// Link a custom derivative
+[BackwardDerivative(load_bwd)]
+float load(MyBufferPointer p, uint index)
+{
+    return p.buf[p.offset + index];
+}
+
+// Note that the backward derivative signature is still an 'in' differential pair.
+void load_bwd(DifferentialPtrPair<MyBufferPointer> p, uint index, float dOut)
+{
+    MyBufferPointer diff_ptr = p.d;
+    diff_ptr.buf[diff_ptr.offset + index] += dOut;
+}
+
+[Differentiable]
+float sumOfSquares<let N : int>(MyBufferPointer p)
+{
+    float sos = 0.f;
+
+    [MaxIters(N)]
+    for (uint i = 0; i < N; i++)
+    {
+        float val_i = load(p, i);
+        sos += val_i * val_i;
+    }
+
+    return sos;
+}
+
+RWStructuredBuffer<float> inputs;
+RWStructuredBuffer<float> derivs;
+
+void main()
+{
+    MyBufferPointer ptr = {inputs, 0};
+    print("Sum of squares of first 10 values: ", sumOfSquares<10>(ptr));
+
+    MyBufferPointer deriv_ptr = {derivs, 0};
+
+    // Pass a pair of pointers as input.
+    bwd_diff(sumOfSquares<10>)(
+        DifferentialPtrPair<MyBufferPointer>(ptr, deriv_ptr),
+        1.0);
+    
+    print("Derivative of result w.r.t the 10 values: \n");
+    for (uint i = 0; i < 10; i++)
+        print("%d: %f\n", i, load(deriv_ptr, i));
+}
+```
+
+## User-Defined Derivative Functions
+
+As an alternative to compiler-generated derivatives, you can choose to provide an implementation for the derivative, which the compiler will use instead of attempting to generate one. 
+
+This can be performed on a per-function basis by using the decorators `[ForwardDerivative(fwd_deriv_func)]` and `[BackwardDerivative(bwd_deriv_func)]` to reference the derivative from the primal function.
+
+For instance, it often makes little sense to differentiate the body of a `sin(x)` implementation, when we know that the derivative is `cos(x) * dx`. In Slang, this can be represented in the following way:
+```csharp
+DifferentialPair<float> sin_fwd(DifferentialPair<float> dpx)
+{
+    float x = dpx.p;
+    float dx = dpx.d;
+    return DifferentialPair<float>(dpx.p, cos(x) * dx);
+}
+
+// sin() is now considered differentiable (atleast for forward-mode) since it provides
+// a derivative implementation.
+//
+[ForwardDerivative(sin_fwd)]
+float sin(float x)
+{
+    // Calc sin(X) using Taylor series..
+}
+
+// Any uses of sin() in a `[Differentiable]` will automaticaly use the sin_fwd implementation when differentiated.
+```
+
+A similar example for a backward derivative.
+```csharp
+void sin_bwd(inout DifferentialPair<float> dpx, float dresult)
+{
+    float x = dpx.p;
+
+    // Write-back the derivative to each input (the primal part must be copied over as-is)
+    dpx = DifferentialPair<float>(x, cos(x) * dresult);
+}
+
+[BackwardDerivative(sin_bwd)]
+float sin(float x)
+{
+    // Calc sin(X) using Taylor series..
+}
+```
+
+> Note that the signature of the provided forward or backward derivative function must match the expected signature from invoking `fwd_diff(fn)`/`bwd_diff(fn)`
+> For a full list of signature rules, see the reference section for the [auto-diff operators](#fwd_difff--slang_function---slang_function).
+
+### Back-referencing User Derivative Attributes.
+Sometimes, the original function's definition might be inaccessible, so it can be tricky to add an attribute to create the association.
+
+For such cases, Slang provides the `[ForwardDerivativeOf(primal_fn)]` and `[BackwardDerivativeOf(primal_fn)]` attributes that can be used
+on the derivative function and contain a reference to the function for which they are providing a derivative implementation.
+As long as both the derivative function is in scope, the primal function will be considered differentiable.
+
+Example:
+```csharp
+// Module A
+float sin(float x) { /* ... */ } 
+
+// Module B
+import A;
+[BackwardDerivativeOf(sin)] // Add a derivative implementation for sin() in module A.
+void sin_bwd(inout DifferentialPair<float> dpx, float dresult) { /* ... */ }
+```
+
+User-defined derivatives also work for generic functions, member functions, accessors, and more. 
+See the reference section for the [`[ForwardDerivative(fn)]`](https://shader-slang.org/stdlib-reference/attributes/forwardderivative-07.html) and [`[BackwardDerivative(fn)]`](https://shader-slang.org/stdlib-reference/attributes/backwardderivative-08) attributes for more. 
+
+## Using Auto-diff with Generics
+Automatic differentiation works seamlessly with generically-defined types and methods.
+For generic methods, differentiability of a type is defined either through an explicit `IDifferentiable` constraint or any other
+interface that extends `IDifferentiable`.
+
+Example for generic methods:
+```csharp
+[Differentiable]
+T calcFoo<T : IDifferentiable>(T x) { /* ... */ }
+
+[Differentiable]
+T calcBar<T : __BuiltinFloatingPointType>(T x) { /* ... */ }
+
+[Differentiable]
+void main()
+{
+    DifferentialPair<float4> dpa = /* ... */;
+
+    // Can call with any type that is IDifferentiable. Generic parameters
+    // are inferred like any other call.
+    //
+    bwd_diff(calcFoo)(dpa, float4(1.f));
+
+    // But you can also be explicit with < >
+    bwd_diff(calcFoo<float4>)(dpa, float4(1.f));
+
+    // x is differentiable for calcBar because 
+    // __BuiltinFloatingPointType : IDifferentiable
+    //
+    DifferentialPair<double> dpb = /* .. */;
+    bwd_diff(calcBar)(dpb, 1.0);
+}
+```
+
+You can implement `IDifferentiable` on a generic type. Automatic synthesis still applies and will use
+generic constraints to resolve whether a field is differentiable or not.
+```csharp
+struct Foo<T : IDifferentiable, U> : IDifferentiable
+{
+    T t;
+    U u;
+};
+
+// The synthesized Foo<T, U>.Differential will contain a field for
+// 't' but not 'U'
+//
+```
+
+## Using Auto-diff with Interface Requirements and Interface Types
+For interface requirements, using `[Differentiable]` attribute enforces that any implementation of that method must also be
+differentiable. You can, of course, provide a manual derivative implementation to satisfy the requirement.
+
+The following is a sample snippet. You can run the full sample on the playground [here](https://shader-slang.org/slang-playground/?target=HLSL&code=eJyVVMtu2zAQvOsrFgEKy4Wq1C7QQ1330AYBcujjnhbBWiRjphQpUJQjI8i_d0mRquLYASLYtLwc7sySs5R1Y6yDRuH-1ppOs1UmteNWYMXh6tKY7CEDeq4vpBDccu0kbhT_E4JCGXRQoary4bWfr7LHLGud7SoHtPqqbhR8miYagJTeGbvKQuj8HDyO15QdnTQadhIBO2dq-lsB-0_tZ8tXCQrxgdo_lrvOaujhLXwoBY1RCQTE43P1y5fk-8gLJdSoO1RqD401O8mkvgXGrdwRYseh5m5rWBvLuTT2Hi27GOdzX8aNuGfzobbrr1j9PQbZjJBXlb-clh-rDz-TjVW_UNrPIdcXSHryU4BTbP78PC7vy2YgLiJ7X7JRw_yJiJ2RDFJ5udSmcyeF9UWsnFnedsodyuhhfWqtl1SkdQebMgoiSd4BcMvdz81d3lGDgNnc3Ug2j66QAvIhAus1LA4FpEaQfljDw6L8KB5Xh9vkZxOlH7lq-fFEyzET6X05EWl_1inRJqZuOseTUwo4UtfxZv1mOToOSEy6dajppjACuHRbbpPEBZjxfRkWhi0U9F2njYxcsfdS9ou9xjp0fdugq7bgDGBDHdRY6Wln3hWzMsLTqh-GptzWqyUK6VquBMgWtNHv2JP6CxLORrYtesz0ilHA0GEBG3LcrJ8F9GwwyCytupdKkTut3U8aui3bqagdWoi-WntRZehLf0Nmk7MaEOHWDJanIrX7jlLn6QxOuZ41SInH3lqW76NhqWFufDiPJzzPCVrAgj6lSPSBJz97I37rs8LnKpm_u_8BU5nW2Q). 
+```csharp
+interface IFoo
+{
+    [Differentiable]
+    float calc(float x);
+}
+
+struct FooImpl : IFoo
+{
+    // Implementation via automatic differentiation.
+    [Differentiable]
+    float calc(float x)
+    { /* ... */ }
+}
+
+struct FooImpl2 : IFoo
+{
+    // Implementation via manually providing derivative methods.
+    [ForwardDerivative(calc_fwd)]
+    [BackwardDerivative(calc_bwd)]
+    float calc(float x)
+    { /* ... */ }
+
+    DifferentialPair<float> calc_fwd(DifferentialPair<float> x)
+    { /* ... */ }
+
+    void calc_bwd(inout DifferentialPair<float> x, float dresult)
+    { /* ... */ }
+}
+
+[Differentiable]
+float compute(float x, uint obj_id)
+{
+    // Create an instance of either FooImpl1 or FooImpl2
+    IFoo foo = createDynamicObject<IFoo>(obj_id); 
+    
+    // Dynamic dispatch to appropriate 'calc'.
+    //
+    // Note that foo itself is non-differentiable, and 
+    // has no differential data, but 'x' and 'result'
+    // will carry derivatives.s
+    //
+    var result = foo.calc(x);
+    return result;
+}
+```
+
+### Differentiable Interface (and Associated) Types
+> Note: This is an advanced use-case and support is currently experimental.
+
+You can have an interface or an interface associated type extend `IDifferentiable` and use that in differentiable interface requirement functions. This is often important in large code-bases with modular components that are all differentiable (one example is the material system in large production renderers)
+
+Here is a snippet of how to make an interface and associated type (and by consequence all its implementations) differentiable. 
+For a full working sample, check out the Slang playground [here](https://shader-slang.org/slang-playground/?target=WGSL&code=eJylVVFvmzAQfudXnCpVgoXRhK4vpdnLuodIq9pqe9umygGzuXPANaYjqvLfd8bgmIR0a-eHcHfcnT9_38WwlSilAsHJ-ocs6yJLPI8VisqcpBQWHwhPa04UKws4h8Uly3MqaaEYWXLqPXmA6-sw-r0N5rwkCogQfO0buw4SbzPs_kWSospLuTrYm1RVmTKiaKbWgsIOHMfFxgexuFUr8ov6HZJKyTpV8IkVlMibkq-LcsUI3-ncIekOlDjO0jgvIaEJ4AkkVbUsgMAbaGCCbWDjbSyc25pkEndOX4_IOOlznDwPzbfYgs5IhyANZwstpSi3glhBO4haNMIZqQYazPcod2ELNRu68cu0bcNme7321CUpAnjy1U9WRdgc3kJnzoLQmpvENujVSk1oVKpXEzEitjNUhwnZuiuW3Qn1XxSNTdxDy5JN0SvGUdCETeBda82QOm0ZBOEgdxvHpDObjuXDPAxbf5_zB5fzvbM514c-1vXy3q_xcgGWhR03j4TPHDsOOjVYDj7LYD6H6fi4DPXkTHNhmuk2-0A564HqX8oreojiYeeHnc4h-JplHUCaW8hwAqdRPsINe46b7gZA5Q0nev56JoTlRMS91fTUOKSWy3tE11NrOuhaEQfduA0-D3pgsCTqb1gHaxqZi6bRF6_nPZZIvpCI64qwwu-3boFeXV9-_Iyd-hkvJXSqYnCa4OPC5KA5meyqZw7TfmHEDU4clkRxcuB1jK9P3dctJP9oKNFVmVE4zsBv5tPoLDiH4_xbcRQCCw29-LT7bU0kVmf3ROnlSMRvCJMXLZr3kIkGgWT4Vkd9XZb8Q9HCOaQttkhe1CIeaxE7LZa_szud4OsTB_rIzv6uE2unCWEWTd2jd8RmvqRVzVVwkuEoWCaxIsqCPRncbH05O_l277_XxWN1sa3Df88fIn-viQ)
+
+```csharp
+interface IFoo : IDifferentiable
+{
+    associatedtype BaseType : IDifferentiable;
+
+    [Differentiable]
+    BaseType foo(BaseType x);
+};
+
+[Differentiable]
+float calc(float x)
+{
+    // Note that since IFoo is differentiable, 
+    // any data in the IFoo implementation is differentiable
+    // and will carry derivatives.
+    //
+    IFoo obj = makeObj(/* ... */);
+    return obj.foo(x);
+}
+```
+
+Under the hood, Slang will automatically construct an anonymous abstract type to represent the differentials. 
+However, on targets that don't support true dynamic dispatch, these are lowered into tagged unions. 
+While we are working to improve the implementation, this union can currently include all active differential 
+types, rather than just the relevant ones. This can lead to increased memory use.
+
+## Primal Substitute Functions
+
+Sometimes it is desirable to replace a function with another when generating derivative code. 
+Most often, this is because a lot of shader operations may just not have a function body, such hardware intrinsics for
+texture sampling. In such cases, Slang provides a `[PrimalSubstitute(fn)]` attribute that can be used to provide
+a reference implementation that Slang can differentiate to generate the derivative function.
+
+The following is a small snippet with bilinear texture sampling. For a full example application that uses this concept, see the [texture differentiation sample](https://github.com/shader-slang/slang/tree/master/examples/autodiff-texture) in the Slang repository.
+
+```csharp
+[PrimalSubstitute(sampleTextureBiliear_reference)]
+float4 sampleTextureBilinear(Texture2D<float4> x, float2 loc) 
+{ 
+    // HW-accelerated sampling intrinsics. 
+    // Slang does not have access to body, so cannot differentiate.
+    //
+    x.Sample(/*...*/)
+}
+
+// Since the substitute is differentiable, so is `sampleTextureBilinear`.
+[Differentiable]
+float4 sampleTextureBilinear_reference(Texture2D<float4> x, float2 loc)
+{
+    // Reference SW interpolation, that is differentiable.
+}
+
+[Differentiable]
+float computePixel(Texture2D<float> x, float a, float b)
+{
+    // Slang will use HW-accelerated sampleTextureBilinear for standard function
+    // call, but differentiate the SW reference interpolation during backprop.
+    // 
+    float4 sample1 = sampleTextureBilinear(x, float2(a, 1));
+}
+```
+
+Similar to `[ForwardDerivativeOf(fn)]` and `[BackwardDerivativeOf(fn)]` attributes, Slang provides a `[PrimalSubstituteOf(fn)]` attribute that can be used on the substitute function to reference the primal one.
+
+## Working with Mixed Differentiable and Non-Differentiable Code
+
+Introducing differentiability to an existing system often involves dealing with code that mixes differentiable and non-differentiable logic.
+Slang provides type checking and code analysis features to allow users to clarify the intention and guard against unexpected behaviors involving when to propagate derivatives through operations.
+
+### Excluding Parameters from Differentiation
+
+Sometimes we do not wish a parameter to be considered differentiable despite it has a differentiable type. We can use the `no_diff` modifier on the parameter to inform the compiler to treat the parameter as non-differentiable and skip generating differentiation code for the parameter. The syntax is:
+
+```csharp
+// Only differentiate this function with regard to `x`.
+float myFunc(no_diff float a, float x);
+```
+
+The forward derivative and backward propagation functions of `myFunc` should have the following signature:
+```csharp
+DifferentialPair<float> fwd_derivative(float a, DifferentialPair<float> x);
+void back_prop(float a, inout DifferentialPair<float> x, float dResult);
+```
+
+In addition, the `no_diff` modifier can also be used on the return type to indicate the return value should be considered non-differentiable. For example, the function
+```csharp
+no_diff float myFunc(no_diff float a, float x, out float y);
+```
+Will have the following forward derivative and backward propagation function signatures:
+
+```csharp
+float fwd_derivative(float a, DifferentialPair<float> x);
+void back_prop(float a, inout DifferentialPair<float> x, float d_y);
+```
+
+By default, the implicit `this` parameter will be treated as differentiable if the enclosing type of the member method is differentiable. If you wish to exclude `this` parameter from differentiation, use `[NoDiffThis]` attribute on the method:
+```csharp
+struct MyDifferentiableType : IDifferentiable
+{
+    [NoDiffThis]   // Make `this` parameter `no_diff`.
+    float compute(float x) { ... }
+}
+```
+
+### Excluding Struct Members from Differentiation
+
+When using automatic `IDifferentiable` conformance synthesis for a `struct` type, Slang will by-default treat all struct members that have a differentiable type as differentiable, and thus include a corresponding field in the generated `Differential` type for the struct.
+For example, given the following definition
+```csharp
+struct MyType : IDifferentiable
+{
+    float member1;
+    float2 member2;
+}
+```
+Slang will generate:
+```csharp
+struct MyType.Differential : IDifferentiable
+{
+    float member1;  // derivative for MyType.member1
+    float2 member2; // derivative for MyType.member2
+}
+```
+If the user does not want a certain member to be treated as differentiable despite it has a differentiable type, a `no_diff` modifier can be used on the struct member to exclude it from differentiation.
+For example, the following code excludes `member1` from differentiation:
+```csharp
+struct MyType : IDifferentiable
+{
+    no_diff float member1;  // excluded from differentiation
+    float2 member2;
+}
+```
+The generated `Differential` in this case will be:
+```csharp
+struct MyType.Differential : IDifferentiable
+{
+    float2 member2;
+}
+```
+
+### Assigning Differentiable Values into a Non-Differentiable Location
+
+When a value with derivatives is being assigned to a location that is not differentiable, such as a struct member that is marked as `no_diff`, the derivative info is discarded and any derivative propagation is stopped at the assignment site.
+This may lead to unexpected results. For example:
+```csharp
+struct MyType : IDifferentiable
+{
+    no_diff float member;
+    float someOtherMember;
+}
+[Differentiable]
+float f(float x)
+{
+    MyType t;
+    t.member = x * x; // Error: assigning value with derivative into a non-differentiable location.
+    return t.member;
+}
+...
+let result = fwd_diff(f)(diffPair(3.0, 1.0)).d; // result == 0.0
+```
+In this case, we are assigning the value `x*x`, which carries a derivative, into a non-differentiable location `MyType.member`, thus throwing away any derivative info. When `f` returns `t.member`, there will be no derivative associated with it, so the function will not propagate the derivative through. This code is most likely not intending to discard the derivative through the assignment. To help avoid this kind of unintentional behavior, Slang will treat any assignments of a value with derivative info into a non-differentiable location as a compile-time error. To eliminate this error, the user should either make `t.member` differentiable, or to force the assignment by clarifying the intention to discard any derivatives using the built-in `detach` method.
+The following code will compile, and the derivatives will be discarded:
+```csharp
+[Differentiable]
+float f(float x)
+{
+    MyType t;
+    // OK: the code has expressed clearly the intention to discard the derivative and perform the assignment.
+    t.member = detach(x * x);
+    return t.member;
+}
+```
+
+### Calling Non-Differentiable Functions from a Differentiable Function
+Calling non-differentiable function from a differentiable function is allowed. However, derivatives will not be propagated through the call. The user is required to clarify the intention by prefixing the call with the `no_diff` keyword. An un-clarified call to non-differentiable function will result in a compile-time error.
+
+For example, consider the following code:
+```csharp
+float g(float x)
+{
+    return 2*x;
+}
+
+[Differentiable]
+float f(float x)
+{
+    // Error: implicit call to non-differentiable function g.
+    return g(x) + x * x;
+}
+```
+The derivative will not propagate through the call to `g` in `f`. As a result, `fwd_diff(f)(diffPair(1.0, 1.0))` will return
+`{3.0, 2.0}` instead of `{3.0, 4.0}` as the derivative from `2*x` is lost through the non-differentiable call. To prevent unintended error, it is treated as a compile-time error to call `g` from `f`. If such a non-differentiable call is intended, a `no_diff` prefix is required in the call:
+```csharp
+[Differentiable]
+float f(float x)
+{
+    // OK. The intention to call a non-differentiable function is clarified.
+    return no_diff g(x) + x * x;
+}
+```
+
+However, the `no_diff` keyword is not required in a call if a non-differentiable function does not take any differentiable parameters, or if the result of the differentiable function is not dependent on the derivative being propagated through the call.
+
+### Treat Non-Differentiable Functions as Differentiable
+Slang allows functions to be marked with a `[TreatAsDifferentiable]` attribute for them to be considered as differentiable functions by the type-system. When a function is marked as `[TreatAsDifferentiable]`, the compiler will not generate derivative propagation code from the original function body or perform any additional checking on the function definition. Instead, it will generate trivial forward and backward propagation functions that returns 0.
+
+This feature can be useful if the user marked an `interface` method as forward or backward differentiable, but only wish to provide non-trivial derivative propagation functions for a subset of types that implement the interface. For other types that does not actually need differentiation, the user can simply put `[TreatAsDifferentiable]` on the method implementations for them to satisfy the interface requirement.
+
+See the following code for an example of `[TreatAsDifferentiable]`:
+```csharp
+interface IFoo
+{
+    [Differentiable]
+    float f(float v);
+}
+
+struct B : IFoo
+{
+    [TreatAsDifferentiable]
+    float f(float v)
+    {
+        return v * v;
+    }
+}
+
+[Differentiable]
+float use(IFoo o, float x)
+{
+    return o.f(x);
+}
+
+// Test:
+B obj;
+float result = fwd_diff(use)(obj, diffPair(2.0, 1.0)).d;
+// result == 0.0, since `[TreatAsDifferentiable]` causes a trivial derivative implementation
+// being generated regardless of the original code.
+```
+
+## Higher-Order Differentiation
+
+Slang supports generating higher order forward and backward derivative propagation functions. It is allowed to use `fwd_diff` and `bwd_diff` operators inside a forward or backward differentiable function, or to nest `fwd_diff` and `bwd_diff` operators. For example, `fwd_diff(fwd_diff(sin))` will have the following signature:
+
+```csharp
+DifferentialPair<DifferentialPair<float>> sin_diff2(DifferentialPair<DifferentialPair<float>> x);
+```
+
+The input parameter `x` contains four fields: `x.p.p`, `x.p.d,`, `x.d.p`, `x.d.d`, where `x.p.p` specifies the original input value, both `x.p.d` and `x.d.p` store the first order derivative if `x`, and `x.d.d` stores the second order derivative of `x`. Calling `fwd_diff(fwd_diff(sin))` with `diffPair(diffPair(pi/2, 1.0), DiffPair(1.0, 0.0))` will result `{ { 1.0, 0.0 }, { 0.0, -1.0 } }`.
+
+User defined higher-order derivative functions can be specified by using `[ForwardDerivative]` or `[BackwardDerivative]` attribute on the derivative function, or by using `[ForwardDerivativeOf]` or `[BackwardDerivativeOf]` attribute on the higher-order derivative function.
+
+## Restrictions and Known Issues
+
+The compiler can generate forward derivative and backward propagation implementations for most uses of array and struct types, including arbitrary read and write access at dynamic array indices, and supports uses of all types of control flows, mutable parameters, generics and interfaces. This covers the set of operations that is sufficient for a lot of functions. However, the user needs to be aware of the following restrictions when using automatic differentiation:
+
+- All operations to global resources, global variables and shader parameters, including texture reads or atomic writes, are treated as a non-differentiable operation. Slang provides support for special data-structures (such as `Tensor`) through libraries such as `SlangPy`, which come with custom derivative implementations
+- If a differentiable function contains calls that cause side-effects such as updates to global memory, there is currently no guarantee on how many times side-effects will occur during the resulting derivative function or back-propagation function.
+- Loops: Loops must have a bounded number of iterations. If this cannot be inferred statically from the loop structure, the attribute `[MaxIters(<count>)]` can be used specify a maximum number of iterations. This will be used by compiler to allocate space to store intermediate data. If the actual number of iterations exceeds the provided maximum, the behavior is undefined. You can always mark a loop with the `[ForceUnroll]` attribute to instruct the Slang compiler to unroll the loop before generating derivative propagation functions. Unrolled loops will be treated the same way as ordinary code and are not subject to any additional restrictions.
+- Double backward derivatives (higher-order differentiation): The compiler does not currently support multiple backward derivative calls such as `bwd_diff(bwd_diff(fn))`. The vast majority of higher-order derivative applications can be acheived more efficiently via multiple forward-derivative calls or a single layer of `bwd_diff` on functions that use one or more `fwd_diff` passes.
+
+The above restrictions do not apply if a user-defined derivative or backward propagation function is provided.
+
+## Reference
+
+This section contains some additional information for operators that are not currently included in the [standard library reference](https://shader-slang.org/stdlib-reference/)
+
+### `fwd_diff(f : slang_function) -> slang_function`
+The `fwd_diff` operator can be used on a differentiable function to obtain the forward derivative propagation function.
+
+A forward derivative propagation function computes the derivative of the result value with regard to a specific set of input parameters. 
+Given an original function, the signature of its forward propagation function is determined using the following rules:
+- If the return type `R` implements `IDifferentiable` the forward propagation function will return a corresponding `DifferentialPair<R>` that consists of both the computed original result value and the (partial) derivative of the result value. Otherwise, the return type is kept unmodified as `R`.
+- If a parameter has type `T` that implements `IDifferentiable`, it will be translated into a `DifferentialPair<T>` parameter in the derivative function, where the differential component of the `DifferentialPair` holds the initial derivatives of each parameter with regard to their upstream parameters.
+- If a parameter has type `T` that implements `IDifferentiablePtrType`, it will be translated into a `DifferentialPtrPair<T>` parameter where the differential component references the differential component.
+- All parameter directions are unchanged. For example, an `out` parameter in the original function will remain an `out` parameter in the derivative function.
+- Differentiable methods cannot have a type implementing `IDifferentiablePtrType` as an `out` or `inout` parameter, or a return type. Types implementing `IDifferentiablePtrType` can only be used for input parameters to a differentiable method. Marking such a method as `[Differentiable]` will result in a compile-time diagnostic error.
+
+For example, given original function:
+```csharp
+[Differentiable]
+R original(T0 p0, inout T1 p1, T2 p2, T3 p3);
+```
+Where `R`, `T0`, `T1 : IDifferentiable`, `T2` is non-differentiable, and `T3 : IDifferentiablePtrType`, the forward derivative function will have the following signature:
+```csharp
+DifferentialPair<R> derivative(DifferentialPair<T0> p0, inout DifferentialPair<T1> p1, T2 p2, DifferentialPtrPair<T3> p3);
+```
+
+This forward propagation function takes the initial primal value of `p0` in `p0.p`, and the partial derivative of `p0` with regard to some upstream parameter in `p0.d`. It takes the initial primal and derivative values of `p1` and updates `p1` to hold the newly computed value and propagated derivative. Since `p2` is not differentiable, it remains unchanged.
+
+### `bwd_diff(f : slang_function) -> slang_function`
+
+A backward derivative propagation function propagates the derivative of the function output to all the input parameters simultaneously.
+
+Given an original function `f`, the general rule for determining the signature of its backward propagation function is that a differentiable output `o` becomes an input parameter holding the partial derivative of a downstream output with regard to the differentiable output, i.e. $$\partial y/\partial o$$; an input differentiable parameter `i` in the original function will become an output in the backward propagation function, holding the propagated partial derivative $$\partial y/\partial i$$; and any non-differentiable outputs are dropped from the backward propagation function. This means that the backward propagation function never returns any values computed in the original function.
+
+More specifically, the signature of its backward propagation function is determined using the following rules:
+- A backward propagation function always returns `void`.
+- A differentiable `in` parameter of type `T : IDifferentiable` will become an `inout DifferentialPair<T>` parameter, where the original value part of the differential pair contains the original value of the parameter to pass into the back-prop function. The original value will not be overwritten by the backward propagation function. The propagated derivative will be written to the derivative part of the differential pair after the backward propagation function returns. The initial derivative value of the pair is ignored as input.
+- A differentiable `out` parameter of type `T : IDifferentiable` will become an `in T.Differential` parameter, carrying the partial derivative of some downstream term with regard to the return value.
+- A differentiable `inout` parameter of type `T : IDifferentiable` will become an `inout DifferentialPair<T>` parameter, where the original value of the argument, along with the downstream partial derivative with regard to the argument is passed as input to the backward propagation function as the original and derivative part of the pair. The propagated derivative with regard to this input parameter will be written back and replace the derivative part of the pair. The primal value part of the parameter will *not* be updated.
+- A differentiable return value of type `R` will become an additional `in R.Differential` parameter at the end of the backward propagation function parameter list, carrying the result derivative of a downstream term with regard to the return value of the original function.
+- A non-differentiable return value of type `NDR` will be dropped.
+- A non-differentiable `in` parameter of type `ND` will remain unchanged in the backward propagation function.
+- A non-differentiable `out` parameter of type `ND` will be removed from the parameter list of the backward propagation function.
+- A non-differentiable `inout` parameter of type `ND` will become an `in ND` parameter.
+- Types implemented `IDifferentiablePtrType` work the same was as the forward-mode case. They can only be used with `in` parameters, and are converted into `DifferentialPtrPair` types. Their directions are **not** affected.
+
+For example consider the following original function:
+```csharp
+struct T : IDifferentiable {...}
+struct R : IDifferentiable {...}
+struct P : IDifferentiablePtrType {...}
+struct ND {} // Non differentiable
+
+[Differentiable]
+R original(T p0, out T p1, inout T p2, ND p3, out ND p4, inout ND p5, P p6);
+```
+The signature of its backward propagation function is:
+```csharp
+void back_prop(
+    inout DifferentialPair<T> p0,
+    T.Differential p1,
+    inout DifferentialPair<T> p2,
+    ND p3,
+    ND p5,
+    DifferentialPtrPair<P> p6,
+    R.Differential dResult);
+```
+Note that although `p2` is still `inout` in the backward propagation function, the backward propagation function will only write propagated derivative to `p2.d` and will not modify `p2.p`.
+
+### Built-in Differentiable Functions
+
+The following built-in functions are differentiable and both their forward and backward derivative functions are already defined in the standard library's core module:
+
+- Arithmetic functions: `abs`, `max`, `min`, `sqrt`, `rcp`, `rsqrt`, `fma`, `mad`, `fmod`, `frac`, `radians`, `degrees`
+- Interpolation and clamping functions: `lerp`, `smoothstep`, `clamp`, `saturate`
+- Trigonometric functions: `sin`, `cos`, `sincos`, `tan`, `asin`, `acos`, `atan`, `atan2`
+- Hyperbolic functions: `sinh`, `cosh`, `tanh`
+- Exponential and logarithmic functions: `exp`, `exp2`, `pow`, `log`, `log2`, `log10`
+- Vector functions: `dot`, `cross`, `length`, `distance`, `normalize`, `reflect`, `refract`
+- Matrix transforms: `mul(matrix, vector)`, `mul(vector, matrix)`, `mul(matrix, matrix)`
+- Matrix operations: `transpose`, `determinant`
+- Legacy blending and lighting intrinsics: `dst`, `lit`
\ No newline at end of file
diff --git a/external/slang/share/doc/slang/user-guide/08-compiling.md b/external/slang/share/doc/slang/user-guide/08-compiling.md
new file mode 100644
index 00000000..1a730e26
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/08-compiling.md
@@ -0,0 +1,986 @@
+---
+layout: user-guide
+permalink: /user-guide/compiling
+---
+
+Compiling Code with Slang
+=========================
+
+This chapter presents the ways that the Slang system supports compiling and composing shader code.
+We will start with a discussion of the mental model that Slang uses for compilation.
+Next we will cover the command-line Slang compiler, `slangc`, and how to use it to perform offline compilation.
+Finally we will discuss the Slang compilation API, which can be used to integrate Slang compilation into an application at runtime, or to build custom tools that implement application-specific compilation policy.
+
+## Concepts
+
+For simple scenarios it may be enough to think of a shader compiler as a box where source code goes in and compiled kernels come out.
+Most real-time graphics applications end up needing more control over shader compilation, and/or more information about the results of compilation.
+In order to make use of the services provided by the Slang compilation system, it is useful to start with a clear model of the concepts that are involved in compilation.
+
+### Source Units
+
+At the finest granularity, code is fed to the compiler in _source units_ which are most often stored as files on disk or strings of text in memory.
+The compilation model largely does not care whether source units have been authored by human programmers or automatically assembled by other tools.
+
+If multiple source units are specified as part of the same compile, they will be preprocessed and parsed independently.
+However, a source unit might contain `#include` directives, so that the preprocessed text of that source unit includes the content of other files.
+Note that the `#include`d files do not become additional source units; they are just part of the text of a source unit that was fed to the compiler.
+
+### Translation Units and Modules
+
+Source units (such as files) are grouped into _translation units_, and each translation unit will produce a single _module_ when compiled.
+
+While the source units are all preprocessed and parsed independently, semantic checking is applied to a translation unit as a whole.
+One source file in a translation unit may freely refer to declarations in another source file from the same translation unit without any need for forward declarations. For example:
+
+```hlsl
+// A.slang
+
+float getFactor() { return 10.0; }
+```
+
+```hlsl
+// B.slang
+
+float scaleValue(float value)
+{
+    return value * getFactor();
+}
+```
+
+In this example, the `scaleValue()` function in `B.slang` can freely refer to the `getFactor()` function in `A.slang` because they are part of the same translation unit.
+
+It is allowed, and indeed common, for a translation unit to contain only a single source unit.
+For example, when adapting an existing codebase with many `.hlsl` files, it is appropriate to compile each `.hlsl` file as its own translation unit.
+A modernized codebase that uses modular `include` feature as documented in [Modules and Access Control](modules) might decide to compile multiple `.slang` files in a single directory as a single translation unit.
+
+The result of compiling a translation unit is a module in Slang's internal intermediate representation (IR). The compiled module can then be serialized to a `.slang-module` binary file. The binary file can then be loaded via the
+`ISession::loadModuleFromIRBlob` function or `import`ed in slang code the same way as modules written in `.slang` files.
+
+### Entry Points
+
+A translation unit / module may contain zero or more entry points.
+Slang supports two models for identifying entry points when compiling.
+
+#### Entry Point Attributes
+
+By default, the compiler will scan a translation unit for function declarations marked with the `[shader(...)]` attribute; each such function will be identified as an entry point in the module.
+Developers are encouraged to use this model because it directly documents intention and makes source code less dependent on external compiler configuration options.
+
+#### Explicit Entry Point Options
+
+For compatibility with existing code, the Slang compiler also supports explicit specification of entry point functions using configuration options external to shader source code.
+When these options are used the compiler will *ignore* all `[shader(...)]` attributes and only use the explicitly-specified entry points instead.
+
+### Shader Parameters
+
+A translation unit / module may contain zero or more global shader parameters.
+Similarly, each entry point may define zero or more entry-point `uniform` shader parameters.
+
+The shader parameters of a module or entry point are significant because they describe the interface between host application code and GPU code.
+It is important that both the application and generated GPU kernel code agree on how parameters are laid out in memory and/or how they are assigned to particular API-defined registers, locations, or other "slots."
+
+### Targets
+
+Within the Slang system a _target_ represents a particular platform and set of capabilities that output code can be generated for.
+A target includes information such as:
+
+* The _format_ that code should be generated in: SPIR-V, DXIL, etc.
+
+* A _profile_ that specifies a general feature/capability level for the target: D3D Shader Model 5.1, GLSL version 4.60, etc.
+
+* Optional _capabilities_ that should be assumed available on the target: for example, specific Vulkan GLSL extensions
+
+* Options that impact code generation: floating-point strictness, level of debug information to generate, etc.
+
+Slang supports compiling for multiple targets in the same compilation session.
+When using multiple targets at a time, it is important to understand the distinction between the _front-end_ of the compiler, and the _back-end_:
+
+* The compiler front-end comprises preprocessing, parsing, and semantic checking. The front-end runs once for each translation unit and its results are shared across all targets.
+
+* The compiler back-end generates output code, and thus runs once per target.
+
+> #### Note ####
+> Because front-end actions, including preprocessing, only run once, across all targets, the Slang compiler does not automatically provide any target-specific preprocessor `#define`s that can be used for preprocessor conditionals.
+> Applications that need target-specific `#define`s should always compile for one target at a time, and set up their per-target preprocessor state manually.
+
+### Layout
+
+While the front-end of the compiler determines what the shader parameters of a module or entry point are, the _layout_ for those parameters is dependent on a particular compilation target.
+A `Texture2D` might consume a `t` register for Direct3D, a `binding` for Vulkan, or just plain bytes for CUDA.
+
+The details of layout in Slang will come in a later chapter.
+For the purposes of the compilation model it is important to note that the layout computed for shader parameters depends on:
+
+* What modules and entry points are being used together; these define which parameters are relevant.
+
+* Some well-defined ordering of those parameters; this defines which parameters should be laid out before which others.
+
+* The rules and constraints that the target imposes on layout.
+
+An important design choice in Slang is give the user of the compiler control over these choices.
+
+### Composition
+
+The user of the Slang compiler communicates the modules and entry points that will be used together, as well as their relative order, using a system for _composition_.
+
+A _component type_ is a unit of shader code composition; both modules and entry points are examples of component types.
+A _composite_ component type is formed from a list of other component types (for example, one module and two entry points) and can be used to define a unit of shader code that is meant to be used together.
+
+Once a programmer has formed a composite of all the code they intend to use together, they can query the layout of the shader parameters in that composite, or invoke the linking step to
+resolve all cross module references.
+
+### Linking
+
+A user-composed program may have transitive module dependencies and cross references between module boundaries. The linking step in Slang is to resolve all the cross references in the IR and produce a
+new self-contained IR module that has everything needed for target code generation. The user will have an opportunity to specialize precompiled modules or provide additional compiler backend options
+at the linking step.
+
+### Kernels
+
+Once a program is linked, the user can request generation of the _kernel_ code for an entry point.
+The same entry point can be used to generate many different kernels.
+First, an entry point can be compiled for different targets, resulting in different kernels in the appropriate format for each target.
+Second, different compositions of shader code can result in different layouts, which leads to different kernels being required.
+
+## Command-Line Compilation with `slangc`
+
+The `slangc` tool, included in binary distributions of Slang, is a command-line compiler that can handle most simple compilation tasks.
+`slangc` is intended to be usable as a replacement for tools like `fxc` and `dxc`, and covers most of the same use cases.
+
+### All Available Options
+
+See [slangc command line reference](https://github.com/shader-slang/slang/blob/master/docs/command-line-slangc-reference.md) for a complete list of compiler options supported by the `slangc` tool.
+
+
+### A Simple `slangc` Example
+
+Here we will repeat the example used in the [Getting Started](01-get-started.md) chapter.
+Given the following Slang code:
+
+```hlsl
+// hello-world.slang
+StructuredBuffer<float> buffer0;
+StructuredBuffer<float> buffer1;
+RWStructuredBuffer<float> result;
+
+[shader("compute")]
+[numthreads(1,1,1)]
+void computeMain(uint3 threadId : SV_DispatchThreadID)
+{
+    uint index = threadId.x;
+    result[index] = buffer0[index] + buffer1[index];
+}
+```
+
+we can compile the `computeMain()` entry point to SPIR-V using the following command line:
+
+```bat
+slangc hello-world.slang -target spirv -o hello-world.spv
+```
+
+### Source Files and Translation Units
+
+The `hello-world.slang` argument here is specifying an input file.
+Each input file specified on the command line will be a distinct source unit during compilation.
+Slang supports multiple file-name extensions for input files, but the most common ones will be `.hlsl` for existing HLSL code, and `.slang` for files written specifically for Slang.
+
+If multiple source files are passed to `slangc`, they will be grouped into translation units using the following rules:
+
+* If there are any `.slang` files, then all of them will be grouped into a single translation unit
+
+* Each `.hlsl` file will be grouped into a distinct translation unit of its own.
+
+* Each `.slang-module` file forms its own translation unit.
+
+### Entry Points
+
+When using `slangc`, you will typically want to identify which entry point(s) you intend to compile.
+The `-entry computeMain` option selects an entry point to be compiled to output code in this invocation of `slangc`.
+
+Because the `computeMain()` entry point in this example has a `[shader(...)]` attribute, the compiler is able to deduce that it should be compiled for the `compute` stage.
+
+```bat
+slangc hello-world.slang -target spirv -o hello-world.spv
+```
+
+In code that does not use `[shader(...)]` attributes, a `-entry` option should be followed by a `-stage` option to specify the stage of the entry point:
+
+```bat
+slangc hello-world.slang -entry computeMain -stage compute -target spirv -o hello-world.spv
+```
+
+### Targets
+
+Our example uses the option `-target spirv` to introduce a compilation target; in this case, code will be generated as SPIR-V.
+The argument of a `-target` option specified the format to use for the target; common values are `dxbc`, `dxil`, and `spirv`.
+
+Additional options for a target can be specified after the `-target` option.
+For example, a `-profile` option can be used to specify a profile that should be used.
+Slang provides two main kinds of profiles for use with `slangc`:
+
+* Direct3D "Shader Model" profiles have names like `sm_5_1` and `sm_6_3`
+
+* GLSL versions can be used as profile with names like `glsl_430` and `glsl_460`
+
+### Kernels
+
+A `-o` option indicates that kernel code should be written to a file on disk.
+In our example, the SPIR-V kernel code for the `computeMain()` entry point will be written to the file `hello-world.spv`.
+
+### Working with Multiples
+
+It is possible to use `slangc` with multiple input files, entry points, or targets.
+In these cases, the ordering of arguments on the command line becomes significant.
+
+When an option modifies or relates to another command-line argument, it implicitly applies to the most recent relevant argument.
+For example:
+
+* If there are multiple input files, then an `-entry` option applies to the preceding input file
+
+* If there are multiple entry points, then a `-stage` option applies to the preceding `-entry` option
+
+* If there are multiple targets, then a `-profile` option applies to the preceding `-target` option
+
+Kernel `-o` options are the most complicated case, because they depend on both a target and entry point.
+A `-o` option applies to the preceding entry point, and the compiler will try to apply it to a matching target based on its file extension.
+For example, a `.spv` output file will be matched to a `-target spirv`.
+
+The compiler makes a best effort to support complicated cases with multiple files, entry points, and targets.
+Users with very complicated compilation requirements will probably be better off using multiple `slangc` invocations or migrating to the compilation API.
+
+### Additional Options
+
+The main other options are:
+
+* `-D<name>` or `-D<name>=<value>` can be used to introduce preprocessor macros.
+
+* `-I<path>` or `-I <path>` can be used to introduce a _search path_ to be used when resolving `#include` directives and `import` declarations.
+
+* `-g` can be used to enable inclusion of debug information in output files (where possible and implemented)
+
+* `-O<level>` can be used to control optimization levels when the Slang compiler invokes downstream code generator
+
+See [slangc command line reference](https://github.com/shader-slang/slang/blob/master/docs/command-line-slangc-reference.md) for a complete list of compiler options supported by the `slangc` tool.
+
+### Downstream Arguments
+
+`slangc` may leverage a 'downstream' tool like 'dxc', 'fxc', 'glslang', or 'gcc' for some target compilations. Rather than replicate every possible downstream option, arguments can be passed directly to the downstream tool using the "-X" option in `slangc`.
+
+The mechanism used here is based on the `-X` mechanism used in GCC, to specify arguments to the linker.
+
+```
+-Xlinker option
+```
+
+When used, `option` is not interpreted by GCC, but is passed to the linker once compilation is complete. Slang extends this idea in several ways. First there are many more 'downstream' stages available to Slang than just `linker`. These different stages are known as `SlangPassThrough` types in the API and have the following names
+
+* `fxc` - FXC HLSL compiler
+* `dxc` - DXC HLSL compiler
+* `glslang` - GLSLANG GLSL compiler
+* `visualstudio` - Visual Studio C/C++ compiler
+* `clang` - Clang C/C++ compiler
+* `gcc` - GCC C/C++ compiler
+* `genericcpp` - A generic C++ compiler (can be any one of visual studio, clang or gcc depending on system and availability)
+* `nvrtc` - NVRTC CUDA compiler
+
+The Slang command line allows you to specify an argument to these downstream compilers, by using their name after the `-X`. So for example to send an option `-Gfa` through to DXC you can use 
+
+```
+-Xdxc -Gfa
+```
+
+Note that if an option is available via normal Slang command line options then these should be used. This will generally work across multiple targets, but also avoids options clashing which is undefined behavior currently. The `-X` mechanism is best used for options that are unavailable through normal Slang mechanisms. 
+
+If you want to pass multiple options using this mechanism the `-Xdxc` needs to be in front of every options. For example 
+
+```
+-Xdxc -Gfa -Xdxc -Vd
+```
+
+Would reach `dxc` as 
+
+```
+-Gfa -Vd
+```
+
+This can get a little repetitive especially if there are many parameters, so Slang adds a mechanism to have multiple options passed by using an ellipsis `...`. The syntax is as follows
+
+```
+-Xdxc... -Gfa -Vd -X.
+```
+
+The `...` at the end indicates all the following parameters should be sent to `dxc` until it reaches the matching terminating `-X.` or the end of the command line. 
+
+It is also worth noting that `-X...` options can be nested. This would allow a GCC downstream compilation to control linking, for example with
+
+```
+-Xgcc -Xlinker --split -X.
+```
+
+In this example gcc would see
+
+```
+-Xlinker --split
+```
+
+And the linker would see (as passed through by gcc) 
+
+```
+--split
+```
+
+Setting options for tools that aren't used in a Slang compilation has no effect. This allows for setting `-X` options specific for all downstream tools on a command line, and they are only used as part of a compilation that needs them.
+
+NOTE! Not all tools that Slang uses downstream make command line argument parsing available. `FXC` and `GLSLANG` currently do not have any command line argument passing as part of their integration, although this could change in the future.
+
+The `-X` mechanism is also supported by render-test tool. In this usage `slang` becomes a downstream tool. Thus you can use the `dxc` option `-Gfa` in a render-test via 
+
+```
+-Xslang... -Xdxc -Gfa -X.
+```
+
+Means that the dxc compilation in the render test (assuming dxc is invoked) will receive 
+
+```
+-Gfa
+```
+
+Some options are made available via the same mechanism for all downstream compilers. 
+
+* Use `-I` to specify include path for downstream compilers
+
+For example to specify an include path "somePath" to DXC you can use...
+
+```
+-Xdxc -IsomePath
+```
+
+
+### Convenience Features
+
+The `slangc` compiler provides a few conveniences for command-line compilation:
+
+* Most options can appear out of order when they are unambiguous. For example, if there is only a single translation unit a `-entry` option can appear before or after any file.
+
+* A `-target` option can be left out if it can be inferred from the only `-o` option present. For example, `-o hello-world.spv` already implies `-target spirv`.
+
+* If a `-o` option is left out then kernel code will be written to the standard output. This output can be piped to a file, or can be printed to a console. In the latter case, the compiler will automatically disassemble binary formats for printing.
+
+### Precompiled Modules
+
+You can compile a `.slang` file into a binary IR module. For example, given the following source:
+
+```hlsl
+// my_library.slang
+float myLibFunc() { return 5.0; }
+```
+
+You can compile it into `my_library.slang-module` with the following slangc command line:
+
+```bat
+slangc my_library.slang -o my_library.slang-module
+```
+
+This allows you to deploy just the `my_library.slang-module` file to users of the module, and it can be consumed in the user code with the same `import` syntax:
+```hlsl
+import my_library;
+```
+
+### Limitations
+
+The `slangc` tool is meant to serve the needs of many developers, including those who are currently using `fxc`, `dxc`, or similar tools.
+However, some applications will benefit from deeper integration of the Slang compiler into application-specific code and workflows.
+Notable features that Slang supports which cannot be accessed from `slangc` include:
+
+* Slang can provide _reflection_ information about shader parameters and their layouts for particular targets; this information is not currently output by `slangc`.
+
+* Slang allows applications to control the way that shader modules and entry points are composed (which in turn influences their layout); `slangc` currently implements a single default policy for how to generate a composition of shader code.
+
+Applications that need more control over compilation are encouraged to use the C++ compilation API described in the next section.
+
+### Examples of `slangc` usage
+
+#### Multiple targets and multiple entrypoints
+
+In this example, there are two shader entrypoints defined in one source file.
+
+```hlsl
+// targets.slang
+
+struct VertexOutput
+{
+    nointerpolation int a : SOME_VALUE;
+    float3              b : SV_Position;
+};
+
+[shader("pixel")]
+float4 psMain() : SV_Target
+{
+    return float4(1, 0, 0, 1);
+}
+
+[shader("vertex")]
+VertexOutput vsMain()
+{
+    VertexOutput out;
+    out.a = 0;
+    out.b = float4(0, 1, 0, 1);
+    return out;
+}
+```
+
+A single entrypoint from the preceding shader can be compiled to both SPIR-V Assembly and HLSL targets in one command:
+```bat
+slangc targets.slang -entry psMain -target spirv-asm -o targets.spv-asm -target hlsl -o targets.hlsl
+```
+
+The following command compiles both entrypoints to SPIR-V:
+
+```bat
+slangc targets.slang -entry vsMain -entry psMain -target spirv -o targets.spv
+```
+
+#### Creating a standalone executable example
+
+This example compiles and runs a CPU host-callable style Slang unit.
+
+```hlsl
+// cpu.slang
+
+class MyClass
+{
+    int intMember;
+    __init()
+    {
+        intMember = 0;
+    }
+    int method()
+    {
+        printf("method\n");
+        return intMember;
+    }
+}
+
+export __extern_cpp int main()
+{
+    MyClass obj = new MyClass();
+    return obj.method();
+}
+
+```
+
+Compile the above code as standalone executable, using -I option to find dependent header files:
+```bat
+slangc cpu.slang -target executable -o cpu.exe -Xgenericcpp -I./include -Xgenericcpp -I./external/unordered_dense/include/
+```
+
+Execute the resulting executable:
+```bat
+C:\slang> cpu
+method
+
+```
+
+#### Compiling and linking slang-modules
+
+This example demonstrates the compilation of a slang-module, and linking to a shader which uses that module.
+Two scenarios are provided, one in which the entry-point is compiled in the same `slangc` invocation that links in the dependent slang-module, and another scenario where linking is a separate invocation.
+
+```hlsl
+// lib.slang
+public int foo(int a) 
+{ 
+    return a + 1;
+}
+```
+
+```hlsl
+// entry.slang
+import "lib";
+
+RWStructuredBuffer<int> outputBuffer;
+
+[shader("compute")]
+[numthreads(4, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+    int index = (int)dispatchThreadID.x;
+    outputBuffer[index] = foo(index);
+}
+```
+
+Compile lib.slang to lib.slang-module:
+```bat
+slangc lib.slang -o lib.slang-module
+```
+
+Scenario 1: Compile entry.slang and link lib and entry together in one step:
+```bat
+slangc entry.slang -target spirv -o program.spv # Compile and link
+```
+
+Scenario 2: Compile entry.slang to entry.slang-module and then link together lib and entry in a second invocation:
+```bat
+slangc entry.slang -o entry.slang-module # Compile
+slangc lib.slang-module entry.slang-module -target spirv -o program.spv # Link
+```
+
+#### Compiling with debug symbols
+
+Debug symbols can be added with the "-g<debug-level>" option.
+
+Adding '-g1' (or higher) to a SPIR-V compilation will emit extended 'DebugInfo' instructions.
+```bat
+slangc vertex.slang -target spirv-asm -o v.spv-asm -g0 # Omit debug symbols
+slangc vertex.slang -target spirv-asm -o v.spv-asm -g1 # Add debug symbols
+```
+
+
+#### Compiling with additional preprocessor macros
+
+User-defined macros can be set on the command-line with the "-D<macro>" or "-D<macro>=<value>" option.
+
+```hlsl
+// macrodefine.slang
+
+[shader("pixel")]
+float4 psMain() : SV_Target
+{
+#if defined(mymacro)
+    return float4(1, 0, 0, 1);
+#else
+    return float4(0, 1, 0, 1);
+#endif
+}
+```
+
+* Setting a user-defined macro "mymacro"
+```bat
+slangc macrodefine.slang -entry psMain -target spirv-asm -o targets.spvasm -Dmymacro
+```
+
+## Using the Compilation API
+
+The C++ API provided by Slang is meant to provide more complete control over compilation for applications that need it.
+The additional level of control means that some tasks require more individual steps than they would when using a one-size-fits-all tool like `slangc`.
+
+### "COM-lite" Components
+
+Many parts of the Slang C++ API use interfaces that follow the design of COM (the Component Object Model).
+Some key Slang interfaces are binary-compatible with existing COM interfaces.
+However, the Slang API does not depend on any runtime aspects of the COM system, even on Windows; the Slang system can be seen as a "COM-lite" API.
+
+The `ISlangUnknown` interface is equivalent to (and binary-compatible with) the standard COM `IUnknown`.
+Application code is expected to correctly maintain the reference counts of `ISlangUnknown` objects returned from API calls; the `Slang::ComPtr<T>` "smart pointer" type is provided as an optional convenience for applications that want to use it.
+
+Many Slang API calls return `SlangResult` values; this type is equivalent to (and binary-compatible with) the standard COM `HRESULT` type.
+As a matter of convention, Slang API calls return a zero value (`SLANG_OK`) on success, and a negative value on errors.
+
+> #### Note ####
+> Slang API interfaces may be named with the suffix "_Experimental", indicating that the interface is not complete, may have known bugs, and may change or be removed between Slang API releases.
+
+### Creating a Global Session
+
+A Slang _global session_ uses the interface `slang::IGlobalSession` and it represents a connection from an application to a particular implementation of the Slang API.
+A global session is created using the function `slang::createGlobalSession()`:
+
+```c++
+using namespace slang;
+
+Slang::ComPtr<IGlobalSession> globalSession;
+SlangGlobalSessionDesc desc = {};
+createGlobalSession(&desc, globalSession.writeRef());
+```
+
+When a global session is created, the Slang system will load its internal representation of the _core module_ that the compiler provides to user code.
+The core module can take a significant amount of time to load, so applications are advised to use a single global session if possible, rather than creating and then disposing of one for each compile.
+
+If you want to enable GLSL compatibility mode, you need to set `SlangGlobalSessionDesc::enableGLSL` to `true` when calling `createGlobalSession()`. This will load the necessary GLSL intrinsic module
+for compiling GLSL code. Without this setting, compiling GLSL code will result in an error.
+
+> #### Note ####
+> Currently, the global session type is *not* thread-safe.
+> Applications that wish to compile on multiple threads will need to ensure that each concurrent thread compiles with a distinct global session.
+
+> #### Note ####
+> Currently, the global session should be freed after any objects created from it.
+> See [issue 6344](https://github.com/shader-slang/slang/issues/6344).
+
+### Creating a Session
+
+A _session_ uses the interface `slang::ISession`, and represents a scope for compilation with a consistent set of compiler options.
+In particular, all compilation with a single session will share:
+
+* A list of enabled compilation targets (with their options)
+
+* A list of search paths (for `#include` and `import`)
+
+* A list of pre-defined macros
+
+In addition, a session provides a scope for the loading and re-use of modules.
+If two pieces of code compiled in a session both `import`  the same module, then that module will only be loaded and compiled once.
+
+To create a session, use the `IGlobalSession::createSession()` method:
+
+```c++
+SessionDesc sessionDesc;
+/* ... fill in `sessionDesc` ... */
+Slang::ComPtr<ISession> session;
+globalSession->createSession(sessionDesc, session.writeRef());
+```
+
+The definition of `SessionDesc` structure is:
+```C++
+struct SessionDesc
+{
+    /** The size of this structure, in bytes.
+     */
+    size_t structureSize = sizeof(SessionDesc);
+
+    /** Code generation targets to include in the session.
+    */
+    TargetDesc const*   targets = nullptr;
+    SlangInt            targetCount = 0;
+
+    /** Flags to configure the session.
+    */
+    SessionFlags flags = kSessionFlags_None;
+
+    /** Default layout to assume for variables with matrix types.
+    */
+    SlangMatrixLayoutMode defaultMatrixLayoutMode = SLANG_MATRIX_LAYOUT_ROW_MAJOR;
+
+    /** Paths to use when searching for `#include`d or `import`ed files.
+    */
+    char const* const*  searchPaths = nullptr;
+    SlangInt            searchPathCount = 0;
+
+    PreprocessorMacroDesc const*    preprocessorMacros = nullptr;
+    SlangInt                        preprocessorMacroCount = 0;
+
+    ISlangFileSystem* fileSystem = nullptr;
+
+    bool enableEffectAnnotations = false;
+    bool allowGLSLSyntax = false;
+
+    /** Pointer to an array of compiler option entries, whose size is compilerOptionEntryCount.
+    */
+    CompilerOptionEntry* compilerOptionEntries = nullptr;
+
+    /** Number of additional compiler option entries.
+    */
+    uint32_t compilerOptionEntryCount = 0;
+};
+```
+The user can specify a set of commonly used compiler options directly in the `SessionDesc` struct, such as `searchPath` and `preprocessMacros`.
+Additional compiler options can be specified via the `compilerOptionEntries` field, which is an array of `CompilerOptionEntry` that defines a key-value
+pair of a compiler option setting, see the [Compiler Options](#compiler-options) section.
+
+#### Targets
+
+The `SessionDesc::targets` array can be used to describe the list of targets that the application wants to support in a session.
+Often, this will consist of a single target.
+
+Each target is described with a `TargetDesc` which includes options to control code generation for the target.
+The most important fields of the `TargetDesc` are the `format` and `profile`; most others can be left at their default values.
+
+The `format` field should be set to one of the values from the `SlangCompileTarget` enumeration.
+For example:
+
+```c++
+TargetDesc targetDesc;
+targetDesc.format = SLANG_SPIRV;
+```
+
+The `profile` field must be set with the ID of one of the profiles supported by the Slang compiler.
+The exact numeric value of the different profiles is not currently stable across compiler versions, so applications should look up a chosen profile using `IGlobalSession::findProfile`.
+For example:
+
+```c++
+targetDesc.profile = globalSession->findProfile("glsl_450");
+```
+
+Once the chosen `TargetDesc`s have been initialized, they can be attached to the `SessionDesc`:
+
+```c++
+sessionDesc.targets = &targetDesc;
+sessionDesc.targetCount = 1;
+```
+
+#### Search Paths
+
+The search paths on a session provide the paths where the compiler will look when trying to resolve a `#include` directive or `import` declaration.
+The search paths can be set in the `SessionDesc` as an array of `const char*`:
+
+```c++
+const char* searchPaths[] = { "myapp/shaders/" };
+sessionDesc.searchPaths = searchPaths;
+sessionDesc.searchPathCount = 1;
+```
+
+#### Pre-Defined Macros
+
+The pre-defined macros in a session will be visible at the start of each source unit that is compiled, including source units loaded via `import`.
+Each pre-defined macro is described with a `PreprocessorMacroDesc`, which has `name` and `value` fields:
+
+```c++
+PreprocessorMacroDesc fancyFlag = { "ENABLE_FANCY_FEATURE", "1" };
+sessionDesc.preprocessorMacros = &fancyFlag;
+sessionDesc.preprocessorMacroCount = 1;
+```
+
+#### More Options
+
+You can specify other compiler options for the session or for a specific target through the `compilerOptionEntries` and `compilerOptionEntryCount` fields
+of the `SessionDesc` or `TargetDesc` structures. See the [Compiler Options](#compiler-options) section for more details on how to encode such an array.
+
+### Loading a Module
+
+The simplest way to load code into a session is with `ISession::loadModule()`:
+
+```c++
+IModule* module = session->loadModule("MyShaders");
+```
+
+Executing `loadModule("MyShaders")` in host C++ code is similar to using `import MyShaders` in Slang code.
+The session will search for a matching module (usually in a file called `MyShaders.slang`) and will load and compile it (if it hasn't been done already).
+
+Note that `loadModule()` does not provide any ways to customize the compiler configuration for that specific module.
+The preprocessor environment, search paths, and targets will always be those specified for the session.
+
+### Capturing Diagnostic Output
+
+Compilers produce various kinds of _diagnostic_ output when compiling code.
+This includes not only error messages when compilation fails, but also warnings and other helpful messages that may be produced even for successful compiles.
+
+Many operations in Slang, such as `ISession::loadModule()` can optionally produce a _blob_ of diagnostic output.
+For example:
+
+```c++
+Slang::ComPtr<IBlob> diagnostics;
+Slang::ComPtr<IModule> module = session->loadModule("MyShaders", diagnostics.writeRef());
+```
+
+In this example, if any diagnostic messages were produced when loading `MyShaders`, then the `diagnostics` pointer will be set to a blob that contains the textual content of those diagnostics.
+
+The content of a blob can be accessed with `getBufferPointer()`, and the size of the content can be accessed with `getBufferSize()`.
+Diagnostic blobs produces by the Slang compiler are always null-terminated, so that they can be used with C-style string APIs:
+
+```c++
+if(diagnostics)
+{
+    fprintf(stderr, "%s\n", (const char*) diagnostics->getBufferPointer());
+}
+```
+
+> #### Note ####
+> The `slang::IBlob` interface is binary-compatible with the `ID3D10Blob` and `ID3DBlob` interfaces used by some Direct3D compilation APIs.
+
+### Entry Points
+
+When using `loadModule()` applications should ensure that entry points in their shader code are always marked with appropriate `[shader(...)]` attributes.
+For example, if `MyShaders.slang` contained:
+
+```hlsl
+[shader("compute")]
+void myComputeMain(...) { ... }
+```
+
+then the Slang system will automatically detect and validate this entry point as part of a `loadModule("MyShaders")` call.
+
+After a module has been loaded, the application can look up entry points in that module using `IModule::findEntryPointByName()`:
+
+```c++
+Slang::ComPtr<IEntryPoint> computeEntryPoint;
+module->findEntryPointByName("myComputeMain", computeEntryPoint.writeRef());
+```
+
+### Composition
+
+An application might load any number of modules with `loadModule()`, and those modules might contain any number of entry points.
+Before GPU kernel code can be generated it is first necessary to decide which pieces of GPU code will be used together.
+
+Both `slang::IModule` and `slang::IEntryPoint` inherit from `slang::IComponentType`, because both can be used as components when composing a shader program.
+A composition can be created with `ISession::createCompositeComponentType()`:
+
+```c++
+IComponentType* components[] = { module, entryPoint };
+Slang::ComPtr<IComponentType> program;
+session->createCompositeComponentType(components, 2, program.writeRef());
+```
+
+As discussed earlier in this chapter, the composition operation serves two important purposes.
+First, it establishes which code is part of a compiled shader program and which is not.
+Second, it established an ordering for the code in a program, which can be used for layout.
+
+### Layout and Reflection
+
+Some applications need to perform reflection on shader parameters and their layout, whether at runtime or as part of an offline compilation tool.
+The Slang API allows layout to be queried on any `IComponentType` using `getLayout()`:
+
+```c++
+slang::ProgramLayout* layout = program->getLayout();
+```
+
+> #### Note ####
+> In  the current Slang API, the `ProgramLayout` type is not reference-counted.
+> Currently, the lifetime of a `ProgramLayout` is tied to the `IComponentType` that returned it.
+> An application must ensure that it retains the given `IComponentType` for as long as it uses the `ProgramLayout`.
+
+Note that because both `IModule` and `IEntryPoint` inherit from `IComponentType`, they can also be queried for their layouts individually.
+The layout for a module comprises just its global-scope parameters.
+The layout for an entry point comprises just its entry-point parameters (both `uniform` and varying).
+
+The details of how Slang computes layout, what guarantees it makes, and how to inspect the reflection information will be discussed in a later chapter.
+
+Because the layout computed for shader parameters may depend on the compilation target, the `getLayout()` method actually takes a `targetIndex` parameter that is the zero-based index of the target for which layout information is being queried.
+This parameter defaults to zero as a convenience for the common case where applications use only a single compilation target at runtime.
+
+See [Using the Reflection API](reflection) chapter for more details on the reflection API.
+
+### Linking
+
+Before generating code, you must link the program to resolve all cross-module references. This can be done by calling
+`IComponentType::link` or `IComponentType::linkWithOptions` if you wish to specify additional compiler options for the program.
+For example:
+```c++
+Slang::ComPtr<IComponentType> linkedProgram;
+Slang::ComPtr<ISlangBlob> diagnosticBlob;
+program->link(linkedProgram.writeRef(), diagnosticBlob.writeRef());
+```
+
+The linking step is also used to perform link-time specialization, which is a recommended approach for shader specialization
+compared to preprocessor based specialization. Please see [Link-time Specialization and Precompiled Modules](link-time-specialization) for more details.
+
+Any diagnostic messages related to linking (for example, if an external symbol cannot be resolved) will be written to `diagnosticBlob`.
+
+### Kernel Code
+
+Given a linked `IComponentType`, an application can extract kernel code for one of its entry points using `IComponentType::getEntryPointCode()`:
+
+```c++
+int entryPointIndex = 0; // only one entry point
+int targetIndex = 0; // only one target
+Slang::ComPtr<IBlob> kernelBlob;
+linkedProgram->getEntryPointCode(
+    entryPointIndex,
+    targetIndex,
+    kernelBlob.writeRef(),
+    diagnostics.writeRef());
+```
+
+Any diagnostic messages related to back-end code generation (for example, if the chosen entry point requires features not available on the chosen target) will be written to `diagnostics`.
+The `kernelBlob` output is a `slang::IBlob` that can be used to access the generated code (whether binary or textual).
+In many cases `kernelBlob->getBufferPointer()` can be passed directly to the appropriate graphics API to load kernel code onto a GPU.
+
+
+## Multithreading
+
+The only functions which are currently thread safe are 
+
+```C++
+SlangSession* spCreateSession(const char* deprecated);
+SlangResult slang_createGlobalSession(SlangInt apiVersion, slang::IGlobalSession** outGlobalSession);
+SlangResult slang_createGlobalSession2(const SlangGlobalSessionDesc* desc, slang::IGlobalSession** outGlobalSession);
+SlangResult slang_createGlobalSessionWithoutCoreModule(SlangInt apiVersion, slang::IGlobalSession** outGlobalSession);
+ISlangBlob* slang_getEmbeddedCoreModule();
+SlangResult slang::createGlobalSession(slang::IGlobalSession** outGlobalSession);
+const char* spGetBuildTagString();
+```
+
+This assumes Slang has been built with the C++ multithreaded runtime, as is the default.
+
+All other functions and methods are not [reentrant](https://en.wikipedia.org/wiki/Reentrancy_(computing)) and can only execute on a single thread. More precisely function and methods can only be called on a *single* thread at *any one time*. This means for example a global session can be used across multiple threads, as long as some synchronisation enforces that only one thread can be in a Slang call at any one time.
+
+Much of the Slang API is available through [COM interfaces](https://en.wikipedia.org/wiki/Component_Object_Model). In strict COM interfaces should be atomically reference counted. Currently *MOST* Slang API COM interfaces are *NOT* atomic reference counted. One exception is the `ISlangSharedLibrary` interface when produced from [host-callable](cpu-target.md#host-callable). It is atomically reference counted, allowing it to persist and be used beyond the original compilation and be freed on a different thread. 
+
+
+## Compiler Options
+
+Both the `SessionDesc`, `TargetDesc` structures contain fields that encodes a `CompilerOptionEntry` array for additional compiler options to apply on the session or the target. In additional,
+the `IComponentType::linkWithOptions()` method allow you to specify additional compiler options when linking a program. All these places accepts the same encoding of compiler options, which is
+documented in this section.
+
+The `CompilerOptionEntry` structure is defined as follows:
+```c++
+struct CompilerOptionEntry
+{
+    CompilerOptionName name;
+    CompilerOptionValue value;
+};
+```
+Where `CompilerOptionName` is an `enum` specifying the compiler option to set, and `value` encodes the value of the option.
+`CompilerOptionValue` is a structure that allows you to end code up to two integer or string values for a compiler option:
+```c++
+enum class CompilerOptionValueKind
+{
+    Int,
+    String
+};
+
+struct CompilerOptionValue
+{
+    CompilerOptionValueKind kind = CompilerOptionValueKind::Int;
+    int32_t intValue0 = 0;
+    int32_t intValue1 = 0;
+    const char* stringValue0 = nullptr;
+    const char* stringValue1 = nullptr;
+};
+```
+The meaning of each integer or string value is dependent on the compiler option. The following table lists all available compiler options that can be set and
+meanings of their `CompilerOptionValue` encodings.
+
+|CompilerOptionName | Description |
+|:------------------ |:----------- |
+| MacroDefine        | Specifies a preprocessor macro define entry. `stringValue0` encodes macro name, `stringValue1` encodes the macro value.
+| Include            | Specifies an additional search path. `stringValue0` encodes the additional path. |
+| Language           | Specifies the input language. `intValue0` encodes a value defined in `SlangSourceLanguage`. |
+| MatrixLayoutColumn | Use column major matrix layout as default. `intValue0` encodes a bool value for the setting. |
+| MatrixLayoutRow    | Use row major matrix layout as default. `intValue0` encodes a bool value for the setting. |
+| Profile            | Specifies the target profile. `intValue0` encodes the raw profile representation returned by `IGlobalSession::findProfile()`. |
+| Stage              | Specifies the target entry point stage. `intValue0` encodes the stage defined in `SlangStage` enum. |
+| Target             | Specifies the target format. Has same effect as setting TargetDesc::format. |
+| WarningsAsErrors   | Specifies a list of warnings to be treated as errors. `stringValue0` encodes a comma separated list of warning codes or names, or can be "all" to indicate all warnings. |
+| DisableWarnings    | Specifies a list of warnings to disable. `stringValue0` encodes comma separated list of warning codes or names. |
+| EnableWarning      | Specifies a list of warnings to enable. `stringValue0` encodes comma separated list of warning codes or names. |
+| DisableWarning     | Specify a warning to disable. `stringValue0` encodes the warning code or name. |
+| ReportDownstreamTime | Turn on/off downstream compilation time report. `intValue0` encodes a bool value for the setting. |
+| ReportPerfBenchmark | Turn on/off reporting of time spend in different parts of the compiler. `intValue0` encodes a bool value for the setting. |
+| SkipSPIRVValidation | Specifies whether or not to skip the validation step after emitting SPIRV. `intValue0` encodes a bool value for the setting. |
+| Capability | Specify an additional capability available in the compilation target. `intValue0` encodes a capability defined in the `CapabilityName` enum. |
+| DefaultImageFormatUnknown | Whether or not to use `unknown` as the image format when emitting SPIRV for a texture/image resource parameter without a format specifier. `intValue0` encodes a bool value for the setting. |
+| DisableDynamicDispatch | (Internal use only) Disables generation of dynamic dispatch code. `intValue0` encodes a bool value for the setting. |
+| DisableSpecialization | (Internal use only) Disables specialization pass.  `intValue0` encodes a bool value for the setting. |
+| FloatingPointMode | Specifies the floating point mode. `intValue0` encodes the floating mode point defined in the `SlangFloatingPointMode` enum. |
+| DebugInformation | Specifies the level of debug information to include in the generated code. `intValue0` encodes an value defined in the  `SlangDebugInfoLevel` enum. |
+| LineDirectiveMode | Specifies the line directive mode to use the generated textual code such as HLSL or CUDA. `intValue0` encodes an value defined in the  `SlangLineDirectiveMode` enum. |
+| Optimization | Specifies the optimization level. `intValue0` encodes the value for the setting defined in the `SlangOptimizationLevel` enum. |
+| Obfuscate | Specifies whether or not to turn on obfuscation. When obfuscation is on, Slang will strip variable and function names from the target code and replace them with hash values. `intValue0` encodes a bool value for the setting. |
+| VulkanBindShift | Specifies the `-fvk-bind-shift` option. `intValue0` (higher 8 bits): kind, `intValue0` (lower bits): set; `intValue1`: shift. |
+| VulkanBindGlobals | Specifies the `-fvk-bind-globals` option. `intValue0`: index, `intValue`: set. |
+| VulkanInvertY | Specifies the `-fvk-invert-y` option. `intValue0` specifies a bool value for the setting. |
+| VulkanUseDxPositionW | Specifies the `-fvk-use-dx-position-w` option. `intValue0` specifies a bool value for the setting. |
+| VulkanUseEntryPointName | When set, will keep the original name of entrypoints as they are defined in the source instead of renaming them to `main`. `intValue0` specifies a bool value for the setting. |
+| VulkanUseGLLayout | When set, will use std430 layout instead of D3D buffer layout for raw buffer load/stores. `intValue0` specifies a bool value for the setting. |
+| VulkanEmitReflection | Specifies the `-fspv-reflect` option. When set will include additional reflection instructions in the output SPIRV. `intValue0` specifies a bool value for the setting. |
+| GLSLForceScalarLayout | Specifies the `-force-glsl-scalar-layout` option. When set will use `scalar` layout for all buffers when generating SPIRV. `intValue0` specifies a bool value for the setting. |
+| EnableEffectAnnotations | When set will turn on compatibility mode to parse legacy HLSL effect annotation syntax. `intValue0` specifies a bool value for the setting. |
+| EmitSpirvViaGLSL | When set will emit SPIRV by emitting GLSL first and then use glslang to produce the final SPIRV code. `intValue0` specifies a bool value for the setting. |
+| EmitSpirvDirectly | When set will use Slang's direct-to-SPIRV backend to generate SPIRV directly from Slang IR. `intValue0` specifies a bool value for the setting. |
+| SPIRVCoreGrammarJSON | When set will use the provided SPIRV grammar file to parse SPIRV assembly blocks. `stringValue0` specifies a path to the spirv core grammar json file. |
+| IncompleteLibrary | When set will not issue an error when the linked program has unresolved extern function symbols. `intValue0` specifies a bool value for the setting. |
+| DownstreamArgs | Provide additional arguments to the downstream compiler. `stringValue0` encodes the downstream compiler name, `stringValue1` encodes the argument list, one argument per line. |
+| DumpIntermediates | When set will dump the intermediate source output. `intValue0` specifies a bool value for the setting. |
+| DumpIntermediatePrefix | The file name prefix for the intermediate source output. `stringValue0` specifies a string value for the setting. |
+| DebugInformationFormat | Specifies the format of debug info. `intValue0` a value defined in the `SlangDebugInfoFormat` enum. |
+| VulkanBindShiftAll | Specifies the `-fvk-bind-shift` option for all spaces. `intValue0`: kind, `intValue1`: shift. |
+| GenerateWholeProgram | When set will emit target code for the entire program instead of for a specific entrypoint. `intValue0` specifies a bool value for the setting. |
+| UseUpToDateBinaryModule | When set will only load precompiled modules if it is up-to-date with its source. `intValue0` specifies a bool value for the setting. |
+| ValidateUniformity | When set will perform [uniformity analysis](a1-05-uniformity.md).|
+
+## Debugging
+
+Slang's SPIRV backend supports generating debug information using the [NonSemantic Shader DebugInfo Instructions](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/nonsemantic/NonSemantic.Shader.DebugInfo.100.asciidoc).
+To enable debugging information when targeting SPIRV, specify the `-emit-spirv-directly` and the `-g2` argument when using `slangc` tool, or set `EmitSpirvDirectly` to `1` and `DebugInformation` to `SLANG_DEBUG_INFO_LEVEL_STANDARD` when using the API.
+Debugging support has been tested with RenderDoc.
diff --git a/external/slang/share/doc/slang/user-guide/09-reflection.md b/external/slang/share/doc/slang/user-guide/09-reflection.md
new file mode 100644
index 00000000..973019b1
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/09-reflection.md
@@ -0,0 +1,1633 @@
+---
+layout: user-guide
+permalink: /user-guide/reflection
+---
+
+Using the Reflection API
+=========================
+
+This chapter provides an introduction to the Slang reflection API.
+Our goals in this chapter are to:
+
+* Demonstrate the recommended types and operations to use for the most common reflection scenarios
+
+* Provide an underlying mental model for how Slang's reflection information represents the structure of a program
+
+We will describe the structure of a program that traverses all of the parameters of a shader program and prints information (including binding locations) for them.
+The code shown here is derived from the [reflection-api](https://github.com/shader-slang/slang/tree/master/examples/reflection-api) example that is included in the Slang repository.
+Readers may find it helpful to follow along with that code, to see a more complete picture of what is presented here.
+
+Compiling a Program
+-------------------
+
+The first step in reflecting a shader program is, unsurprisingly, to compile it.
+Currently reflection information cannot be queried from code compiled via the command-line `slangc` tool, so applications that want to perform reflection on Slang shader code should use the [compilation API](./compiling#using-the-compilation-api) to compile a program, and then use `getLayout()` to extract reflection information:
+
+```c++
+slang::IComponentType* program = ...;
+slang::ProgramLayout* programLayout = program->getLayout(targetIndex);
+```
+
+For more information, see the [relevant section](./compiling#layout-and-reflection) of the chapter on compilation.
+
+Types and Variables
+-------------------
+
+We start our discussion of the reflection API with two of the fundamental building blocks used to represent the structure of a program: types and variables.
+
+A key property of GPU shader programming is that the same type may be laid out differently, depending on how it is used.
+For example, a user-defined `struct` type `Stuff` will often be laid out differently if it is used in a `ConstantBuffer<Stuff>` than in a `StructuredBuffer<Stuff>`.
+
+Because the same thing can be laid out in multiple ways (even within the same program), the Slang reflection API represents types and variables as distinct things from the *layouts* applied to them.
+This section focuses only on the underlying types/variables, while later sections will build on these concepts to show how layouts can be reflected.
+
+### Variables
+
+A `VariableReflection` represents a variable declaration in the input program.
+Variables include global shader parameters, fields of `struct` types, and entry-point parameters.
+
+Because a `VariableReflection` does not include layout information, the main things that can be queried on it are just its name and type:
+
+```c++
+void printVariable(
+    slang::VariableReflection* variable)
+{
+    const char* name = variable->getName();
+    slang::TypeReflection* type = variable->getType();
+
+    print("name: ");    printQuotedString(name);
+    print("type: ");    printType(type);
+}
+```
+
+### Types
+
+A `TypeReflection` represents some type in the input program.
+There are various different *kinds* of types, such as arrays, user-defined `struct` types, and built-in types like `int`.
+The reflection API represents these different cases with the `TypeReflection::Kind` enumeration.
+
+On its own, a `TypeReflection` does not include layout information.
+
+We will now start building a function for printing information about types:
+
+```c++
+void printType(slang::TypeReflection* type)
+{
+    const char* name = type->getName();
+    slang::TypeReflection::Kind kind = type->getKind();
+
+    print("name: ");    printQuotedString(name);
+    print("kind: ");    printTypeKind(kind);
+
+    // ...
+}
+```
+
+Given what has been presented so far, if we have a Slang variable declaration like the following:
+
+```hlsl
+float x;
+```
+
+then applying `printVariable()` to a `VariableReflection` for `x` would yield:
+
+```
+name: "x"
+type:
+  name: "float"
+  kind: Scalar
+```
+
+Additional information can be queried from a `TypeReflection`, depending on its kind:
+
+```c++
+void printType(slang::TypeReflection* type)
+{
+    // ...
+
+    switch(type->getKind())
+    {
+    default:
+        break;
+
+    // ...
+    }
+}
+```
+
+The following subsections will show examples of what can be queried for various kinds of types.
+
+#### Scalar Types
+
+Scalar types store an additional enumerant to indicate which of the built-in scalar types is being represented:
+
+```c++
+case slang::TypeReflection::Kind::Scalar:
+    {
+        print("scalar type: ");
+        printScalarType(type->getScalarType());
+    }
+    break;
+```
+
+The `slang::ScalarType` enumeration includes cases for the built-in integer and floating-point types (for example, `slang::ScalarType::UInt64` and `slang::ScalarType::Float16`), as well as the basic `bool` type (`slang::ScalarType::Bool`).
+The `void` type is also considered a scalar type (`slang::ScalarType::Void`);
+
+#### Structure Types
+
+A structure type may have zero or more *fields*.
+Each field is represented as a `VariableReflection`.
+A `TypeReflection` allows the fields to be enumerated using `getFieldCount()` and `getFieldByIndex()`.
+
+```c++
+case slang::TypeReflection::Kind::Struct:
+    {
+        print("fields:");
+        int fieldCount = type->getFieldCount();
+        for (int f = 0; f < fieldCount; f++)
+        {
+            print("- ");
+            slang::VariableReflection* field =
+                type->getFieldByIndex(f);
+            printVariable(field);
+        }
+    }
+    break;
+```
+
+For the purposes of the reflection API, the fields of a `struct` type are its non-static members (both `public` and non-`public`).
+
+Given Slang code like the following:
+
+```hlsl
+struct S
+{
+    int a;
+    float b;
+}
+```
+
+Reflection on type `S` would yield:
+
+```
+name: "S"
+kind: Struct
+fields:
+  - name: "a"
+    type:
+      name: "int"
+      kind: Scalar
+  - name: "b"
+    type:
+      name: "float"
+      kind: Scalar
+```
+
+#### Arrays
+
+An array type like `int[3]` is defined by the number and type of elements in the array, which can be queried with `getElementCount()` and `getElementType`, respectively:
+
+```c++
+case slang::TypeReflection::Kind::Array:
+    {
+        print("element count: ");
+        printPossiblyUnbounded(type->getElementCount());
+
+        print("element type: ");
+        printType(type->getElementType());
+    }
+    break;
+```
+
+Some array types, like `Stuff[]`, have *unbounded* size.
+The Slang reflection API represents this case using the maximum value possible for the `size_t` result from `getElementCount()`:
+
+```c++
+void printPossiblyUnbounded(size_t value)
+{
+    if (value == ~size_t(0))
+    {
+        printf("unbounded");
+    }
+    else
+    {
+        printf("%u", unsigned(value));
+    }
+}
+```
+
+#### Vectors
+
+Vector types like `int3` are similar to arrays, in that they are defined by their element type and number of elements:
+
+```c++
+case slang::TypeReflection::Kind::Vector:
+    {
+        print("element count: ");
+        printCount(type->getElementCount());
+
+        print("element type: ");
+        printType(type->getElementType());
+    }
+    break;
+```
+
+#### Matrices
+
+Matrix types like `float3x4` are defined by the number of rows, the number of columns, and the element type:
+
+```c++
+case slang::TypeReflection::Kind::Matrix:
+    {
+        print("row count: ");
+        printCount(type->getRowCount());
+
+        print("column count: ");
+        printCount(type->getColumnCount());
+
+        print("element type: ");
+        printType(type->getElementType());
+    }
+    break;
+```
+
+#### Resources
+
+There are a wide range of resource types, including simple cases like `TextureCube` and `StructuredBuffer<int>`, as well as quite complicated ones like `RasterizerOrderedTexture2DArray<int4>` and `AppendStructuredBuffer<Stuff>`.
+
+The Slang reflection API breaks down the properties of a resource type into its shape, access, and result type:
+
+```c++
+case slang::TypeReflection::Kind::Resource:
+    {
+        key("shape");
+        printResourceShape(type->getResourceShape());
+
+        key("access");
+        printResourceAccess(type->getResourceAccess());
+
+        key("result type");
+        printType(type->getResourceResultType());
+    }
+    break;
+```
+
+The *result type* of a resource is simply whatever would be returned by a basic read operation on that resource.
+For resource types in Slang code, the result type is typically written as a generic type parameter after the type name.
+For a `StructuredBuffer<Thing>` the result type is `Thing`, while for a `Texture2D<int3>` it is `int3`.
+A texture type like `Texture2D` that does not give an explicit result type has a default result type of `float4`.
+
+The *access* of a resource (`SlangResourceAccess`) represents how the elements of the resource may be accessed by shader code.
+For Slang resource types, access is typically encoded as a prefix on the type name.
+For example, an unprefixed `Texture2D` has read-only access (`SLANG_RESOURCE_ACCESS_READ`), while a `RWTexture2D` has read-write access (`SLANG_RESOURCE_ACCESS_READ_WRITE`).
+
+The *shape* of a resource (`SlangResourceShape`) represents the conceptual rank/dimensionality of the resource and how it is indexed.
+For Slang resource type names, everything after the access prefix is typically part of the shape.
+
+A resource shape breaks down into a *base shape* along with a few possible suffixes like array-ness:
+
+```c++
+void printResourceShape(SlangResourceShape shape)
+{
+    print("base shape:");
+    switch(shape & SLANG_BASE_SHAPE_MASK)
+    {
+    case SLANG_TEXTURE1D: printf("TEXTURE1D"); break;
+    case SLANG_TEXTURE2D: printf("TEXTURE2D"); break;
+    // ...
+    }
+
+    if(shape & SLANG_TEXTURE_ARRAY_FLAG) printf("ARRAY");
+    if(shape & SLANG_TEXTURE_MULTISAMPLE_FLAG) printf("MULTISAMPLE");
+    // ...
+}
+```
+
+#### Single-Element Containers
+
+Types like `ConstantBuffer<T>` and `ParameterBlock<T>` represent a grouping of parameter data, and behave like an array or structured buffer with only a single element:
+
+```c++
+case slang::TypeReflection::Kind::ConstantBuffer:
+case slang::TypeReflection::Kind::ParameterBlock:
+case slang::TypeReflection::Kind::TextureBuffer:
+case slang::TypeReflection::Kind::ShaderStorageBuffer:
+    {
+        key("element type");
+        printType(type->getElementType());
+    }
+    break;
+```
+
+Layout for Types and Variables
+------------------------------
+
+The Slang reflection API provides `VariableLayoutReflection` and `TypeLayoutReflection` to represent a *layout* of a given variable or type.
+As discussed earlier, the same type might have multiple different layouts used for it in the same program.
+
+### Layout Units
+
+A key challenge that the Slang reflection API has to address is how to represent the offset of a variable (or struct field, etc.) or the size of a type when `struct` types are allowed to mix various kinds of data together.
+
+For example, consider the following Slang code:
+
+```hlsl
+struct Material
+{
+    Texture2D albedoMap;
+    SamplerState sampler;
+    float2 uvScale;
+    float2 uvBias;
+}
+struct Uniforms
+{
+    TextureCube environmentMap;
+    SamplerState environmentSampler;
+    float3 sunLightDirection;
+    float3 sunLightIntensity;
+    Material material;
+    // ...
+}
+ParameterBlock<Uniforms> uniforms;
+```
+
+When laid out in the given parameter block, what is the offset of the field `Uniforms::material`? What is the size of the `Material` type?
+
+The key insight is that layout is multi-dimensional: the same type can have a size in multiple distinct units.
+For example, when compiling the above code for D3D12/DXIL, the answer is that the `Uniforms::material` has an offset of one `t` register, one `s` register, and 32 bytes.
+Similarly, the size of the `Material` type is one `t` register, one `s` register, and 16 bytes.
+
+We refer to these distinct units of measure used in layouts (including bytes, `t` registers, and `s` registers) as *layout units*.
+Layout units are represented in the Slang reflection API with the `slang::ParameterCategory` enumeration.
+(We will avoid the term "parameter category," despite that being the name currently exposed in the public API; that name has turned out to be a less-than-ideal choice).
+
+### Variable Layouts
+
+A `VariableLayoutReflection` represents a layout computed for a given variable (itself a `VariableReflection`).
+The underlying variable can be accessed with `getVariable()`, but the variable layout also provides accessors for the most important properties.
+
+A variable layout stores the offsets of that variable (possibly in multiple layout units), and also a type layout for the data stored in the variable.
+
+```c++
+void printVarLayout(slang::VariableLayoutReflection* varLayout)
+{
+    print("name"); printQuotedString(varLayout->getName());
+
+    printRelativeOffsets(varLayout);
+
+    key("type layout");
+    printTypeLayout(varLayout->getTypeLayout());
+}
+```
+
+#### Offsets
+
+The offsets stored by a `VariableLayoutReflection` are always *relative* to the enclosing `struct` type, scope, or other context that surrounds the variable.
+
+The `VariableLayoutReflection::getOffset` method can be used to query the relative offset of a variable for any given layout unit:
+
+```c++
+void printOffset(
+    slang::VariableLayoutReflection* varLayout,
+    slang::ParameterCategory layoutUnit)
+{
+    size_t offset = varLayout->getOffset(layoutUnit);
+
+    print("value: "); print(offset);
+    print("unit: "); printLayoutUnit(layoutUnit);
+
+    // ...
+}
+```
+
+If an application knows what unit(s) it expects a variable to be laid out in, it can directly query those.
+However, in a case like our systematic traversal of all shader parameters, it is not always possible to know what units a given variable uses.
+
+The Slang reflection API can be used to query layout units used by a given variable layout with `getCategoryCount()` and `getCategoryByIndex()`:
+
+```c++
+void printRelativeOffsets(
+    slang::VariableLayoutReflection* varLayout)
+{
+    print("relative offset: ");
+    int usedLayoutUnitCount = varLayout->getCategoryCount();
+    for (int i = 0; i < usedLayoutUnitCount; ++i)
+    {
+        auto layoutUnit = varLayout->getCategoryByIndex(i);
+        printOffset(varLayout, layoutUnit);
+    }
+}
+```
+
+#### Spaces / Sets
+
+For certain target platforms and layout units, the offset of a variable for that unit might include an additional dimension that represents a Vulkan/SPIR-V descriptor set, D3D12/DXIL register space, or a WebGPU/WGSL binding group.
+In this chapter, we will uniformly refer to all of these concepts as *spaces*.
+
+The relative space offset of a variable layout for a given layout unit can be queried with `getBindingSpace()`:
+
+```c++
+void printOffset(
+    slang::VariableLayoutReflection* varLayout,
+    slang::ParameterCategory layoutUnit)
+{
+    // ...
+
+    size_t spaceOffset = varLayout->getBindingSpace(layoutUnit);
+
+    switch(layoutUnit)
+    {
+    default:
+        break;
+
+    case slang::ParameterCategory::ConstantBuffer:
+    case slang::ParameterCategory::ShaderResource:
+    case slang::ParameterCategory::UnorderedAccess:
+    case slang::ParameterCategory::SamplerState:
+    case slang::ParameterCategory::DescriptorTableSlot:
+        print("space: "); print(spaceOffset);    
+    }
+}
+```
+
+The code above only prints the space offset for the layout units where a space is semantically possible and meaningful.
+
+### Type Layouts
+
+A `TypeLayoutReflection` represents a layout computed for a type.
+The underlying type that layout was computed for can be accessed using `TypeLayoutReflection::getType()`, but accessors are provided so that the most common properties of types can be queried on type layouts.
+
+The main thing that a type layout stores is the size of the type:
+
+```c++
+void printTypeLayout(slang::TypeLayoutReflection* typeLayout)
+{
+    print("name: "); printQuotedString(typeLayout->getName());
+    print("kind: "); printTypeKind(typeLayout->getKind());
+
+    printSizes(typeLayout);
+
+    // ...
+}
+```
+
+#### Size
+
+Similarly to variable layouts, the size of a type layout can be queried given a chosen layout unit:
+
+```c++
+void printSize(
+    slang::TypeLayoutReflection* typeLayout,
+    slang::ParameterCategory layoutUnit)
+{
+    size_t size = typeLayout->getSize(layoutUnit);
+
+    key("value"); printPossiblyUnbounded(size);
+    key("unit"); writeLayoutUnit(layoutUnit);
+}
+```
+
+Note that the size of a type may be *unbounded* for a particular layout unit; this case is encoded just like the unbounded case for the element count of an array type (`~size_t(0)`).
+
+The layout units used by a particular type layout can be iterated over using `getCategoryCount()` and `getCategoryByIndex()`:
+
+```c++
+void printSizes(slang::TypeLayoutReflection* typeLayout)
+{
+    print("size: ");
+    int usedLayoutUnitCount = typeLayout->getCategoryCount();
+    for (int i = 0; i < usedLayoutUnitCount; ++i)
+    {
+        auto layoutUnit = typeLayout->getCategoryByIndex(i);
+        print("- "); printSize(typeLayout, layoutUnit);
+    }
+
+    // ...
+}
+```
+
+#### Alignment and Stride
+
+For any given layout unit, a type layout can also reflect the alignment of the type for that unit with `TypeLayoutReflection::getAlignment()`.
+Alignment is typically only interesting when the layout unit is bytes (`slang::ParameterCategory::Uniform`).
+
+Note that, unlike in C/C++, a type layout in Slang may have a size that is not a multiple of its alignment.
+The *stride* of a type layout (for a given layout unit) is its size rounded up to its alignment, and is used as the distance between consecutive elements in arrays.
+The stride of a type layout can be queried for any chosen layout unit with `TypeLayoutReflection::getStride()`.
+
+Note that all of the `TypeLayoutReflection` methods `getSize()`, `getAlignment()`, and `getStride()` default to returning information in bytes, if a layout unit is not specified.
+The same is true of the `VariableLayoutReflection::getOffset()` method.
+
+The alignment and stride of a type layout can be reflected when it is relevant with code like:
+
+```c++
+void printTypeLayout(slang::TypeLayoutReflection* typeLayout)
+{
+    // ...
+
+    if(typeLayout->getSize() != 0)
+    {
+        print("alignment in bytes: ");
+        print(typeLayout->getAlignment());
+
+        print("stride in bytes: ");
+        print(typeLayout->getStride());
+    }
+
+    // ...
+}
+```
+
+#### Kind-Specific Information
+
+Just as with the underlying types, a type layout may store additional information depending on the kind of type:
+
+```c++
+void printTypeLayout(slang::TypeLayoutReflection* typeLayout)
+{
+    // ...
+
+    switch(typeLayout->getKind())
+    {
+    default:
+        break;
+    
+        // ...
+    }
+}
+```
+
+The following subsections will cover the important kinds to handle when reflecting type layouts.
+
+#### Structure Type Layouts
+
+A type layout for a `struct` type provides access to the fields of the `struct`, with each field represented as a variable layout:
+
+```c++
+case slang::TypeReflection::Kind::Struct:
+    {
+        print("fields: ");
+
+        int fieldCount = typeLayout->getFieldCount();
+        for (int f = 0; f < fieldCount; f++)
+        {
+            auto field = typeLayout->getFieldByIndex(f);
+            printVarLayout(field);
+        }
+    }
+    break;
+```
+
+The offset information stored on the type layout for each field will always be relative to the start of the `struct` type.
+
+#### Array Type Layouts
+
+Array type layouts store a layout for the element type of the array, which can be accessed with `getElementTypeLayout()`:
+
+```c++
+case slang::TypeReflection::Kind::Array:
+    {
+        print("element count: ");
+        printPossiblyUnbounded(typeLayout->getElementCount());
+
+        print("element type layout: ");
+        printTypeLayout(typeLayout->getElementTypeLayout());
+    }
+    break;
+```
+
+#### Matrix Type Layouts
+
+A layout for a matrix type stores a matrix layout *mode* (`SlangMatrixLayoutMode`) to record whether the type was laid out in row-major or column-major layout:
+
+```c++
+case slang::TypeReflection::Kind::Matrix:
+    {
+        // ...
+
+        print("matrix layout mode: ");
+        printMatrixLayoutMode(typeLayout->getMatrixLayoutMode());
+    }
+    break;
+```
+
+Note that the concepts of "row" and "column" as employed by Slang are the opposite of how Vulkan, SPIR-V, GLSL, and OpenGL use those terms.
+When Slang reflects a matrix as using row-major layout, the corresponding matrix in generated SPIR-V will have a `ColMajor` decoration.
+For an explanation of why these conventions differ, please see the relevant [appendix](./a1-01-matrix-layout.md).
+
+#### Single-Element Containers
+
+Constant buffers, parameter blocks, and other types representing grouping of parameters are the most subtle cases to handle for reflection.
+The Slang reflection API aspires to provide complete and accurate information for these cases, but understanding *why* the provided data is what it is requires an appropriate mental model.
+
+##### Simple Cases
+
+In simple cases, a constant buffer has only ordinary data in it (things where the only used layout unit is bytes):
+
+```
+struct DirectionalLight
+{
+    float3 direction;
+    float3 intensity;
+}
+ConstantBuffer<DirectionalLight> light;
+```
+
+When this case is laid out for D3D12, the `DirectionalLight` type will consume 28 bytes, but the `light` parameter will instead consume one `b` register.
+We thus see that the `ConstantBuffer<>` type effectively "hides" the number of bytes used by its element.
+
+Similarly, when a parameter block only has opaque types in it:
+
+```
+struct Material
+{
+    Texture2D albedoMap;
+    Texture2D glossMap;
+    SamplerState sampler;
+}
+ParameterBlock<Material> material;
+```
+
+When this is laid out for Vulkan, the `Material` type will consume 3 bindings, but the `material` parameter will instead consume one space.
+A `ParameterBLock<>` type hides the bindings/registers/slots used by its element.
+
+##### When Things Leak
+
+If the element type of a constant buffer includes any data that isn't just measured in bytes, that usage will "leak" into the size of the constant buffer.
+For example:
+
+```
+struct ViewParams
+{
+    float3 cameraPos;
+    float3 cameraDir;
+    TextureCube envMap;
+}
+ConstantBuffer<ViewParams> view;
+```
+
+If this example is laid out for D3D12, the `ViewParams` type will have a size of 28 bytes (according to D3D constant buffer layout rules) and one `t` register.
+The size of the `view` parameter will be one `b` register and one `t` register.
+The `ConstantBuffer<>` type can hide the bytes used by `ViewParams`, but the used `t` register leaks out and becomes part of the size of `view`.
+
+If the same example is laid out for Vulkan, the `ViewParams` type will have a size of 28 bytes (according to `std140` layout rules) and one `binding`.
+The size of the `view` parameter will be two `binding`s.
+
+An important question a user might have in the Vulkan case, is whether the `binding` for `view` comes before that for `view.envMap`, or the other way around.
+The answer is that the Slang compiler always lays out the "container" part of a parameter like `view` (the constant buffer) before the element, but a client of the reflection API shouldn't have to know such things to understand the information that gets reflected.
+
+Note that in the Vulkan case, the offset of the `envMap` field within `ViewParams` is zero `binding`s, but the offset of `view.envMap` field relative to `view` is one `binding`.
+Computing the cumulative offset of `view.envMap` requires more information than just that available on the variable layouts for `view` and `view.envMap`.
+
+Similar cases of usage leaking can occur for parameter blocks, when one parameter block is nested within another.
+
+##### A `ConstantBuffer<>` Without a Constant Buffer
+
+While it is an uncommon case, it is possible to use a `ConstantBuffer<>` with an element type that contains no ordinary data (nothing with a layout unit of bytes):
+
+```
+struct Material
+{
+    Texture2D albedoMap;
+    Texture2D glossMap;
+    SamplerState sampler;
+}
+ConstantBuffer<Material> material;
+```
+
+If this case is compiled for Vulkan, the `material` parameter will consume 3 `binding`s, but none of those will be for a constant buffer.
+In this case, unlike in the preceding example with `view.envMap`, the offset of `material.albedoMap` relative to `material` will be zero `binding`s.
+
+##### Implicitly-Allocated Constant Buffers
+
+A common use case for parameter blocks is to wrap up all of the parameters of a shader, or of some subsystem.
+In such cases, there are likely to be both ordinary-type and opaque-type fields:
+
+```
+struct PointLight
+{
+    float3 position;
+    float3 intensity;
+}
+struct LightingEnvironment
+{
+    TextureCube envMap;
+    PointLight pointLights[10];
+}
+ParameterBlock<LightingEnvironment> lightEnv;
+```
+
+If this example is compiled for Vulkan, the `LightingEnvironment` type uses 316 bytes and one `binding` (ParameterCategory::DescriptorTableSlot), while `lightEnv` uses one descriptor `set`  (ParameterCategory::SubElementRegisterSpace).
+
+What is not clear in the above description, however, is that because `LightingEnvironment` uses ordinary bytes, the Slang compiler will have to implicitly allocate a `binding` for a constant buffer to hold those bytes.
+Conceptually, the layout is similar to what would be produced for `ParameterBlock<ConstantBuffer<LightingEnvironment>>`.
+
+Furthermore, that constant buffer `binding` will be the first binding within the descriptor `set` for `lightEnv`, so that the cumulative `binding` offset for `lightEnv.envMap` will be one `binding` (even though `LightingEnvironment::envMap` has a relative offset of zero `binding`s).
+
+##### Container and Element
+
+In order to properly handle all of the nuances described here, the layout for a type like `ConstantBuffer<Thing>` or `ParameterBlock<Thing>` includes both layout information for the element of the container (a `Thing`) as well as layout information for the *container* itself.
+Furthermore, the layout information for both the element and container need to support storing offset information (not just size), relative to the overall `ConstantBuffer<>` or `ParameterBlock<>`.
+
+The breakdown is thus:
+
+* The size information for the complete container type layout reflects whatever usage "leaks" out, such that it would need to be accounted for when further aggregating the overall type.
+
+* Information about the allocated container is stored as a variable layout, queried with `getContainerVarLayout()`
+
+  * The type layout for that variable layout shows what was allocated to represent the container itself, including any implicitly-allocated constant buffer
+
+  * The offsets of that variable layout show where the container is situated relative to the overall type.
+  With the current layout strategies used by the Slang compiler, all of these offsets will be zero.
+
+* Information about the element is stored as a variable layout, queried with `getElementVarLayout()`
+
+  * The type layout of that variable layout shows how the element type is laid out inside container.
+
+  * The offsets on that variable layout show where the element is situated relative to the overall type.
+  These offsets will be non-zero in cases where there is some layout unit used by both the element type and the container itself.
+
+Given this understanding, we can now look at the logic to reflect a type layout for a constant buffer, parameter block, or similar type.
+
+```c++
+case slang::TypeReflection::Kind::ConstantBuffer:
+case slang::TypeReflection::Kind::ParameterBlock:
+case slang::TypeReflection::Kind::TextureBuffer:
+case slang::TypeReflection::Kind::ShaderStorageBuffer:
+    {
+        print("container: ");
+        printOffsets(typeLayout->getContainerVarLayout());
+    
+        auto elementVarLayout = typeLayout->getElementVarLayout();
+        print("element: ");
+        printOffsets(elementVarLayout);
+
+        print("type layout: ");
+        printTypeLayout(
+            elementVarLayout->getTypeLayout();
+    }
+    break;
+```
+
+Note that the application logic here does not simply make use of `printVarLayout()` on the results of both `getContainerVarLayout()` and `getElementVarLayout()`, even though it technically could.
+While these sub-parts of the overall type layout are each represented as a `VariableLayoutReflection`, many of the properties of those variable layouts are uninteresting or null; they primarily exist to convey offset information.
+
+##### Example
+
+Given input code like the following:
+
+```hlsl
+struct Material
+{
+    Texture2D albedoMap;
+    SamplerState sampler;
+    float2 uvScale;
+    float2 uvBias;
+}
+
+struct FrameParams
+{
+    ConstantBuffer<Material> material;
+
+    float3 cameraPos;
+    float3 cameraDir;
+
+    TextureCube envMap;
+    float3 sunLightDir;
+    float3 sunLightIntensity;
+
+    Texture2D shadowMap;
+    SamplerComparisonState shadowMapSampler;
+}
+
+ParameterBlock<FrameParams> params;
+```
+
+We will look at the kind of output our example application prints for `params` when compiling for Vulkan.
+The basic information for the variable and its type layout looks like:
+
+```
+- name: "params"
+  offset:
+    relative:
+    - value: 1
+      unit: SubElementRegisterSpace # register spaces / descriptor sets
+  type layout:
+    name: "ParameterBlock"
+    kind: ParameterBlock
+    size:
+      - value: 1
+        unit: SubElementRegisterSpace # register spaces / descriptor sets
+```
+
+As we would expect, the size of the parameter block is one register space (aka Vulkan descriptor `set`).
+In this case, the Slang compiler has assigned `params` to have a space offset of 1 (`set=1` in GLSL terms).
+
+The offset information for the container part of `params` is the following:
+
+```
+container:
+offset:
+  relative:
+    - value: 0
+      unit: DescriptorTableSlot # bindings
+      space: 0
+    - value: 0
+      unit: SubElementRegisterSpace # register spaces / descriptor sets
+```
+
+We can see from this information that the `ParameterBlock<>` container had two things allocated to it: a descriptor set (`ParameterCategory::SubElementRegisterSpace`), and a binding within that descriptor set (`ParameterCategory::DescriptorTableSlot`) for the automatically-introduced constant buffer.
+That automatically-introduced buffer has an offset of 0 bindings from the start of the descriptor set.
+
+The layout for the element part of the parameter block is as follows:
+
+```
+element:
+  offset:
+    relative:
+      - value: 1
+        unit: DescriptorTableSlot # bindings
+        space: 0
+      - value: 0
+        unit: Uniform # bytes
+  type layout:
+    name: "FrameParams"
+    kind: Struct
+    size:
+      - value: 6
+        unit: DescriptorTableSlot # bindings
+      - value: 64
+        unit: Uniform # bytes
+    alignment in bytes: 16
+    stride in bytes: 64
+    fields:                  
+      - name: "material"
+        offset:
+          relative:
+            - value: 0
+              unit: DescriptorTableSlot # bindings
+              space: 0
+      ...
+```
+
+We see here that the type layout for the element is as expected of a layout for the `FrameParams` type.
+In particular, note how the `material` field has a relative offset of zero bindings from the start of the `struct`, as is expected for the first field.
+In order to account for the automatically-introduced constant buffer that is used by the container part of the layout, the element variable layout includes a relative offset of one binding (`ParameterCategory::DescriptorTableSlot`).
+
+In a later section we will discuss how to easily sum up the various relative offsets shown in an example like this, when an application wants to compute a *cumulative* offset for a field like `params.material.sampler`.
+
+
+##### Pitfalls to Avoid
+
+It is a common mistake for users to apply `getElementTypeLayout()` on a single-element container, instead of using `getElementVarLayout()` as we advise here.
+The implementation of the reflection API makes an effort to ensure that the type layout returned by `getElementTypeLayout()` automatically bakes in the additional offsets that are needed, but the results can still be unintuitive.
+
+Programs and Scopes
+-------------------
+
+So far, our presentation has largely been bottom-up: we have shown how to recursively perform reflection on types, variables, and their layouts, but we have not yet shown how how to get this recursive traversal started.
+We will now proceed top-down for a bit, and look at how to reflect the top-level parameters of a program.
+
+A `ProgramLayout` is typically obtained using `IComponentType::getLayout()` after compiling and linking a Slang program.
+A program layout primarily comprises the global scope, and zero or more entry points:
+
+```c++
+void printProgramLayout(
+    slang::ProgramLayout* programLayout)
+{
+    print("global scope: ");
+    printScope(programLayout->getGlobalParamsVarLayout());
+
+    print("entry points: ");
+    int entryPointCount = programLayout->getEntryPointCount();
+    for (int i = 0; i < entryPointCount; ++i)
+    {
+        print("- ");
+        printEntryPointLayout(
+            programLayout->getEntryPointByIndex(i));
+    }
+}
+```
+
+The global scope and entry points are each an example of a *scope* where top-level shader parameters can be declared.
+Scopes are represented in the reflection API using `VariableLayoutReflection`s.
+We will now discuss the details of reflection for scopes, starting with the global scope as an example.
+
+### Global Scope
+
+In order to understand how the Slang reflection API exposes the global scope, it is valuable to think of the steps (some of them optional) that the Slang compiler applies to global-scope shader parameter declarations as part of compilation.
+
+#### Parameters are Grouped Into a Structure
+
+If a shader program declares global-scope parameters like the following:
+
+```hlsl
+Texture2D diffuseMap;
+TextureCube envMap;
+SamplerState sampler;
+```
+
+The Slang compiler will conceptually group all of those distinct global-scope parameter declarations into a `struct` type and then have only a single global-scope parameter of that type:
+
+```hlsl
+struct Globals
+{
+    Texture2D diffuseMap;
+    TextureCube envMap;
+    SamplerState sampler;
+}
+uniform Globals globals;
+```
+
+In this simple kind of case, the scope will be reflected as a variable layout with a `struct` type layout, with one field for each parameter declared in that scope:
+
+```c++
+void printScope(
+    slang::VariableLayoutReflection*    scopeVarLayout)
+{
+    auto scopeTypeLayout = scopeVarLayout->getTypeLayout();
+    switch (scopeTypeLayout->getKind())
+    {
+    case slang::TypeReflection::Kind::Struct:
+        {
+            print("parameters: ");
+
+            int paramCount = scopeTypeLayout->getFieldCount();
+            for (int i = 0; i < paramCount; i++)
+            {
+                print("- ");
+
+                auto param = scopeTypeLayout->getFieldByIndex(i);
+                printVarLayout(param, &scopeOffsets);
+            }
+        }
+        break;
+
+        // ...
+    }
+}
+```
+
+#### Wrapped in a Constant Buffer If Needed
+
+In existing shader code that was originally authored for older APIs (such as D3D9) it is common to find a mixture of opaque and ordinary types appearing as global-scope shader parameters:
+
+```hlsl
+Texture2D diffuseMap;
+TextureCube envMap;
+SamplerState sampler;
+
+uniform float3 cameraPos;
+uniform float3 cameraDir;
+```
+
+In these cases, when the Slang compiler groups the parameters into a single `struct`:
+
+```hlsl
+struct Globals
+{
+    Texture2D diffuseMap;
+    TextureCube envMap;
+    SamplerState sampler;
+
+    float3 cameraPos;
+    float3 cameraDir;
+}
+```
+
+it finds that the resulting `struct` consumes a non-zero number of bytes and, for most compilation targets, it will automatically wrap that structure in a `ConstantBuffer<>` before declaring the single shader parameter that represents the global scope:
+
+```hlsl
+ConstantBuffer<Globals> globals
+```
+
+This case shows up in the Slang reflection API as the scope having a type layout with the constant-buffer kind:
+
+```c++
+case slang::TypeReflection::Kind::ConstantBuffer:
+    print("automatically-introduced constant buffer: ");
+
+    printOffsets(scopeTypeLayout->getContainerVarLayout());
+
+    printScope(scopeTypeLayout->getElementVarLayout());
+    break;
+```
+
+In this case, the container variable layout reflects the relative offsets for where the automatically-introduced constant buffer is bound, and the element variable layout reflects the global scope parameters that were wrapped in this way.
+
+#### Wrapped in a Parameter Block If Needed
+
+For targets like D3D12/DXIL, Vulkan/SPIR-V, and WebGPU/WGSL, most shader parameters must be bound via the target-specific grouping mechanism (descriptor tables, descriptor sets, or binding groups, respectively).
+If the Slang compiler is compiling for such a target and detects that there are global-scope parameters that do not specify an explicit space, then it will conceptually wrap the global-scope declarations in a `ParameterBlock<>` that provides a default space.
+
+For example, if compiling this code to Vulkan:
+
+```hlsl
+Texture2D diffuseMap;
+[[vk::binding(1,0)]] TextureCube envMap;
+SamplerState sampler;
+```
+
+the Slang compiler will detect that `envMap` is explicitly bound to `binding` 1 in space (aka descriptor `set`) 0, and that neither `diffuseMap` nor `sampler` has been explicitly bound.
+Both of the unbound parameters need to be passed inside of some space, so the compiler will allocate space 1 for that purpose (as space 0 was already claimed by explicit bindings).
+In simplistic terms, the compiler will behave *as if* the global-scope parameters are wrapped up in a `struct` and then further wrapped up into a `ParameterBlock<>`.
+
+This case shows up in the Slang reflection API as the scope having a type layout with the parameter-block kind:
+
+```c++
+case slang::TypeReflection::Kind::ParameterBlock:
+    print("automatically-introduced parameter block: ");
+
+    printOffsets(scopeTypeLayout->getContainerVarLayout());
+
+    printScope(scopeTypeLayout->getElementVarLayout());
+    break;
+```
+
+In cases where the parameters in a scope require *both* a constant buffer and a parameter block to be automatically introduced, the scope is reflected as if things were wrapped with `ParameterBlock<...>` and not `ParameterBlock<ConstantBuffer<...>>`.
+That is, the binding information for the implicit constant buffer will be found as part of the container variable layout for the parameter block.
+
+#### Pitfalls to Avoid
+
+The `ProgramLayout` type has the appealingly-named `getParameterCount` and `getParameterByIndex()` methods, which seem to be the obvious way to navigate the global-scope parameters of a shader.
+However, we recommend *against* using these functions in applications that want to be able to systematically and robustly reflect any possible input shader code.
+
+While the reflection API implementation makes an effort to ensure that the information returned by `getParameterByIndex()` is not incorrect, it is very difficult when using those functions to account for how global-scope parameters might have been grouped into an automatically-introduced constant buffer or parameter block.
+The `getGlobalConstantBufferBinding()` and `getGlobalConstantBufferSize()` methods can be used in some scenarios, but aren't the best way to get the relevant information.
+
+While it would only matter in corner cases, we still recommend that applications use `getGlobalParamsVarLayout()` instead of `getGlobalParamsTypeLayout()`, to account for cases where the global-scope might have offsets applied to it (and also to handle the global scope and entry-point scopes more uniformly).
+
+### Entry Points
+
+An `EntryPointReflection` provides information on an entry point.
+This includes the stage that the entry point was compiled for:
+
+```c++
+void printEntryPointLayout(slang::EntryPointReflection* entryPointLayout)
+{
+    print("stage: "); printStage(entryPointLayout->getStage());
+
+    // ...
+}
+```
+
+#### Entry Point Parameters
+
+An entry point acts as a scope for top-level shader parameters, much like the global scope.
+Entry-point parameters are grouped into a `struct`, and then automatically wrapped in a constant buffer or parameter block if needed.
+The main additional consideration, compared to the global scope, is that an entry-point function may also declare a result type.
+When present, the function result acts more or less as an additional `out` parameter.
+
+The parameter scope and result of an entry point can be reflected with logic like:
+
+```c++
+void printEntryPointLayout(slang::EntryPointReflection* entryPointLayout)
+{
+    // ...
+    printScope(entryPointLayout->getVarLayout());
+
+    auto resultVarLayout = entryPointLayout->getResultVarLayout();
+    if (resultVarLayout->getTypeLayout()->getKind() != slang::TypeReflection::Kind::None)
+    {
+        key("result"); printVarLayout(resultVarLayout);
+    }
+}
+```
+
+##### Pitfalls to Avoid
+
+Similarly to the case for the global scope, we recommend against using the `getParameterCount()` and `getParameterByIndex()` methods on `EntryPointReflection`, since they make it harder to handle cases where the entry-point scope might have been allocated as a constant buffer (although the `hasDefaultConstantBuffer()` method is provided to try to support older applications that still use `getParameterByIndex()`).
+Applications are also recommended to use `EntryPointReflection::getVarLayout()` instead of `::getTypeLayout()`, to more properly reflect the way that offsets are computed and applied to the parameters of an entry point.
+
+#### Stage-Specific Information
+
+Depending on the stage that an entry point was compiled for, it may provide additional information that an application can query:
+
+```c++
+void printEntryPointLayout(slang::EntryPointReflection* entryPointLayout)
+{
+    // ...
+    switch (entryPointLayout->getStage())
+    {
+    default:
+        break;
+
+        // ...
+    }
+    // ...
+}
+```
+
+For example, compute entry points store the thread-group dimensions:
+
+```c++
+case SLANG_STAGE_COMPUTE:
+    {
+        SlangUInt sizes[3];
+        entryPointLayout->getComputeThreadGroupSize(3, sizes);
+
+        print("thread group size: ");
+        print("x: "); print(sizes[0]);
+        print("y: "); print(sizes[1]);
+        print("z: "); print(sizes[2]);
+    }
+    break;
+```
+
+#### Varying Parameters
+
+So far we have primarily been talking about the *uniform* shader parameters of a program: those that can be passed in from application code to shader code.
+Slang's reflection API also reflects the *varying* shader parameters that appear are passed between stages of a pipeline.
+
+Variable and type layouts for varying shader parameters will typically show usage of:
+
+* Varying input slots (`slang::ParameterCategory::VaryingInput`) for stage inputs
+* Varying output slots (`slang::ParameterCategory::VaryingOutput`) for `out` parameters and the entry-point result
+* Both (`slang::ParameterCategory::VaryingInput` *and* `::VaryingOutput`) for `inout` parameters
+* Nothing (no usage for any unit) for *system value* parameters (typically using an `SV_*` semantic)
+
+For user-defined varying parameters, some GPU APIs care about the *semantic* that has been applied to the parameter.
+For example, given this shader code:
+
+```hlsl
+[shader("vertex")]
+float4 vertexMain(
+    float3 position : POSITION,
+    float3 normal : NORMAL,
+    float3 uv : TEXCOORD,
+    // ...
+    )
+    : SV_Position
+{
+    // ...
+}
+```
+
+the shader parameter `normal` of `vertexMain` has a semantic of `NORMAL`.
+
+Semantics are only relevant for shader parameters that became part of the varying input/output interface of an entry point for some stage, in which case the `VariableLayoutReflection::getStage()` method will return that stage.
+A semantic is decomposed into both a name and an index (e.g., `TEXCOORD5` has a name of `"TEXCOORD"` and an index of `5`).
+This information can be reflected with `getSemanticName()` and `getSemanticIndex()`:
+
+```c++
+
+```c++
+void printVarLayout(slang::VariableLayoutReflection* varLayout)
+{
+    // ...
+    if (varLayout->getStage() != SLANG_STAGE_NONE)
+    {
+        print("semantic: ");
+        print("name: "); printQuotedString(varLayout->getSemanticName());
+        print("index: "); print(varLayout->getSemanticIndex());
+    }
+    // ...
+}
+```
+
+Calculating Cumulative Offsets
+------------------------------
+
+All of the code so far has only extracted the *relative* offsets of variable layouts.
+Offsets for fields have been relative to the `struct` that contains them.
+Offsets for top-level parameters have been relative to the scope that contains them, or even to a constant buffer or parameter block that was introduced for that scope.
+
+There are many cases where an application needs to calculate a *cumulative* offset (or even an absolute offset) for some parameter, even down to the granularity of individual `struct` fields.
+As a notable example, allocation of D3D root signatures and Vulkan pipeline layouts for a program requires being able to enumerate the absolute offsets of all bindings in all descriptor tables/sets.
+
+Because offsets for certain layout units include an additional dimension for a space, our example application will define a simple `struct` to represent a cumulative offset:
+
+```c++
+struct CumulativeOffset
+{
+    int value; // the actual offset
+    int space; // the associated space
+};
+```
+
+### Access Paths
+
+There are multiple ways to track and calculate cumulative offsets.
+Here we will present a solution that is both simple and reasonably efficient, while still yielding correct results even in complicated scenarios.
+
+If all we had to do was calculate the byte offsets of things, a single `size_t` would be enough to represent a cumulative offset.
+However, we have already seen that in the context of a GPU language like Slang, we can have offsets measured in multiple different layout units.
+A naive implementation might try to represent a cumulative offset as a vector or dictionary of scalar offsets, with (up to) one for each layout unit.
+The sheer number of layout units (the cases of the `slang::ParameterCategory` enumeration) makes such an approach unwieldy.
+
+Instead we focus on the intuition that the cumulative offset of a variable layout, for any given layout unit, can be computed by summing up all the relative offsets along the *access path* to that variable.
+For example, given code like:
+
+```hlsl
+struct Material
+{
+    Texture2D albedoMap;
+    Texture2D glossMap;
+    SamplerState sampler;
+}
+struct LightingEnvironment
+{
+    TextureCube environmentMap;
+    float3 sunLightDir;
+    float3 sunLightIntensity;
+}
+struct Params
+{
+    LightingEnvironment lights;
+    Material material;
+}
+uniform Params params;
+```
+
+we expect that the cumulative offset of `params.material.glossMap` in units of Vulkan `binding`s can be computed by summing up the offsets in that unit of `params` (0), `material` (1), and `glossMap` (1).
+
+When recursively traversing the parameters of a shader, out example application will track an access path as a singly-linked list of variable layouts that points up the stack, from the deepest variable to the shallowest:
+
+```c++
+struct AccessPathNode
+{
+    slang::VariableLayoutReflection* varLayout;
+    AccessPathNode* outer;
+};
+
+struct AccessPath
+{
+    AccessPathNode* leafNode = nullptr;
+};
+```
+
+For the example code above, if our recursive traversal is at `params.material.glossMap`, then the access path will start with a node for `glossMap` which points to a node for `material`, which points to a node for `glossMap`.
+
+For many layout units, we can calculate a cumulative offset simply by summing up contributions along the entire access path, with logic like the following:
+
+```c++
+CumulativeOffset calculateCumulativeOffset(slang::ParameterCategory layoutUnit, AccessPath accessPath)
+{
+    // ...
+    for(auto node = accessPath.leafNode; node != nullptr; node = node->outer)
+    {
+        result.value += node->varLayout->getOffset(layoutUnit);
+        result.space += node->varLayout->getBindingSpace(layoutUnit);
+    }
+    // ...
+}
+```
+
+Once our example application is properly tracking access paths, we will be able to use them to calculate and print the cumulative offsets of variable layouts:
+
+```c++
+void printOffsets(
+    slang::VariableLayoutReflection* varLayout,
+    AccessPath accessPath)
+{
+    // ...
+
+    print("cumulative:");
+    for (int i = 0; i < usedLayoutUnitCount; ++i)
+    {
+        print("- ");
+        auto layoutUnit = varLayout->getCategoryByIndex(i);
+        printCumulativeOffset(varLayout, layoutUnit, accessPath);
+    }
+}
+```
+
+Printing the cumulative offset of a variable layout requires adding the offset information for the variable itself to the offset calculated from its access path:
+
+```c++
+void printCumulativeOffset(
+    slang::VariableLayoutReflection* varLayout,
+    slang::ParameterCategory layoutUnit,
+    AccessPath accessPath)
+{
+    CumulativeOffset cumulativeOffset = calculateCumulativeOffset(layoutUnit, accessPath);
+
+    cumulativeOffset.offset += varLayout->getOffset(layoutUnit);
+    cumulativeOffset.space += varLayout->getBindingSpace(layoutUnit);
+
+    printOffset(layoutUnit, cumulativeOffset.offset, cumulativeOffset.space);
+}
+```
+
+### Tracking Access Paths
+
+In order to support calculation of cumulative offsets, the various functions we've presented so far like `printVarLayout()` and `printTypeLayout()` need to be extended with an additional parameter for an `AccessPath`.
+For example, the signature of `printTypeLayout()` becomes:
+
+```c++
+void printTypeLayout(slang::TypeLayoutReflection* typeLayout, AccessPath accessPath)
+{
+    // ...
+}
+```
+
+#### Variable Layouts
+
+When traversing a variable layout, we then need to extend the access path to include the additional variable layout, before traversing down into its type layout:
+
+```c++
+void printVarLayout(slang::VariableLayoutReflection* typeLayout, AccessPath accessPath)
+{
+    // ...
+
+    ExtendedAccessPath varAccessPath(accessPath, varLayout);
+
+    print("type layout: ");
+    printTypeLayout(varLayout->getTypeLayout(), varAccessPath);
+}
+```
+
+#### Scopes
+
+Similar logic is needed within `printScope()` in our example program:
+
+```c++
+void printScope(
+    slang::VariableLayoutReflection* scopeVarLayout,
+    AccessPath                       accessPath)
+{
+    ExtendedAccessPath scopeAccessPath(accessPath, scopeVarLayout);
+
+    // ...
+}
+```
+
+The calls to `printOffsets()`, `printTypeLayout()`, etc. inside of `printScope()` will then pass along the extended access path.
+
+#### Array-Like Types
+
+When the traversing an array, matrix, or vector type, it is impossible to compute a single cumulative offset that is applicable to all elements of the type.
+The recursive calls to `printTypeLayout()` in these cases will simply pass in an empty `AccessPath`.
+For example:
+
+```c++
+case slang::TypeReflection::Kind::Array:
+    {
+        // ...
+
+        print("element type layout: ");
+        printTypeLayout(
+            typeLayout->getElementTypeLayout(),
+            AccessPath());
+    }
+    break;
+```
+
+### Handling Single-Element Containers
+
+Types like constant buffers and parameter blocks add complexity that requires additions to our representation and handling of access paths.
+
+First, when calculating the cumulative byte offset of variables inside a constant buffer (or any of these single-element container types), it is important not to sum contributions too far up the access path.
+Consider this example:
+
+```c++
+struct A
+{
+    float4 x;
+    Texture2D t;
+}
+struct B
+{
+    float4 y;
+    ConstantBuffer<Inner> a;
+}
+struct C
+{
+    float4 z;
+    Texture2D t;
+    B b;
+}
+uniform C c;
+```
+
+When compiling for D3D12, the cumulative byte offset of `c.b` is 16, but the cumulative byte offset of `c.b.a.x` needs to be zero, because its byte offset should be measured relative to the enclosing constant buffer `c.b.a`.
+In contrast, the cumulative of offset of `c.b` in `t` registers is one, and the cumulative offset of `c.b.a.t` needs to be two.
+
+Similarly, when calculating the cumulative offsets of variables inside a parameter block (for targets that can allocate each parameter block its own space), it is important not to sum contributions past an enclosing parameter block.
+
+We can account for these subtleties by extending the representation of access paths in our example application to record the node corresponding to the deepest constant buffer or parameter block along the path:
+
+```c++
+struct AccessPath
+{
+    AccessPathNode* leaf = nullptr;
+    AccessPathNode* deepestConstantBuffer = nullptr;
+    AccessPathNode* deepestParameterBlock = nullptr;
+};
+```
+
+Now when traversing a single-element container type in `printTypeLayout`, we can make a copy of the current access path and modify its `deepestConstantBuffer` to account for the container:
+
+```c++
+case slang::TypeReflection::Kind::ConstantBuffer:
+case slang::TypeReflection::Kind::ParameterBlock:
+case slang::TypeReflection::Kind::TextureBuffer:
+case slang::TypeReflection::Kind::ShaderStorageBuffer:
+    {
+        // ...
+
+        AccumulatedOffsets innerAccessPath = accessPath;
+        innerAccessPath.deepestConstantBuffer = innerAccessPath.leaf;
+
+        // ...
+    }
+    break;
+```
+
+Further, if the container had a full space allocated to it, then we also update the `deepestParameterBlock`:
+
+```c++
+// ...
+if (containerVarLayout->getTypeLayout()->getSize(
+    slang::ParameterCategory::SubElementRegisterSpace) != 0)
+{
+    innerAccessPath.deepestParameterBlock = innerAccessPath.leaf;
+}
+// ...
+```
+
+Finally, when traversing the element of the container, we need to use this new `innerAccessPath`, and also extend the access path when traversing into the type layout of the element:
+
+```c++
+print("element: ");
+printOffsets(elementVarLayout, innerAccessPath);
+
+ExtendedAccessPath elementAccessPath(innerAccessPath, elementVarLayout);
+
+print("type layout: ");
+printTypeLayout(
+    elementVarLayout->getTypeLayout(),
+    elementAccessPath);
+```
+
+### Accumulating Offsets Along An Access Path
+
+We now understand that the proper way to calculate a cumulative offset depends on the layout unit:
+
+```c++
+CumulativeOffset calculateCumulativeOffset(
+    slang::ParameterCategory layoutUnit,
+    AccessPath               accessPath)
+{
+    switch(layoutUnit)
+    {
+    // ...
+    }
+}
+```
+
+#### Layout Units That Don't Require Special Handling
+
+By default, relative offsets will be summed for all nodes along the access path:
+
+```c++
+default:
+    for (auto node = accessPath.leaf; node != nullptr; node = node->outer)
+    {
+        result.offset += node->varLayout->getOffset(layoutUnit);
+    }
+    break;
+```
+
+#### Bytes
+
+When a byte offset is being computed, relative offsets will only be summed up to the deepest enclosing constant buffer, if any:
+
+```c++
+case slang::ParameterCategory::Uniform:
+    for (auto node = accessPath.leaf; node != accessPath.deepestConstantBuffer; node = node->outer)
+    {
+        result.offset += node->varLayout->getOffset(layoutUnit);
+    }
+    break;
+```
+
+#### Layout Units That Care About Spaces
+
+Finally, we need to handle the layout units that care about spaces:
+
+```c++
+case slang::ParameterCategory::ConstantBuffer:
+case slang::ParameterCategory::ShaderResource:
+case slang::ParameterCategory::UnorderedAccess:
+case slang::ParameterCategory::SamplerState:
+case slang::ParameterCategory::DescriptorTableSlot:
+    // ...
+    break;
+```
+
+Relative offsets, including space offsets, need to be summed along the access path up to the deepest enclosing parameter block, if any:
+
+```c++
+for (auto node = accessPath.leaf; node != accessPath.deepestParameterBlock; node = node->outer)
+{
+    result.offset += node->varLayout->getOffset(layoutUnit);
+    result.space += node->varLayout->getBindingSpace(layoutUnit);
+}
+```
+
+Additionally, the offset of the enclosing parameter block in spaces needs to be added to the space of the cumulative offset:
+
+```c++
+for (auto node = accessPath.deepestParameterBlock; node != nullptr; node = node->outer)
+{
+    result.space += node->varLayout->getOffset(slang::ParameterCategory::SubElementRegisterSpace);
+}
+```
+
+Determining Whether Parameters Are Used
+---------------------------------------
+
+Some application architectures make use of shader code that declares a large number of shader parameters at global scope, but only uses a small fraction of those parameters at runtime.
+Similarly, shader parameters may be declared at global scope even if they are only used by a single entry point in a pipeline.
+These kinds of architectures are not ideal, but they are pervasive.
+
+Slang's base reflection API *intentionally* does not provide information about which shader parameters are or are not used by a program, or specific entry points.
+This choice ensures that applications using the reflection API can robustly re-use data structures built from reflection data across hot reloads of shaders, or switches between variants of a program.
+
+Applications that need to know which parameters are used (and by which entry points or stages) need to query for additional metadata connected to the entry points of their compiled program using `IComponentType::getEntryPointMetadata()`:
+
+```c++
+slang::IComponentType* program = ...;
+slang::IMetadata* entryPointMetadata;
+program->getEntryPointMetadata(
+        entryPointIndex,
+        0, // target index
+        &entryPointMetadata);
+```
+
+When traversal of reflection data reaches a leaf parameter, the application can use `IMetadata::isParameterLocationUsed()` with the absolute location of that parameter for a given layout unit:
+
+```c++
+unsigned calculateParameterStageMask(
+    slang::ParameterCategory layoutUnit,
+    CumulativeOffset offset)
+{
+    unsigned mask = 0;
+    for(int i = 0; i < entryPointCount; ++i)
+    {
+        bool isUsed = false;
+        entryPoints[i].metadata->isParameterLocationUsed(
+            layoutUnit, offset.space, offset.value, isUsed);
+        if(isUsed)
+        {
+            mask |= 1 << unsigned(entryPoints[i].stage);
+        }
+    }
+    return mask;
+}
+```
+
+The application can then incorporate this logic into a loop over the layout units consumed by a parameter:
+
+```c++
+unsigned calculateParameterStageMask(
+    slang::VariableLayoutReflection* varLayout,
+    AccessPath accessPath)
+{
+    unsigned mask = 0;
+
+    int usedLayoutUnitCount = varLayout->getCategoryCount();
+    for (int i = 0; i < usedLayoutUnitCount; ++i)
+    {
+        auto layoutUnit = varLayout->getCategoryByIndex(i);
+        auto offset = calculateCumulativeOffset(
+            varLayout, layoutUnit, accessPath);
+        
+        mask |= calculateStageMask(
+            layoutUnit, offset);
+    }
+
+    return mask;
+}
+```
+
+Finally, we can wrap all this up into logic to print which stage(s) use a given parameter, based on the information in the per-entry-point metadata:
+
+```c++
+void printVarLayout(
+    slang::VariableLayoutReflection* varLayout,
+    AccessPath accessPath)
+{
+    //...
+    unsigned stageMask = calculateStageMask(
+        varLayout, accessPath);
+
+    print("used by stages: ");
+    for(int i = 0; i < SLANG_STAGE_COUNT; i++)
+    {
+        if(stageMask & (1 << i))
+        {
+            print("- ");
+            printStage(SlangStage(i));
+        }
+    }
+    // ...
+}
+```
+
+Conclusion
+----------
+
+At this point we have provided a comprehensive example of how to robustly traverse the information provided by the Slang reflection API to get a complete picture of the shader parameters of a program, and what target-specific locations they were bound to.
+We hope that along the way we have also imparted some key parts of the mental model that exists behind the reflection API and its representations.
diff --git a/external/slang/share/doc/slang/user-guide/09-targets.md b/external/slang/share/doc/slang/user-guide/09-targets.md
new file mode 100644
index 00000000..711d3fa7
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/09-targets.md
@@ -0,0 +1,460 @@
+---
+layout: user-guide
+permalink: /user-guide/targets
+---
+
+# Supported Compilation Targets
+
+This chapter provides a brief overview of the compilation targets supported by Slang, and their different capabilities.
+
+## Background and Terminology
+
+### Code Formats
+
+When Slang compiles for a target platform one of the most important distinctions is the _format_ of code for that platform.
+For a native CPU target, the format is typically the executable machine-code format for the processor family (for example, x86-64).
+In contrast, GPUs are typically programmed through APIs that abstract over multiple GPU processor families and versions.
+GPU APIs usually define an _intermediate language_ that sits between a high-level-language compiler like Slang and GPU-specific compilers that live in drivers for the API.
+
+### Pipelines and Stages
+
+GPU code execution occurs in the context of a _pipeline_.
+A pipeline comprises one or more _stages_ and dataflow connections between them.
+Some stages are _programmable_ and run a user-defined _kernel_ that has been compiled from a language like Slang, while others are _fixed-function_ and can only be configured, rather than programmed, by the user.
+Slang supports three different pipelines.
+
+#### Rasterization
+
+The _rasterization_ pipeline is the original GPU rendering pipeline.
+On current GPUs, the simplest rasterization pipelines have two programmable stages: a `vertex` stage and a `fragment` stage.
+The rasterization pipeline is named after its most important fixed-function stage: the rasterizer, which determines the pixels covered by a geometric primitive, and emits _fragments_ covering those pixels, to be shaded.
+
+#### Compute
+
+The _compute_ pipeline is a simple pipeline with only one stage: a programmable `compute` stage.
+As a result of being a single-stage pipeline the compute pipeline doesn't need to deal with many issues around inter-stage dataflow that other pipelines do.
+
+#### Ray Tracing
+
+A _ray tracing_ pipeline has multiple stages pertaining to the life cycle of a ray being traced through a scene of geometric primitives.
+These can include an `intersection` stage to compute whether a ray intersects a geometry primitive, a `miss` stage that runs when a ray does not intersect any geometric object in a scene, etc.
+
+Note that some platforms support types and operations related to ray tracing that can run outside of the context of a dedicated ray tracing pipeline.
+Just as applications can do computation outside of the dedicated compute pipeline, the use of ray tracing does not necessarily mean that a ray tracing pipeline is being used.
+
+### Shader Parameter Bindings
+
+The kernels that execute within a pipeline typically has access to four different kinds of data:
+
+- _Varying inputs_ coming from the system or from a preceding pipeline stage
+
+- _Varying outputs_ which will be passed along to the system or to a following pipeline stage
+
+- _Temporaries_ which are scratch memory or registers used by each invocation of the kernel and then dismissed on exit.
+
+- _Shader parameters_ (sometimes also called _uniform parameters_), which provide access to data from outside the pipeline dataflow
+
+The first three of these kinds of data are largely handled by the implementation of a pipeline.
+In contrast, an application programmer typically needs to manually prepare shader parameters, using the appropriate mechanisms and rules for each target platform.
+
+On platforms that provide a CPU-like "flat" memory model with a single virtual address space, and where any kind of data can be stored at any address, passing shader parameters can be almost trivial.
+Current graphics APIs provide far more complicated and less uniform mechanisms for passing shader parameters.
+
+A high-level language compiler like Slang handles the task of _binding_ each user-defined shader parameter to one or more of the parameter-passing resources defined by a target platform.
+For example, the Slang compiler might bind a global `Texture2D` parameter called `gDiffuse` to the `t1` register defined by the Direct3D 11 API.
+
+An application is responsible for passing the argument data for a parameter using the using the corresponding platform-specific resource it was bound to.
+For example, an application should set the texture they want to use for `gDiffuse` to the `t1` register using Direct3D 11 API calls.
+
+#### Slots
+
+Historically, most graphics APIs have used a model where shader parameters are passed using a number of API-defined _slots_.
+Each slot can store a single argument value of an allowed type.
+Depending on the platform slots might be called "registers," "locations," "bindings," "texture units," or other similar names.
+
+Slots almost exclusively use opaque types: textures, buffers, etc.
+On platforms that use slots for passing shader parameters, value of ordinary types like `float` or `int` need to be stored into a buffer, and then that buffer is passed via an appropriate slot.
+
+Although many graphics APIs use slots as an abstraction, the details vary greatly across APIs.
+Different APIs define different kinds of slots, and the types of arguments that may be stored in those slots vary.
+For example, one API might use two different kinds of slots for textures and buffers, while another uses a single kind of slot for both.
+On some APIs each pipeline stage gets is own dedicated slots, while on others slots are shared across all stages in a pipeline.
+
+#### Blocks
+
+Newer graphics APIs typically provide a system for grouping related shader parameters into re-usable _blocks_.
+Blocks might be referred to as "descriptor tables," "descriptor sets," or "argument buffers."
+Each block comprises one or more slots (often called "descriptors") that can be used to bind textures, buffers, etc.
+
+Blocks are in turn set into appropriate slots provided by a pipeline.
+Because a block can contain many different slots for textures or buffers, switching a pipeline argument from one block to another can effectively swap out a large number of shader parameters in one operation.
+Thus, while blocks introduce a level of indirection to parameter setting, then can also enable greater efficiency when parameters are grouped into blocks according to frequency of change.
+
+#### Root Constants
+
+Most recent graphics APIs also allow for a small amount of ordinary data (meaning types like `float` and `int` but not opaque types like buffers or textures) to be passed to the pipeline as _root constants_ (also called "push constants").
+
+Using root constants can eliminate some overheads from passing parameters of ordinary types via buffers.
+Passing a single `float` using a root constant rather than a buffer obviously eliminates a level of indirection.
+More importantly, though, using a root constant can avoid application code having to allocate and manage the lifetime of a buffer in a concurrent CPU/GPU program.
+
+## Direct3D 11
+
+Direct3D 11 (D3D11) is a older graphics API, but remains popular because it is much simpler to learn and use than some more recent APIs.
+In this section we will give an overview of the relevant features of D3D11 when used as a target platform for Slang.
+Subsequent sections about other APIs may describe them by comparison to D3D11.
+
+D3D11 kernels must be compiled to the DirectX Bytecode (DXBC) intermediate language.
+A DXBC binary includes a hash/checksum computed using an undocumented algorithm, and the runtime API rejects kernels without a valid checksum.
+The only supported way to generate DXBC is by compiling HLSL using the fxc compiler.
+
+### Pipelines
+
+D3D11 exposes two pipelines: rasterization and compute.
+
+The D3D11 rasterization pipeline can include up to five programmable stages, although most of them are optional:
+
+- The `vertex` stage (VS) transforms vertex data loaded from memory
+
+- The optional `hull` stage (HS) typically sets up and computes desired tessellation levels for a higher-order primitive
+
+- The optional `domain` stage (DS) evaluates a higher-order surface at domain locations chosen by a fixed-function tessellator
+
+- The optional `geometry` stage (GS) receives as input a primitive and can produce zero or more new primitives as output
+
+- The optional `fragment` stage transforms fragments produced by the fixed-function rasterizer, determining the values for those fragments that will be merged with values in zero or more render targets. The fragment stage is sometimes called a "pixel" stage (PS), even when it does not process pixels.
+
+### Parameter Passing
+
+Shader parameters are passed to each D3D11 stage via slots.
+Each stage has its own slots of the following types:
+
+- **Constant buffers** are used for passing relatively small (4KB or less) amounts of data that will be read by GPU code. Constant buffers are passed via `b` registers.
+
+- **Shader resource views** (SRVs) include most textures, buffers, and other opaque resource types there are read or sampled by GPU code. SRVs use `t` registers.
+
+- **Unordered access views** (UAVs) include textures, buffers, and other opaque resource types used for write or read-write operations in GPU code. UAVs use `u` registers.
+
+- **Samplers** are used to pass opaque texture-sampling stage, and use `s` registers.
+
+In addition, the D3D11 pipeline provides _vertex buffer_ slots and a single _index buffer_ slot to be used as the source vertex and index data that defines primitives.
+User-defined varying vertex shader inputs are bound to _vertex attribute_ slots (referred to as "input elements" in D3D11) which define how data from vertex buffers should be fetched to provide values for vertex attributes.
+
+The D3D11 rasterization pipeline also provides a mechanism for specifying _render target views_ (RTVs) and _depth-stencil views_ (DSVs) that provide the backing storage for the pixels in a framebuffer.
+User-defined fragment shader varying outputs (with `SV_Target` binding semantics) are bound to RTV slots.
+
+One notable detail of the D3D11 API is that the slots for fragment-stage UAVs and RTVs overlap.
+For example, a fragment kernel cannot use both `u0` and `SV_Target0` at once.
+
+## Direct3D 12
+
+Direct3D 12 (D3D12) is the current major version of the Direct3D API.
+
+D3D12 kernels must be compiled to the DirectX Intermediate Language (DXIL).
+DXIL is a layered encoding based off of LLVM bitcode; it introduces additional formatting rules and constraints which are loosely documented.
+A DXIL binary may be signed, and the runtime API only accepts appropriately signed binaries (unless a developer mode is enabled on the host machine).
+A DXIL validator `dxil.dll` is included in SDK releases, and this validator can sign binaries that pass validation.
+While DXIL can in principle be generated from multiple compiler front-ends, support for other compilers is not prioritized.
+
+### Pipelines
+
+D3D12 includes rasterization and compute pipelines similar to those in D3D11.
+Revisions to D3D12 have added additional stages to the rasterization pipeline, as well as a ray-tracing pipeline.
+
+#### Mesh Shaders
+
+> #### Note
+>
+> The Slang system does not currently support mesh shaders.
+
+The D3D12 rasterization pipeline provides alternative geometry processing stages that may be used as an alternative to the `vertex`, `hull`, `domain`, and `geometry` stages:
+
+- The `mesh` stage runs groups of threads which are responsible cooperating to produce both the vertex and index data for a _meshlet_ a bounded-size chunk of geometry.
+
+- The optional `amplification` stage precedes the mesh stage and is responsible for determining how many mesh shader invocations should be run.
+
+Compared to the D3D11 pipeline without tessellation (hull and domain shaders), a mesh shader is kind of like a combined/generalized vertex and geometry shader.
+
+Compared to the D3D11 pipeline with tessellation, an amplification shader is kind of like a combined/generalized vertex and hull shader, while a mesh shader is kind of like a combined/generalized domain and geometry shader.
+
+#### Ray Tracing
+
+The DirectX Ray Tracing (DXR) feature added a ray tracing pipeline to D3D12.
+The D3D12 ray tracing pipeline exposes the following programmable stages:
+
+- The ray generation (`raygeneration`) stage is similar to a compute stage, but can trace zero or more rays and make use of the results of those traces.
+
+- The `intersection` stage runs kernels to compute whether a ray intersects a user-defined primitive type. The system also includes a default intersector that handles triangle meshes.
+
+- The so-called any-hit (`anyhit`) stage runs on _candidate_ hits where a ray has intersected some geometry, but the hit must be either accepted or rejected by application logic. Note that the any-hit stage does not necessarily run on _all_ hits, because configuration options on both scene geometry and rays can lead to these checks being bypassed.
+
+- The closest-hit (`closesthit`) stage runs a single _accepted_ hit for a ray; under typical circumstances this will be the closest hit to the origin of the ray. A typical closest-hit shader might compute the apparent color of a surface, similar to a typical fragment shader.
+
+- The `miss` stage runs for rays that do not find or accept any hits in a scene. A typical miss shader might return a background color or sample an environment map.
+
+- The `callable` stage allows user-defined kernels to be invoked like subroutines in the context of the ray tracing pipeline.
+
+Compared to existing rasterization and compute pipelines, an important difference in the design of the D3D12 ray tracing pipeline is that multiple kernels can be loaded into the pipeline for each of the programming stages.
+The specific closest-hit, miss, or other kernel that runs for a given hit or ray is determined by indexing into an appropriate _shader table_, which is effectively an array of kernels.
+The indexing into a shader table can depend on many factors including the type of ray, the type of geometry hit, etc.
+
+Note that DXR version 1.1 adds ray tracing types and operations that can be used outside of the dedicated ray tracing pipeline.
+These new mechanisms have less visible impact for a programmer using or integrating Slang.
+
+### Parameter Passing
+
+The mechanisms for parameter passing in D3D12 differ greatly from D3D11.
+Most opaque types (texture, resources, samplers) must be set into blocks (D3D12 calls blocks "descriptor tables").
+Each pipeline supports a fixed amount of storage for "root parameters," and allows those root parameters to be configured as root constants, slots for blocks, or slots for a limited number of opaque types (primarily just flat buffers).
+
+Shader parameters are still grouped and bound to registers as in D3D11; for example, a `Texture2D` parameter is considered as an SRV and uses a `t` register.
+D3D12 additionally associates binds shader parameters to "spaces" which are expressed similarly to registers (e.g., `space2`), but represent an orthogonal "axis" of binding.
+
+While shader parameters are bound registers and spaces, those registers and spaces do not directly correspond to slots provided by the D3D12 API the way registers do in D3D11.
+Instead, the configuration of the root parameters and the correspondence of registers/spaces to root parameters, blocks, and/or slots are defined by a _pipeline layout_ that D3D12 calls a "root signature."
+
+Unlike D3D11, all of the stages in a D3D12 pipeline share the same root parameters.
+A D3D12 pipeline layout can specify that certain root parameters or certain slots within blocks will only be accessed by a subset of stages, and can map the _same_ register/space pair to different parameters/blocks/slots as long as this is done for disjoint subset of stages.
+
+#### Ray Tracing Specifics
+
+The D3D12 ray tracing pipeline adds a new mechanism for passing shader parameters.
+In addition to allowing shader parameters to be passed to the entire pipeline via root parameters, each shader table entry provides storage space for passing argument data specific to that entry.
+
+Similar to the use of a pipeline layout (root signature) to configure the use of root parameters, each kernel used within shader entries must be configured with a "local root signature" that defines how the storage space in the shader table entry is to be used.
+Shader parameters are still bound to registers and spaces as for non-ray-tracing code, and the local root signature simply allows those same registers/spaces to be associated with locations in a shader table entry.
+
+One important detail is that some shader table entries are associated with a kernel for a single stage (e.g., a single miss shader), while other shader table entries are associated with a "hit group" consisting of up to one each of an intersection, any-hit, and closest-hit kernel.
+Because multiple kernels in a hit group share the same shader table entry, they also share the configured slots in that entry for binding root constants, blocks, etc.
+
+## Vulkan
+
+Vulkan is a cross-platform GPU API for graphics and compute with a detailed specification produced by a multi-vendor standards body.
+In contrast with OpenGL, Vulkan focuses on providing explicit control over as many aspects of GPU work as possible.
+In contrast with OpenCL, Vulkan focuses first and foremost on the needs of real-time graphics developers.
+
+Vulkan requires kernels to be compiled to the SPIR-V intermediate language.
+SPIR-V is a simple and extensible binary program format with a detailed specification; it is largely unrelated to earlier "SPIR" formats that were LLVM-based and loosely specified.
+The SPIR-V format does not require signing or hashing, and is explicitly designed to allow many different tools to produce and manipulate the format.
+Drivers that consume SPIR-V are expected to perform validation at load time.
+Some choices in the SPIR-V encoding are heavily influenced by specific design choices in the GLSL language, and may require non-GLSL compilers to transform code to match GLSL idioms.
+
+### Pipelines
+
+Vulkan includes rasterization, compute, and ray tracing pipelines with the same set of stages as described for D3D12 above.
+
+### Parameter Passing
+
+Like D3D12, Vulkan uses blocks (called "descriptor sets") to organize groups of bindings for opaque types (textures, buffers, samplers).
+Similar to D3D12, a Vulkan pipeline supports a limited number of slots for passing blocks to the pipeline, and these slots are shared across all stages.
+Vulkan also supports a limited number of bytes reserved for passing root constants (called "push constants").
+Vulkan uses pipeline layouts to describe configurations of usage for blocks and root constants.
+
+High-level-language shader parameters are bound to a combination of a "binding" and a "set" for Vulkan, which are superficially similar to the registers and spaces of D3D12.
+Unlike D3D12, however, bindings and sets in Vulkan directly correspond to the API-provided parameter-passing mechanism.
+The set index of a parameter indicates the zero-based index of a slot where a block must be passed, and the binding index is the zero-based index of a particular opaque value set into the block.
+A shader parameter that will be passed using root constants (rather than via blocks) must be bound to a root-constant offset as part of compilation.
+
+Unlike D3D12, where SRVs, UAVs, etc. use distinct classes of registers, all opaque-type shader parameters use the same index space of bindings.
+That is, a buffer and a texture both using `binding=2` in `set=3` for Vulkan will alias the same slot in the same block.
+
+The Vulkan ray tracing pipeline also uses a shader table, and also forms hit groups similar to D3D12.
+Unlike D3D12, each shader table entry in Vulkan can only be used to pass ordinary values (akin to root constants), and cannot be configured for binding of opaque types or blocks.
+
+## OpenGL
+
+> #### Note
+>
+> Slang has only limited support for compiling code for OpenGL.
+
+OpenGL has existed for many years, and predates programmable GPU pipelines of the kind this chapter discusses; we will focus solely on use of OpenGL as an API for programmable GPU pipelines.
+
+OpenGL is a cross-platform GPU API for graphics and compute with a detailed specification produced by a multi-vendor standard body.
+In contrast with Vulkan, OpenGL provides many convenience and safety features that can simplify GPU programming.
+
+OpenGL allows kernels to be loaded as SPIR-V binaries, vendor-specific binaries, or using GLSL source code.
+Loading shaders as GLSL source code is the most widely supported of these options, such that GLSL is the _de facto_ intermediate language of OpenGL.
+
+### Pipelines
+
+OpenGL supports rasterization and compute pipelines with the same stages as described for D3D11.
+The OpenGL rasterization pipeline also supports the same mesh shader stages that are supported by D3D12.
+
+### Parameter Passing
+
+OpenGL uses slots for binding.
+There are distinct kinds of slots for buffers and textures/images, and each set of slots is shared by all pipeline stages.
+
+High-level-language shader parameters are bounding to a "binding" index for OpenGL.
+The binding index of a parameter is the zero-based index of the slot (of the appropriate kind) that must be used to pass an argument value.
+
+Note that while OpenGL and Vulkan both use binding indices for shader parameters like textures, the semantics of those are different because OpenGL uses distinct slots for passing buffers and textures.
+For OpenGL it is legal to have a texture that uses `binding=2` and a buffer that uses `binding=2` in the same kernel, because those are indices of distinct kinds of slots, while this scenario would typically be invalid for Vulkan.
+
+## Metal
+
+> #### Note
+>
+> Slang support for Metal is a work in progress.
+
+Metal is Apple's proprietary graphics and compute API for iOS and macOS
+platforms. It provides a modern, low-overhead architecture similar to Direct3D
+12 and Vulkan.
+
+Metal kernels must be compiled to the Metal Shading Language (MSL), which is
+based on C++14 with additional GPU-specific features and constraints. Unlike
+some other APIs, Metal does not use an intermediate representation - MSL source
+code is compiled directly to platform-specific binaries by Apple's compiler.
+
+### Pipelines
+
+Metal supports rasterization, compute, and ray tracing pipelines.
+
+> #### Note
+>
+> Ray-tracing support for Metal is a work in progress.
+
+The Metal rasterization pipeline includes the following programmable stages:
+
+- The vertex stage outputs vertex data
+
+- The optional mesh stage allows groups of threads to cooperatively generate geometry
+
+- The optional task stage can be used to control mesh shader invocations
+
+- The optional tessellation stages (kernel, post-tessellation vertex) enable hardware tessellation
+
+- The fragment stage processes fragments produced by the rasterizer
+
+### Parameter Passing
+
+Metal uses a combination of slots and blocks for parameter passing:
+
+- Resources (buffers, textures, samplers) are bound to slots using explicit
+  binding indices
+
+- Argument buffers (similar to descriptor tables/sets in other APIs) can group
+  multiple resources together
+
+- Each resource type (buffer, texture, sampler) has its own independent binding
+  space
+
+- Arguments within argument buffers are referenced by offset rather than
+  explicit bindings
+
+Unlike some other APIs, Metal:
+
+- Does not support arrays of buffers as of version 3.1
+- Shares binding slots across all pipeline stages
+- Uses argument buffers that can contain nested resources without consuming additional binding slots
+
+The Metal ray tracing pipeline follows similar parameter passing conventions to
+the rasterization and compute pipelines, while adding intersection,
+closest-hit, and miss stages comparable to those in Direct3D 12 and Vulkan.
+
+## CUDA and OptiX
+
+> #### Note
+>
+> Slang support for OptiX is a work in progress.
+
+CUDA C/C++ is a language for expressing heterogeneous CPU and GPU code with a simple interface to invoking GPU compute work.
+OptiX is a ray tracing API that uses CUDA C++ as the language for expressing shader code.
+We focus here on OptiX version 7 and up.
+
+CUDA and OptiX allow kernels to be loaded as GPU-specific binaries, or using the PTX intermediate language.
+
+### Pipelines
+
+CUDA supports a compute pipeline that is similar to D3D12 or Vulkan, with additional features.
+
+OptiX introduced the style of ray tracing pipeline adopted by D3D12 and Vulkan, and thus uses the same basic stages.
+
+The CUDA system does not currently expose a rasterization pipeline.
+
+### Parameter Passing
+
+Unlike most of the GPU APIs discussed so far, CUDA supports a "flat" memory model with a single virtual address space for all GPU data.
+Textures, buffers, etc. are not opaque types, but can instead sit in the same memory as ordinary data like `float`s or `int`s.
+
+With a flat memory model, a distinct notion of a slot or block is not needed.
+A slot is just an ordinary memory location that happens to be used to store a value of texture, buffer, or other resource type.
+A block is just an ordinary memory buffer that happens to be filled with values of texture/buffer/etc. type.
+
+CUDA provides two parameter-passing mechanisms for the compute pipeline.
+First, when invoking a compute kernel, the application passes a limited number of bytes of parameter data that act as root constants.
+Second, each loaded module of GPU code may contain pre-allocated "constant memory" storage which can be initialized from the host and then read by GPU code.
+Because types like blocks or textures are not special in CUDA, either of these mechanisms can be utilized to pass any kind of data including references to pointer-based data structures stored in the GPU virtual address space.
+The use of "slots" or "blocks" or "root constants" is a matter of application policy instead of API mechanism.
+
+OptiX supports use of constant memory storage for ray tracing pipelines, where all the stages in a ray tracing pipeline share that storage.
+OptiX uses a shader table for managing kernels and hit groups, and allows kernels to access the bytes of their shader table entry via a pointer.
+Similar to the compute pipeline, application code can layer many different policies on top of these mechanisms.
+
+## CPU Compute
+
+> #### Note
+>
+> Slang's support for CPU compute is functional, but not feature- or performance-complete.
+> Backwards-incompatible changes to this target may come in future versions of Slang.
+
+For the purposes of Slang, different CPU-based host platforms are largely the same.
+All support binary code in a native machine-code format.
+All CPU platforms Slang supports use a flat memory model with a single virtual address space, where any data type can be stored at any virtual address.
+
+Note that this section considers CPU-based platforms only as targets for kernel compilation; using a CPU as a target for scalar "host" code is an advanced target beyond the scope of this document.
+
+### Pipelines
+
+Slang's CPU compute target supports only a compute pipeline.
+
+### Parameter Passing
+
+Because CPU target support flexible pointer-based addressing and large low-latency caches, a compute kernel can simply be passed a small fixed number of pointers and be relied upon to load parameter values of any types via indirection through those pointers.
+
+## WebGPU
+
+> #### Note
+>
+> Slang support for WebGPU is work in progress.
+
+WebGPU is a graphics and compute API.
+It is similar in spirit to modern APIs, like Metal, Direct3D 12 and Vulkan, but with concessions to portability and privacy.
+
+WebGPU is available both in browsers as a JavaScript API, and natively as a C/C++ API.
+[Dawn](https://github.com/google/dawn), is a native WebGPU implementation used by the Chrome browser.
+
+By combining Slang, [Dawn](https://github.com/google/dawn) and [Emscripten](https://emscripten.org/),
+an application can easily target any native API, and the web, with a single codebase consisting of C++ and Slang code.
+
+WebGPU shader modules are created from WGSL (WebGPU Shading Language) source files.
+WebGPU does not use an intermediate representation - WGSL code is compiled to backend-specific code by
+compilers provided by the WebGPU implementation.
+
+### Pipelines
+
+WebGPU supports render and compute pipelines.
+
+The WebGPU render pipeline includes the following programmable stages:
+
+- The vertex stage outputs vertex data
+
+- The fragment stage outputs fragments
+
+### Parameter Passing
+
+WebGPU uses groups of bindings called bind groups to bind things like textures, buffers and samplers.
+Bind group objects are passed as arguments when encoding bind group setting commands.
+
+There is a notion of equivalence for bind groups, and a notion of equivalence for pipelines defined in
+terms of bind group equivalence.
+This equivalence allows an application to save some bind group setting commands, when switching between
+pipelines, if bindings are grouped together appropriately.
+
+Which bindings are grouped together can be controlled using Slang's `ParameterBlock` generic type.
+
+## Summary
+
+This chapter has reviewed the main target platforms supported by the Slang compiler and runtime system.
+A key point to take away is that there is great variation in the capabilities of these systems.
+Even superficially similar graphics APIs have complicated differences in their parameter-passing mechanisms that must be accounted for by application programmers and GPU compilers.
diff --git a/external/slang/share/doc/slang/user-guide/10-link-time-specialization.md b/external/slang/share/doc/slang/user-guide/10-link-time-specialization.md
new file mode 100644
index 00000000..4c8a110b
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/10-link-time-specialization.md
@@ -0,0 +1,258 @@
+---
+layout: user-guide
+permalink: /user-guide/link-time-specialization
+---
+
+# Link-time Specialization and Module Precompilation
+
+Traditionally, graphics developers have been relying on the preprocessor defines to specialize their shader code for high-performance GPU execution.
+While functioning systems can be built around preprocessor macros, overusing them leads to many problems:
+- Long compilation time. With preprocessors defines, specialization happens before parsing, which is a very early stage in the compilation flow.
+  This means that the compiler must redo almost all work from the scratch with every specialized variant, including parsing, type checking, IR generation
+  and optimization, even when two specialized variants only differ in one constant value. The lack of reuse of compiler front-end work between
+  different shader specializations contributes a significant portion to long shader compile times.
+- Reduced code readability and maintainability. The compiler cannot enforce any structures on preprocessor macros and cannot offer static checks to
+  guarantee that the preprocessor macros are used in an intended way. Macros don't blend well with the native language syntax, which leads to less
+  readable code, mystic diagnostic messages when things go wrong, and suboptimal intellisense experience.
+- Locked in with early specialization. Once the code is written using preprocessor macros for specialization, the application that uses the shader
+  code has no choice but to provide the macro values during shader compilation and always opt-in to static specialization. If the developer changes
+  their mind to move away from specialization, a lot of code needs to be rewritten. As a result, the application is locked out of opportunities to
+  take advantage of different design decisions or future hardware features that allow more efficient execution of non-specialized code.
+
+Slang approaches the problem of shader specialization by supporting generics as a first class feature that allow most specializable code to be
+written in strongly typed code, and by allowing specialization to be triggered through link-time constants or types.
+
+As discussed in the [Compiling code with Slang](compiling) chapter, Slang provides a three-step compilation model: precompiling, linking and target code generation.
+Assuming the user shader is implemented as three Slang modules: `a.slang`, `b.slang`, and `c.slang`, the user can precompile all three modules to binary IR and store
+them as `a.slang-module`, `b.slang-module`, and `c.slang-module` in a complete offline process that is independent to any specialization arguments.
+Next, these three IR modules are linked together to form a self-contained program that will then go through a set of compiler optimizations for target code generation.
+Slang's compilation model allows specialization arguments, in the form of constants or types to be provided during linking. This means that specialization happens at
+a much later stage of compilation, reusing all the work done during module precompilation.
+
+## Link-time Constants
+
+The simplest form of link time specialization is done through link-time constants. See the following code for an example.
+```c++
+// main.slang
+
+// Define a constant whose value will be provided in another module at link time.
+extern static const int kSampleCount;
+
+float sample(int index) {...}
+
+RWStructuredBuffer<float> output;
+void main(uint tid : SV_DispatchThreadID)
+{
+    [ForceUnroll]
+    for (int i = 0; i < kSampleCount; i++)
+        output[tid] += sample(i);
+}
+```
+This code defines a compute shader that can be specialized with different constant values of `kSampleCount`. The `extern` modifier means that
+`kSampleCount` is a constant whose value is not provided within the current module, but will be resolved during the linking step.
+The `main.slang` file can be compiled offline into a binary IR module with the `slangc` tool:
+```
+slangc main.slang -o main.slang-module
+```
+
+To specialize the code with a value of `kSampleCount`, the user can create another module that defines it:
+
+```c++
+// sample-count.slang
+export static const int kSampleCount = 2;
+```
+
+This file can also be compiled separately:
+```
+slangc sample-count.slang -o sample-count.slang-module
+```
+
+With these two modules precompiled, we can link them together to get our specialized code:
+```
+slangc sample-count.slang-module main.slang-module -target hlsl -entry main -profile cs_6_0 -o main.hlsl
+```
+
+This process can also be done with Slang's compilation API as in the following code snippet:
+
+```c++
+
+ComPtr<slang::ISession> slangSession = ...;
+ComPtr<slang::IBlob> diagnosticsBlob;
+
+// Load the main module from file.
+slang::IModule* mainModule = slangSession->loadModule("main.slang", diagnosticsBlob.writeRef());
+
+// Load the specialization constant module from string.
+const char* sampleCountSrc = R"(export static const int kSampleCount = 2;)";
+auto sampleCountModuleSrcBlob = UnownedRawBlob::create(sampleCountSrc, strlen(sampleCountSrc));
+slang::IModule* sampleCountModule = slangSession->loadModuleFromSource(
+    "sample-count",  // module name
+    "sample-count.slang", // synthetic module path
+    sampleCountModuleSrcBlob);  // module source content
+
+// Compose the modules and entry points.
+ComPtr<slang::IEntryPoint> computeEntryPoint;
+SLANG_RETURN_ON_FAIL(
+    module->findEntryPointByName(entryPointName, computeEntryPoint.writeRef()));
+
+std::vector<slang::IComponentType*> componentTypes;
+componentTypes.push_back(mainModule);
+componentTypes.push_back(computeEntryPoint);
+componentTypes.push_back(sampleCountModule);
+
+ComPtr<slang::IComponentType> composedProgram;
+SlangResult result = slangSession->createCompositeComponentType(
+    componentTypes.data(),
+    componentTypes.size(),
+    composedProgram.writeRef(),
+    diagnosticsBlob.writeRef());
+
+// Link.
+ComPtr<slang::IComponentType> linkedProgram;
+composedProgram->link(linkedProgram.writeRef(), diagnosticsBlob.writeRef());
+
+// Get compiled code.
+ComPtr<slang::IBlob> compiledCode;
+linkedProgram->getEntryPointCode(0, 0, compiledCode.writeRef(), diagnosticBlob.writeRef());
+
+```
+
+## Link-time Types
+
+In addition to constants, you can also define types that are specified at link-time. For example, given the following modules:
+
+```csharp
+// common.slang
+interface ISampler
+{
+    int getSampleCount();
+    float sample(int index);
+}
+struct FooSampler : ISampler
+{
+    int getSampleCount() { return 1; }
+    float sample(int index) { return 0.0; }
+}
+struct BarSampler : ISampler
+{
+    int getSampleCount() { return 2; }
+    float sample(int index) { return index * 0.5; }
+}
+```
+
+```csharp
+// main.slang
+import common;
+extern struct Sampler : ISampler;
+
+RWStructuredBuffer<float> output;
+void main(uint tid : SV_DispatchThreadID)
+{
+    Sampler sampler;
+    [ForceUnroll]
+    for (int i = 0; i < sampler.getSampleCount(); i++)
+        output[tid] += sampler.sample(i);
+}
+```
+
+Again, we can separately compile these modules into binary forms independently from how they will be specialized.
+To specialize the shader, we can author a third module that provides a definition for the `extern Sampler` type:
+
+```csharp
+// sampler.slang
+import common;
+export struct Sampler : ISampler = FooSampler;
+```
+
+The `=` syntax is a syntactic sugar that expands to the following code:
+
+```csharp
+export struct Sampler : ISampler
+{
+    FooSampler inner;
+    int getSampleCount() { return inner.getSampleCount(); }
+    float sample(int index) { return inner.sample(index); }
+}
+```
+
+When all these three modules are linked, we will produce a specialized shader that uses the `FooSampler`.
+
+## Providing Default Settings
+
+When defining an `extern` symbol as a link-time constant or type, it is allowed to provide a default value for that constant or type.
+When no other modules exists to `export` the same-named symbol, the default value will be used in the linked program.
+
+For example, the following code is considered complete at linking and can proceed to code generation without any issues:
+```c++
+// main.slang
+
+// Provide a default value when no other modules are exporting the symbol.
+extern static const int kSampleCount = 2;
+// ... 
+void main(uint tid : SV_DispatchThreadID)
+{
+    [ForceUnroll]
+    for (int i = 0; i < kSampleCount; i++)
+        output[tid] += sample(i);
+}
+```
+
+## Restrictions
+
+Unlike preprocessors, link-time constants and types can only be used in places where shader parameter layout cannot be
+affected. This means that link-time constants and types are subject to the following restrictions:
+- Link-time constants cannot be used to define array sizes.
+- Link-time types are considered "incomplete" types. A struct or array type that has incomplete typed element is also an incomplete type.
+  Incomplete types cannot be used as `ConstantBuffer` or `ParameterBlock` element type, and cannot be used directly as the type of
+  a uniform variable.
+
+However it is allowed to use incomplete types as the element type of `StructuredBuffer` or `GLSLStorageBuffer`.
+
+## Using Precompiling Modules with the API
+
+In addition to using `slangc` for precompiling Slang modules, the `IModule` class provides a method to serialize itself to disk:
+
+```C++
+/// Get a serialized representation of the checked module.
+SlangResult IModule::serialize(ISlangBlob** outSerializedBlob);
+
+/// Write the serialized representation of this module to a file.
+SlangResult IModule::writeToFile(char const* fileName);
+```
+
+These functions will write only the module itself to a file, which excludes the modules that it includes. To write all imported
+modules, you can use methods from the `ISession` class to enumerate all currently loaded modules (including transitively imported modules)
+in the session:
+
+```c++
+SlangInt ISession::getLoadedModuleCount();
+IModule* ISession::getLoadedModule(SlangInt index);
+```
+
+Additionally, the `ISession` class also provides a function to query if a previously compiled module is still up-to-date with the current
+Slang version, the compiler options in the session and the current content of the source files used to compile the module:
+
+```c++
+bool ISession::isBinaryModuleUpToDate(
+    const char* modulePath,
+    slang::IBlob* binaryModuleBlob);
+```
+
+If the compiler options or source files has been changed since the module was last compiled, the `isBinaryModuleUpToDate` will return false.
+
+The compiler can be setup to automatically use the precompiled modules when they exist and up-to-date. When loading a module,
+either triggered via the `ISession::loadModule` call or via transitive `import`s in the modules being loaded, the compiler will look in the
+search paths for a `.slang-module` file first. If it exists, it will load the precompiled module instead of compiling from the source.
+If you wish the compiler to verify whether the `.slang-module` file is up-to-date before loading it, you can specify the `CompilerOptionName::UseUpToDateBinaryModule` to `1`
+when creating the session. When this option is set, the compiler will verify the precompiled module is still update, and will recompile the module
+from source if it is not up-to-date.
+
+
+## Additional Remarks
+
+Link-time specialization is Slang's answer to compile-time performance and modularity issues associated with preprocessor
+based shader specialization. By representing specializable settings as link-time constants or link-time types, we are able
+to defer shader specialization to link time, allowing reuse of all the front-end compilation work that includes tokenization,
+parsing, type checking, IR generation and validation. As Slang evolves to support more language features and as the user code
+is growing to be more complex, the cost of front-end compilation will only increase over time. By using link-time specialization
+on precompiled modules, an application can be completely isolated from any front-end compilation cost.
\ No newline at end of file
diff --git a/external/slang/share/doc/slang/user-guide/a1-01-matrix-layout.md b/external/slang/share/doc/slang/user-guide/a1-01-matrix-layout.md
new file mode 100644
index 00000000..cb301ce8
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/a1-01-matrix-layout.md
@@ -0,0 +1,158 @@
+---
+layout: user-guide
+---
+
+Handling Matrix Layout Differences on Different Platforms
+=========================================================
+
+The differences between default matrix layout or storage conventions between GLSL (OpenGL/Vulkan) and HLSL has been an issue that frequently causes confusion among developers. When writing applications that work on different targets, one important goal that developers frequently seek is to make it possible to pass the same matrix generated by host code to the same shader code, regardless of what graphics API is being used (e.g. Vulkan, OpenGL or Direct3D). As a solution to shader cross-compilation, Slang provides necessary tools for developers navigate around the differences between GLSL and HLSL targets.
+
+A high level summary:
+
+* Default matrix **layout** in memory for Slang is `row-major`. 
+  * Except when running the compiler through the `slangc` tool, in which case the default is `col-major`. This default is for *legacy* reasons and may change in the future.
+* Row-major layout is the only *portable* layout to use across targets (with significant caveats for non 4x4 matrices)
+* Use `setMatrixLayoutMode`/`spSetMatrixLayoutMode`/`createSession` to set the default  
+* Use `-matrix-layout-row-major` or `-matrix-layout-column-major` for the command line 
+  * or via `spProcessCommandLineArguments`/`processCommandLineArguments`
+* Depending on your host maths library, matrix sizes and targets, it may be necessary to convert matrices at host/kernel boundary  
+  
+On the portability issue, some targets *ignore* the matrix layout mode, notably CUDA and CPU/C++. For this reason for the widest breadth of targets it is recommended to use *row-major* matrix layout.
+
+Two conventions of matrix transform math
+----------------------------------------
+
+Depending on the platform a developer is used to, a matrix-vector transform can be expressed as either `v*m` (`mul(v, m)` in HLSL), or `m*v` (`mul(m,v)` in HLSL). This convention, together with the matrix layout (column-major or row-major), determines how a matrix should be filled out in host code. 
+
+In HLSL/Slang the order of vector and matrix parameters to `mul` determine how the *vector* is interpreted. This interpretation is required because a vector does not in as of it's self differentiate between being a row or a column.
+
+* `mul(v, m)` - v is interpreted as a row vector.
+* `mul(m, v)` - v is interpreted as a column vector.
+
+Through this mechanism a developer is able to write transforms in their preferred style. 
+
+These two styles are not directly interchangeable - for a given `v` and `m` then generally `mul(v, m) != mul(m, v)`. For that the matrix needs to be transposed so
+
+* `mul(v, m) == mul(transpose(m), v)`
+* `mul(m, v) == mul(v, transpose(m))`
+
+This behavior is *independent* of how a matrix layout in memory. Host code needs to be aware of how a shader code will interpret a matrix stored in memory, it's layout, as well as the vector interpretation convention used in shader code (ie `mul(v,m)` or `mul(m, v)`).
+
+[Matrix layout](https://en.wikipedia.org/wiki/Row-_and_column-major_order) can be either `row-major` or `column-major`. The difference just determines which elements are contiguous in memory. `Row-major` means the rows elements are contiguous. `Column-major` means the column elements are contiguous.
+
+Another way to think about this difference is in terms of where translation terms should be placed in memory when filling a typical 4x4 transform matrix. When transforming a row vector (ie `mul(v, m)`) with a `row-major` matrix layout, translation will be at `m + 12, 13, 14`. For a `column-major` matrix layout, translation will be at `m + 3, 7, 11`.
+
+Note it is a *HLSL*/*Slang* convention that the parameter ordering of `mul(v, m)` means v is a *row* vector. A host maths library *could* have a transform function `SomeLib::transform(v, m)` such that `v` is a interpreted as *column* vector. For simplicitys sake the remainder of this discussion assumes that the `mul(v, m)` in equivalent in host code follows the interpretation that `v` is *row* vector.
+
+Discussion
+----------
+
+There are four variables in play here:
+
+* Host vector interpretation (row or column) - and therefore effective transform order (column) `m * v` or (row) `v * m`
+* Host matrix memory layout
+* Shader vector interpretation (as determined via `mul(v, m)` or `mul(m, v)` )
+* Shader matrix memory layout 
+
+Since each item can be either `row` or `column` there are 16 possible combinations. For simplicity let's reduce the variable space by making some assumptions.
+
+1) The same vector convention will be used in host code as in shader code. 
+2) The host maths matrix layout is the same as the kernel.
+
+If we accept 1, then we can ignore the vector interpretation because as long as they are consistent then only matrix layout is significant.
+If we accept 2, then there are only two possible combinations - either both host and shader are using `row-major` matrix layout or `column-major` layout.
+
+This is simple, but is perhaps not the end of the story. First lets assume that we want our Slang code to be as portable as possible. As previously discussed for CUDA and C++/CPU targets Slang ignores the matrix layout settings - the matrix layout is *always* `row-major`.
+
+Second lets consider performance. The matrix layout in a host maths library is not arbitrary from a performance point of view. A performant host maths library will want to use SIMD instructions. With both x86/x64 SSE and ARM NEON SIMD it makes a performance difference which layout is used, depending on if `column` or `row` is the *preferred* vector interpretation. If the `row` vector interpretation is preferred, it is most performant to have `row-major` matrix layout. Conversely if `column` vector interpretation is preferred `column-major` matrix will be the most performant.
+
+The performance difference comes down to a SIMD implementation having to do a transpose if the layout doesn't match the preferred vector interpretation. 
+
+If we put this all together - best performance, consistency between vector interpretation and platform independence we get:
+
+1) Consistency : Same vector interpretation in shader and host code
+2) Platform independence: Kernel uses `row-major` matrix layout
+3) Performance: Host vector interpretation should match host matrix layout
+
+The only combination that fulfills all aspects is `row-major` matrix layout and `row` vector interpretation for both host and kernel.
+
+It's worth noting that for targets that honor the default matrix layout - that setting can act like a toggle transposing a matrix layout. If for some reason the combination of choices leads to inconsistent vector transforms, an implementation can perform this transform in *host* code at the boundary between host and the kernel. This is not the most performant or convenient scenario, but if supported in an implementation it could be used for targets that do not support kernel matrix layout settings. 
+
+If only targeting platforms that honor matrix layout, there is more flexibility, our constraints are
+
+1) Consistency : Same vector interpretation in shader and host code
+2) Performance: Host vector interpretation should match host matrix layout
+
+Then there are two combinations that work
+
+1) `row-major` matrix layout for host and kernel, and `row` vector interpretation.
+2) `column-major` matrix layout for host and kernel, and `column` vector interpretation.
+
+If the host maths library is not performance orientated, it may be arbitrary from a performance point of view if a `row` or `column` vector interpretation is used. In that case assuming shader and host vector interpretation is the same it is only important that the kernel and maths library matrix layout match.
+
+Another way of thinking about these combinations is to think of each change in `row-major`/`column-major` matrix layout and `row`/`column` vector interpretation is a transpose. If there are an *even* number of flips then all the transposes cancel out. Therefore the following combinations work
+
+| Host Vector | Kernel Vector | Host Mat Layout | Kernel Mat Layout 
+|-------------|---------------|-----------------|------------------
+|   Row       |    Row        |    Row          |    Row
+|   Row       |    Row        |    Column       |    Column
+|   Column    |    Column     |    Row          |    Row
+|   Column    |    Column     |    Column       |    Column
+|   Row       |    Column     |    Row          |    Column
+|   Row       |    Column     |    Column       |    Row
+|   Column    |    Row        |    Row          |    Column
+|   Column    |    Row        |    Column       |    Row
+
+To be clear 'Kernel Mat Layout' is the shader matrix layout setting. As previously touched upon, if it is not possible to use the setting (say because it is not supported on a target), then doing a transpose at the host/kernel boundary can fix the issue. 
+
+Matrix Layout
+-------------
+
+The above discussion is largely around 4x4 32-bit element matrices. For graphics APIs such as Vulkan, GL, and D3D there are typically additional restrictions for matrix layout. One restriction is for 16 byte alignment between rows (for `row-major` layout) and columns (for `column-major` layout). 
+
+More CPU-like targets such as CUDA and C++/CPU do not have this restriction, and all elements are consecutive. 
+
+This being the case only the following matrix types/matrix layouts will work across all targets. (Listed in the HLSL convention of RxC). 
+ 
+* 1x4 `row-major` matrix layout
+* 2x4 `row-major` matrix layout
+* 3x4 `row-major` matrix layout
+* 4x4 `row-major` matrix layout
+
+These are all 'row-major' because as previously discussed currently only `row-major` matrix layout works across all targets currently.
+
+NOTE! This only applies to matrices that are transferred between host and kernel - any matrix size will work appropriately for variables in shader/kernel code for example.
+
+The hosts maths library also plays a part here. The library may hold all elements consecutively in memory. If that's the case it will match the CPU/CUDA kernels, but will only work on 'graphics'-like targets that match that layout for the size. 
+
+For SIMD based host maths libraries it can be even more convoluted. If a SIMD library is being used that prefers `row` vector interpretation and therefore will have `row-major` layout it may for many sizes *not* match the CPU-like consecutive layout. For example a 4x3 - it will likely be packed with 16 byte row alignment. Additionally even if a matrix is packed in the same way it may not be the same size. For example a 3x2 matrix *may* hold the rows consecutively *but* be 16 bytes in size, as opposed to the 12 bytes that a CPU-like kernel will expect. 
+
+If a SIMD based host maths library with graphics-like APIs are being used, there is a good chance (but certainly *not* guaranteed) that layout across non 4x4 sizes will match because SIMD typically implies 16 byte alignment. 
+
+If your application uses matrix sizes that are not 4x4 across the host/kernel boundary and it wants to work across all targets, it is *likely* that *some* matrices will have to be converted at the boundary. This being the case, having to handle transposing matrices at the boundary is a less significant issue. 
+
+In conclusion if your application has to perform matrix conversion work at the host/kernel boundary the previous observation about "best performance" implies `row-major` layout and `row` vector interpretation becomes somewhat mute.
+
+Overriding default matrix layout
+--------------------------------
+
+Slang allows users to override default matrix layout with a compiler flag. This compiler flag can be specified during the creation of a `Session`:
+
+```
+slang::IGlobalSession* globalSession;
+...
+slang::SessionDesc slangSessionDesc = {};
+slangSessionDesc.defaultMatrixLayoutMode = SLANG_MATRIX_LAYOUT_COLUMN_MAJOR;
+...
+slang::ISession* session;
+globalSession->createSession(slangSessionDesc, &session);
+```
+
+This makes Slang treat all matrices as in `column-major` layout, and for example emitting `column_major` qualifier in resulting HLSL code.
+
+Alternatively the default layout can be set by
+
+* Including a `CompilerOptionName::MatrixLayoutColumn` or `CompilerOptionName::MatrixLayoutRow` entry in `SessionDesc::compilerOptionEntries`.
+* Setting `-matrix-layout-row-major` or `-matrix-layout-column-major` command line options to `slangc`.
+
+
diff --git a/external/slang/share/doc/slang/user-guide/a1-03-obfuscation.md b/external/slang/share/doc/slang/user-guide/a1-03-obfuscation.md
new file mode 100644
index 00000000..d0f10791
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/a1-03-obfuscation.md
@@ -0,0 +1,278 @@
+---
+layout: user-guide
+---
+
+Obfuscation
+===========
+
+The Slang obfuscation feature allows developers to distribute shader code in a way where the implementation details are kept secret. For example let's say a developer has produced a novel way to render and wants to protect that intellectual property. If it is possible to compile all possible uses of the shader code into SPIR-V/DXIL the developer can ship their product with those binaries without debug information. This is similar to the protection achieved by shipping an executable - a determined person may with a good deal of effort work out how some algorithm in the executable works, but doing so requires a considerable amount of work, and certainly more work than reading the original source code.
+
+If a developer is not able to ship all shader binaries then there is a problem. The developer doesn't want to ship the source code as in doing so it is relatively straight forward to see how it works or even copy the implementation. A developer could provide some level of protection by encrypting the source, but when compilation occurs it will still be necessary to decrypt and so make it available to read. A developer could obfuscate their source before shipping it. In this scenario 
+
+* Requires tooling to do the obfuscation of the source
+* Any source on the client that isn't obfuscated, needs to be able to call to the obfuscated code
+  * Depending on how the obfuscation takes place this could be hard - remapping symbols or obfuscating on the fly on the client
+  * If "public" symbols keep their original names they leak information about the implementation
+* Obfuscated source, provides some protection but not typically as much as a binary format (like an object file without debug information)
+* How can you debug, or determine where a crash occurred without the original source? 
+* If a failure occurs - how is it possible to report meaningful errors?
+
+Some of these issues are similar to the problems of distributing JavaScript libraries that run on client machines, but which the original authors do not want to directly make available the implementation. Some of the obfuscation solutions used in the JavaScript world are partially applicable to Slang's obfuscation solution, including [source maps](https://github.com/source-map/source-map-spec).
+
+## Obfuscation in Slang
+
+Slang provides an obfuscation feature that addresses these issues. The major parts being
+
+* The ability to compile a module with obfuscation enabled
+  * The module is a binary format, that doesn't contain the original names or locations
+* The ability to compile regular slang code that can *link* against an obfuscated module
+* Code emitted to downstream compilers contain none of the symbols or locations from the original source
+* Source map(s) to provide mappings between originating source and obfuscated source produced on the client
+
+Enabling obfuscation can be achieved via the `-obfuscate` option. When using the Slang API the `-obfuscate` option can be passed via `spProcessCommandLineArguments` function or `processCommandLineArguments` method. 
+
+When enabled a few things will happen
+
+* Source locations are scrambled to (blank) lines in an "empty" obfuscation source file.
+* A source map is produced mapping from the (blank) lines, to the originating source locations 
+* Name hints are stripped.
+* If a `slang-module` is being produced, AST information will be stripped.
+* The names of symbols are scrambled into hashes
+
+The source Slang emits which is passed down to downstream compilers is obfuscated, and only contains the sections of code necessary for the kernel to compile and function. 
+
+Currently all source that is going to be compiled and linked must all have the `-obfuscate` option enabled to be able to link correctly.
+
+When obfuscation is enabled source locations are scrambled, but Slang will also create a [source map](https://github.com/source-map/source-map-spec), which provides the mapping from the obfuscated locations to the original source. This so called "obfuscated source map" is stored with the module. If compilation produces an error, Slang will automatically use the obfuscated source map to display the error location in the originating source.
+
+If the obfuscated source map isn't available, it will still display a source location if available, but the location will be to the "empty" obfuscated source file. This will appear in diagnostics as "(hex-digits)-obfuscated(line)". With this information and the source map it is possible to output the original source location. Importantly without the obfuscated source map information leakage about the original source is very limited.
+
+It should be noted that the obfuscated source map is of key importance in hiding the information. In the example scenario of protecting intellectual property, a developer should compile the code they wish to protect with `-obfuscate` and distribute *just* the `.slang-module` file to link on the client machine. The source map file should not be distributed onto client machines. 
+
+A developer could use the source map 
+
+* To determine where a problem is occurring by getting the obfuscated error, or crash information. 
+* Provide a web service that could provide more meaningful information keyed on the obfuscated location.
+  * Such a service could limit what information is returned, but still be meaningful
+* A web service could additionally log errors for later analysis with the source map to determine the actual origin.
+
+## Using An Obfuscated Module
+
+To use a `slang-module` with obfuscation requires
+
+* Specifying one or more obfuscated modules via `-r` option
+  * Currently there is only support for referencing modules stored in files
+* Specifying the `-obfuscate` option
+
+In a non obfuscated module, parts of the AST are serialized. This AST information could be through as broadly analogous to a header in C++. It is enough such that functionality in the module can be semantically checked, and linked with, however it does not, for example, contain the implementations of functions. This means doing a `-r` is roughly equivalent to doing an `import` of the source, without having the source. Any of the types, functions and so forth are available.
+
+With the `-obfuscate` option we strip the AST, in an abundance of caution to try and limit leaking information about the module.
+
+This means that `-r` is *NOT* enough to be able access the functionality of the module. It is necessary to declare the functions and types you wish to use. If a type is used only opaquely - i.e. not accessing its members directly, it is only necessary to declare that the type exists. If fields are accessed directly it is undefined behavior for a definition in one module to be incompatible with the definition in the obfuscated module.
+
+For example, in "module.slang"
+
+```slang
+struct Thing
+{
+    int a; 
+    int b;
+};
+
+int foo(Thing thing) 
+{ 
+    return (thing.a + thing.b) - thing.b; 
+}
+```
+
+In the source that uses this module
+
+```slang
+// This is fragile - needs match the definition in "module.slang"
+struct Thing
+{
+    int a;
+    int b;
+};
+
+int foo(Thing thing);
+
+RWStructuredBuffer<int> outputBuffer;
+
+[numthreads(4, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+    Thing thing;
+
+    int index = (int)dispatchThreadID.x;
+        
+    thing.a = index;
+    thing.b = -index;
+
+    outputBuffer[index] = foo(thing);
+}
+```
+
+If the type `Thing` is only used opaquely then it would only be necessary to declare that it exists. For example in "module-opaque.slang"
+
+```slang
+struct Thing
+{
+    int a; 
+    int b;
+};
+
+Thing makeThing(int a, int b)
+{
+    return {a, b};
+}
+
+int foo(Thing thing) 
+{ 
+    return (thing.a + thing.b) - thing.b; 
+}
+```
+
+In the source that uses this module
+
+```slang
+// We can just declare Thing exists, as its usage is opaque.
+struct Thing;
+int foo(Thing thing);
+Thing makeThing(int a, int b);
+
+RWStructuredBuffer<int> outputBuffer;
+
+[numthreads(4, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+    int index = (int)dispatchThreadID.x;
+    Thing thing = makeThing(index, -index);
+    outputBuffer[index] = foo(thing);
+}
+```
+
+That this works might seem surprising to users of languages such as C/C++, because in these languages it is necessary to know the layout of `Thing` to be able to create the `thing` variable.  This isn't necessary here though, and this can be very useful for some scenarios.
+
+A future iteration of the feature may include parts of the AST such that an obfuscated slang-module can be used like a regular module. It would be important that what is exposed is clear and under programmer control. By default most of the definitions within a module would typically not be exposed. 
+## Accessing Source Maps
+
+During a compilation Slang can produce many different "artifacts". When using the obfuscated source map option to produce a `slang-module` Slang will associate an obfuscated source map providing the mapping to the original source. 
+
+With typical Slang API usage, a compilation takes place and the output is a "blob" that is the output kernel. It is also possible to compile to a container, such as a zip file or a directory. The zip file can contain the kernel as well as source map(s).
+
+For example
+
+```
+slangc module-source.slang -o module.zip -g -obfuscate 
+```
+
+This will compile "module-source.slang" into SlangIR module (aka `slang-module`) and places the `.slang-module` inside of the zip. As obfuscation is enabled the .zip will also contain the obfuscated source map for the module. 
+
+The `.zip` file can now be used and referenced as a module 
+
+```
+slangc source.slang -target dxil -stage compute -entry computeMain -obfuscate -r module.zip
+```
+
+Notice here that the `-r` module reference is to the `.zip` file rather than the more usual `.slang-module` that is contained in the zip file. By referencing the module in this way Slang will automatically associate the contained obfuscated source map with the module. It will use that mapping for outputting diagnostics.
+
+It is also worth noticing that in this second compilation, using `module.zip`, we need the `-obfuscate` flag set. If this isn't set linking will not work correctly.
+
+NOTE! As previously discussed though you should *not* ship the .zip file with the obfuscated source map such that it's available on client machines, as doing so does leak some information about the original source. Not the original source itself, but the names of files and the locations in files. You could ship a .zip to client machines, but make sure the `.map` obfuscated source maps are stripped. Alternatively, and perhaps less riskily you could ship `.slang-module` files taken from the `.zip` file and then it is clear there is no source map information available.
+
+## Accessing Source Maps without Files
+
+When using the Slang API typically things work through memory, such as accessing a compilation result via a blob. It is possible to access source maps via memory also, but doing so currently requires accessing the result of a compilation as if its a file system. The current API to do this is 
+
+```
+ISlangMutableFileSystem* getCompileRequestResultAsFileSystem();
+```
+
+This method is currently only available on the `ICompileRequest` and not on the component (aka `IComponentType`) API.
+
+The file system returned is held in memory, and the blob data held in the file system typically shared, so accessing items this way is typically very low overhead. 
+
+The conventions used for the file system representation could best be described as a work in progress, and may change in the future. Internally Slang stores compilation results as a hierarchy of "artifacts". An artifact consists of the main result, plus associated artifacts. An artifact can also be a container which can additionally hold children artifacts. In the current directory structure each artifact is a directory, with the root directory of the `ISlangMutableFileSystem` being the root artifact. 
+
+Given a directory representing an artifact it can contain 2 special directories `children` and `associated`. The `children` directory contains the artifacts that are children of the current directories artifact. Similarly `associated` contains directories for artifacts that are associated with the current artifact.
+
+To give an example, if we compiled a module with obfuscation we might end up with a directory structure like....
+
+```
+obfuscated-loc-module.slang-module
+associated/
+associated/bc65f637-obfuscated/
+associated/bc65f637-obfuscated/bc65f637-obfuscated.map
+```
+
+The root contains the root artifact `obfuscated-loc-module.slang-module` and the associated directory holds anything associated with that module, in this case there is just one thing associated which is the obfuscated source map. Note all obfuscated source maps have a name ending in `-obfuscated`.
+
+The directory `associated/bc65f637-obfuscated/` is the directory that represents the `bc65f637-obfuscated` artifact, and that just consists of the contained map file.
+
+At the moment the types of files need to be determined by their extensions. A future version will hold a manifest that describes in more detail the content.
+
+## Emit Source Maps
+
+So far we have been mainly discussing "obfuscation" source maps. These maps provide a mapping from output locations to hidden original locations.
+
+It is also possible to generate a source map as part of emitting source to be passed to downstream compilers such as DXC, FXC, GLSLANG, NVRTCC or C++ compilers. This can be achieved via `-line-directive-mode source-map` option. The line directive mode controls how information about the original source is handled when emitting the source. The default mechanism, will add `#line` declarations into the original source. 
+
+Via the API there are a few options to enable emit source maps
+
+```
+const char* args[2] = {"-line-directive-mode", "source-map" };
+request->processCommandLineArguments(args, 2);
+
+// Or
+spProcessCommandLineArguments(request, args, 2);
+
+// Or just setting directly
+request->setLineDirectiveMode(SLANG_LINE_DIRECTIVE_MODE_SOURCE_MAP);
+ 
+// Or 
+spSetLineDirectiveMode(request, SLANG_LINE_DIRECTIVE_MODE_SOURCE_MAP);
+```
+
+The `#line` mechanism is fairly straight forward in that all of the information is including the mapping information is in a single file. A downstream compiler will then embed that information into its debug information. If obfuscation is being used, this will work and the `#line` will actually reference the "made up" "xxx-obfuscated" files.
+
+With the `-line-directive-mode source-map` option no line directives are emitted, but a source map is produced that can map from a location in the emitted source back to its origin. If one of the origins is an obfuscated module this will reference "xxx-obfuscated" files. So in this scenario if you want to do a lookup to a location in the original source you *potentially* have to do two source map lookups.
+
+The first lookup will take you from the emitted source location, as will likely be specified by a debugger, to their origin. Some of the origins might be source that was compiled directly (i.e. not part of an obfuscated module); these files will be named directly. If this leads to a location inside an obfuscated source map, another lookup is needed to get back to the original source location.
+
+Why might you want to use an emit source map rather than use the `#line` mechanism?
+
+* Less source will need to be consumed by the downstream compiler - it can just be emitted as is
+* The debugging source locations will be directly the locations within the emitted source
+* Source map mapping is accurate from any point in the generated source to any point in the original source
+  * The `#line` mechanism is only accurate to a line
+* It allows a separation of this information, such that it can be consumed and disposed of as the application requires
+* Source maps are a standard, and so can be used in tooling
+* Source maps allow for name mapping, mapping a symbol name to the symbol name in the original source
+  * This is currently not enabled in Slang, but may be a future addition
+
+Why you might not want to use an emit source map
+
+* The `#line` mechanism doesn't require any special handling, and the mapping back is embedded directly into the emitted source/output binary
+* There is more housekeeping in getting keeping and using source maps
+* Currently Slang doesn't directly expose a source map processing API directly  
+  * We do support source maps in module files, or produced as part of a compilation
+  * A developer could use the slang `compiler-core` implementation
+  * In the future the project could provide some API support 
+
+## Issues/Future Work
+
+* Support AST emitting in obfuscated modules
+* Potentially add API support for source maps
+* Add manifest support for artifacts
+* Potentially provide a way to interact with artifacts more directly 
+* Potentially support for name mapping
+* May want to improve the file hierarchy representation
+* Provide other ways to ingest modules, such as through memory (currently -r just supports files)
+* Provide more support for other kinds of artifacts
+  * Diagnostics
+  * Meta data (such as bindings used)
+  * Reflection
+* We use -g to indicate debug information
+  * On DXC the debug information is embedded in the DXIL, we allow for pdb to separate, but we currently *don't* strip the PDB from the DXIL
+  * If we do strip the PDB, we may need to resign the DXIL
diff --git a/external/slang/share/doc/slang/user-guide/a1-04-interop.md b/external/slang/share/doc/slang/user-guide/a1-04-interop.md
new file mode 100644
index 00000000..21ed4027
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/a1-04-interop.md
@@ -0,0 +1,218 @@
+---
+layout: user-guide
+---
+
+Interoperation with Target-Specific Code
+===========
+
+Slang provides low-level interoperation mechanisms to allow developers to use target-specific features or invoke code written in the target language from Slang code. These mechanisms are:
+- `__intrinsic_asm` construct to map a function invocation to specific textual target code.
+- `__require_prelude` construct to inject arbitrary text to the generated textual target code. 
+- `__target_switch` construct to use different implementations for different targets.
+- `spirv_asm` construct to define inline SPIRV assembly blocks.
+
+> #### Note
+> The language mechanisms described in this chapter are considered internal compiler features.
+> The compiler does not provide comprehensive checks around their uses. These mechanisms are also subject
+> to breaking changes in future releases.
+
+## Defining Intrinsic Functions for Textual Targets
+
+When using Slang to generate code for a textual target, e.g. HLSL, GLSL, CUDA or C++, you can use `__intrinsic_asm` to define what code to generate for an invocation to an intrinsic function. For example, the following Slang code defines an intrinsic function `myPrint`, that when called, will produce a call to `printf` in the target code:
+```cpp
+void myPrint(float v)
+{
+    __intrinsic_asm R"(printf("v is %f", $0))";
+}
+
+void test()
+{
+    myPrint(1.0f);
+}
+```
+Compiling the above code to CUDA or C++ will yield the following output:
+
+```cpp
+// ...
+void test_0()
+{
+    printf("v is %f", 1.0f);
+}
+```
+
+The `__intrinsic_asm` statement in `myPrint` serves as the definition for the function. When a function body contains `__intrinsic_asm`, the function is treated by the compiler as an intrinsic and it must not contain other ordinary statements. Calls to an intrinsic function will be translated using the definition string of the intrinsic. In this example, the intrinsic is defined by the string literal `R"(printf("v is %f", $0))"`, which is used to translate the call from `test()`. The `"$0"` in the literal is replaced with the first argument. Besides `"$<index>"`, you may also use the following macros in an intrinsic definition:
+
+| Macro     |  Expands to |
+|-----------|-------------|
+| `$<index>`  |  Argument `<index>`, starting from 0 |
+| `$T<index>` |  Type of argument `<index>` |
+| `$TR`       |  The return type. |
+| `$N<index>` |  The element count of argument `<index>`, if the argument is a vector. |
+| `$S<index>` |  The scalar type of argument `<index>`, if the argument is a matrix or vector. |
+| `$*<index>` |  Emit all arguments starting from `<index>` as comma separated list |
+
+## Defining Intrinsic Types
+
+You can use `__target_intrinsic` modifier on a `struct` type to cause the type being emitted as a specific string for a given target. For example:
+```
+__target_intrinsic(cpp, "std::string")
+struct CppString
+{
+    uint size()
+    {
+        __intrinsic_asm "static_cast<uint32_t>(($0).size())";
+    }
+}
+```
+When compiling the above code to C++, the `CppString` struct will not be emitted as a C++ struct. Instead, all uses of `CppString` will be emitted as `std::string`.
+
+## Injecting Preludes
+
+If you have code written in the target language that you want to include in the generated code, you can use `__requirePrelude`.
+For example:
+```cpp
+int getMyEnvVariable()
+{
+    __requirePrelude(R"(#include <stdlib.h>)");
+    __requirePrelude(R"(#include <string>)");
+    __requirePrelude(R"(
+            int getEnvVarImpl()
+            {
+                char* var = getenv("MY_ENVIRONMENT_VAR");
+                return std::stoi(var);
+            }
+        )");
+    __intrinsic_asm "getEnvVarImpl()";
+}
+void test()
+{
+    if (getMyEnvVariable() == 0)
+        return;
+}
+```
+In this code, `getMyEnvVariable()` is defined as an intrinsic Slang function that will translate to a call to `getEnvVarImpl()` in the target code. The first two `__requirePrelude` calls causes include directives being emitted in the resulting code, and the third `__requirePrelude` call causes a definition of `getEnvVarImpl()`, written in C++, being emitted before other Slang functions are emitted. The above code will translate to the following output:
+```cpp
+// ...
+#include <stdlib.h>
+#include <string>
+int getEnvVarImpl()
+{
+    char* var = getenv("MY_ENVIRONMENT_VAR");
+    return std::stoi(var);
+}
+void test_0()
+{
+    if (getEnvVarImpl() == 0)
+        return;
+}
+```
+
+The strings in `__requirePrelude` are deduplicated: the same prelude string will only be emitted once no matter how many times an intrinsic function is invoked. Therefore, it is good practice to put `#include` lines as separate `__requirePrelude` statements to prevent duplicate `#include`s being generated in the output code.
+
+## Managing Cross-Platform Code
+If you are defining an intrinsic function that maps to multiple targets in different ways, you can use `__target_switch` construct to manage the target-specific definitions. For example, here is a snippet from the Slang core module that defines `getRealtimeClock`:
+```hlsl
+[__requiresNVAPI]
+__glsl_extension(GL_EXT_shader_realtime_clock)
+uint2 getRealtimeClock()
+{
+    __target_switch
+    {
+    case hlsl:
+        __intrinsic_asm "uint2(NvGetSpecial(NV_SPECIALOP_GLOBAL_TIMER_LO), NvGetSpecial( NV_SPECIALOP_GLOBAL_TIMER_HI))";
+    case glsl:
+        __intrinsic_asm "clockRealtime2x32EXT()";
+    case spirv:
+        return spirv_asm
+        {
+            OpCapability ShaderClockKHR;
+            OpExtension "SPV_KHR_shader_clock";
+            result : $$uint2 = OpReadClockKHR Device
+        };
+    default:
+        return uint2(0, 0);
+    }
+}
+```
+This definition causes `getRealtimeClock()` to translate to a call to NVAPI when targeting HLSL, to `clockRealtime2x32EXT()` when targeting
+GLSL, and to the `OpReadClockKHR` instruction when compiling directly to SPIRV through the inline SPIRV assembly block. The `default` case is
+used for target not specified in the `__target_switch` statement.
+
+Currently, the following target names are supported in a `case` statement: `cpp`, `cuda`, `glsl`, `hlsl`, and `spirv`.
+
+## Inline SPIRV Assembly
+
+When targeting SPIRV, Slang allows you to directly write an SPIRV assembly block and use it as a part of an expression. For example:
+```cpp
+int test()
+{
+    int localVar = 5;
+    return 1 + spirv_asm {
+            %temp: $$int = OpIMul $localVar $(2);
+            result: $$int = OpIAdd %temp %temp
+        };
+    // returns 21
+}
+```
+A SPIRV assembly block contains one or more SPIRV instructions, separated by semicolons. Each SPIRV instruction has the form:
+```
+%identifier : <type> = <opcode> <operand> ... ;
+```
+where `<opcode>` defines a value named `identifier` of `<type>`, or simply:
+```
+<opcode> <operand> ... ;
+```
+When `<opcode>` does not define a return value.
+
+When used as part of an expression, the Slang type of the `spirv_asm` construct is defined by the last instruction, which must be in the form of
+```
+result: <type> = ...
+```
+
+You can use the `$` prefix to begin an anti-quote of a Slang expression inside a `spirv_asm` block. This is commonly used to refer to a Slang variable, such as `localVar` in the example, as an operand. Additionally, the `$$` prefix is used to reference a Slang type, such as the `$$uint` references in the example. 
+
+You can also use the `&` prefix to refer to an l-value as a pointer-typed value in SPIRV, for example:
+```cpp
+float modf(float x, out float ip)
+{
+    return spirv_asm
+    {
+        result:$$float = OpExtInst glsl450 Modf $x &ip
+    };
+}
+```
+
+Opcodes such as `OpCapability`, `OpExtension` and type definitions are allowed inside a `spirv_asm` block. These instructions will be deduplicated and inserted into the correct sections defined by the SPIRV specification, for example:
+```cpp
+uint4 WaveMatch(T value)
+{
+    return spirv_asm
+    {
+        OpCapability GroupNonUniformPartitionedNV;
+        OpExtension "SPV_NV_shader_subgroup_partitioned";
+        OpGroupNonUniformPartitionNV $$uint4 result $value
+    };
+}
+```
+
+You may use SPIRV enum values directly as operands, for example:
+```cpp
+void memoryBarrierImage()
+{
+    spirv_asm
+    {
+        OpMemoryBarrier Device AcquireRelease|ImageMemory
+    };
+}
+```
+
+To access SPIRV builtin variables, you can use the `builtin(VarName:type)` syntax as an operand:
+```cpp
+uint InstanceIndex()
+{
+    return spirv_asm {
+        result:$$uint = OpLoad builtin(InstanceId:uint);
+    };
+}
+```
+
diff --git a/external/slang/share/doc/slang/user-guide/a1-05-uniformity.md b/external/slang/share/doc/slang/user-guide/a1-05-uniformity.md
new file mode 100644
index 00000000..be07f89c
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/a1-05-uniformity.md
@@ -0,0 +1,104 @@
+---
+layout: user-guide
+---
+
+Uniformity Analysis
+===========
+
+On certain hardware, accessing resources with a non-uniform index may lead to significant performance degradation. Developers can often benefit from a compiler warning for unintentional non-uniform resource access.
+
+Starting from v2024.1.0, Slang provides uniformity analysis that can warn users if a non-dynamically-uniform value is being used unintentionally. This feature is not enabled by default but can be turned on with the `-validate-uniformity` commandline option when using `slangc`, or the `CompilerOptionName::ValidateUniformity` compiler option when using the API.
+
+In addition to specifying the compiler option, the source code must be augmented with the `dynamic_uniform` modifier to mark function parameters, struct fields or local variables as expecting a dynamic uniform value.
+
+For example, the following code will trigger a warning:
+```csharp
+// Indicate that the `v` parameter needs to be dynamic uniform.
+float f(dynamic_uniform float v)
+{
+    return v + 1.0;
+}
+
+[numthread(1,1,1)]
+[shader("compute")]
+void main(int tid : SV_DispatchThreadID)
+{
+    f(tid); // warning: tid is not dynamically uniform.
+}
+```
+
+Currently, the analysis is being conservative for `struct` typed values, in that if any member of the `struct` is known to be non-uniform, the entire composite is
+treated as non-uniform:
+```csharp
+struct MyType
+{
+    int a;
+    int b;
+}
+
+void expectUniform(dynamic_uniform int a){}
+
+void main(int tid : SV_DispatchThreadID)
+{
+    MyType t;
+    t.a = tid;
+    t.b = 0;
+
+    // Generates a warning here despite t.b is non-uniform, because
+    // t.a is non-uniform and that assignment makes `t` non-uniform.
+    expectUniform(t.b);
+}
+```
+
+To allow the compiler to provide more accurate analysis, you can use mark struct fields as
+`dynamic_uniform`:
+
+```csharp
+struct MyType
+{
+    int a;
+    dynamic_uniform int b;
+}
+
+void expectUniform(dynamic_uniform int a){}
+
+void main(int tid : SV_DispatchThreadID)
+{
+    MyType t;
+    t.a = tid;
+    t.b = 0;
+
+    // OK, because MyType::b is marked as dynamic_uniform.
+    expectUniform(t.b);
+
+    // Warning: trying to assign non-uniform value to dynamic_uniform location.
+    t.b = tid;
+}
+```
+
+## Treat Values as Uniform
+
+In some cases, the compiler might not be able to deduce a value to be non-uniform. If you are certain that a value can
+be treated as dynamic uniform, you can call `asDynamicUniform()` function to force the compiler to treat the value as
+dynamic uniform. For example:
+```csharp
+void main(int tid: SV_DispatchThreadID
+{
+    expectUniform(asDynamicUniform(tid)); // OK.
+}
+```
+
+## Treat Function Return Values as Non-uniform
+
+The uniformity analysis will automatically propagate uniformity to function return values. However if you have
+an intrinsic function that does not have a body, or you simply wish the return value of a function to be always
+treated as non-uniform, you can mark the function with the `[NonUniformReturn]` attribute:
+```csharp
+[NonUniformReturn]
+int f() { return 0; }
+void expectUniform(dynamic_uniform int x) {}
+void main()
+{
+    expectUniform(f()); // Warning.
+}
+```
diff --git a/external/slang/share/doc/slang/user-guide/a1-special-topics.md b/external/slang/share/doc/slang/user-guide/a1-special-topics.md
new file mode 100644
index 00000000..ef218b8f
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/a1-special-topics.md
@@ -0,0 +1,15 @@
+---
+layout: user-guide
+---
+
+Special Topics
+============================
+
+This chapter covers several additional topics on using Slang. These topics do not belong to any categories covered in previous chapters, but they address specific issues that developers may frequently encounter.
+
+In this chapter:
+1. [Handling matrix layout differences on different platforms](a1-01-matrix-layout.md)
+2. [Using Slang to write PyTorch kernels](a1-02-slangpy.md)
+3. [Obfuscation](a1-03-obfuscation.md)
+4. [Interoperation with target-specific code](a1-04-interop.md)
+5. [Uniformity Analysis](a1-05-uniformity.md)
\ No newline at end of file
diff --git a/external/slang/share/doc/slang/user-guide/a2-01-spirv-target-specific.md b/external/slang/share/doc/slang/user-guide/a2-01-spirv-target-specific.md
new file mode 100644
index 00000000..5090caac
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/a2-01-spirv-target-specific.md
@@ -0,0 +1,421 @@
+---
+layout: user-guide
+permalink: /user-guide/spirv-target-specific
+---
+
+SPIR-V specific functionalities
+===============================
+
+This chapter provides information for SPIR-V specific functionalities and behaviors.
+
+Experimental support for the older versions of SPIR-V
+-----------------------------------------------------
+
+Slang's SPIRV backend is stable when emitting SPIRV 1.3 and later, however, support for SPIR-V 1.0, 1.1 and 1.2 is still experimental.
+When targeting the older SPIR-V profiles, Slang may produce SPIR-V that uses the instructions and keywords that were introduced in the later versions of SPIR-V.
+
+
+Combined texture sampler
+------------------------
+Slang supports Combined texture sampler such as `Sampler2D`.
+Slang emits SPIR-V code with `OpTypeSampledImage` instruction.
+
+For SPIR-V targets, explicit bindings may be provided through a single `vk::binding` decoration.
+```
+[[vk::binding(1,2)]]
+Sampler2D explicitBindingSampler;
+```
+
+For other targets (HLSL or others) where combined texture samplers are _not_ supported intrinsicly, they are emulated by Slang using separate objects for Texture and Sampler.
+For explicit binding on such targets, you can specify two different register numbers for each: one for the texture register and another for the sampler register.
+```
+Sampler2D explicitBindingSampler : register(t4): register(s3);
+```
+
+
+System-Value semantics
+----------------
+
+The system-value semantics are translated to the following SPIR-V code.
+
+| SV semantic name              | SPIR-V code                       |
+|-------------------------------|-----------------------------------|
+| `SV_Barycentrics`             | `BuiltIn BaryCoordKHR`            |
+| `SV_ClipDistance<N>`          | `BuiltIn ClipDistance`            |
+| `SV_CullDistance<N>`          | `BuiltIn CullDistance`            |
+| `SV_Coverage`                 | `BuiltIn SampleMask`              |
+| `SV_CullPrimitive`            | `BuiltIn CullPrimitiveEXT`        |
+| `SV_Depth`                    | `BuiltIn FragDepth`               |
+| `SV_DepthGreaterEqual`        | `BuiltIn FragDepth`               |
+| `SV_DepthLessEqual`           | `BuiltIn FragDepth`               |
+| `SV_DispatchThreadID`         | `BuiltIn GlobalInvocationId`      |
+| `SV_DomainLocation`           | `BuiltIn TessCoord`               |
+| `SV_DrawIndex`                | `Builtin DrawIndex`               |
+| `SV_GSInstanceID`             | `BuiltIn InvocationId`            |
+| `SV_GroupID`                  | `BuiltIn WorkgroupId`             |
+| `SV_GroupIndex`               | `BuiltIn LocalInvocationIndex`    |
+| `SV_GroupThreadID`            | `BuiltIn LocalInvocationId`       |
+| `SV_InnerCoverage`            | `BuiltIn FullyCoveredEXT`         |
+| `SV_InsideTessFactor`         | `BuiltIn TessLevelInner`          |
+| `SV_InstanceID`               | `BuiltIn InstanceIndex`           |
+| `SV_IntersectionAttributes`   | *Not supported*                   |
+| `SV_IsFrontFace`              | `BuiltIn FrontFacing`             |
+| `SV_OutputControlPointID`     | `BuiltIn InvocationId`            |
+| `SV_PointSize<sup>note</sup>` | `BuiltIn PointSize`               |
+| `SV_Position`                 | `BuiltIn Position/FragCoord`      |
+| `SV_PrimitiveID`              | `BuiltIn PrimitiveId`             |
+| `SV_RenderTargetArrayIndex`   | `BuiltIn Layer`                   |
+| `SV_SampleIndex`              | `BuiltIn SampleId`                |
+| `SV_ShadingRate`              | `BuiltIn PrimitiveShadingRateKHR` |
+| `SV_StartVertexLocation`      | `BuiltIn BaseVertex`              |
+| `SV_StartInstanceLocation`    | `BuiltIn BaseInstance`            |
+| `SV_StencilRef`               | `BuiltIn FragStencilRefEXT`       |
+| `SV_Target<N>`                | `Location`                        |
+| `SV_TessFactor`               | `BuiltIn TessLevelOuter`          |
+| `SV_VertexID`                 | `BuiltIn VertexIndex`             |
+| `SV_ViewID`                   | `BuiltIn ViewIndex`               |
+| `SV_ViewportArrayIndex`       | `BuiltIn ViewportIndex`           |
+
+*Note* that `SV_DrawIndex` and `SV_PointSize` are Slang-specific semantics that are not defined in HLSL.
+
+
+Behavior of `discard` after SPIR-V 1.6
+--------------------------------------
+
+`discard` is translated to OpKill in SPIR-V 1.5 and earlier. But it is translated to OpDemoteToHelperInvocation in SPIR-V 1.6.
+You can use OpDemoteToHelperInvocation by explicitly specifying the capability, "SPV_EXT_demote_to_helper_invocation".
+
+As an example, the following command-line arguments can control the behavior of `discard` when targeting SPIR-V.
+```
+slangc.exe test.slang -target spirv -profile spirv_1_5 # emits OpKill 
+slangc.exe test.slang -target spirv -profile spirv_1_6 # emits OpDemoteToHelperInvocation 
+slangc.exe test.slang -target spirv -capability SPV_EXT_demote_to_helper_invocation -profile spirv_1_5 # emits OpDemoteToHelperInvocation 
+```
+
+
+Supported HLSL features when targeting SPIR-V
+---------------------------------------------
+
+Slang supports the following HLSL feature sets when targeting SPIR-V.
+ - ray tracing,
+ - inline ray tracing,
+ - mesh shader,
+ - tessellation shader,
+ - geometry shader,
+ - wave intrinsics,
+ - barriers,
+ - atomics,
+ - and more
+
+
+Unsupported GLSL keywords when targeting SPIR-V
+-----------------------------------------------
+
+Slang doesn't support the following Precision qualifiers in Vulkan.
+ - lowp : RelaxedPrecision, on storage variable and operation
+ - mediump : RelaxedPrecision, on storage variable and operation
+ - highp : 32-bit, same as int or float
+
+Slang ignores the keywords above and all of them are treated as `highp`.
+
+
+Supported atomic types for each target
+--------------------------------------
+Shader Model 6.2 introduced [16-bit scalar types](https://github.com/microsoft/DirectXShaderCompiler/wiki/16-Bit-Scalar-Types) such as `float16` and `int16_t`, but they didn't come with any atomic operations.
+Shader Model 6.6 introduced [atomic operations for 64-bit integer types and bitwise atomic operations for 32-bit float type](https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_Int64_and_Float_Atomics.html), but 16-bit integer types and 16-bit float types are not a part of it.
+
+[GLSL 4.3](https://registry.khronos.org/OpenGL/specs/gl/GLSLangSpec.4.30.pdf) introduced atomic operations for 32-bit integer types.
+GLSL 4.4 with [GL_EXT_shader_atomic_int64](https://github.com/KhronosGroup/GLSL/blob/main/extensions/ext/GL_EXT_shader_atomic_int64.txt) can use atomic operations for 64-bit integer types.
+GLSL 4.6 with [GLSL_EXT_shader_atomic_float](https://github.com/KhronosGroup/GLSL/blob/main/extensions/ext/GLSL_EXT_shader_atomic_float.txt) can use atomic operations for 32-bit float type.
+GLSL 4.6 with [GLSL_EXT_shader_atomic_float2](https://github.com/KhronosGroup/GLSL/blob/main/extensions/ext/GLSL_EXT_shader_atomic_float2.txt) can use atomic operations for 16-bit float type.
+
+SPIR-V 1.5 with [SPV_EXT_shader_atomic_float_add](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float_add.asciidoc) and [SPV_EXT_shader_atomic_float_min_max](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float_min_max.asciidoc) can use atomic operations for 32-bit float type and 64-bit float type.
+SPIR-V 1.5 with [SPV_EXT_shader_atomic_float16_add](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float16_add.asciidoc) can use atomic operations for 16-bit float type
+
+|        |  32-bit integer | 64-bit integer  |      32-bit float     |  64-bit float    |   16-bit float   |
+|--------|-----------------|-----------------|-----------------------|------------------|------------------|
+| HLSL   |   Yes (SM5.0)   |   Yes (SM6.6)   | Only bit-wise (SM6.6) |       No         |      No          |
+| GLSL   |   Yes (GL4.3)   | Yes (GL4.4+ext) |    Yes (GL4.6+ext)    | Yes (GL4.6+ext)  | Yes (GL4.6+ext)  |
+| SPIR-V |   Yes           |     Yes         |    Yes (SPV1.5+ext)   | Yes (SPV1.5+ext) | Yes (SPV1.5+ext) |
+
+
+ConstantBuffer, StructuredBuffer and ByteAddressBuffer
+-----------------------------------------------------------------------------------------------
+
+Each member in a `ConstantBuffer` will be emitted as `uniform` parameter in a uniform block.
+StructuredBuffer and ByteAddressBuffer are translated to a shader storage buffer with `readonly` access.
+RWStructuredBuffer and RWByteAddressBuffer are translated to a shader storage buffer with `read-write` access.
+RasterizerOrderedStructuredBuffer and RasterizerOrderedByteAddressBuffer will use an extension, `SPV_EXT_fragment_shader_interlock`.
+
+If you need to apply a different buffer layout for individual `ConstantBuffer` or `StructuredBuffer`, you can specify the layout as a second generic argument. E.g., `ConstantBuffer<T, Std430DataLayout>`, `StructuredBuffer<T, Std140DataLayout>`, `StructuredBuffer<T, Std430DataLayout>` or `StructuredBuffer<T, ScalarDataLayout>`.
+
+Note that there are compiler options, "-fvk-use-scalar-layout" / "-force-glsl-scalar-layout" and "-fvk-use-dx-layout".
+These options do the same but they are applied globally.
+
+
+ParameterBlock for SPIR-V target
+--------------------------------
+
+`ParameterBlock` is a Slang generic type for binding uniform parameters.
+In contrast to `ConstantBuffer`, a `ParameterBlock<T>` introduces a new descriptor set ID for resource/sampler handles defined in the element type `T`.
+
+`ParameterBlock` is designed specifically for D3D12/Vulkan/Metal/WebGPU, so that parameters defined in `T` can be placed into an independent descriptor table/descriptor set/argument buffer/binding group.
+
+For example, when targeting Vulkan, when a ParameterBlock doesn't contain nested parameter block fields, it will always map to a single descriptor set, with a dedicated set number and every resources is placed into the set with binding index starting from 0. This allows the user application to create and pre-populate the descriptor set and reuse it during command encoding, without explicitly specifying the binding index for each individual parameter.
+
+When both ordinary data fields and resource typed fields exist in a parameter block, all ordinary data fields will be grouped together into a uniform buffer and appear as a binding 0 of the resulting descriptor set.
+
+
+Push Constants
+---------------------
+
+By default, a `uniform` parameter defined in the parameter list of an entrypoint function is translated to a push constant in SPIRV, if the type of the parameter is ordinary data type (no resources/textures).
+All `uniform` parameters defined in global scope are grouped together and placed in a default constant buffer. You can make a global uniform parameter laid out as a push constant by using the `[vk::push_constant]` attribute
+on the uniform parameter. All push constants follow the std430 layout by default.
+
+Specialization Constants
+------------------------
+
+You can specify a global constant to translate into a SPIRV specialization constant with the `[SpecializationConstant]` attribute.
+For example:
+```csharp
+[SpecializationConstant]
+const int myConst = 1; // Maps to a SPIRV specialization constant
+```
+
+By default, Slang will automatically assign `constant_id` number for specialization constants. If you wish to explicitly specify them, use `[vk::constant_id]` attribute:
+```csharp
+[vk::constant_id(1)]
+const int myConst = 1;
+```
+
+Alternatively, the GLSL `layout` syntax is also supported by Slang:
+```glsl
+layout(constant_id = 1) const int MyConst = 1;
+```
+
+SPIR-V specific Attributes 
+--------------------------
+
+DXC supports a few attributes and command-line arguments for targeting SPIR-V. Similar to DXC, Slang supports a few of the attributes as following:
+
+### [[vk::binding(binding: int, set: int = 0)]]
+Similar to `binding` layout qualifier in Vulkan. It specifies the uniform buffer binding point, and the descriptor set for Vulkan.
+
+### [[vk::location(X)]]
+Same as `location` layout qualifier in Vulkan. For vertex shader inputs, it specifies the number of the vertex attribute from which input values are taken. For inputs of all other shader types, the location specifies a vector number that can be used to match against outputs from a previous shader stage.
+
+### [[vk::index(Y)]]
+Same as `index` layout qualifier in Vulkan. It is valid only when used with [[location(X)]]. For fragment shader outputs, the location and index specify the color output number and index receiving the values of the output. For outputs of all other shader stages, the location specifies a vector number that can be used to match against inputs in a subsequent shader stage.
+
+### [[vk::input_attachment_index(i)]]
+Same as `input_attachment_index` layout qualifier in Vulkan. It selects which subpass input is being read from. It is valid only when used on subpassInput type uniform variables.
+
+### [[vk::push_constant]]
+Same as `push_constant` layout qualifier in Vulkan. It is applicable only to a uniform block and it will be copied to a special memory location where GPU may have a more direct access to.
+
+### [vk::image_format(format : String)]
+Same as `[[vk::image_format("XX")]]` layout qualifier in DXC. Vulkan/GLSL allows the format string to be specified without the keyword, `image_format`.  Consider the following Slang code, as an example,
+```csharp
+[vk::image_format("r32f")] RWTexture2D<float> typicalTexture;
+```
+It will generate the following GLSL,
+> layout(r32f) uniform image2D typicalTexture_0;
+
+Or it will generate the following SPIR-V code,
+> %18 = OpTypeImage %float 2D 2 0 0 2 R32f
+
+### [vk::shader_record]
+Same as `shaderRecordEXT` layout qualifier in [GL_EXT_ray_tracing extension](https://github.com/KhronosGroup/GLSL/blob/main/extensions/ext/GLSL_EXT_ray_tracing.txt).
+It can be used on a buffer block that represents a buffer within a shader record as defined in the Ray Tracing API.
+
+
+Multiple entry points support
+-----------------------------
+
+To use multiple entry points, you will need to use a compiler option, `-fvk-use-entrypoint-name`.
+
+Because GLSL requires the entry point to be named, "main", a GLSL shader can have only one entry point.
+The default behavior of Slang is to rename all entry points to "main" when targeting SPIR-V.
+
+When there are more than one entry point, the default behavior will prevent a shader from having more than one entry point.
+To generate a valid SPIR-V with multiple entry points, use `-fvk-use-entrypoint-name` compiler option to disable the renaming behavior and preserve the entry point names.
+
+
+Global memory pointers
+------------------------------
+
+Slang supports global memory pointers when targeting SPIRV. See [an example and explanation](convenience-features.html#pointers-limited).
+
+`float4*` in user code will be translated to a pointer in PhysicalStorageBuffer storage class in SPIRV.
+When a slang module uses a pointer type, the resulting SPIRV will be using the SpvAddressingModelPhysicalStorageBuffer64 addressing mode. Modules without use of pointers will use SpvAddressingModelLogical addressing mode.
+
+
+Matrix type translation
+-----------------------
+
+A m-row-by-n-column matrix in Slang, represented as float`m`x`n` or matrix<T, m, n>, is translated to OpTypeMatrix (OpTypeVector(T, n), m) in SPIRV. Note that in SPIR-V terminology, this type is referred to a m-column-by-n-row matrix.
+
+The swap of row and column terminology may seem to be confusing at first, but this is the only translation without needing extra operations that may have negative performance consequences. For example, consider the following Slang code:
+```
+float3x4 v;
+for (int i = 0; i < 3; ++i)
+{
+  for (int j = 0; j < 4; ++j)
+  {
+    v[i][j] = i * 4 + j;
+  }
+}
+```
+The Slang shader above can iterate each element of a `float3x4` matrix. This is similar to how a multi-dimensional array is handled in C and HLSL. When a matrix type is `float3x4`, the first dimension indexing, `i`, corresponds to the first value specified in the matrix type `3`. And the second dimension indexing, `j`, corresponds to the second value specified in the matrix type `4`.
+
+A matrix in Slang can be also seen as an array of a vector type. And the following code is same as above.
+```
+float3x4 v;
+for (int i = 0; i < 3; ++i)
+{
+  v[i] = float4(0, 1, 2, 3);
+  v[i] += i * 4;
+}
+```
+
+For the given example above, when targeting SPIR-V, Slang emits a matrix that consists of three vectors each of which has four elements,
+```
+%float = OpTypeFloat 32
+%v4float = OpTypeVector %float 4 ; <= float4 type
+%mat3v4float = OpTypeMatrix %v4float 3 ; <= three of float4
+```
+
+An alternative way to emit SPIR-V code is to emit four vectors and each vector has three elements. Slang doesn't do this but this is a more direct translation because SPIR-V spec defines OpTypeMatrix to take "Column Count" not row.
+```
+; NOT SLANG EMITTED CODE
+%v3float = OpTypeVector %float 3 ; <= float3 type
+%mat4v3float = OpTypeMatrix %v3float 4 ; <= four of float3
+```
+However, this results in a more complicated access pattern to the elements in a matrix, because `v[i]` will no longer correspond to a vector natively when emitted to SPIR-V.
+
+Another way to put, Slang treats column as row and row as column when targeting GLSL or SPIR-V. This is same to [how DXC handles a matrix when emitting SPIR-V](https://github.com/Microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst#appendix-a-matrix-representation).
+
+Due to the swap of row and column in terminology, the matrix multiplication needs to be performed little differently. Slang translates a matrix multiplication, `mul(mat1, mat2)`, to `transpose(mul(transpose(mat2), transpose(mat1)))` when targeting SPIR-V.
+
+Note that the matrix translation explained above is orthogonal to the memory layout of a matrix. The memory layout is related to how CPU places matrix values in the memory and how GPU reads them. It is like how `std140` or `std430` works. DXC by default uses `column_major` memory layout and Slang uses row-major memory layout. For more information about the matrix memory layout, please see [a1-01-matrix-layout](a1-01-matrix-layout.md).
+
+
+Legalization
+------------
+
+Legalization is a process where Slang applies slightly different approach to translate the input Slang shader to the target.
+This process allows Slang shaders to be written in a syntax that SPIR-V may not be able to achieve natively.
+
+Slang allows to use opaque resource types as members of a struct. These members will be hoisted out of struct types and become global variables.
+
+Slang allows functions that return any resource types as return type or `out` parameter as long as things are statically resolvable.
+
+Slang allows functions that return arrays. These functions will be converted to return the array via an out parameter in SPIRV.
+
+Slang allows putting scalar/vector/matrix/array types directly as element type of a constant buffer or structured buffers. Such element types will be wrapped in a struct type when emitting to SPIRV.
+
+When RasterizerOrder resources are used, the order of the rasterization is guaranteed by the instructions from `SPV_EXT_fragment_shader_interlock` extension.
+
+A `StructuredBuffer` with a primitive type such as `StructuredBuffer<int> v` is translated to a buffer with a struct that has the primitive type, which is more like `struct Temp { int v; }; StructuredBuffer<Temp> v;`. It is because, SPIR-V requires buffer variables to be declared within a named buffer block.
+
+When `pervertex` keyword is used, the given type for the varying input will be translated into an array of the given type whose element size is 3. It is because each triangle consists of three vertices.
+
+
+Tessellation
+------------
+
+In HLSL and Slang, Hull shader requires two functions: a Hull shader and patch function.
+A typical example of a Hull shader will look like the following.
+```
+// Hull Shader (HS)
+[domain("quad")]
+[patchconstantfunc("constants")]
+HS_OUT main(InputPatch<VS_OUT, 4> patch, uint i : SV_OutputControlPointID)
+{
+  ...
+}
+HSC_OUT constants(InputPatch<VS_OUT, 4> patch)
+{
+  ...
+}
+```
+
+When targeting SPIR-V, the patch function is merged as a part of the Hull shader, because SPIR-V doesn't have a same concept as `patchconstantfunc`.
+The function used for `patchconstantfunc` should be called only once for each patch.
+
+As an example, the Hull shader above will be emitted as following,
+```
+void main() {
+    ...
+    main(patch, gl_InvocationID);
+    barrier(); // OpControlBarrier
+    if (gl_InvocationID == 0)
+    {
+        constants(path);
+    }
+}
+```
+
+This behavior is same to [how DXC translates Hull shader from HLSL to SPIR-V](https://github.com/Microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst#patch-constant-function).
+
+
+SPIR-V specific Compiler options
+--------------------------------
+
+The following compiler options are specific to SPIR-V.
+
+### -emit-spirv-directly
+Generate SPIR-V output directly (default)
+It cannot be used with -emit-spirv-via-glsl
+
+### -emit-spirv-via-glsl
+Generate SPIR-V output by compiling to glsl source first, then use glslang compiler to produce SPIRV from the glsl.
+It cannot be used with -emit-spirv-directly
+
+### -g
+Include debug information in the generated code, where possible.
+When targeting SPIR-V, this option emits [SPIR-V NonSemantic Shader DebugInfo Instructions](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/nonsemantic/NonSemantic.Shader.DebugInfo.100.asciidoc).
+
+### -O<optimization-level>
+Set the optimization level.
+Under `-O0` option, Slang will not perform extensive inlining for all functions calls, instead it will preserve the call graph as much as possible to help with understanding the SPIRV structure and diagnosing any downstream toolchain issues.
+
+### -fvk-{b|s|t|u}-shift <N> <space>
+For example '-fvk-b-shift <N> <space>' shifts by N the inferred binding
+numbers for all resources in 'b' registers of space <space>. For a resource attached with :register(bX, <space>)
+but not [vk::binding(...)], sets its Vulkan descriptor set to <space> and binding number to X + N. If you need to
+shift the inferred binding numbers for more than one space, provide more than one such option. If more than one
+such option is provided for the same space, the last one takes effect. If you need to shift the inferred binding
+numbers for all sets, use 'all' as <space>.
+
+For more information, see the following pages:
+ - [DXC description](https://github.com/Microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst#implicit-binding-number-assignment)
+ - [GLSL wiki](https://github.com/KhronosGroup/glslang/wiki/HLSL-FAQ#auto-mapped-binding-numbers)
+
+### -fvk-bind-globals <N> <descriptor-set>
+Places the $Globals cbuffer at descriptor set <descriptor-set> and binding <N>.
+It lets you specify the descriptor for the source at a certain register.
+
+For more information, see the following pages:
+ - [DXC description](https://github.com/Microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst#hlsl-global-variables-and-vulkan-binding)
+
+### -fvk-use-scalar-layout, -force-glsl-scalar-layout
+Make data accessed through ConstantBuffer, ParameterBlock, StructuredBuffer, ByteAddressBuffer and general pointers follow the 'scalar' layout when targeting GLSL or SPIRV.
+
+### -fvk-use-gl-layout
+Use std430 layout instead of D3D buffer layout for raw buffer load/stores.
+
+### -fvk-use-dx-layout
+Pack members using FXCs member packing rules when targeting GLSL or SPIRV.
+
+### -fvk-use-entrypoint-name
+Uses the entrypoint name from the source instead of 'main' in the spirv output.
+
+### -fspv-reflect
+Include reflection decorations in the resulting SPIRV for shader parameters.
+
+### -spirv-core-grammar
+A path to a specific spirv.core.grammar.json to use when generating SPIR-V output
+
+
diff --git a/external/slang/share/doc/slang/user-guide/a2-02-metal-target-specific.md b/external/slang/share/doc/slang/user-guide/a2-02-metal-target-specific.md
new file mode 100644
index 00000000..48c6b458
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/a2-02-metal-target-specific.md
@@ -0,0 +1,298 @@
+---
+layout: user-guide
+permalink: /user-guide/metal-target-specific
+---
+
+# Metal-specific functionalities
+
+This chapter provides information for Metal-specific functionalities and
+behaviors in Slang.
+
+## Entry Point Parameter Handling
+
+Slang performs several transformations on entry point parameters when targeting Metal:
+
+- Struct parameters are flattened to eliminate nested structures
+- Input parameters with varying inputs are packed into a single struct
+- System value semantics are translated to Metal attributes
+- Parameters without semantics are given automatic attribute indices
+
+## System-Value semantics
+
+The system-value semantics are translated to the following Metal attributes:
+
+| SV semantic name            | Metal attribute                                      |
+| --------------------------- | ---------------------------------------------------- |
+| `SV_Position`               | `[[position]]`                                       |
+| `SV_Coverage`               | `[[sample_mask]]`                                    |
+| `SV_Depth`                  | `[[depth(any)]]`                                     |
+| `SV_DepthGreaterEqual`      | `[[depth(greater)]]`                                 |
+| `SV_DepthLessEqual`         | `[[depth(less)]]`                                    |
+| `SV_DispatchThreadID`       | `[[thread_position_in_grid]]`                        |
+| `SV_GroupID`                | `[[threadgroup_position_in_grid]]`                   |
+| `SV_GroupThreadID`          | `[[thread_position_in_threadgroup]]`                 |
+| `SV_GroupIndex`             | Calculated from `SV_GroupThreadID` and group extents |
+| `SV_InstanceID`             | `[[instance_id]]`                                    |
+| `SV_IsFrontFace`            | `[[front_facing]]`                                   |
+| `SV_PrimitiveID`            | `[[primitive_id]]`                                   |
+| `SV_RenderTargetArrayIndex` | `[[render_target_array_index]]`                      |
+| `SV_SampleIndex`            | `[[sample_id]]`                                      |
+| `SV_Target<N>`              | `[[color(N)]]`                                       |
+| `SV_VertexID`               | `[[vertex_id]]`                                      |
+| `SV_ViewportArrayIndex`     | `[[viewport_array_index]]`                           |
+| `SV_StartVertexLocation`    | `[[base_vertex]]`                                    |
+| `SV_StartInstanceLocation`  | `[[base_instance]]`                                  |
+
+Custom semantics are mapped to user attributes:
+
+- `[[user(SEMANTIC_NAME)]]` For non-system value semantics
+- `[[user(SEMANTIC_NAME_INDEX)]]` When semantic has an index
+
+## Interpolation Modifiers
+
+Slang maps interpolation modifiers to Metal's interpolation attributes:
+
+| Slang Interpolation | Metal Attribute             |
+| ------------------- | --------------------------- |
+| `nointerpolation`   | `[[flat]]`                  |
+| `noperspective`     | `[[center_no_perspective]]` |
+| `linear`            | `[[sample_no_perspective]]` |
+| `sample`            | `[[sample_perspective]]`    |
+| `centroid`          | `[[center_perspective]]`    |
+
+## Resource Types
+
+Resource types are translated with appropriate Metal qualifiers:
+
+| Slang Type            | Metal Translation  |
+| --------------------- | ------------------ |
+| `Texture2D`           | `texture2d`        |
+| `RWTexture2D`         | `texture2d`        |
+| `ByteAddressBuffer`   | `uint32_t device*` |
+| `StructuredBuffer<T>` | `device* T`        |
+| `ConstantBuffer<T>`   | `constant* T`      |
+
+| Slang Type                        | Metal Translation                     |
+| --------------------------------- | ------------------------------------- |
+| `Texture1D`                       | `texture1d`                           |
+| `Texture1DArray`                  | `texture1d_array`                     |
+| `RWTexture1D`                     | `texture1d`                           |
+| `RWTexture1DArray`                | `texture1d_array`                     |
+| `Texture2D`                       | `texture2d`                           |
+| `Texture2DArray`                  | `texture2d_array`                     |
+| `RWTexture2D`                     | `texture2d`                           |
+| `RWTexture2DArray`                | `texture2d_array`                     |
+| `Texture3D`                       | `texture3d`                           |
+| `RWTexture3D`                     | `texture3d`                           |
+| `TextureCube`                     | `texturecube`                         |
+| `TextureCubeArray`                | `texturecube_array`                   |
+| `Buffer<T>`                       | `device* T`                           |
+| `RWBuffer<T>`                     | `device* T`                           |
+| `ByteAddressBuffer`               | `device* uint32_t`                    |
+| `RWByteAddressBuffer`             | `device* uint32_t`                    |
+| `StructuredBuffer<T>`             | `device* T`                           |
+| `RWStructuredBuffer<T>`           | `device* T`                           |
+| `AppendStructuredBuffer<T>`       | `device* T`                           |
+| `ConsumeStructuredBuffer<T>`      | `device* T`                           |
+| `ConstantBuffer<T>`               | `constant* T`                         |
+| `SamplerState`                    | `sampler`                             |
+| `SamplerComparisonState`          | `sampler`                             |
+| `RaytracingAccelerationStructure` | `(Not supported)`                     |
+| `RasterizerOrderedTexture2D`      | `texture2d [[raster_order_group(0)]]` |
+| `RasterizerOrderedBuffer<T>`      | `device* T [[raster_order_group(0)]]` |
+
+Raster-ordered access resources receive the `[[raster_order_group(0)]]`
+attribute, for example `texture2d<float, access::read_write> tex
+[[raster_order_group(0)]]`.
+
+# Array Types
+
+Array types in Metal are declared using the array template:
+
+| Slang Type          | Metal Translation          |
+| ------------------- | -------------------------- |
+| `ElementType[Size]` | `array<ElementType, Size>` |
+
+# Matrix Layout
+
+Metal exclusively uses column-major matrix layout. Slang automatically handles
+the translation of matrix operations to maintain correct semantics:
+
+- Matrix multiplication is transformed to account for layout differences
+- Matrix types are declared as `matrix<T, Columns, Rows>`, for example
+  `float3x4` is represented as `matrix<float, 3, 4>`
+
+# Mesh Shader Support
+
+Mesh shaders can be targeted using the following types and syntax. The same as task/mesh shaders generally in Slang.
+
+```slang
+[outputtopology("triangle")]
+[numthreads(12, 1, 1)]
+void meshMain(
+    in uint tig: SV_GroupIndex,
+    in payload MeshPayload meshPayload,
+    OutputVertices<Vertex, MAX_VERTS> verts,
+    OutputIndices<uint3, MAX_PRIMS> triangles,
+    OutputPrimitives<Primitive, MAX_PRIMS> primitives
+    )
+```
+
+## Header Inclusions and Namespace
+
+When targeting Metal, Slang automatically includes the following headers, these
+are available to any intrinsic code.
+
+```cpp
+#include <metal_stdlib>
+#include <metal_math>
+#include <metal_texture>
+using namespace metal;
+```
+
+## Parameter blocks and Argument Buffers
+
+`ParameterBlock` values are translated into _Argument Buffers_ potentially
+containing nested resources. For example this Slang code...
+
+```slang
+struct MyParameters
+{
+    int x;
+    int y;
+    StructuredBuffer<float> buffer1;
+    RWStructuredBuffer<uint3> buffer2;
+}
+
+ParameterBlock<MyParameters> gObj;
+
+void main(){ ... gObj ... }
+```
+
+... results in this Metal output:
+
+```cpp
+struct MyParameters
+{
+    int x;
+    int y;
+    float device* buffer1;
+    uint3 device* buffer2;
+};
+
+[[kernel]] void main(MyParameters constant* gObj [[buffer(1)]])
+```
+
+## Struct Parameter Flattening
+
+When targeting Metal, top-level nested struct parameters are automatically
+flattened. For example:
+
+```slang
+struct NestedStruct
+{
+    float2 uv;
+};
+struct InputStruct
+{
+    float4 position;
+    float3 normal;
+    NestedStruct nested;
+};
+```
+
+Will be flattened to:
+
+```cpp
+struct InputStruct
+{
+    float4 position;
+    float3 normal;
+    float2 uv;
+};
+```
+
+## Return Value Handling
+
+Non-struct return values from entry points are automatically wrapped in a
+struct with appropriate semantics. For example:
+
+```slang
+float4 main() : SV_Target
+{
+    return float4(1,2,3,4);
+}
+```
+
+becomes:
+
+```c++
+struct FragmentOutput
+{
+    float4 value : SV_Target;
+};
+FragmentOutput main()
+{
+    return { float4(1,2,3,4) };
+}
+```
+
+## Value Type Conversion
+
+Metal enforces strict type requirements for certain operations. Slang
+automatically performs the following conversions:
+
+- Vector size expansion (e.g., float2 to float4), for example when the user
+  specified `float2` but the semantic type in Metal is float4.
+- Image store value expansion to 4-components
+
+For example:
+
+```slang
+RWTexture2D<float2> tex;
+tex[coord] = float2(1,2);  // Automatically expanded to float4(1,2,0,0)
+```
+
+## Conservative Rasterization
+
+Since Metal doesn't support conservative rasterization, SV_InnerCoverage is always false.
+
+## Address Space Assignment
+
+Metal requires explicit address space qualifiers. Slang automatically assigns appropriate address spaces:
+
+| Variable Type         | Metal Address Space |
+| --------------------- | ------------------- |
+| Local Variables       | `thread`            |
+| Global Variables      | `device`            |
+| Uniform Buffers       | `constant`          |
+| RW/Structured Buffers | `device`            |
+| Group Shared          | `threadgroup`       |
+| Parameter Blocks      | `constant`          |
+
+## Explicit Parameter Binding
+
+The HLSL `:register()` semantic is respected when emitting Metal code.
+
+Since metal does not differentiate a constant buffer, a shader resource (read-only) buffer and an unordered access buffer, Slang will map `register(tN)`, `register(uN)` and `register(bN)` to `[[buffer(N)]]` when such `register` semantic is declared on a buffer typed parameter.
+
+`spaceN` specifiers inside `register` semantics are ignored.
+
+The `[vk::location(N)]` attributes on stage input/output parameters are respected.
+
+## Specialization Constants
+
+Specialization constants declared with the `[SpecializationConstant]` or `[vk::constant_id]` attribute will be translated into a `function_constant` when generating Metal source.
+For example:
+
+```csharp
+[vk::constant_id(7)]
+const int a = 2;
+```
+
+Translates to:
+
+```metal
+constant int fc_a_0 [[function_constant(7)]];
+constant int a_0 = is_function_constant_defined(fc_a_0) ? fc_a_0 : 2;
+```
diff --git a/external/slang/share/doc/slang/user-guide/a2-03-wgsl-target-specific.md b/external/slang/share/doc/slang/user-guide/a2-03-wgsl-target-specific.md
new file mode 100644
index 00000000..b88c2864
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/a2-03-wgsl-target-specific.md
@@ -0,0 +1,182 @@
+---
+layout: user-guide
+permalink: /user-guide/wgsl-target-specific
+---
+
+WGSL specific functionalities
+=============================
+
+This chapter provides information for WGSL (WebGPU Shading Language) -specific functionalities and behaviors.
+
+
+System-Value semantics
+----------------------
+
+The system-value semantics are translated to the following WGSL code.
+
+| SV semantic name | WGSL code |
+|--|--|
+| SV_Barycentrics | *Not supported* |
+| SV_ClipDistance<N> | *Not supported* |
+| SV_CullDistance<N> | *Not supported* |
+| SV_Coverage | `@builtin(sample_mask)` |
+| SV_CullPrimitive | *Not supported* |
+| SV_Depth | `@builtin(frag_depth)` |
+| SV_DepthGreaterEqual | *Not supported* |
+| SV_DepthLessEqual | *Not supported* |
+| SV_DispatchThreadID | `@builtin(global_invocation_id)` |
+| SV_DomainLocation | *Not supported* |
+| SV_GSInstanceID | *Not supported* |
+| SV_GroupID | `@builtin(workgroup_id)` |
+| SV_GroupIndex | `@builtin(local_invocation_index)` |
+| SV_GroupThreadID | `@builtin(local_invocation_id)` |
+| SV_InnerCoverage | *Not supported* |
+| SV_InsideTessFactor | *Not supported* |
+| SV_InstanceID | `@builtin(instance_index)` |
+| SV_IntersectionAttributes | *Not supported* |
+| SV_IsFrontFace | `@builtin(front_facing)` |
+| SV_OutputControlPointID | *Not supported* |
+| SV_PointSize | *Not supported* |
+| SV_Position | `@builtin(position)` |
+| SV_PrimitiveID | *Not supported* |
+| SV_RenderTargetArrayIndex | *Not supported* |
+| SV_SampleIndex | `@builtin(sample_index)` |
+| SV_ShadingRate | *Not supported* |
+| SV_StartVertexLocation | *Not supported* |
+| SV_StartInstanceLocation | *Not supported* |
+| SV_StencilRef | *Not supported* |
+| SV_Target<N> | *Not supported* |
+| SV_TessFactor | *Not supported* |
+| SV_VertexID | `@builtin(vertex_index)` |
+| SV_ViewID | *Not supported* |
+| SV_ViewportArrayIndex | *Not supported* |
+
+
+Supported HLSL features when targeting WGSL
+-------------------------------------------
+
+The following table lists Slang's support for various HLSL feature sets, when targeting WGSL.
+
+| Feature set | Supported |
+| -- | -- |
+| ray tracing | No |
+| inline ray tracing | No |
+| mesh shader | No |
+| tessellation shader | No |
+| geometry shader | No |
+| wave intrinsics | No |
+| barriers | Yes |
+| atomics | Yes |
+
+
+Supported atomic types
+----------------------
+
+The following table shows what is supported when targeting WGSL:
+
+|              |  32-bit integer | 64-bit integer  |      32-bit float     |  64-bit float    |   16-bit float   |
+|--------------|-----------------|-----------------|-----------------------|------------------|------------------|
+| Supported?   |   Yes           |     No          |    No                 |       No         |      No          |
+
+
+ConstantBuffer, (RW/RasterizerOrdered)StructuredBuffer, (RW/RasterizerOrdered)ByteAddressBuffer
+-----------------------------------------------------------------------------------------------
+
+ConstantBuffer translates to the `uniform` address space with `read` access mode in WGSL.
+ByteAddressBuffer and RWByteAddressBuffer translate to `array<u32>` in the `storage` address space, with the `read` and `read_write` access modes in WGSL, respectively.
+StructuredBuffer and RWStructuredBuffer with struct type T translate to `array<T>` in the `storage` address space, with with the `read` and `read_write` access modes in WGSL, respectively.
+
+
+Specialization Constants
+------------------------
+
+Specialization constants are not supported when targeting WGSL, at the moment.
+They should map to 'override declarations' in WGSL, however this is not yet implemented.
+
+
+Interlocked operations
+----------------------
+
+The InterlockedAdd, InterlockedAnd, etc... functions are not supported when targeting WGSL.
+Instead, operations on [`Atomic<T>`](https://shader-slang.com/stdlib-reference/types/atomic-0/index) types should be used.
+
+
+Entry Point Parameter Handling
+------------------------------
+
+Slang performs several transformations on entry point parameters when targeting WGSL:
+
+- Struct parameters and returned structs are flattened to eliminate nested structures.
+- System value semantics are translated to WGSL built-ins. (See the `@builtin` attribute, and the table above.)
+- Parameters without semantics are given automatic location indices. (See the `@location` attribute.)
+
+
+Parameter blocks
+----------------
+
+Each `ParameterBlock` is assigned its own bind group in WGSL.
+
+
+Write-only Textures
+---------------
+
+Many image formats supported by WebGPU can only be accessed in compute shader as a write-only image.
+Use `WTexture2D` type (similar to `RWTexture2D`) to write to an image when possible.
+The write-only texture types are also supported when targeting HLSL/GLSL/SPIR-V/Metal and CUDA.
+
+
+Pointers
+--------
+
+`out` and `inout` parameters in Slang are translated to pointer-typed parameters in WGSL.
+At callsites, a pointer value is formed and passed as argument using the `&` operator in WGSL.
+
+Since WGSL cannot form pointers to fields of structs (or fields of fields of structs, etc...), the described transformation cannot be done in a direct way when a function argument expression is an "access chain" like `myStruct.myField` or `myStruct.myStructField.someField`.
+In those cases, the argument is copied to a local variable, the address of the local variable is passed to the function, and then the local
+variable is written back to the struct field after the function call.
+
+Address Space Assignment
+------------------------
+
+WGSL requires explicit address space qualifiers. Slang automatically assigns appropriate address spaces:
+
+| Variable Type         | WGSL Address Space  |
+| --------------------- | ------------------- |
+| Local Variables       | `function`          |
+| Global Variables      | `private`           |
+| Uniform Buffers       | `uniform`           |
+| RW/Structured Buffers | `storage`           |
+| Group Shared          | `workgroup`         |
+| Parameter Blocks      | `uniform`           |
+
+
+Matrix type translation
+-----------------------
+
+A m-row-by-n-column matrix in Slang, represented as float`m`x`n` or matrix<T, m, n>, is translated to `mat[n]x[m]` in WGSL, i.e. a matrix with `n` columns and `m` rows.
+The rationale for this inversion of terminology is the same as [the rationale for SPIR-V](a2-01-spirv-target-specific.md#matrix-type-translation).
+Since the WGSL matrix multiplication convention is the normal one, where inner products of rows of the matrix on the left are taken with columns of the matrix on the right, the order of matrix products is also reversed in WGSL. This is relying on the fact that the transpose of a matrix product equals the product of the transposed matrix operands in reverse order.
+
+## Explicit Parameter Binding
+
+The `[vk::binding(index,set)]` attribute is respected when emitting WGSL code, and will translate to `@binding(index) @group(set)` in WGSL.
+
+If the `[vk::binding()]` attribute is not specified by a `:register()` semantic is present, Slang will derive the binding from the `register` semantic the same way as the SPIRV and GLSL backends.
+
+The `[vk::location(N)]` attributes on stage input/output parameters are respected.
+
+## Specialization Constants
+
+Specialization constants declared with the `[SpecializationConstant]` or `[vk::constant_id]` attribute will be translated into a global `override` declaration when generating WGSL source.
+For example:
+
+```csharp
+[vk::constant_id(7)]
+const int a = 2;
+```
+
+Translates to:
+
+```wgsl
+@id(7) override a : i32 = 2;
+```
\ No newline at end of file
diff --git a/external/slang/share/doc/slang/user-guide/a2-target-specific-features.md b/external/slang/share/doc/slang/user-guide/a2-target-specific-features.md
new file mode 100644
index 00000000..32be22f0
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/a2-target-specific-features.md
@@ -0,0 +1,13 @@
+---
+layout: user-guide
+---
+
+# Target-specific features
+
+Slang can produce code for a variety of targets. When producing code for a target, Slang attempts to translate HLSL intrinsics to the closes functionality provided by the target. In addition, Slang also support target specific intrinsics and language extensions that allow users to make best use of the target. This chapter documents all the important target-specific behaviors.
+
+In this chapter:
+
+1. [SPIR-V target specific](./a2-01-spirv-target-specific.md)
+2. [Metal target specific](./a2-02-metal-target-specific.md)
+3. [WGSL target specific](./a2-03-wgsl-target-specific.md)
diff --git a/external/slang/share/doc/slang/user-guide/a3-01-reference-capability-profiles.md b/external/slang/share/doc/slang/user-guide/a3-01-reference-capability-profiles.md
new file mode 100644
index 00000000..43fe8eed
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/a3-01-reference-capability-profiles.md
@@ -0,0 +1,49 @@
+---
+layout: user-guide
+---
+
+Capability Profiles
+============================
+
+### Accepted values of `-profile`:
+
+> Note: To 'make' your own 'profile's, try mixing capabilities with `-capability`.
+
+`sm_{4_0,4_1,5_0,5_1,6_0,6_1,6_2,6_3,6_4,6_5,6_6,6_7}`
+* HLSL shader model
+
+`vs_{4_0,4_1,5_0,5_1,6_0,6_1,6_2,6_3,6_4,6_5,6_6,6_7}`
+* HLSL shader model + vertex shader
+
+`ps_{4_0,4_1,5_0,5_1,6_0,6_1,6_2,6_3,6_4,6_5,6_6,6_7}`
+* HLSL shader model + pixel shader
+
+`hs_{4_0,4_1,5_0,5_1,6_0,6_1,6_2,6_3,6_4,6_5,6_6,6_7}`
+* HLSL shader model + hull shader
+
+`gs_{4_0,4_1,5_0,5_1,6_0,6_1,6_2,6_3,6_4,6_5,6_6,6_7}`
+* HLSL shader model + geometry shader
+
+`ds_{4_0,4_1,5_0,5_1,6_0,6_1,6_2,6_3,6_4,6_5,6_6,6_7}`
+* HLSL shader model + domain shader
+
+`cs_{4_0,4_1,5_0,5_1,6_0,6_1,6_2,6_3,6_4,6_5,6_6,6_7}`
+* HLSL shader model + compute shader
+
+`ms_6_{5,6,7}`
+* HLSL shader model + mesh shader
+
+`as_6_{5,6,7}`
+* HLSL shader model + amplification shader
+
+`lib_6_{1,2,3,4,5,6,7}`
+* HLSL shader model for libraries
+
+`glsl_{110,120,130,140,150,330,400,410,420,430,440,450,460}`
+* GLSL versions
+
+`spirv_1_{1,2,3,4,5,6}`
+* SPIRV versions
+
+`metallib_2_{3,4}`
+* Metal versions
diff --git a/external/slang/share/doc/slang/user-guide/a3-02-reference-capability-atoms.md b/external/slang/share/doc/slang/user-guide/a3-02-reference-capability-atoms.md
new file mode 100644
index 00000000..9a6eead6
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/a3-02-reference-capability-atoms.md
@@ -0,0 +1,1294 @@
+---
+layout: user-guide
+---
+
+Capability Atoms
+============================
+
+### Sections:
+
+1. [Targets](#Targets)
+2. [Stages](#Stages)
+3. [Versions](#Versions)
+4. [Extensions](#Extensions)
+5. [Compound Capabilities](#Compound-Capabilities)
+6. [Other](#Other)
+
+Targets
+----------------------
+*Capabilities to specify code generation targets (`glsl`, `spirv`...)*
+
+`textualTarget`
+> Represents a non-assembly code generation target.
+
+`hlsl`
+> Represents the HLSL code generation target.
+
+`glsl`
+> Represents the GLSL code generation target.
+
+`c`
+> Represents the C programming language code generation target.
+
+`cpp`
+> Represents the C++ programming language code generation target.
+
+`cuda`
+> Represents the CUDA code generation target.
+
+`metal`
+> Represents the Metal programming language code generation target.
+
+`spirv`
+> Represents the SPIR-V code generation target.
+
+`wgsl`
+> Represents the WebGPU shading language code generation target.
+
+Stages
+----------------------
+*Capabilities to specify code generation stages (`vertex`, `fragment`...)*
+
+`vertex`
+> Vertex shader stage
+
+`fragment`
+> Fragment shader stage
+
+`compute`
+> Compute shader stage
+
+`hull`
+> Hull shader stage
+
+`domain`
+> Domain shader stage
+
+`geometry`
+> Geometry shader stage
+
+`pixel`
+> Pixel shader stage
+
+`tesscontrol`
+> Tessellation Control shader stage
+
+`tesseval`
+> Tessellation Evaluation shader stage
+
+`raygen`
+> Ray-Generation shader stage & ray-tracing capabilities
+
+`raygeneration`
+> Ray-Generation shader stage & ray-tracing capabilities
+
+`intersection`
+> Intersection shader stage & ray-tracing capabilities
+
+`anyhit`
+> Any-Hit shader stage & ray-tracing capabilities
+
+`closesthit`
+> Closest-Hit shader stage & ray-tracing capabilities
+
+`callable`
+> Callable shader stage & ray-tracing capabilities
+
+`miss`
+> Ray-Miss shader stage & ray-tracing capabilities
+
+`mesh`
+> Mesh shader stage & mesh shader capabilities
+
+`amplification`
+> Amplification shader stage & mesh shader capabilities
+
+Versions
+----------------------
+*Capabilities to specify versions of a code generation target (`sm_5_0`, `GLSL_400`...)*
+
+`glsl_spirv_1_0`
+> Represents SPIR-V 1.0 through glslang.
+
+`glsl_spirv_1_1`
+> Represents SPIR-V 1.1 through glslang.
+
+`glsl_spirv_1_2`
+> Represents SPIR-V 1.2 through glslang.
+
+`glsl_spirv_1_3`
+> Represents SPIR-V 1.3 through glslang.
+
+`glsl_spirv_1_4`
+> Represents SPIR-V 1.4 through glslang.
+
+`glsl_spirv_1_5`
+> Represents SPIR-V 1.5 through glslang.
+
+`glsl_spirv_1_6`
+> Represents SPIR-V 1.6 through glslang.
+
+`metallib_2_3`
+> Represents MetalLib 2.3.
+
+`metallib_2_4`
+> Represents MetalLib 2.4.
+
+`metallib_3_0`
+> Represents MetalLib 3.0.
+
+`metallib_3_1`
+> Represents MetalLib 3.1.
+
+`metallib_latest`
+> Represents the latest MetalLib version.
+
+`hlsl_nvapi`
+> Represents HLSL NVAPI support.
+
+`dxil_lib`
+> Represents capabilities required for DXIL Library compilation.
+
+`spirv_1_0`
+> Represents SPIR-V 1.0 version.
+
+`spirv_1_1`
+> Represents SPIR-V 1.1 version, which includes SPIR-V 1.0.
+
+`spirv_1_2`
+> Represents SPIR-V 1.2 version, which includes SPIR-V 1.1.
+
+`spirv_1_3`
+> Represents SPIR-V 1.3 version, which includes SPIR-V 1.2.
+
+`spirv_1_4`
+> Represents SPIR-V 1.4 version, which includes SPIR-V 1.3.
+
+`spirv_1_5`
+> Represents SPIR-V 1.5 version, which includes SPIR-V 1.4 and additional extensions.
+
+`spirv_1_6`
+> Represents SPIR-V 1.6 version, which includes SPIR-V 1.5 and additional extensions.
+
+`spirv_latest`
+> Represents the latest SPIR-V version.
+
+`sm_4_0_version`
+> HLSL shader model 4.0 and related capabilities of other targets.
+> Does not include related GLSL/SPIRV extensions.
+
+`sm_4_0`
+> HLSL shader model 4.0 and related capabilities of other targets.
+> Includes related GLSL/SPIRV extensions.
+
+`sm_4_1_version`
+> HLSL shader model 4.1 and related capabilities of other targets.
+> Does not include related GLSL/SPIRV extensions.
+
+`sm_4_1`
+> HLSL shader model 4.1 and related capabilities of other targets.
+> Includes related GLSL/SPIRV extensions.
+
+`sm_5_0_version`
+> HLSL shader model 5.0 and related capabilities of other targets.
+> Does not include related GLSL/SPIRV extensions.
+
+`sm_5_0`
+> HLSL shader model 5.0 and related capabilities of other targets.
+> Includes related GLSL/SPIRV extensions.
+
+`sm_5_1_version`
+> HLSL shader model 5.1 and related capabilities of other targets.
+> Does not include related GLSL/SPIRV extensions.
+
+`sm_5_1`
+> HLSL shader model 5.1 and related capabilities of other targets.
+> Includes related GLSL/SPIRV extensions.
+
+`sm_6_0_version`
+> HLSL shader model 6.0 and related capabilities of other targets.
+> Does not include related GLSL/SPIRV extensions.
+
+`sm_6_0`
+> HLSL shader model 6.0 and related capabilities of other targets.
+> Includes related GLSL/SPIRV extensions.
+
+`sm_6_1_version`
+> HLSL shader model 6.1 and related capabilities of other targets.
+> Does not include related GLSL/SPIRV extensions.
+
+`sm_6_1`
+> HLSL shader model 6.1 and related capabilities of other targets.
+> Includes related GLSL/SPIRV extensions.
+
+`sm_6_2_version`
+> HLSL shader model 6.2 and related capabilities of other targets.
+> Does not include related GLSL/SPIRV extensions.
+
+`sm_6_2`
+> HLSL shader model 6.2 and related capabilities of other targets.
+> Includes related GLSL/SPIRV extensions.
+
+`sm_6_3_version`
+> HLSL shader model 6.3 and related capabilities of other targets.
+> Does not include related GLSL/SPIRV extensions.
+
+`sm_6_3`
+> HLSL shader model 6.3 and related capabilities of other targets.
+> Includes related GLSL/SPIRV extensions.
+
+`sm_6_4_version`
+> HLSL shader model 6.4 and related capabilities of other targets.
+> Does not include related GLSL/SPIRV extensions.
+
+`sm_6_4`
+> HLSL shader model 6.4 and related capabilities of other targets.
+> Includes related GLSL/SPIRV extensions.
+
+`sm_6_5_version`
+> HLSL shader model 6.5 and related capabilities of other targets.
+> Does not include related GLSL/SPIRV extensions.
+
+`sm_6_5`
+> HLSL shader model 6.5 and related capabilities of other targets.
+> Includes related GLSL/SPIRV extensions.
+
+`sm_6_6_version`
+> HLSL shader model 6.6 and related capabilities of other targets.
+> Does not include related GLSL/SPIRV extensions.
+
+`sm_6_6`
+> HLSL shader model 6.6 and related capabilities of other targets.
+> Includes related GLSL/SPIRV extensions.
+
+`sm_6_7_version`
+> HLSL shader model 6.7 and related capabilities of other targets.
+> Does not include related GLSL/SPIRV extensions.
+
+`sm_6_7`
+> HLSL shader model 6.7 and related capabilities of other targets.
+> Includes related GLSL/SPIRV extensions.
+
+`sm_6_8_version`
+> HLSL shader model 6.8 and related capabilities of other targets.
+> Does not include related GLSL/SPIRV extensions.
+
+`sm_6_8`
+> HLSL shader model 6.8 and related capabilities of other targets.
+> Includes related GLSL/SPIRV extensions.
+
+`GLSL_130`
+> GLSL 130 and related capabilities of other targets.
+
+`GLSL_140`
+> GLSL 140 and related capabilities of other targets.
+
+`GLSL_150`
+> GLSL 150 and related capabilities of other targets.
+
+`GLSL_330`
+> GLSL 330 and related capabilities of other targets.
+
+`GLSL_400`
+> GLSL 400 and related capabilities of other targets.
+
+`GLSL_410`
+> GLSL 410 and related capabilities of other targets.
+
+`GLSL_420`
+> GLSL 420 and related capabilities of other targets.
+
+`GLSL_430`
+> GLSL 430 and related capabilities of other targets.
+
+`GLSL_440`
+> GLSL 440 and related capabilities of other targets.
+
+`GLSL_450`
+> GLSL 450 and related capabilities of other targets.
+
+`GLSL_460`
+> GLSL 460 and related capabilities of other targets.
+
+`cuda_sm_1_0`
+> cuda 1.0 and related capabilities of other targets.
+
+`cuda_sm_2_0`
+> cuda 2.0 and related capabilities of other targets.
+
+`cuda_sm_3_0`
+> cuda 3.0 and related capabilities of other targets.
+
+`cuda_sm_3_5`
+> cuda 3.5 and related capabilities of other targets.
+
+`cuda_sm_4_0`
+> cuda 4.0 and related capabilities of other targets.
+
+`cuda_sm_5_0`
+> cuda 5.0 and related capabilities of other targets.
+
+`cuda_sm_6_0`
+> cuda 6.0 and related capabilities of other targets.
+
+`cuda_sm_7_0`
+> cuda 7.0 and related capabilities of other targets.
+
+`cuda_sm_8_0`
+> cuda 8.0 and related capabilities of other targets.
+
+`cuda_sm_9_0`
+> cuda 9.0 and related capabilities of other targets.
+
+Extensions
+----------------------
+*Capabilities to specify extensions (`GL_EXT`, `SPV_EXT`...)*
+
+`SPV_EXT_fragment_shader_interlock`
+> Represents the SPIR-V extension for fragment shader interlock operations.
+
+`SPV_EXT_physical_storage_buffer`
+> Represents the SPIR-V extension for physical storage buffer.
+
+`SPV_EXT_fragment_fully_covered`
+> Represents the SPIR-V extension for SPV_EXT_fragment_fully_covered.
+
+`SPV_EXT_descriptor_indexing`
+> Represents the SPIR-V extension for descriptor indexing.
+
+`SPV_EXT_shader_atomic_float_add`
+> Represents the SPIR-V extension for atomic float add operations.
+
+`SPV_EXT_shader_atomic_float16_add`
+> Represents the SPIR-V extension for atomic float16 add operations.
+
+`SPV_EXT_shader_atomic_float_min_max`
+> Represents the SPIR-V extension for atomic float min/max operations.
+
+`SPV_EXT_mesh_shader`
+> Represents the SPIR-V extension for mesh shaders.
+
+`SPV_EXT_demote_to_helper_invocation`
+> Represents the SPIR-V extension for demoting to helper invocation.
+
+`SPV_KHR_maximal_reconvergence`
+> Represents the SPIR-V extension for maximal reconvergence.
+
+`SPV_KHR_quad_control`
+> Represents the SPIR-V extension for quad group control.
+
+`SPV_KHR_fragment_shader_barycentric`
+> Represents the SPIR-V extension for fragment shader barycentric.
+
+`SPV_KHR_non_semantic_info`
+> Represents the SPIR-V extension for non-semantic information.
+
+`SPV_KHR_ray_tracing`
+> Represents the SPIR-V extension for ray tracing.
+
+`SPV_KHR_ray_query`
+> Represents the SPIR-V extension for ray queries.
+
+`SPV_KHR_ray_tracing_position_fetch`
+> Represents the SPIR-V extension for ray tracing position fetch.
+> Should be used with either SPV_KHR_ray_query or SPV_KHR_ray_tracing.
+
+`SPV_KHR_shader_clock`
+> Represents the SPIR-V extension for shader clock.
+
+`SPV_NV_shader_subgroup_partitioned`
+> Represents the SPIR-V extension for shader subgroup partitioned.
+
+`SPV_NV_ray_tracing_motion_blur`
+> Represents the SPIR-V extension for ray tracing motion blur.
+
+`SPV_NV_shader_invocation_reorder`
+> Represents the SPIR-V extension for shader invocation reorder.
+> Requires SPV_KHR_ray_tracing.
+
+`SPV_NV_shader_image_footprint`
+> Represents the SPIR-V extension for shader image footprint.
+
+`SPV_KHR_compute_shader_derivatives`
+> Represents the SPIR-V extension for compute shader derivatives.
+
+`SPV_NV_compute_shader_derivatives`
+> Represents the SPIR-V extension for compute shader derivatives.
+
+`SPV_GOOGLE_user_type`
+> Represents the SPIR-V extension for SPV_GOOGLE_user_type.
+
+`SPV_EXT_replicated_composites`
+> Represents the SPIR-V extension for SPV_EXT_replicated_composites.
+
+`SPV_NV_cooperative_vector`
+> Represents the SPIR-V extension for SPV_NV_cooperative_vector.
+
+`spvAtomicFloat32AddEXT`
+> Represents the SPIR-V capability for atomic float 32 add operations.
+
+`spvAtomicFloat16AddEXT`
+> Represents the SPIR-V capability for atomic float 16 add operations.
+
+`spvAtomicFloat64AddEXT`
+> Represents the SPIR-V capability for atomic float 64 add operations.
+
+`spvInt64Atomics`
+> Represents the SPIR-V capability for 64-bit integer atomics.
+
+`spvAtomicFloat32MinMaxEXT`
+> Represents the SPIR-V capability for atomic float 32 min/max operations.
+
+`spvAtomicFloat16MinMaxEXT`
+> Represents the SPIR-V capability for atomic float 16 min/max operations.
+
+`spvAtomicFloat64MinMaxEXT`
+> Represents the SPIR-V capability for atomic float 64 min/max operations.
+
+`spvDerivativeControl`
+> Represents the SPIR-V capability for 'derivative control' operations.
+
+`spvImageQuery`
+> Represents the SPIR-V capability for image query operations.
+
+`spvImageGatherExtended`
+> Represents the SPIR-V capability for extended image gather operations.
+
+`spvSparseResidency`
+> Represents the SPIR-V capability for sparse residency.
+
+`spvImageFootprintNV`
+> Represents the SPIR-V capability for image footprint.
+
+`spvMinLod`
+> Represents the SPIR-V capability for using minimum LOD operations.
+
+`spvFragmentShaderPixelInterlockEXT`
+> Represents the SPIR-V capability for using SPV_EXT_fragment_shader_interlock.
+
+`spvFragmentBarycentricKHR`
+> Represents the SPIR-V capability for using SPV_KHR_fragment_shader_barycentric.
+
+`spvFragmentFullyCoveredEXT`
+> Represents the SPIR-V capability for using SPV_EXT_fragment_fully_covered functionality.
+
+`spvGroupNonUniformBallot`
+> Represents the SPIR-V capability for group non-uniform ballot operations.
+
+`spvGroupNonUniformShuffle`
+> Represents the SPIR-V capability for group non-uniform shuffle operations.
+
+`spvGroupNonUniformArithmetic`
+> Represents the SPIR-V capability for group non-uniform arithmetic operations.
+
+`spvGroupNonUniformQuad`
+> Represents the SPIR-V capability for group non-uniform quad operations.
+
+`spvGroupNonUniformVote`
+> Represents the SPIR-V capability for group non-uniform vote operations.
+
+`spvGroupNonUniformPartitionedNV`
+> Represents the SPIR-V capability for group non-uniform partitioned operations.
+
+`spvRayTracingMotionBlurNV`
+> Represents the SPIR-V capability for ray tracing motion blur.
+
+`spvMeshShadingEXT`
+> Represents the SPIR-V capability for mesh shading.
+
+`spvRayTracingKHR`
+> Represents the SPIR-V capability for ray tracing.
+
+`spvRayTracingPositionFetchKHR`
+> Represents the SPIR-V capability for ray tracing position fetch.
+
+`spvRayQueryKHR`
+> Represents the SPIR-V capability for ray query.
+
+`spvRayQueryPositionFetchKHR`
+> Represents the SPIR-V capability for ray query position fetch.
+
+`spvShaderInvocationReorderNV`
+> Represents the SPIR-V capability for shader invocation reorder.
+
+`spvShaderClockKHR`
+> Represents the SPIR-V capability for shader clock.
+
+`spvShaderNonUniformEXT`
+> Represents the SPIR-V capability for non-uniform resource indexing.
+
+`spvShaderNonUniform`
+> Represents the SPIR-V capability for non-uniform resource indexing.
+
+`spvDemoteToHelperInvocationEXT`
+> Represents the SPIR-V capability for demoting to helper invocation.
+
+`spvDemoteToHelperInvocation`
+> Represents the SPIR-V capability for demoting to helper invocation.
+
+`spvReplicatedCompositesEXT`
+> Represents the SPIR-V capability for replicated composites
+
+`spvCooperativeVectorNV`
+> Represents the SPIR-V capability for cooperative vectors
+
+`spvCooperativeVectorTrainingNV`
+> Represents the SPIR-V capability for cooperative vector training
+
+`spvMaximalReconvergenceKHR`
+> Represents the SPIR-V capability for maximal reconvergence.
+
+`spvQuadControlKHR`
+> Represents the SPIR-V capability for quad group control.
+
+`GL_EXT_buffer_reference`
+> Represents the GL_EXT_buffer_reference extension.
+
+`GL_EXT_buffer_reference_uvec2`
+> Represents the GL_EXT_buffer_reference_uvec2 extension.
+
+`GL_EXT_debug_printf`
+> Represents the GL_EXT_debug_printf extension.
+
+`GL_EXT_demote_to_helper_invocation`
+> Represents the GL_EXT_demote_to_helper_invocation extension.
+
+`GL_EXT_maximal_reconvergence`
+> Represents the GL_EXT_maximal_reconvergence extension.
+
+`GL_EXT_shader_quad_control`
+> Represents the GL_EXT_shader_quad_control extension.
+
+`GL_EXT_fragment_shader_barycentric`
+> Represents the GL_EXT_fragment_shader_barycentric extension.
+
+`GL_EXT_mesh_shader`
+> Represents the GL_EXT_mesh_shader extension.
+
+`GL_EXT_nonuniform_qualifier`
+> Represents the GL_EXT_nonuniform_qualifier extension.
+
+`GL_EXT_ray_query`
+> Represents the GL_EXT_ray_query extension.
+
+`GL_EXT_ray_tracing`
+> Represents the GL_EXT_ray_tracing extension.
+
+`GL_EXT_ray_tracing_position_fetch_ray_tracing`
+> Represents the GL_EXT_ray_tracing_position_fetch_ray_tracing extension.
+
+`GL_EXT_ray_tracing_position_fetch_ray_query`
+> Represents the GL_EXT_ray_tracing_position_fetch_ray_query extension.
+
+`GL_EXT_ray_tracing_position_fetch`
+> Represents the GL_EXT_ray_tracing_position_fetch extension.
+
+`GL_EXT_samplerless_texture_functions`
+> Represents the GL_EXT_samplerless_texture_functions extension.
+
+`GL_EXT_shader_atomic_float`
+> Represents the GL_EXT_shader_atomic_float extension.
+
+`GL_EXT_shader_atomic_float_min_max`
+> Represents the GL_EXT_shader_atomic_float_min_max extension.
+
+`GL_EXT_shader_atomic_float2`
+> Represents the GL_EXT_shader_atomic_float2 extension.
+
+`GL_EXT_shader_atomic_int64`
+> Represents the GL_EXT_shader_atomic_int64 extension.
+
+`GL_EXT_shader_explicit_arithmetic_types_int64`
+> Represents the GL_EXT_shader_explicit_arithmetic_types_int64 extension.
+
+`GL_EXT_shader_image_load_store`
+> Represents the GL_EXT_shader_image_load_store extension.
+
+`GL_EXT_shader_realtime_clock`
+> Represents the GL_EXT_shader_realtime_clock extension.
+
+`GL_EXT_texture_query_lod`
+> Represents the GL_EXT_texture_query_lod extension.
+
+`GL_EXT_texture_shadow_lod`
+> Represents the GL_EXT_texture_shadow_lod extension.
+
+`GL_ARB_derivative_control`
+> Represents the GL_ARB_derivative_control extension.
+
+`GL_ARB_fragment_shader_interlock`
+> Represents the GL_ARB_fragment_shader_interlock extension.
+
+`GL_ARB_gpu_shader5`
+> Represents the GL_ARB_gpu_shader5 extension.
+
+`GL_ARB_shader_image_load_store`
+> Represents the GL_ARB_shader_image_load_store extension.
+
+`GL_ARB_shader_image_size`
+> Represents the GL_ARB_shader_image_size extension.
+
+`GL_ARB_texture_multisample`
+> Represents the GL_ARB_texture_multisample extension.
+
+`GL_ARB_shader_texture_image_samples`
+> Represents the GL_ARB_shader_texture_image_samples extension.
+
+`GL_ARB_sparse_texture`
+> Represents the GL_ARB_sparse_texture extension.
+
+`GL_ARB_sparse_texture2`
+> Represents the GL_ARB_sparse_texture2 extension.
+
+`GL_ARB_sparse_texture_clamp`
+> Represents the GL_ARB_sparse_texture_clamp extension.
+
+`GL_ARB_texture_gather`
+> Represents the GL_ARB_texture_gather extension.
+
+`GL_ARB_texture_query_levels`
+> Represents the GL_ARB_texture_query_levels extension.
+
+`GL_ARB_shader_clock`
+> Represents the GL_ARB_shader_clock extension.
+
+`GL_ARB_shader_clock64`
+> Represents the GL_ARB_shader_clock64 extension.
+
+`GL_ARB_gpu_shader_int64`
+> Represents the GL_ARB_gpu_shader_int64 extension.
+
+`GL_KHR_memory_scope_semantics`
+> Represents the GL_KHR_memory_scope_semantics extension.
+
+`GL_KHR_shader_subgroup_arithmetic`
+> Represents the GL_KHR_shader_subgroup_arithmetic extension.
+
+`GL_KHR_shader_subgroup_ballot`
+> Represents the GL_KHR_shader_subgroup_ballot extension.
+
+`GL_KHR_shader_subgroup_basic`
+> Represents the GL_KHR_shader_subgroup_basic extension.
+
+`GL_KHR_shader_subgroup_clustered`
+> Represents the GL_KHR_shader_subgroup_clustered extension.
+
+`GL_KHR_shader_subgroup_quad`
+> Represents the GL_KHR_shader_subgroup_quad extension.
+
+`GL_KHR_shader_subgroup_shuffle`
+> Represents the GL_KHR_shader_subgroup_shuffle extension.
+
+`GL_KHR_shader_subgroup_shuffle_relative`
+> Represents the GL_KHR_shader_subgroup_shuffle_relative extension.
+
+`GL_KHR_shader_subgroup_vote`
+> Represents the GL_KHR_shader_subgroup_vote extension.
+
+`GL_NV_compute_shader_derivatives`
+> Represents the GL_NV_compute_shader_derivatives extension.
+
+`GL_NV_fragment_shader_barycentric`
+> Represents the GL_NV_fragment_shader_barycentric extension.
+
+`GL_NV_gpu_shader5`
+> Represents the GL_NV_gpu_shader5 extension.
+
+`GL_NV_ray_tracing`
+> Represents the GL_NV_ray_tracing extension.
+
+`GL_NV_ray_tracing_motion_blur`
+> Represents the GL_NV_ray_tracing_motion_blur extension.
+
+`GL_NV_shader_atomic_fp16_vector`
+> Represents the GL_NV_shader_atomic_fp16_vector extension.
+
+`GL_NV_shader_invocation_reorder`
+> Represents the GL_NV_shader_invocation_reorder extension.
+
+`GL_NV_shader_subgroup_partitioned`
+> Represents the GL_NV_shader_subgroup_partitioned extension.
+
+`GL_NV_shader_texture_footprint`
+> Represents the GL_NV_shader_texture_footprint extension.
+
+Compound Capabilities
+----------------------
+*Capabilities to specify capabilities created by other capabilities (`raytracing`, `meshshading`...)*
+
+`any_target`
+> All code-gen targets
+
+`any_textual_target`
+> All non-asm code-gen targets
+
+`any_gfx_target`
+> All slang-gfx compatible code-gen targets
+
+`any_cpp_target`
+> All "cpp syntax" code-gen targets
+
+`cpp_cuda`
+> CPP and CUDA code-gen targets
+
+`cpp_cuda_spirv`
+> CPP, CUDA and SPIRV code-gen targets
+
+`cuda_spirv`
+> CUDA and SPIRV code-gen targets
+
+`cpp_cuda_glsl_spirv`
+> CPP, CUDA, GLSL and SPIRV code-gen targets
+
+`cpp_cuda_glsl_hlsl`
+> CPP, CUDA, GLSL, and HLSL code-gen targets
+
+`cpp_cuda_glsl_hlsl_spirv`
+> CPP, CUDA, GLSL, HLSL, and SPIRV code-gen targets
+
+`cpp_cuda_glsl_hlsl_spirv_wgsl`
+> CPP, CUDA, GLSL, HLSL, SPIRV and WGSL code-gen targets
+
+`cpp_cuda_glsl_hlsl_metal_spirv`
+> CPP, CUDA, GLSL, HLSL, Metal and SPIRV code-gen targets
+
+`cpp_cuda_glsl_hlsl_metal_spirv_wgsl`
+> CPP, CUDA, GLSL, HLSL, Metal, SPIRV and WGSL code-gen targets
+
+`cpp_cuda_hlsl`
+> CPP, CUDA, and HLSL code-gen targets
+
+`cpp_cuda_hlsl_spirv`
+> CPP, CUDA, HLSL, and SPIRV code-gen targets
+
+`cpp_cuda_hlsl_metal_spirv`
+> CPP, CUDA, HLSL, Metal, and SPIRV code-gen targets
+
+`cpp_glsl`
+> CPP, and GLSL code-gen targets
+
+`cpp_glsl_hlsl_spirv`
+> CPP, GLSL, HLSL, and SPIRV code-gen targets
+
+`cpp_glsl_hlsl_spirv_wgsl`
+> CPP, GLSL, HLSL, SPIRV and WGSL code-gen targets
+
+`cpp_glsl_hlsl_metal_spirv`
+> CPP, GLSL, HLSL, Metal, and SPIRV code-gen targets
+
+`cpp_glsl_hlsl_metal_spirv_wgsl`
+> CPP, GLSL, HLSL, Metal, SPIRV and WGSL code-gen targets
+
+`cpp_hlsl`
+> CPP, and HLSL code-gen targets
+
+`cuda_glsl_hlsl`
+> CUDA, GLSL, and HLSL code-gen targets
+
+`cuda_hlsl_metal_spirv`
+> CUDA, HLSL, Metal, and SPIRV code-gen targets
+
+`cuda_glsl_hlsl_spirv`
+> CUDA, GLSL, HLSL, and SPIRV code-gen targets
+
+`cuda_glsl_hlsl_spirv_wgsl`
+> CUDA, GLSL, HLSL, SPIRV, and WGSL code-gen targets
+
+`cuda_glsl_hlsl_metal_spirv`
+> CUDA, GLSL, HLSL, Metal, and SPIRV code-gen targets
+
+`cuda_glsl_hlsl_metal_spirv_wgsl`
+> CUDA, GLSL, HLSL, Metal, SPIRV and WGSL code-gen targets
+
+`cuda_glsl_spirv`
+> CUDA, GLSL, and SPIRV code-gen targets
+
+`cuda_glsl_metal_spirv`
+> CUDA, GLSL, Metal, and SPIRV code-gen targets
+
+`cuda_glsl_metal_spirv_wgsl`
+> CUDA, GLSL, Metal, SPIRV and WGSL code-gen targets
+
+`cuda_hlsl`
+> CUDA, and HLSL code-gen targets
+
+`cuda_hlsl_spirv`
+> CUDA, HLSL, SPIRV code-gen targets
+
+`glsl_hlsl_spirv`
+> GLSL, HLSL, and SPIRV code-gen targets
+
+`glsl_hlsl_spirv_wgsl`
+> GLSL, HLSL, SPIRV and WGSL code-gen targets
+
+`glsl_hlsl_metal_spirv`
+> GLSL, HLSL, Metal, and SPIRV code-gen targets
+
+`glsl_hlsl_metal_spirv_wgsl`
+> GLSL, HLSL, Metal, SPIRV and WGSL code-gen targets
+
+`glsl_metal_spirv`
+> GLSL, Metal, and SPIRV code-gen targets
+
+`glsl_metal_spirv_wgsl`
+> GLSL, Metal, SPIRV and WGSL code-gen targets
+
+`glsl_spirv`
+> GLSL, and SPIRV code-gen targets
+
+`glsl_spirv_wgsl`
+> GLSL, SPIRV, and WGSL code-gen targets
+
+`hlsl_spirv`
+> HLSL, and SPIRV code-gen targets
+
+`nvapi`
+> NVAPI capability for HLSL
+
+`raytracing`
+> Capabilities needed for minimal raytracing support
+
+`ser`
+> Capabilities needed for shader-execution-reordering
+
+`motionblur`
+> Capabilities needed for raytracing-motionblur
+
+`rayquery`
+> Capabilities needed for compute-shader rayquery
+
+`raytracing_motionblur`
+> Capabilities needed for compute-shader rayquery and motion-blur
+
+`ser_motion`
+> Capabilities needed for shader-execution-reordering and motion-blur
+
+`shaderclock`
+> Capabilities needed for realtime clocks
+
+`fragmentshaderinterlock`
+> Capabilities needed for interlocked-fragment operations
+
+`atomic64`
+> Capabilities needed for int64/uint64 atomic operations
+
+`atomicfloat`
+> Capabilities needed to use GLSL-tier-1 float-atomic operations
+
+`atomicfloat2`
+> Capabilities needed to use GLSL-tier-2 float-atomic operations
+
+`fragmentshaderbarycentric`
+> Capabilities needed to use fragment-shader-barycentric's
+
+`shadermemorycontrol`
+> (gfx targets) Capabilities needed to use memory barriers
+
+`wave_multi_prefix`
+> Capabilities needed to use HLSL tier wave operations
+
+`bufferreference`
+> Capabilities needed to use GLSL buffer-reference's
+
+`bufferreference_int64`
+> Capabilities needed to use GLSL buffer-reference's with int64
+
+`cooperative_vector`
+> Capabilities needed to use cooperative vectors
+> Note that cpp and cuda are supported via a fallback non-cooperative implementation
+> No HLSL shader model bound yet
+
+`cooperative_vector_training`
+> Capabilities needed to train cooperative vectors
+
+`any_stage`
+> Collection of all shader stages
+
+`amplification_mesh`
+> Collection of shader stages
+
+`raytracing_stages`
+> Collection of shader stages
+
+`anyhit_closesthit`
+> Collection of shader stages
+
+`raygen_closesthit_miss`
+> Collection of shader stages
+
+`anyhit_closesthit_intersection`
+> Collection of shader stages
+
+`anyhit_closesthit_intersection_miss`
+> Collection of shader stages
+
+`raygen_closesthit_miss_callable`
+> Collection of shader stages
+
+`compute_tesscontrol_tesseval`
+> Collection of shader stages
+
+`compute_fragment`
+> Collection of shader stages
+
+`compute_fragment_geometry_vertex`
+> Collection of shader stages
+
+`domain_hull`
+> Collection of shader stages
+
+`raytracingstages_fragment`
+> Collection of shader stages
+
+`raytracingstages_compute`
+> Collection of shader stages
+
+`raytracingstages_compute_amplification_mesh`
+> Collection of shader stages
+
+`raytracingstages_compute_fragment`
+> Collection of shader stages
+
+`raytracingstages_compute_fragment_geometry_vertex`
+> Collection of shader stages
+
+`meshshading`
+> Ccapabilities required to use mesh shading features
+
+`shadermemorycontrol_compute`
+> (gfx targets) Capabilities required to use memory barriers that only work for raytracing & compute shader stages
+
+`subpass`
+> Capabilities required to use Subpass-Input's
+
+`appendstructuredbuffer`
+> Capabilities required to use AppendStructuredBuffer
+
+`atomic_hlsl`
+> (hlsl only) Capabilities required to use hlsl atomic operations
+
+`atomic_hlsl_nvapi`
+> (hlsl only) Capabilities required to use hlsl NVAPI atomics
+
+`atomic_hlsl_sm_6_6`
+> (hlsl only) Capabilities required to use hlsl sm_6_6 atomics
+
+`byteaddressbuffer`
+> Capabilities required to use ByteAddressBuffer
+
+`byteaddressbuffer_rw`
+> Capabilities required to use RWByteAddressBuffer
+
+`consumestructuredbuffer`
+> Capabilities required to use ConsumeStructuredBuffer
+
+`structuredbuffer`
+> Capabilities required to use StructuredBuffer
+
+`structuredbuffer_rw`
+> Capabilities required to use RWStructuredBuffer
+
+`fragmentprocessing`
+> Capabilities required to use fragment derivative operations (without GLSL derivativecontrol)
+
+`fragmentprocessing_derivativecontrol`
+> Capabilities required to use fragment derivative operations (with GLSL derivativecontrol)
+
+`getattributeatvertex`
+> Capabilities required to use 'getAttributeAtVertex'
+
+`memorybarrier`
+> Capabilities required to use sm_5_0 style memory barriers
+
+`texture_sm_4_0`
+> Capabilities required to use sm_4_0 texture operations
+
+`texture_sm_4_1`
+> Capabilities required to use sm_4_1 texture operations
+
+`texture_sm_4_1_samplerless`
+> Capabilities required to use sm_4_1 samplerless texture operations
+
+`texture_sm_4_1_compute_fragment`
+> Capabilities required to use 'compute/fragment shader only' texture operations.
+> We do not require 'compute'/'fragment' shader capabilities since this seems to be incorrect behavior despite what official documentation says.
+
+`texture_sm_4_0_fragment`
+> Capabilities required to use 'fragment shader only' texture operations
+
+`texture_sm_4_1_clamp_fragment`
+> Capabilities required to use 'fragment shader only' texture clamp operations
+
+`texture_sm_4_1_vertex_fragment_geometry`
+> Capabilities required to use 'fragment/geometry shader only' texture clamp operations
+
+`texture_gather`
+> Capabilities required to use 'vertex/fragment/geometry shader only' texture gather operations
+
+`image_samples`
+> Capabilities required to query image (RWTexture) sample info
+
+`image_size`
+> Capabilities required to query image (RWTexture) size info
+
+`texture_size`
+> Capabilities required to query texture sample info
+
+`texture_querylod`
+> Capabilities required to query texture LOD info
+
+`texture_querylevels`
+> Capabilities required to query texture level info
+
+`texture_shadowlod`
+> Capabilities required to query shadow texture lod info
+
+`atomic_glsl_float1`
+> (GLSL/SPIRV) Capabilities required to use GLSL-tier-1 float-atomic operations
+
+`atomic_glsl_float2`
+> (GLSL/SPIRV) Capabilities required to use GLSL-tier-2 float-atomic operations
+
+`atomic_glsl_halfvec`
+> (GLSL/SPIRV) Capabilities required to use NVAPI GLSL-fp16 float-atomic operations
+
+`atomic_glsl`
+> (GLSL/SPIRV) Capabilities required to use GLSL-400 atomic operations
+
+`atomic_glsl_int64`
+> (GLSL/SPIRV) Capabilities required to use int64/uint64 atomic operations
+
+`image_loadstore`
+> (GLSL/SPIRV) Capabilities required to use image load/image store operations
+
+`nonuniformqualifier`
+> Capabilities required to use NonUniform qualifier
+
+`printf`
+> Capabilities required to use 'printf'
+
+`texturefootprint`
+> Capabilities required to use basic TextureFootprint operations
+
+`texturefootprintclamp`
+> Capabilities required to use TextureFootprint clamp operations
+
+`shader5_sm_4_0`
+> Capabilities required to use sm_4_0 features apart of GL_ARB_gpu_shader5
+
+`shader5_sm_5_0`
+> Capabilities required to use sm_5_0 features apart of GL_ARB_gpu_shader5
+
+`pack_vector`
+> Capabilities required to use pack/unpack intrinsics on packed vector data
+
+`subgroup_basic`
+> Capabilities required to use GLSL-style subgroup operations 'subgroup_basic'
+
+`subgroup_ballot`
+> Capabilities required to use GLSL-style subgroup operations 'subgroup_ballot'
+
+`subgroup_ballot_activemask`
+> Capabilities required to use GLSL-style subgroup operations 'subgroup_ballot_activemask'
+
+`subgroup_basic_ballot`
+> Capabilities required to use GLSL-style subgroup operations 'subgroup_basic_ballot'
+
+`subgroup_vote`
+> Capabilities required to use GLSL-style subgroup operations 'subgroup_vote'
+
+`shaderinvocationgroup`
+> Capabilities required to use GLSL-style subgroup operations 'subgroup_vote'
+
+`subgroup_arithmetic`
+> Capabilities required to use GLSL-style subgroup operations 'subgroup_arithmetic'
+
+`subgroup_shuffle`
+> Capabilities required to use GLSL-style subgroup operations 'subgroup_shuffle'
+
+`subgroup_shufflerelative`
+> Capabilities required to use GLSL-style subgroup operations 'subgroup_shuffle_relative'
+
+`subgroup_clustered`
+> Capabilities required to use GLSL-style subgroup operations 'subgroup_clustered'
+
+`subgroup_quad`
+> Capabilities required to use GLSL-style subgroup operations 'subgroup_quad'
+
+`subgroup_partitioned`
+> Capabilities required to use GLSL-style subgroup operations 'subgroup_partitioned'
+
+`atomic_glsl_hlsl_nvapi_cuda_metal_float1`
+> (All implemented targets) Capabilities required to use atomic operations of GLSL tier-1 float atomics
+
+`atomic_glsl_hlsl_nvapi_cuda5_int64`
+> (All implemented targets) Capabilities required to use atomic operations of int64 (cuda_sm_5 tier atomics)
+
+`atomic_glsl_hlsl_nvapi_cuda6_int64`
+> (All implemented targets) Capabilities required to use atomic operations of int64 (cuda_sm_6 tier atomics)
+
+`atomic_glsl_hlsl_nvapi_cuda9_int64`
+> (All implemented targets) Capabilities required to use atomic operations of int64 (cuda_sm_9 tier atomics)
+
+`atomic_glsl_hlsl_cuda_metal`
+> (All implemented targets) Capabilities required to use atomic operations
+
+`atomic_glsl_hlsl_cuda9_int64`
+> (All implemented targets) Capabilities required to use atomic operations (cuda_sm_9 tier atomics)
+
+`helper_lane`
+> Capabilities required to enable helper-lane demotion
+
+`quad_control`
+> Capabilities required to enable quad group control
+
+`breakpoint`
+> Capabilities required to enable shader breakpoints
+
+`raytracing_allstages`
+> Collection of capabilities for raytracing with all raytracing stages.
+
+`raytracing_anyhit`
+> Collection of capabilities for raytracing with the shader stage of anyhit.
+
+`raytracing_intersection`
+> Collection of capabilities for raytracing with the shader stage of intersection.
+
+`raytracing_anyhit_closesthit`
+> Collection of capabilities for raytracing with the shader stages of anyhit and closesthit.
+
+`raytracing_anyhit_closesthit_intersection`
+> Collection of capabilities for raytracing with the shader stages of anyhit, closesthit, and intersection.
+
+`raytracing_raygen_closesthit_miss`
+> Collection of capabilities for raytracing with the shader stages of raygen, closesthit, and miss.
+
+`raytracing_anyhit_closesthit_intersection_miss`
+> Collection of capabilities for raytracing with the shader stages of anyhit, closesthit, intersection, and miss.
+
+`raytracing_raygen_closesthit_miss_callable`
+> Collection of capabilities for raytracing the shader stages of raygen, closesthit, miss, and callable.
+
+`raytracing_position`
+> Collection of capabilities for raytracing + ray_tracing_position_fetch and the shader stages of anyhit and closesthit.
+
+`raytracing_motionblur_anyhit_closesthit_intersection_miss`
+> Collection of capabilities for raytracing + motion blur and the shader stages of anyhit, closesthit, intersection, and miss.
+
+`raytracing_motionblur_raygen_closesthit_miss`
+> Collection of capabilities for raytracing + motion blur and the shader stages of raygen, closesthit, and miss.
+
+`rayquery_position`
+> Collection of capabilities for rayquery + ray_tracing_position_fetch.
+
+`ser_raygen`
+> Collection of capabilities for raytracing + shader execution reordering and the shader stage of raygen.
+
+`ser_raygen_closesthit_miss`
+> Collection of capabilities for raytracing + shader execution reordering and the shader stages of raygen, closesthit, and miss.
+
+`ser_any_closesthit_intersection_miss`
+> Collection of capabilities for raytracing + shader execution reordering and the shader stages of anyhit, closesthit, intersection, and miss.
+
+`ser_anyhit_closesthit_intersection`
+> Collection of capabilities for raytracing + shader execution reordering and the shader stages of anyhit, closesthit, and intersection.
+
+`ser_anyhit_closesthit`
+> Collection of capabilities for raytracing + shader execution reordering and the shader stages of anyhit and closesthit.
+
+`ser_motion_raygen_closesthit_miss`
+> Collection of capabilities for raytracing + motion blur + shader execution reordering and the shader stages of raygen, closesthit, and miss.
+
+`ser_motion_raygen`
+> Collection of capabilities for raytracing raytracing + motion blur + shader execution reordering and the shader stage of raygen.
+
+Other
+----------------------
+*Capabilities which may be deprecated*
+
+`SPIRV_1_0`
+> Use `spirv_1_0` instead
+
+`SPIRV_1_1`
+> Use `spirv_1_1` instead
+
+`SPIRV_1_2`
+> Use `spirv_1_2` instead
+
+`SPIRV_1_3`
+> Use `spirv_1_3` instead
+
+`SPIRV_1_4`
+> Use `spirv_1_4` instead
+
+`SPIRV_1_5`
+> Use `spirv_1_5` instead
+
+`SPIRV_1_6`
+> Use `spirv_1_6` instead
+
+`DX_4_0`
+> Use `sm_4_0` instead
+
+`DX_4_1`
+> Use `sm_4_1` instead
+
+`DX_5_0`
+> Use `sm_5_0` instead
+
+`DX_5_1`
+> Use `sm_5_1` instead
+
+`DX_6_0`
+> Use `sm_6_0` instead
+
+`DX_6_1`
+> Use `sm_6_1` instead
+
+`DX_6_2`
+> Use `sm_6_2` instead
+
+`DX_6_3`
+> Use `sm_6_3` instead
+
+`DX_6_4`
+> Use `sm_6_4` instead
+
+`DX_6_5`
+> Use `sm_6_5` instead
+
+`DX_6_6`
+> Use `sm_6_6` instead
+
+`DX_6_7`
+> Use `sm_6_7` instead
+
+`DX_6_8`
+> Use `sm_6_8` instead
+
+`GLSL_410_SPIRV_1_0`
+> User should not use this capability
+
+`GLSL_420_SPIRV_1_0`
+> User should not use this capability
+
+`GLSL_430_SPIRV_1_0`
+> User should not use this capability
+
+`METAL_2_3`
+> Use `metallib_2_3` instead
+
+`METAL_2_4`
+> Use `metallib_2_4` instead
+
+`METAL_3_0`
+> Use `metallib_3_0` instead
+
+`METAL_3_1`
+> Use `metallib_3_1` instead
+
+`GLSL_430_SPIRV_1_0_compute`
+> User should not use this capability
+
+`all`
+> User should not use this capability
diff --git a/external/slang/share/doc/slang/user-guide/a3-reference.md b/external/slang/share/doc/slang/user-guide/a3-reference.md
new file mode 100644
index 00000000..d533ea1f
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/a3-reference.md
@@ -0,0 +1,11 @@
+---
+layout: user-guide
+permalink: /user-guide/reference
+---
+
+Reference
+============================
+
+In this chapter:
+1. [Reference for all Capability Profiles](a3-01-Reference-Capability-Profiles.md)
+2. [Reference for all Capability Atoms](a3-02-Reference-Capability-Atoms.md)
diff --git a/external/slang/share/doc/slang/user-guide/index.md b/external/slang/share/doc/slang/user-guide/index.md
new file mode 100644
index 00000000..51aee62a
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/index.md
@@ -0,0 +1,13 @@
+---
+layout: user-guide
+---
+
+Slang User's Guide
+=============
+
+Welcome to the Slang User's Guide, an introduction to the Slang language, compiler, and API. In this guide, you will learn:
+- Slang's language features, including those inherited from HLSL and additional language features to make it easy to work with shaders.
+- The compiler API that controls how to assemble shaders from different pieces of code, and how they are compiled for different targets.
+- The reflection API that allows the host application to query the details of shader code in order to generate the right shader kernel and to set shader parameters correctly.
+
+Note: this documentation is still under active development. While the coverage on language features is complete, we are still working on the remaining chapters on Slang's compilation and reflection API.
diff --git a/external/slang/share/doc/slang/user-guide/nav.html b/external/slang/share/doc/slang/user-guide/nav.html
new file mode 100644
index 00000000..af749864
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/nav.html
@@ -0,0 +1,6 @@
+<nav>
+    <li><a href="/">Slang</a></li>
+    <li><a href="/docs">Docs</a></li>
+    <li><a href="index.html">User's Guide</a></li>
+
+</nav>
diff --git a/external/slang/share/doc/slang/user-guide/toc.html b/external/slang/share/doc/slang/user-guide/toc.html
new file mode 100644
index 00000000..8a314d68
--- /dev/null
+++ b/external/slang/share/doc/slang/user-guide/toc.html
@@ -0,0 +1,269 @@
+<ul class="toc_root_list"><li data-link="index"><span>Slang User's Guide</span>
+<ul class="toc_list">
+<li data-link="introduction"><span>Introduction</span>
+<ul class="toc_list">
+<li data-link="introduction#why-use-slang"><span>Why use Slang?</span></li>
+<li data-link="introduction#who-is-slang-for"><span>Who is Slang for?</span></li>
+<li data-link="introduction#who-is-this-guide-for"><span>Who is this guide for?</span></li>
+<li data-link="introduction#goals-and-non-goals"><span>Goals and Non-Goals</span></li>
+</ul>
+</li>
+<li data-link="get-started"><span>Getting Started with Slang</span>
+<ul class="toc_list">
+<li data-link="get-started#installation"><span>Installation</span></li>
+<li data-link="get-started#your-first-slang-shader"><span>Your first Slang shader</span></li>
+<li data-link="get-started#the-full-example"><span>The full example</span></li>
+</ul>
+</li>
+<li data-link="conventional-features"><span>Conventional Language Features</span>
+<ul class="toc_list">
+<li data-link="conventional-features#types"><span>Types</span></li>
+<li data-link="conventional-features#expressions"><span>Expressions</span></li>
+<li data-link="conventional-features#statements"><span>Statements</span></li>
+<li data-link="conventional-features#functions"><span>Functions</span></li>
+<li data-link="conventional-features#preprocessor"><span>Preprocessor</span></li>
+<li data-link="conventional-features#attributes"><span>Attributes</span></li>
+<li data-link="conventional-features#global-variables-and-shader-parameters"><span>Global Variables and Shader Parameters</span></li>
+<li data-link="conventional-features#shader-entry-points"><span>Shader Entry Points</span></li>
+<li data-link="conventional-features#mixed-shader-entry-points"><span>Mixed Shader Entry Points</span></li>
+<li data-link="conventional-features#auto-generated-constructors"><span>Auto-Generated Constructors</span></li>
+<li data-link="conventional-features#initializer-lists"><span>Initializer Lists</span></li>
+</ul>
+</li>
+<li data-link="convenience-features"><span>Basic Convenience Features</span>
+<ul class="toc_list">
+<li data-link="convenience-features#type-inference-in-variable-definitions"><span>Type Inference in Variable Definitions</span></li>
+<li data-link="convenience-features#immutable-values"><span>Immutable Values</span></li>
+<li data-link="convenience-features#namespaces"><span>Namespaces</span></li>
+<li data-link="convenience-features#member-functions"><span>Member functions</span></li>
+<li data-link="convenience-features#properties"><span>Properties</span></li>
+<li data-link="convenience-features#initializers"><span>Initializers</span></li>
+<li data-link="convenience-features#operator-overloading"><span>Operator Overloading</span></li>
+<li data-link="convenience-features#subscript-operator"><span>Subscript Operator</span></li>
+<li data-link="convenience-features#tuple-types"><span>Tuple Types</span></li>
+<li data-link="convenience-features#optionalt-type"><span>`Optional&lt;T&gt;` type</span></li>
+<li data-link="convenience-features#if_let-syntax"><span>`if_let` syntax</span></li>
+<li data-link="convenience-features#reinterprett-operation"><span>`reinterpret&lt;T&gt;` operation</span></li>
+<li data-link="convenience-features#pointers-limited"><span>Pointers (limited)</span></li>
+<li data-link="convenience-features#descriptorhandle-for-bindless-descriptor-access"><span>`DescriptorHandle` for Bindless Descriptor Access</span></li>
+<li data-link="convenience-features#extensions"><span>Extensions</span></li>
+<li data-link="convenience-features#multi-level-break"><span>Multi-level break</span></li>
+<li data-link="convenience-features#force-inlining"><span>Force inlining</span></li>
+<li data-link="convenience-features#special-scoping-syntax"><span>Special Scoping Syntax</span></li>
+<li data-link="convenience-features#user-defined-attributes-experimental"><span>User Defined Attributes (Experimental)</span></li>
+</ul>
+</li>
+<li data-link="modules"><span>Modules and Access Control</span>
+<ul class="toc_list">
+<li data-link="modules#defining-a-module"><span>Defining a Module</span></li>
+<li data-link="modules#importing-a-module"><span>Importing a Module</span></li>
+<li data-link="modules#access-control"><span>Access Control</span></li>
+<li data-link="modules#organizing-file-structure-of-modules"><span>Organizing File Structure of Modules</span></li>
+<li data-link="modules#legacy-modules"><span>Legacy Modules</span></li>
+</ul>
+</li>
+<li data-link="capabilities"><span>Capabilities</span>
+<ul class="toc_list">
+<li data-link="capabilities#capability-atoms-and-capability-requirements"><span>Capability Atoms and Capability Requirements</span></li>
+<li data-link="capabilities#conflicting-capabilities"><span>Conflicting Capabilities</span></li>
+<li data-link="capabilities#requirements-in-parent-scope"><span>Requirements in Parent Scope</span></li>
+<li data-link="capabilities#inference-of-capability-requirements"><span>Inference of Capability Requirements</span></li>
+<li data-link="capabilities#inference-on-target_switch"><span>Inference on target_switch</span></li>
+<li data-link="capabilities#capability-aliases"><span>Capability Aliases</span></li>
+<li data-link="capabilities#validation-of-capability-requirements"><span>Validation of Capability Requirements</span></li>
+</ul>
+</li>
+<li data-link="interfaces-generics"><span>Interfaces and Generics</span>
+<ul class="toc_list">
+<li data-link="interfaces-generics#interfaces"><span>Interfaces</span></li>
+<li data-link="interfaces-generics#generics"><span>Generics</span></li>
+<li data-link="interfaces-generics#supported-constructs-in-interface-definitions"><span>Supported Constructs in Interface Definitions</span></li>
+<li data-link="interfaces-generics#associated-types"><span>Associated Types</span></li>
+<li data-link="interfaces-generics#generic-value-parameters"><span>Generic Value Parameters</span></li>
+<li data-link="interfaces-generics#type-equality-constraints"><span>Type Equality Constraints</span></li>
+<li data-link="interfaces-generics#interface-typed-values"><span>Interface-typed Values</span></li>
+<li data-link="interfaces-generics#extending-a-type-with-additional-interface-conformances"><span>Extending a Type with Additional Interface Conformances</span></li>
+<li data-link="interfaces-generics#is-and-as-operator"><span>`is` and `as` Operator</span></li>
+<li data-link="interfaces-generics#generic-interfaces"><span>Generic Interfaces</span></li>
+<li data-link="interfaces-generics#generic-extensions"><span>Generic Extensions</span></li>
+<li data-link="interfaces-generics#extensions-to-interfaces"><span>Extensions to Interfaces</span></li>
+<li data-link="interfaces-generics#variadic-generics"><span>Variadic Generics</span></li>
+<li data-link="interfaces-generics#builtin-interfaces"><span>Builtin Interfaces</span></li>
+</ul>
+</li>
+<li data-link="autodiff"><span>Automatic Differentiation</span>
+<ul class="toc_list">
+<li data-link="autodiff#auto-diff-operations-fwd_diff-and-bwd_diff"><span>Auto-diff operations `fwd_diff` and `bwd_diff`</span></li>
+<li data-link="autodiff#differentiable-type-system"><span>Differentiable Type System</span></li>
+<li data-link="autodiff#user-defined-derivative-functions"><span>User-Defined Derivative Functions</span></li>
+<li data-link="autodiff#using-auto-diff-with-generics"><span>Using Auto-diff with Generics</span></li>
+<li data-link="autodiff#using-auto-diff-with-interface-requirements-and-interface-types"><span>Using Auto-diff with Interface Requirements and Interface Types</span></li>
+<li data-link="autodiff#primal-substitute-functions"><span>Primal Substitute Functions</span></li>
+<li data-link="autodiff#working-with-mixed-differentiable-and-non-differentiable-code"><span>Working with Mixed Differentiable and Non-Differentiable Code</span></li>
+<li data-link="autodiff#higher-order-differentiation"><span>Higher-Order Differentiation</span></li>
+<li data-link="autodiff#restrictions-and-known-issues"><span>Restrictions and Known Issues</span></li>
+<li data-link="autodiff#reference"><span>Reference</span></li>
+</ul>
+</li>
+<li data-link="compiling"><span>Compiling Code with Slang</span>
+<ul class="toc_list">
+<li data-link="compiling#concepts"><span>Concepts</span></li>
+<li data-link="compiling#command-line-compilation-with-slangc"><span>Command-Line Compilation with `slangc`</span></li>
+<li data-link="compiling#using-the-compilation-api"><span>Using the Compilation API</span></li>
+<li data-link="compiling#multithreading"><span>Multithreading</span></li>
+<li data-link="compiling#compiler-options"><span>Compiler Options</span></li>
+<li data-link="compiling#debugging"><span>Debugging</span></li>
+</ul>
+</li>
+<li data-link="reflection"><span>Using the Reflection API</span>
+<ul class="toc_list">
+<li data-link="reflection#compiling-a-program"><span>Compiling a Program</span></li>
+<li data-link="reflection#types-and-variables"><span>Types and Variables</span></li>
+<li data-link="reflection#layout-for-types-and-variables"><span>Layout for Types and Variables</span></li>
+<li data-link="reflection#programs-and-scopes"><span>Programs and Scopes</span></li>
+<li data-link="reflection#calculating-cumulative-offsets"><span>Calculating Cumulative Offsets</span></li>
+<li data-link="reflection#determining-whether-parameters-are-used"><span>Determining Whether Parameters Are Used</span></li>
+<li data-link="reflection#conclusion"><span>Conclusion</span></li>
+</ul>
+</li>
+<li data-link="targets"><span>Supported Compilation Targets</span>
+<ul class="toc_list">
+<li data-link="targets#background-and-terminology"><span>Background and Terminology</span></li>
+<li data-link="targets#direct3d-11"><span>Direct3D 11</span></li>
+<li data-link="targets#direct3d-12"><span>Direct3D 12</span></li>
+<li data-link="targets#vulkan"><span>Vulkan</span></li>
+<li data-link="targets#opengl"><span>OpenGL</span></li>
+<li data-link="targets#metal"><span>Metal</span></li>
+<li data-link="targets#cuda-and-optix"><span>CUDA and OptiX</span></li>
+<li data-link="targets#cpu-compute"><span>CPU Compute</span></li>
+<li data-link="targets#webgpu"><span>WebGPU</span></li>
+<li data-link="targets#summary"><span>Summary</span></li>
+</ul>
+</li>
+<li data-link="link-time-specialization"><span>Link-time Specialization and Module Precompilation</span>
+<ul class="toc_list">
+<li data-link="link-time-specialization#link-time-constants"><span>Link-time Constants</span></li>
+<li data-link="link-time-specialization#link-time-types"><span>Link-time Types</span></li>
+<li data-link="link-time-specialization#providing-default-settings"><span>Providing Default Settings</span></li>
+<li data-link="link-time-specialization#restrictions"><span>Restrictions</span></li>
+<li data-link="link-time-specialization#using-precompiling-modules-with-the-api"><span>Using Precompiling Modules with the API</span></li>
+<li data-link="link-time-specialization#additional-remarks"><span>Additional Remarks</span></li>
+</ul>
+</li>
+<li data-link="a1-special-topics"><span>Special Topics</span>
+<ul class="toc_list">
+<li data-link="a1-01-matrix-layout"><span>Handling Matrix Layout Differences on Different Platforms</span>
+<ul class="toc_list">
+<li data-link="a1-01-matrix-layout#two-conventions-of-matrix-transform-math"><span>Two conventions of matrix transform math</span></li>
+<li data-link="a1-01-matrix-layout#discussion"><span>Discussion</span></li>
+<li data-link="a1-01-matrix-layout#matrix-layout"><span>Matrix Layout</span></li>
+<li data-link="a1-01-matrix-layout#overriding-default-matrix-layout"><span>Overriding default matrix layout</span></li>
+</ul>
+</li>
+<li data-link="a1-03-obfuscation"><span>Obfuscation</span>
+<ul class="toc_list">
+<li data-link="a1-03-obfuscation#obfuscation-in-slang"><span>Obfuscation in Slang</span></li>
+<li data-link="a1-03-obfuscation#using-an-obfuscated-module"><span>Using An Obfuscated Module</span></li>
+<li data-link="a1-03-obfuscation#accessing-source-maps"><span>Accessing Source Maps</span></li>
+<li data-link="a1-03-obfuscation#accessing-source-maps-without-files"><span>Accessing Source Maps without Files</span></li>
+<li data-link="a1-03-obfuscation#emit-source-maps"><span>Emit Source Maps</span></li>
+<li data-link="a1-03-obfuscation#issuesfuture-work"><span>Issues/Future Work</span></li>
+</ul>
+</li>
+<li data-link="a1-04-interop"><span>Interoperation with Target-Specific Code</span>
+<ul class="toc_list">
+<li data-link="a1-04-interop#defining-intrinsic-functions-for-textual-targets"><span>Defining Intrinsic Functions for Textual Targets</span></li>
+<li data-link="a1-04-interop#defining-intrinsic-types"><span>Defining Intrinsic Types</span></li>
+<li data-link="a1-04-interop#injecting-preludes"><span>Injecting Preludes</span></li>
+<li data-link="a1-04-interop#managing-cross-platform-code"><span>Managing Cross-Platform Code</span></li>
+<li data-link="a1-04-interop#inline-spirv-assembly"><span>Inline SPIRV Assembly</span></li>
+</ul>
+</li>
+<li data-link="a1-05-uniformity"><span>Uniformity Analysis</span>
+<ul class="toc_list">
+<li data-link="a1-05-uniformity#treat-values-as-uniform"><span>Treat Values as Uniform</span></li>
+<li data-link="a1-05-uniformity#treat-function-return-values-as-non-uniform"><span>Treat Function Return Values as Non-uniform</span></li>
+</ul>
+</li>
+</ul>
+</li>
+<li data-link="a2-target-specific-features"><span>Target-specific features</span>
+<ul class="toc_list">
+<li data-link="spirv-target-specific"><span>SPIR-V specific functionalities</span>
+<ul class="toc_list">
+<li data-link="spirv-target-specific#experimental-support-for-the-older-versions-of-spir-v"><span>Experimental support for the older versions of SPIR-V</span></li>
+<li data-link="spirv-target-specific#combined-texture-sampler"><span>Combined texture sampler</span></li>
+<li data-link="spirv-target-specific#system-value-semantics"><span>System-Value semantics</span></li>
+<li data-link="spirv-target-specific#behavior-of-discard-after-spir-v-16"><span>Behavior of `discard` after SPIR-V 1.6</span></li>
+<li data-link="spirv-target-specific#supported-hlsl-features-when-targeting-spir-v"><span>Supported HLSL features when targeting SPIR-V</span></li>
+<li data-link="spirv-target-specific#unsupported-glsl-keywords-when-targeting-spir-v"><span>Unsupported GLSL keywords when targeting SPIR-V</span></li>
+<li data-link="spirv-target-specific#supported-atomic-types-for-each-target"><span>Supported atomic types for each target</span></li>
+<li data-link="spirv-target-specific#constantbuffer-structuredbuffer-and-byteaddressbuffer"><span>ConstantBuffer, StructuredBuffer and ByteAddressBuffer</span></li>
+<li data-link="spirv-target-specific#parameterblock-for-spir-v-target"><span>ParameterBlock for SPIR-V target</span></li>
+<li data-link="spirv-target-specific#push-constants"><span>Push Constants</span></li>
+<li data-link="spirv-target-specific#specialization-constants"><span>Specialization Constants</span></li>
+<li data-link="spirv-target-specific#spir-v-specific-attributes"><span>SPIR-V specific Attributes </span></li>
+<li data-link="spirv-target-specific#multiple-entry-points-support"><span>Multiple entry points support</span></li>
+<li data-link="spirv-target-specific#global-memory-pointers"><span>Global memory pointers</span></li>
+<li data-link="spirv-target-specific#matrix-type-translation"><span>Matrix type translation</span></li>
+<li data-link="spirv-target-specific#legalization"><span>Legalization</span></li>
+<li data-link="spirv-target-specific#tessellation"><span>Tessellation</span></li>
+<li data-link="spirv-target-specific#spir-v-specific-compiler-options"><span>SPIR-V specific Compiler options</span></li>
+</ul>
+</li>
+<li data-link="metal-target-specific"><span>Metal-specific functionalities</span>
+<ul class="toc_list">
+<li data-link="metal-target-specific#entry-point-parameter-handling"><span>Entry Point Parameter Handling</span></li>
+<li data-link="metal-target-specific#system-value-semantics"><span>System-Value semantics</span></li>
+<li data-link="metal-target-specific#interpolation-modifiers"><span>Interpolation Modifiers</span></li>
+<li data-link="metal-target-specific#resource-types"><span>Resource Types</span></li>
+<li data-link="metal-target-specific#header-inclusions-and-namespace"><span>Header Inclusions and Namespace</span></li>
+<li data-link="metal-target-specific#parameter-blocks-and-argument-buffers"><span>Parameter blocks and Argument Buffers</span></li>
+<li data-link="metal-target-specific#struct-parameter-flattening"><span>Struct Parameter Flattening</span></li>
+<li data-link="metal-target-specific#return-value-handling"><span>Return Value Handling</span></li>
+<li data-link="metal-target-specific#value-type-conversion"><span>Value Type Conversion</span></li>
+<li data-link="metal-target-specific#conservative-rasterization"><span>Conservative Rasterization</span></li>
+<li data-link="metal-target-specific#address-space-assignment"><span>Address Space Assignment</span></li>
+<li data-link="metal-target-specific#explicit-parameter-binding"><span>Explicit Parameter Binding</span></li>
+<li data-link="metal-target-specific#specialization-constants"><span>Specialization Constants</span></li>
+</ul>
+</li>
+<li data-link="wgsl-target-specific"><span>WGSL specific functionalities</span>
+<ul class="toc_list">
+<li data-link="wgsl-target-specific#system-value-semantics"><span>System-Value semantics</span></li>
+<li data-link="wgsl-target-specific#supported-hlsl-features-when-targeting-wgsl"><span>Supported HLSL features when targeting WGSL</span></li>
+<li data-link="wgsl-target-specific#supported-atomic-types"><span>Supported atomic types</span></li>
+<li data-link="wgsl-target-specific#constantbuffer-rwrasterizerorderedstructuredbuffer-rwrasterizerorderedbyteaddressbuffer"><span>ConstantBuffer, (RW/RasterizerOrdered)StructuredBuffer, (RW/RasterizerOrdered)ByteAddressBuffer</span></li>
+<li data-link="wgsl-target-specific#specialization-constants"><span>Specialization Constants</span></li>
+<li data-link="wgsl-target-specific#interlocked-operations"><span>Interlocked operations</span></li>
+<li data-link="wgsl-target-specific#entry-point-parameter-handling"><span>Entry Point Parameter Handling</span></li>
+<li data-link="wgsl-target-specific#parameter-blocks"><span>Parameter blocks</span></li>
+<li data-link="wgsl-target-specific#write-only-textures"><span>Write-only Textures</span></li>
+<li data-link="wgsl-target-specific#pointers"><span>Pointers</span></li>
+<li data-link="wgsl-target-specific#address-space-assignment"><span>Address Space Assignment</span></li>
+<li data-link="wgsl-target-specific#matrix-type-translation"><span>Matrix type translation</span></li>
+<li data-link="wgsl-target-specific#explicit-parameter-binding"><span>Explicit Parameter Binding</span></li>
+<li data-link="wgsl-target-specific#specialization-constants"><span>Specialization Constants</span></li>
+</ul>
+</li>
+</ul>
+</li>
+<li data-link="reference"><span>Reference</span>
+<ul class="toc_list">
+<li data-link="a3-01-reference-capability-profiles"><span>Capability Profiles</span>
+</li>
+<li data-link="a3-02-reference-capability-atoms"><span>Capability Atoms</span>
+<ul class="toc_list">
+<li data-link="a3-02-reference-capability-atoms#targets"><span>Targets</span></li>
+<li data-link="a3-02-reference-capability-atoms#stages"><span>Stages</span></li>
+<li data-link="a3-02-reference-capability-atoms#versions"><span>Versions</span></li>
+<li data-link="a3-02-reference-capability-atoms#extensions"><span>Extensions</span></li>
+<li data-link="a3-02-reference-capability-atoms#compound-capabilities"><span>Compound Capabilities</span></li>
+<li data-link="a3-02-reference-capability-atoms#other"><span>Other</span></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
\ No newline at end of file
diff --git a/external/slang/docs/wave-intrinsics.md b/external/slang/share/doc/slang/wave-intrinsics.md
similarity index 94%
rename from external/slang/docs/wave-intrinsics.md
rename to external/slang/share/doc/slang/wave-intrinsics.md
index 640158f7..aa46f72a 100644
--- a/external/slang/docs/wave-intrinsics.md
+++ b/external/slang/share/doc/slang/wave-intrinsics.md
@@ -31,7 +31,7 @@ Using WaveMask intrinsics is generally more verbose and prone to error than the
 * Might allow for higher performance (for example it gives more control of divergence)
 * Maps most closely to CUDA
 
-On D3D12 and Vulkan the WaveMask instrinsics can be used, but the mask is effectively ignored. For this to work across targets including CUDA, the mask must be calculated such that it exactly matches that of HLSL defined 'active' lanes, else the behavior is undefined. 
+On D3D12 and Vulkan the WaveMask intrinsics can be used, but the mask is effectively ignored. For this to work across targets including CUDA, the mask must be calculated such that it exactly matches that of HLSL defined 'active' lanes, else the behavior is undefined.
 
 The WaveMask intrinsics are a non standard Slang feature, and may change in the future. 
 
@@ -41,7 +41,7 @@ RWStructuredBuffer<int> outputBuffer;
 [numthreads(4, 1, 1)]
 void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
 {
-    // It is the programmers responsibility to determine the inital mask, and that is dependent on the launch
+    // It is the programmers responsibility to determine the initial mask, and that is dependent on the launch
     // It's common to launch such that all lanes are active - with CUDA this would mean 32 lanes. 
     // Here the launch only has 4 lanes active, and so the initial mask is 0xf.
     const WaveMask mask0 = 0xf;
@@ -212,7 +212,7 @@ T WaveBroadcastLaneAt<T>(T value, constexpr int lane);
 ```
 
 All lanes receive the value specified in lane. Lane must be an active lane, otherwise the result is undefined. 
-This is a more restricive version of `WaveReadLaneAt` - which can take a non constexpr lane, *but* must be the same value for all lanes in the warp. Or 'dynamically uniform' as described in the HLSL documentation. 
+This is a more restrictive version of `WaveReadLaneAt` - which can take a non constexpr lane, *but* must be the same value for all lanes in the warp. Or 'dynamically uniform' as described in the HLSL documentation.
 
 ```
 T WaveShuffle<T>(T value, int lane);
@@ -220,7 +220,7 @@ T WaveShuffle<T>(T value, int lane);
 
 Shuffle is a less restrictive version of `WaveReadLaneAt` in that it has no restriction on the lane value - it does *not* require the value to be same on all lanes. 
 
-There isn't explicit support for WaveShuffle in HLSL, and for now it will emit `WaveReadLaneAt`. As it turns out for a sizable set of hardware WaveReadLaneAt does work correctly when the lane is not 'dynamically uniform'. This is not necessarily the case for hardware general though, so if targetting HLSL it is important to make sure that this does work correctly on your target hardware.
+There isn't explicit support for WaveShuffle in HLSL, and for now it will emit `WaveReadLaneAt`. As it turns out for a sizable set of hardware WaveReadLaneAt does work correctly when the lane is not 'dynamically uniform'. This is not necessarily the case for hardware general though, so if targeting HLSL it is important to make sure that this does work correctly on your target hardware.
 
 Our intention is that Slang will support the appropriate HLSL mechanism that makes this work on all hardware when it's available.  
 
@@ -338,5 +338,3 @@ T WaveMaskReadLaneAt<T>(WaveMask mask, T value, int lane);
 
 T WaveMaskShuffle<T>(WaveMask mask, T value, int lane);
 ```
-
- 
\ No newline at end of file
diff --git a/external/slang/slang-com-helper.h b/external/slang/slang-com-helper.h
deleted file mode 100644
index fc8b7de5..00000000
--- a/external/slang/slang-com-helper.h
+++ /dev/null
@@ -1,134 +0,0 @@
-#ifndef SLANG_COM_HELPER_H
-#define SLANG_COM_HELPER_H
-
-/** \file slang-com-helper.h
-*/
-
-#include "slang.h"
-#include <atomic>
-
-/* !!!!!!!!!!!!!!!!!!!!! Macros to help checking SlangResult !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
-
-/*! Set SLANG_HANDLE_RESULT_FAIL(x) to code to be executed whenever an error occurs, and is detected by one of the macros */
-#ifndef SLANG_HANDLE_RESULT_FAIL
-#	define SLANG_HANDLE_RESULT_FAIL(x)
-#endif
-
-//! Helper macro, that makes it easy to add result checking to calls in functions/methods that themselves return Result. 
-#define SLANG_RETURN_ON_FAIL(x) { SlangResult _res = (x); if (SLANG_FAILED(_res)) { SLANG_HANDLE_RESULT_FAIL(_res); return _res; } }
-//! Helper macro that can be used to test the return value from a call, and will return in a void method/function
-#define SLANG_RETURN_VOID_ON_FAIL(x) { SlangResult _res = (x); if (SLANG_FAILED(_res)) { SLANG_HANDLE_RESULT_FAIL(_res); return; } }
-//! Helper macro that will return false on failure.
-#define SLANG_RETURN_FALSE_ON_FAIL(x) { SlangResult _res = (x); if (SLANG_FAILED(_res)) { SLANG_HANDLE_RESULT_FAIL(_res); return false; } }
-//! Helper macro that will return nullptr on failure.
-#define SLANG_RETURN_NULL_ON_FAIL(x) { SlangResult _res = (x); if (SLANG_FAILED(_res)) { SLANG_HANDLE_RESULT_FAIL(_res); return nullptr; } }
-
-//! Helper macro that will assert if the return code from a call is failure, also returns the failure.
-#define SLANG_ASSERT_ON_FAIL(x) { SlangResult _res = (x); if (SLANG_FAILED(_res)) { assert(false); return _res; } }
-//! Helper macro that will assert if the result from a call is a failure, also returns. 
-#define SLANG_ASSERT_VOID_ON_FAIL(x) { SlangResult _res = (x); if (SLANG_FAILED(_res)) { assert(false); return; } }
-
-/* !!!!!!!!!!!!!!!!!!!!!!! C++ helpers !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
-
-#if defined(__cplusplus)
-namespace Slang {
-
-// Alias SlangResult to Slang::Result
-typedef SlangResult Result;
-// Alias SlangUUID to Slang::Guid
-typedef SlangUUID Guid;
-
-} // namespace Slang
-
-// Operator == and != for Guid/SlangUUID
-
-SLANG_FORCE_INLINE bool operator==(const Slang::Guid& aIn, const Slang::Guid& bIn)
-{
-    using namespace Slang;
-    // Use the largest type the honors the alignment of Guid
-    typedef uint32_t CmpType;
-    union GuidCompare
-    {
-        Guid guid;
-        CmpType data[sizeof(Guid) / sizeof(CmpType)];
-    };
-    // Type pun - so compiler can 'see' the pun and not break aliasing rules
-    const CmpType* a = reinterpret_cast<const GuidCompare&>(aIn).data;
-    const CmpType* b = reinterpret_cast<const GuidCompare&>(bIn).data;
-    // Make the guid comparison a single branch, by not using short circuit
-    return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3])) == 0;
-}
-
-SLANG_FORCE_INLINE bool operator!=(const Slang::Guid& a, const Slang::Guid& b)
-{
-    return !(a == b);
-}
-
-/* !!!!!!!! Macros to simplify implementing COM interfaces !!!!!!!!!!!!!!!!!!!!!!!!!!!! */
-
-/* Assumes underlying implementation has a member m_refCount that is initialized to 0 and can have ++ and -- operate on it. 
-For SLANG_IUNKNOWN_QUERY_INTERFACE to work - must have a method 'getInterface' that returns valid pointers for the Guid, or nullptr 
-if not found. */
-
-#define SLANG_IUNKNOWN_QUERY_INTERFACE \
-SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface(SlangUUID const& uuid, void** outObject) SLANG_OVERRIDE \
-{ \
-    ISlangUnknown* intf = getInterface(uuid); \
-    if (intf) \
-    { \
-        addRef(); \
-        *outObject = intf; \
-        return SLANG_OK;\
-    } \
-    return SLANG_E_NO_INTERFACE;\
-}
-
-#define SLANG_IUNKNOWN_ADD_REF \
-SLANG_NO_THROW uint32_t SLANG_MCALL addRef() \
-{ \
-    return ++m_refCount; \
-}
-
-#define SLANG_IUNKNOWN_RELEASE \
-SLANG_NO_THROW uint32_t SLANG_MCALL release() \
-{ \
-    --m_refCount; \
-    if (m_refCount == 0) \
-    { \
-        delete this; \
-        return 0; \
-    } \
-    return m_refCount; \
-} 
-
-#define SLANG_IUNKNOWN_ALL \
-    SLANG_IUNKNOWN_QUERY_INTERFACE \
-    SLANG_IUNKNOWN_ADD_REF \
-    SLANG_IUNKNOWN_RELEASE 
-
-// ------------------------ RefObject IUnknown -----------------------------
-
-#define SLANG_REF_OBJECT_IUNKNOWN_QUERY_INTERFACE \
-SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface(SlangUUID const& uuid, void** outObject) SLANG_OVERRIDE \
-{ \
-    void* intf = getInterface(uuid); \
-    if (intf) \
-    { \
-        addReference(); \
-        *outObject = intf; \
-        return SLANG_OK;\
-    } \
-    return SLANG_E_NO_INTERFACE;\
-}
-
-#define SLANG_REF_OBJECT_IUNKNOWN_ADD_REF SLANG_NO_THROW uint32_t SLANG_MCALL addRef() SLANG_OVERRIDE { return (uint32_t)addReference(); }
-#define SLANG_REF_OBJECT_IUNKNOWN_RELEASE SLANG_NO_THROW uint32_t SLANG_MCALL release() SLANG_OVERRIDE { return (uint32_t)releaseReference(); }
-
-#    define SLANG_REF_OBJECT_IUNKNOWN_ALL         \
-        SLANG_REF_OBJECT_IUNKNOWN_QUERY_INTERFACE \
-        SLANG_REF_OBJECT_IUNKNOWN_ADD_REF         \
-        SLANG_REF_OBJECT_IUNKNOWN_RELEASE
-
-#endif // defined(__cplusplus)
-
-#endif
diff --git a/external/slang/slang-com-ptr.h b/external/slang/slang-com-ptr.h
deleted file mode 100644
index 00cc9dbb..00000000
--- a/external/slang/slang-com-ptr.h
+++ /dev/null
@@ -1,160 +0,0 @@
-#ifndef SLANG_COM_PTR_H
-#define SLANG_COM_PTR_H
-
-#include "slang-com-helper.h"
-
-#include <assert.h>
-#include <cstddef>
-
-namespace Slang {
-
-/*! \brief ComPtr is a simple smart pointer that manages types which implement COM based interfaces.
-\details A class that implements a COM, must derive from the IUnknown interface or a type that matches
-it's layout exactly (such as ISlangUnknown). Trying to use this template with a class that doesn't follow
-these rules, will lead to undefined behavior.
-This is a 'strong' pointer type, and will AddRef when a non null pointer is set and Release when the pointer
-leaves scope.
-Using 'detach' allows a pointer to be removed from the management of the ComPtr.
-To set the smart pointer to null, there is the method setNull, or alternatively just assign SLANG_NULL/nullptr.
-
-One edge case using the template is that sometimes you want access as a pointer to a pointer. Sometimes this
-is to write into the smart pointer, other times to pass as an array. To handle these different behaviors
-there are the methods readRef and writeRef, which are used instead of the & (ref) operator. For example
-
-\code
-Void doSomething(ID3D12Resource** resources, IndexT numResources);
-// ...
-ComPtr<ID3D12Resource> resources[3];
-doSomething(resources[0].readRef(), SLANG_COUNT_OF(resource));
-\endcode
-
-A more common scenario writing to the pointer
-
-\code
-IUnknown* unk = ...;
-
-ComPtr<ID3D12Resource> resource;
-Result res = unk->QueryInterface(resource.writeRef());
-\endcode
-*/
-
-// Enum to force initializing as an attach (without adding a reference)
-enum InitAttach
-{
-    INIT_ATTACH
-};
-
-template <class T>
-class ComPtr
-{
-public:
-	typedef T Type;
-	typedef ComPtr ThisType;
-	typedef ISlangUnknown* Ptr;
-
-		/// Constructors
-		/// Default Ctor. Sets to nullptr
-	SLANG_FORCE_INLINE ComPtr() :m_ptr(nullptr) {}
-    SLANG_FORCE_INLINE ComPtr(std::nullptr_t) : m_ptr(nullptr) {}
-		/// Sets, and ref counts.
-	SLANG_FORCE_INLINE explicit ComPtr(T* ptr) :m_ptr(ptr) { if (ptr) ((Ptr)ptr)->addRef(); }
-		/// The copy ctor
-	SLANG_FORCE_INLINE ComPtr(const ThisType& rhs) : m_ptr(rhs.m_ptr) { if (m_ptr) ((Ptr)m_ptr)->addRef(); }
-
-        /// Ctor without adding to ref count.
-    SLANG_FORCE_INLINE explicit ComPtr(InitAttach, T* ptr) :m_ptr(ptr) { }
-        /// Ctor without adding to ref count
-    SLANG_FORCE_INLINE ComPtr(InitAttach, const ThisType& rhs) : m_ptr(rhs.m_ptr) { }
-
-#ifdef SLANG_HAS_MOVE_SEMANTICS
-		/// Move Ctor
-	SLANG_FORCE_INLINE ComPtr(ThisType&& rhs) : m_ptr(rhs.m_ptr) { rhs.m_ptr = nullptr; }
-		/// Move assign
-	SLANG_FORCE_INLINE ComPtr& operator=(ThisType&& rhs) { T* swap = m_ptr; m_ptr = rhs.m_ptr; rhs.m_ptr = swap; return *this; }
-#endif
-
-	/// Destructor releases the pointer, assuming it is set
-	SLANG_FORCE_INLINE ~ComPtr() { if (m_ptr) ((Ptr)m_ptr)->release(); }
-
-	// !!! Operators !!!
-
-	  /// Returns the dumb pointer
-	SLANG_FORCE_INLINE operator T *() const { return m_ptr; }
-
-	SLANG_FORCE_INLINE T& operator*() { return *m_ptr; }
-		/// For making method invocations through the smart pointer work through the dumb pointer
-	SLANG_FORCE_INLINE T* operator->() const { return m_ptr; }
-
-		/// Assign
-	SLANG_FORCE_INLINE const ThisType &operator=(const ThisType& rhs);
-		/// Assign from dumb ptr
-	SLANG_FORCE_INLINE T* operator=(T* in);
-
-		/// Get the pointer and don't ref
-	SLANG_FORCE_INLINE T* get() const { return m_ptr; }
-		/// Release a contained nullptr pointer if set
-	SLANG_FORCE_INLINE void setNull();
-
-		/// Detach
-	SLANG_FORCE_INLINE T* detach() { T* ptr = m_ptr; m_ptr = nullptr; return ptr; }
-		/// Set to a pointer without changing the ref count
-	SLANG_FORCE_INLINE void attach(T* in) { m_ptr = in; }
-
-		/// Get ready for writing (nulls contents)
-	SLANG_FORCE_INLINE T** writeRef() { setNull(); return &m_ptr; }
-		/// Get for read access
-	SLANG_FORCE_INLINE T*const* readRef() const { return &m_ptr; }
-
-		/// Swap
-	void swap(ThisType& rhs);
-
-protected:
-	/// Gets the address of the dumb pointer.
-    // Disabled: use writeRef and readRef to get a reference based on usage.
-#ifndef SLANG_COM_PTR_ENABLE_REF_OPERATOR
-	SLANG_FORCE_INLINE T** operator&() = delete;
-#endif
-
-	T* m_ptr;
-};
-
-//----------------------------------------------------------------------------
-template <typename T>
-void ComPtr<T>::setNull()
-{
-	if (m_ptr)
-	{
-		((Ptr)m_ptr)->release();
-		m_ptr = nullptr;
-	}
-}
-//----------------------------------------------------------------------------
-template <typename T>
-const ComPtr<T>& ComPtr<T>::operator=(const ThisType& rhs)
-{
-	if (rhs.m_ptr) ((Ptr)rhs.m_ptr)->addRef();
-	if (m_ptr) ((Ptr)m_ptr)->release();
-	m_ptr = rhs.m_ptr;
-	return *this;
-}
-//----------------------------------------------------------------------------
-template <typename T>
-T* ComPtr<T>::operator=(T* ptr)
-{
-	if (ptr) ((Ptr)ptr)->addRef();
-	if (m_ptr) ((Ptr)m_ptr)->release();
-	m_ptr = ptr;
-	return m_ptr;
-}
-//----------------------------------------------------------------------------
-template <typename T>
-void ComPtr<T>::swap(ThisType& rhs)
-{
-	T* tmp = m_ptr;
-	m_ptr = rhs.m_ptr;
-	rhs.m_ptr = tmp;
-}
-
-} // namespace Slang
-
-#endif // SLANG_COM_PTR_H
diff --git a/external/slang/slang-tag-version.h b/external/slang/slang-tag-version.h
deleted file mode 100644
index 3209026c..00000000
--- a/external/slang/slang-tag-version.h
+++ /dev/null
@@ -1 +0,0 @@
-#define SLANG_TAG_VERSION "v2024.1.22" 
diff --git a/external/slang/slang.h b/external/slang/slang.h
deleted file mode 100644
index cdd20f99..00000000
--- a/external/slang/slang.h
+++ /dev/null
@@ -1,5158 +0,0 @@
-#ifndef SLANG_H
-#define SLANG_H
-
-/** \file slang.h
-
-The Slang API provides services to compile, reflect, and specialize code
-written in the Slang shading language.
-*/
-
-/*
-The following section attempts to detect the compiler and version in use.
-
-If an application defines `SLANG_COMPILER` before including this header,
-they take responsibility for setting any compiler-dependent macros
-used later in the file.
-
-Most applications should not need to touch this section.
-*/
-#ifndef SLANG_COMPILER
-#    define SLANG_COMPILER
-
-/*
-Compiler defines, see http://sourceforge.net/p/predef/wiki/Compilers/
-NOTE that SLANG_VC holds the compiler version - not just 1 or 0
-*/
-#    if defined(_MSC_VER)
-#        if _MSC_VER >= 1900
-#            define SLANG_VC 14
-#        elif _MSC_VER >= 1800
-#            define SLANG_VC 12
-#        elif _MSC_VER >= 1700
-#            define SLANG_VC 11
-#        elif _MSC_VER >= 1600
-#            define SLANG_VC 10
-#        elif _MSC_VER >= 1500
-#            define SLANG_VC 9
-#        else
-#            error "unknown version of Visual C++ compiler"
-#        endif
-#    elif defined(__clang__)
-#        define SLANG_CLANG 1
-#    elif defined(__SNC__)
-#        define SLANG_SNC 1
-#    elif defined(__ghs__)
-#        define SLANG_GHS 1
-#    elif defined(__GNUC__) /* note: __clang__, __SNC__, or __ghs__ imply __GNUC__ */
-#        define SLANG_GCC 1
-#    else
-#        error "unknown compiler"
-#    endif
-/*
-Any compilers not detected by the above logic are now now explicitly zeroed out.
-*/
-#    ifndef SLANG_VC
-#        define SLANG_VC 0
-#    endif
-#    ifndef SLANG_CLANG
-#        define SLANG_CLANG 0
-#    endif
-#    ifndef SLANG_SNC
-#        define SLANG_SNC 0
-#    endif
-#    ifndef SLANG_GHS
-#        define SLANG_GHS 0
-#    endif
-#    ifndef SLANG_GCC
-#        define SLANG_GCC 0
-#    endif
-#endif /* SLANG_COMPILER */
-
-/*
-The following section attempts to detect the target platform being compiled for.
-
-If an application defines `SLANG_PLATFORM` before including this header,
-they take responsibility for setting any compiler-dependent macros
-used later in the file.
-
-Most applications should not need to touch this section.
-*/
-#ifndef SLANG_PLATFORM
-#    define SLANG_PLATFORM
-/**
-Operating system defines, see http://sourceforge.net/p/predef/wiki/OperatingSystems/
-*/
-#    if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_PARTITION_APP
-#        define SLANG_WINRT 1 /* Windows Runtime, either on Windows RT or Windows 8 */
-#    elif defined(XBOXONE)
-#        define SLANG_XBOXONE 1
-#    elif defined(_WIN64) /* note: XBOXONE implies _WIN64 */
-#        define SLANG_WIN64 1
-#    elif defined(_M_PPC)
-#        define SLANG_X360 1
-#    elif defined(_WIN32) /* note: _M_PPC implies _WIN32 */
-#        define SLANG_WIN32 1
-#    elif defined(__ANDROID__)
-#        define SLANG_ANDROID 1
-#    elif defined(__linux__) || defined(__CYGWIN__) /* note: __ANDROID__ implies __linux__ */
-#        define SLANG_LINUX 1
-#    elif defined(__APPLE__)
-#        include "TargetConditionals.h"
-#        if TARGET_OS_MAC
-#            define SLANG_OSX 1
-#        else
-#            define SLANG_IOS 1
-#        endif
-#    elif defined(__CELLOS_LV2__)
-#        define SLANG_PS3 1
-#    elif defined(__ORBIS__)
-#        define SLANG_PS4 1
-#    elif defined(__SNC__) && defined(__arm__)
-#        define SLANG_PSP2 1
-#    elif defined(__ghs__)
-#        define SLANG_WIIU 1
-#    else
-#        error "unknown target platform"
-#    endif
-/*
-Any platforms not detected by the above logic are now now explicitly zeroed out.
-*/
-#    ifndef SLANG_WINRT
-#        define SLANG_WINRT 0
-#    endif
-#    ifndef SLANG_XBOXONE
-#        define SLANG_XBOXONE 0
-#    endif
-#    ifndef SLANG_WIN64
-#        define SLANG_WIN64 0
-#    endif
-#    ifndef SLANG_X360
-#        define SLANG_X360 0
-#    endif
-#    ifndef SLANG_WIN32
-#        define SLANG_WIN32 0
-#    endif
-#    ifndef SLANG_ANDROID
-#        define SLANG_ANDROID 0
-#    endif
-#    ifndef SLANG_LINUX
-#        define SLANG_LINUX 0
-#    endif
-#    ifndef SLANG_IOS
-#        define SLANG_IOS 0
-#    endif
-#    ifndef SLANG_OSX
-#        define SLANG_OSX 0
-#    endif
-#    ifndef SLANG_PS3
-#        define SLANG_PS3 0
-#    endif
-#    ifndef SLANG_PS4
-#        define SLANG_PS4 0
-#    endif
-#    ifndef SLANG_PSP2
-#        define SLANG_PSP2 0
-#    endif
-#    ifndef SLANG_WIIU
-#        define SLANG_WIIU 0
-#    endif
-#endif /* SLANG_PLATFORM */
-
-/* Shorthands for "families" of compilers/platforms */
-#define SLANG_GCC_FAMILY (SLANG_CLANG || SLANG_SNC || SLANG_GHS || SLANG_GCC)
-#define SLANG_WINDOWS_FAMILY (SLANG_WINRT || SLANG_WIN32 || SLANG_WIN64)
-#define SLANG_MICROSOFT_FAMILY (SLANG_XBOXONE || SLANG_X360 || SLANG_WINDOWS_FAMILY)
-#define SLANG_LINUX_FAMILY (SLANG_LINUX || SLANG_ANDROID)
-#define SLANG_APPLE_FAMILY (SLANG_IOS || SLANG_OSX)                  /* equivalent to #if __APPLE__ */
-#define SLANG_UNIX_FAMILY (SLANG_LINUX_FAMILY || SLANG_APPLE_FAMILY) /* shortcut for unix/posix platforms */
-
-/* Macros concerning DirectX */
-#if !defined(SLANG_CONFIG_DX_ON_VK) || !SLANG_CONFIG_DX_ON_VK
-#    define SLANG_ENABLE_DXVK 0
-#    define SLANG_ENABLE_VKD3D 0
-#else
-#    define SLANG_ENABLE_DXVK 1
-#    define SLANG_ENABLE_VKD3D 1
-#endif
-
-#if SLANG_WINDOWS_FAMILY
-#    define SLANG_ENABLE_DIRECTX 1
-#    define SLANG_ENABLE_DXGI_DEBUG 1
-#    define SLANG_ENABLE_DXBC_SUPPORT 1
-#    define SLANG_ENABLE_PIX 1
-#elif SLANG_LINUX_FAMILY
-#    define SLANG_ENABLE_DIRECTX (SLANG_ENABLE_DXVK || SLANG_ENABLE_VKD3D)
-#    define SLANG_ENABLE_DXGI_DEBUG 0
-#    define SLANG_ENABLE_DXBC_SUPPORT 0
-#    define SLANG_ENABLE_PIX 0
-#else
-#    define SLANG_ENABLE_DIRECTX 0
-#    define SLANG_ENABLE_DXGI_DEBUG 0
-#    define SLANG_ENABLE_DXBC_SUPPORT 0
-#    define SLANG_ENABLE_PIX 0
-#endif
-
-/* Macro for declaring if a method is no throw. Should be set before the return parameter. */
-#ifndef SLANG_NO_THROW
-#   if SLANG_WINDOWS_FAMILY && !defined(SLANG_DISABLE_EXCEPTIONS)
-#       define SLANG_NO_THROW __declspec(nothrow)
-#   endif
-#endif
-#ifndef SLANG_NO_THROW
-#   define SLANG_NO_THROW
-#endif
-
-/* The `SLANG_STDCALL` and `SLANG_MCALL` defines are used to set the calling
-convention for interface methods.
-*/
-#ifndef SLANG_STDCALL
-#   if SLANG_MICROSOFT_FAMILY
-#       define SLANG_STDCALL __stdcall
-#   else
-#       define SLANG_STDCALL
-#   endif
-#endif
-#ifndef SLANG_MCALL
-#   define SLANG_MCALL SLANG_STDCALL
-#endif
-
-
-#if !defined(SLANG_STATIC) && !defined(SLANG_DYNAMIC)
-    #define SLANG_DYNAMIC
-#endif
-
-#if defined(_MSC_VER)
-#   define SLANG_DLL_EXPORT __declspec(dllexport)
-#else
-#   if 0 && __GNUC__ >= 4
-// Didn't work on latest gcc on linux.. so disable for now
-// https://gcc.gnu.org/wiki/Visibility
-#       define SLANG_DLL_EXPORT __attribute__ ((dllexport))
-#   else
-#       define SLANG_DLL_EXPORT __attribute__((__visibility__("default")))
-#   endif
-#endif
-
-#if defined(SLANG_DYNAMIC)
-#   if defined(_MSC_VER)
-#       ifdef SLANG_DYNAMIC_EXPORT
-#           define SLANG_API SLANG_DLL_EXPORT
-#       else
-#           define SLANG_API __declspec(dllimport)
-#       endif
-#   else
-        // TODO: need to consider compiler capabilities
-//#     ifdef SLANG_DYNAMIC_EXPORT
-#       define SLANG_API SLANG_DLL_EXPORT 
-//#     endif
-#   endif
-#endif
-
-#ifndef SLANG_API
-#   define SLANG_API
-#endif
-
-// GCC Specific
-#if SLANG_GCC_FAMILY
-
-#	define SLANG_NO_INLINE __attribute__((noinline))
-#	define SLANG_FORCE_INLINE inline __attribute__((always_inline))
-#   define SLANG_BREAKPOINT(id) __builtin_trap();
-#	define SLANG_ALIGN_OF(T)	__alignof__(T)
-
-// Use the builtin directly so we don't need to have an include of stddef.h
-#   define SLANG_OFFSET_OF(T, ELEMENT) __builtin_offsetof(T, ELEMENT) 
-#endif // SLANG_GCC_FAMILY
-
-#ifndef SLANG_OFFSET_OF
-#   define SLANG_OFFSET_OF(T, ELEMENT) (size_t(&((T*)1)->ELEMENT) - 1)
-#endif
-
-// Microsoft VC specific
-#if SLANG_MICROSOFT_FAMILY
-#	define SLANG_NO_INLINE __declspec(noinline)
-#	define SLANG_FORCE_INLINE __forceinline
-#	define SLANG_BREAKPOINT(id) __debugbreak();
-#	define SLANG_ALIGN_OF(T) __alignof(T)
-
-#   define SLANG_INT64(x) (x##i64)
-#   define SLANG_UINT64(x) (x##ui64)
-#endif // SLANG_MICROSOFT_FAMILY
-
-#ifndef SLANG_FORCE_INLINE
-#	define SLANG_FORCE_INLINE inline
-#endif
-#ifndef SLANG_NO_INLINE
-#	define SLANG_NO_INLINE
-#endif
-
-#ifndef SLANG_COMPILE_TIME_ASSERT
-#   define SLANG_COMPILE_TIME_ASSERT(x) static_assert(x)
-#endif
-
-#ifndef SLANG_OFFSET_OF
-#	define SLANG_OFFSET_OF(X, Y) offsetof(X, Y)
-#endif
-
-#ifndef SLANG_BREAKPOINT
-// Make it crash with a write to 0!
-#   define SLANG_BREAKPOINT(id) (*((int*)0) = int(id));
-#endif
-
-// Use for getting the amount of members of a standard C array.
-// Use 0[x] here to catch the case where x has an overloaded subscript operator
-#define SLANG_COUNT_OF(x) (SlangSSizeT(sizeof(x)/sizeof(0[x])))
-/// SLANG_INLINE exists to have a way to inline consistent with SLANG_ALWAYS_INLINE
-#define SLANG_INLINE inline
-
-// If explicilty disabled and not set, set to not available
-#if !defined(SLANG_HAS_EXCEPTIONS) && defined(SLANG_DISABLE_EXCEPTIONS)
-#   define SLANG_HAS_EXCEPTIONS 0
-#endif
-
-// If not set, the default is exceptions are available
-#ifndef SLANG_HAS_EXCEPTIONS
-#   define SLANG_HAS_EXCEPTIONS 1
-#endif
-
-// Other defines
-#define SLANG_STRINGIZE_HELPER(X) #X
-#define SLANG_STRINGIZE(X) SLANG_STRINGIZE_HELPER(X)
-
-#define SLANG_CONCAT_HELPER(X, Y) X##Y
-#define SLANG_CONCAT(X, Y) SLANG_CONCAT_HELPER(X, Y)
-
-#ifndef SLANG_UNUSED
-#	define SLANG_UNUSED(v) (void)v;
-#endif
-
-// Used for doing constant literals
-#ifndef SLANG_INT64
-#	define SLANG_INT64(x) (x##ll)
-#endif
-#ifndef SLANG_UINT64
-#	define SLANG_UINT64(x) (x##ull)
-#endif
-
-
-#ifdef __cplusplus
-#   define SLANG_EXTERN_C extern "C"
-#else
-#   define SLANG_EXTERN_C
-#endif
-
-#ifdef __cplusplus
-// C++ specific macros
-// Clang
-#if SLANG_CLANG
-#    if (__clang_major__*10 + __clang_minor__) >= 33
-#       define SLANG_HAS_MOVE_SEMANTICS 1
-#       define SLANG_HAS_ENUM_CLASS 1
-#       define SLANG_OVERRIDE override
-#    endif
-
-// Gcc
-#elif SLANG_GCC_FAMILY
-// Check for C++11
-#		if (__cplusplus >= 201103L)
-#			if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405
-#				define SLANG_HAS_MOVE_SEMANTICS 1
-#			endif
-#			if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406
-#				define SLANG_HAS_ENUM_CLASS 1
-#			endif
-#			if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407
-#				define SLANG_OVERRIDE override
-#			endif
-#		endif
-
-// TODO(JS): Not used in previous code. Left here as may be useful on some other version. 
-// #define SLANG_RETURN_NEVER __attribute__((__noreturn__))
-
-#       define SLANG_RETURN_NEVER [[noreturn]]
-
-#	endif // SLANG_GCC_FAMILY
-
-// Visual Studio
-
-#	if SLANG_VC
-// C4481: nonstandard extension used: override specifier 'override'
-#		if _MSC_VER < 1700
-#			pragma warning(disable : 4481)
-#		endif
-#		define SLANG_OVERRIDE	override
-#		if _MSC_VER >= 1600
-#			define SLANG_HAS_MOVE_SEMANTICS 1
-#		endif
-#	    if _MSC_VER >= 1700
-#		    define SLANG_HAS_ENUM_CLASS 1
-#       endif
-
-#   define SLANG_RETURN_NEVER __declspec(noreturn)
-
-#   endif // SLANG_VC
-
-// Set non set
-#   ifndef SLANG_OVERRIDE
-#	    define SLANG_OVERRIDE
-#   endif
-#   ifndef SLANG_HAS_ENUM_CLASS
-#	    define SLANG_HAS_ENUM_CLASS 0
-#   endif
-#   ifndef SLANG_HAS_MOVE_SEMANTICS
-#	    define SLANG_HAS_MOVE_SEMANTICS 0
-#   endif
-
-#endif // __cplusplus
-
-#ifndef SLANG_RETURN_NEVER
-#   define SLANG_RETURN_NEVER [[noreturn]]
-#endif // SLANG_RETURN_NEVER
-
-/* Macros for detecting processor */
-#if defined(_M_ARM) || defined(__ARM_EABI__)
-// This is special case for nVidia tegra
-#   define SLANG_PROCESSOR_ARM 1
-#elif defined(__i386__) || defined(_M_IX86)
-#   define SLANG_PROCESSOR_X86 1
-#elif defined(_M_AMD64) || defined(_M_X64) || defined(__amd64) || defined(__x86_64)
-#   define SLANG_PROCESSOR_X86_64 1
-#elif defined(_PPC_) || defined(__ppc__) || defined(__POWERPC__) || defined(_M_PPC)
-#   if defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) || defined(__64BIT__) || defined(_LP64) || defined(__LP64__)
-#       define SLANG_PROCESSOR_POWER_PC_64 1
-#   else
-#       define SLANG_PROCESSOR_POWER_PC 1
-#   endif
-#elif defined(__arm__)
-#   define SLANG_PROCESSOR_ARM 1
-#elif defined(_M_ARM64) || defined(__aarch64__)
-#   define SLANG_PROCESSOR_ARM_64 1
-#endif 
-
-#ifndef SLANG_PROCESSOR_ARM
-#   define SLANG_PROCESSOR_ARM 0
-#endif
-
-#ifndef SLANG_PROCESSOR_ARM_64
-#   define SLANG_PROCESSOR_ARM_64 0
-#endif
-
-#ifndef SLANG_PROCESSOR_X86
-#   define SLANG_PROCESSOR_X86 0
-#endif
-
-#ifndef SLANG_PROCESSOR_X86_64
-#   define SLANG_PROCESSOR_X86_64 0
-#endif
-
-#ifndef SLANG_PROCESSOR_POWER_PC
-#   define SLANG_PROCESSOR_POWER_PC 0
-#endif
-
-#ifndef SLANG_PROCESSOR_POWER_PC_64
-#   define SLANG_PROCESSOR_POWER_PC_64 0
-#endif
-
-// Processor families
-
-#define SLANG_PROCESSOR_FAMILY_X86 (SLANG_PROCESSOR_X86_64 | SLANG_PROCESSOR_X86)
-#define SLANG_PROCESSOR_FAMILY_ARM (SLANG_PROCESSOR_ARM | SLANG_PROCESSOR_ARM_64)
-#define SLANG_PROCESSOR_FAMILY_POWER_PC (SLANG_PROCESSOR_POWER_PC_64 | SLANG_PROCESSOR_POWER_PC)
-
-// Pointer size
-#define SLANG_PTR_IS_64 (SLANG_PROCESSOR_ARM_64 | SLANG_PROCESSOR_X86_64 | SLANG_PROCESSOR_POWER_PC_64)
-#define SLANG_PTR_IS_32 (SLANG_PTR_IS_64 ^ 1)
-
-// Processor features
-#if SLANG_PROCESSOR_FAMILY_X86
-#   define SLANG_LITTLE_ENDIAN 1
-#   define SLANG_UNALIGNED_ACCESS 1
-#elif SLANG_PROCESSOR_FAMILY_ARM
-#   if defined(__ARMEB__)
-#       define SLANG_BIG_ENDIAN 1
-#   else
-#       define SLANG_LITTLE_ENDIAN 1
-#   endif
-#elif SLANG_PROCESSOR_FAMILY_POWER_PC
-#       define SLANG_BIG_ENDIAN 1
-#endif
-
-#ifndef SLANG_LITTLE_ENDIAN
-#   define SLANG_LITTLE_ENDIAN 0
-#endif
-
-#ifndef SLANG_BIG_ENDIAN
-#   define SLANG_BIG_ENDIAN 0
-#endif
-
-#ifndef SLANG_UNALIGNED_ACCESS
-#   define SLANG_UNALIGNED_ACCESS 0
-#endif
-
-// One endianess must be set
-#if ((SLANG_BIG_ENDIAN | SLANG_LITTLE_ENDIAN) == 0)
-#   error "Couldn't determine endianess"
-#endif
-
-#ifndef  SLANG_NO_INTTYPES
-#include <inttypes.h>
-#endif // ! SLANG_NO_INTTYPES
-
-#ifndef  SLANG_NO_STDDEF
-#include <stddef.h>
-#endif // ! SLANG_NO_STDDEF
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-    /*!
-    @mainpage Introduction
-
-    API Reference: slang.h
-
-    @file slang.h
-    */
-
-    typedef uint32_t    SlangUInt32;
-    typedef int32_t     SlangInt32;
-
-    // Use SLANG_PTR_ macros to determine SlangInt/SlangUInt types.
-    // This is used over say using size_t/ptrdiff_t/intptr_t/uintptr_t, because on some targets, these types are distinct from
-    // their uint_t/int_t equivalents and so produce ambiguity with function overloading.
-    //
-    // SlangSizeT is helpful as on some compilers size_t is distinct from a regular integer type and so overloading doesn't work.
-    // Casting to SlangSizeT works around this.
-#if SLANG_PTR_IS_64
-    typedef int64_t    SlangInt;
-    typedef uint64_t   SlangUInt;
-
-    typedef int64_t    SlangSSizeT;
-    typedef uint64_t   SlangSizeT;
-#else
-    typedef int32_t    SlangInt;
-    typedef uint32_t   SlangUInt;
-
-    typedef int32_t    SlangSSizeT;
-    typedef uint32_t   SlangSizeT;
-#endif
-
-    typedef bool SlangBool;
-
-    
-    /*!
-    @brief Severity of a diagnostic generated by the compiler.
-    Values come from the enum below, with higher values representing more severe
-    conditions, and all values >= SLANG_SEVERITY_ERROR indicating compilation
-    failure.
-    */
-    typedef int SlangSeverityIntegral;
-    enum SlangSeverity : SlangSeverityIntegral
-    {
-        SLANG_SEVERITY_DISABLED = 0, /**< A message that is disabled, filtered out. */
-        SLANG_SEVERITY_NOTE,         /**< An informative message. */
-        SLANG_SEVERITY_WARNING,      /**< A warning, which indicates a possible proble. */
-        SLANG_SEVERITY_ERROR,        /**< An error, indicating that compilation failed. */
-        SLANG_SEVERITY_FATAL,        /**< An unrecoverable error, which forced compilation to abort. */
-        SLANG_SEVERITY_INTERNAL,     /**< An internal error, indicating a logic error in the compiler. */
-    };
-
-    typedef int SlangDiagnosticFlags;
-    enum
-    {
-        SLANG_DIAGNOSTIC_FLAG_VERBOSE_PATHS = 0x01,
-        SLANG_DIAGNOSTIC_FLAG_TREAT_WARNINGS_AS_ERRORS = 0x02
-    };
-
-    typedef int SlangBindableResourceIntegral;
-    enum SlangBindableResourceType : SlangBindableResourceIntegral
-    {
-        SLANG_NON_BINDABLE = 0,
-        SLANG_TEXTURE,
-        SLANG_SAMPLER,
-        SLANG_UNIFORM_BUFFER,
-        SLANG_STORAGE_BUFFER,
-    };
-
-    /* NOTE! To keep binary compatibility care is needed with this enum!
-
-    * To add value, only add at the bottom (before COUNT_OF) 
-    * To remove a value, add _DEPRECATED as a suffix, but leave in the list
-    
-    This will make the enum values stable, and compatible with libraries that might not use the latest
-    enum values.
-    */
-    typedef int SlangCompileTargetIntegral;
-    enum SlangCompileTarget : SlangCompileTargetIntegral
-    {
-        SLANG_TARGET_UNKNOWN,
-        SLANG_TARGET_NONE,
-        SLANG_GLSL,
-        SLANG_GLSL_VULKAN_DEPRECATED,              //< deprecated and removed: just use `SLANG_GLSL`.
-        SLANG_GLSL_VULKAN_ONE_DESC_DEPRECATED,     //< deprecated and removed.
-        SLANG_HLSL,
-        SLANG_SPIRV,
-        SLANG_SPIRV_ASM,
-        SLANG_DXBC,
-        SLANG_DXBC_ASM,
-        SLANG_DXIL,
-        SLANG_DXIL_ASM,
-        SLANG_C_SOURCE,                 ///< The C language
-        SLANG_CPP_SOURCE,               ///< C++ code for shader kernels.
-        SLANG_HOST_EXECUTABLE,          ///< Standalone binary executable (for hosting CPU/OS)
-        SLANG_SHADER_SHARED_LIBRARY,    ///< A shared library/Dll for shader kernels (for hosting CPU/OS)
-        SLANG_SHADER_HOST_CALLABLE,     ///< A CPU target that makes the compiled shader code available to be run immediately
-        SLANG_CUDA_SOURCE,              ///< Cuda source
-        SLANG_PTX,                      ///< PTX
-        SLANG_CUDA_OBJECT_CODE,         ///< Object code that contains CUDA functions.
-        SLANG_OBJECT_CODE,              ///< Object code that can be used for later linking
-        SLANG_HOST_CPP_SOURCE,          ///< C++ code for host library or executable.
-        SLANG_HOST_HOST_CALLABLE,       ///< Host callable host code (ie non kernel/shader) 
-        SLANG_CPP_PYTORCH_BINDING,      ///< C++ PyTorch binding code.
-        SLANG_METAL,                    ///< Metal shading language
-        SLANG_METAL_LIB,                ///< Metal library
-        SLANG_METAL_LIB_ASM,            ///< Metal library assembly
-        SLANG_HOST_SHARED_LIBRARY,      ///< A shared library/Dll for host code (for hosting CPU/OS)
-        SLANG_TARGET_COUNT_OF,
-    };
-
-    /* A "container format" describes the way that the outputs
-    for multiple files, entry points, targets, etc. should be
-    combined into a single artifact for output. */
-    typedef int SlangContainerFormatIntegral;
-    enum SlangContainerFormat : SlangContainerFormatIntegral
-    {
-        /* Don't generate a container. */
-        SLANG_CONTAINER_FORMAT_NONE,
-
-        /* Generate a container in the `.slang-module` format,
-        which includes reflection information, compiled kernels, etc. */
-        SLANG_CONTAINER_FORMAT_SLANG_MODULE,
-    };
-
-    typedef int SlangPassThroughIntegral;
-    enum SlangPassThrough : SlangPassThroughIntegral
-    {
-        SLANG_PASS_THROUGH_NONE,
-        SLANG_PASS_THROUGH_FXC,
-        SLANG_PASS_THROUGH_DXC,
-        SLANG_PASS_THROUGH_GLSLANG,
-        SLANG_PASS_THROUGH_SPIRV_DIS,
-        SLANG_PASS_THROUGH_CLANG,                   ///< Clang C/C++ compiler 
-        SLANG_PASS_THROUGH_VISUAL_STUDIO,           ///< Visual studio C/C++ compiler
-        SLANG_PASS_THROUGH_GCC,                     ///< GCC C/C++ compiler
-        SLANG_PASS_THROUGH_GENERIC_C_CPP,           ///< Generic C or C++ compiler, which is decided by the source type
-        SLANG_PASS_THROUGH_NVRTC,                   ///< NVRTC Cuda compiler
-        SLANG_PASS_THROUGH_LLVM,                    ///< LLVM 'compiler' - includes LLVM and Clang
-        SLANG_PASS_THROUGH_SPIRV_OPT,               ///< SPIRV-opt
-        SLANG_PASS_THROUGH_METAL,                   ///< Metal compiler
-        SLANG_PASS_THROUGH_COUNT_OF,
-    };
-
-    /* Defines an archive type used to holds a 'file system' type structure. */
-    typedef int SlangArchiveTypeIntegral;
-    enum SlangArchiveType : SlangArchiveTypeIntegral
-    {
-        SLANG_ARCHIVE_TYPE_UNDEFINED,
-        SLANG_ARCHIVE_TYPE_ZIP,
-        SLANG_ARCHIVE_TYPE_RIFF,                ///< Riff container with no compression
-        SLANG_ARCHIVE_TYPE_RIFF_DEFLATE,
-        SLANG_ARCHIVE_TYPE_RIFF_LZ4,
-        SLANG_ARCHIVE_TYPE_COUNT_OF,
-    };
-
-    /*!
-    Flags to control compilation behavior.
-    */
-    typedef unsigned int SlangCompileFlags;
-    enum
-    {
-        /* Do as little mangling of names as possible, to try to preserve original names */
-        SLANG_COMPILE_FLAG_NO_MANGLING          = 1 << 3,
-
-        /* Skip code generation step, just check the code and generate layout */
-        SLANG_COMPILE_FLAG_NO_CODEGEN           = 1 << 4,
-
-        /* Obfuscate shader names on release products */
-        SLANG_COMPILE_FLAG_OBFUSCATE = 1 << 5,
-
-        /* Deprecated flags: kept around to allow existing applications to
-        compile. Note that the relevant features will still be left in
-        their default state. */
-        SLANG_COMPILE_FLAG_NO_CHECKING          = 0,
-        SLANG_COMPILE_FLAG_SPLIT_MIXED_TYPES    = 0,
-    };
-
-    /*!
-    @brief Flags to control code generation behavior of a compilation target */
-    typedef unsigned int SlangTargetFlags;
-    enum 
-    {
-        /* When compiling for a D3D Shader Model 5.1 or higher target, allocate
-           distinct register spaces for parameter blocks.
-
-           @deprecated This behavior is now enabled unconditionally.
-        */
-        SLANG_TARGET_FLAG_PARAMETER_BLOCKS_USE_REGISTER_SPACES = 1 << 4,
-
-        /* When set, will generate target code that contains all entrypoints defined
-           in the input source or specified via the `spAddEntryPoint` function in a
-           single output module (library/source file).
-        */
-        SLANG_TARGET_FLAG_GENERATE_WHOLE_PROGRAM = 1 << 8,
-
-        /* When set, will dump out the IR between intermediate compilation steps.*/
-        SLANG_TARGET_FLAG_DUMP_IR = 1 << 9,
-
-        /* When set, will generate SPIRV directly rather than via glslang. */
-        SLANG_TARGET_FLAG_GENERATE_SPIRV_DIRECTLY = 1 << 10,
-    };
-    constexpr static SlangTargetFlags kDefaultTargetFlags = SLANG_TARGET_FLAG_GENERATE_SPIRV_DIRECTLY;
-
-    /*!
-    @brief Options to control floating-point precision guarantees for a target.
-    */
-    typedef unsigned int SlangFloatingPointModeIntegral;
-    enum SlangFloatingPointMode : SlangFloatingPointModeIntegral
-    {
-        SLANG_FLOATING_POINT_MODE_DEFAULT = 0,
-        SLANG_FLOATING_POINT_MODE_FAST,
-        SLANG_FLOATING_POINT_MODE_PRECISE,
-    };
-
-    /*!
-    @brief Options to control emission of `#line` directives
-    */
-    typedef unsigned int SlangLineDirectiveModeIntegral;
-    enum SlangLineDirectiveMode : SlangLineDirectiveModeIntegral
-    {
-        SLANG_LINE_DIRECTIVE_MODE_DEFAULT = 0,  /**< Default behavior: pick behavior base on target. */
-        SLANG_LINE_DIRECTIVE_MODE_NONE,         /**< Don't emit line directives at all. */
-        SLANG_LINE_DIRECTIVE_MODE_STANDARD,     /**< Emit standard C-style `#line` directives. */
-        SLANG_LINE_DIRECTIVE_MODE_GLSL,         /**< Emit GLSL-style directives with file *number* instead of name */
-        SLANG_LINE_DIRECTIVE_MODE_SOURCE_MAP,   /**< Use a source map to track line mappings (ie no #line will appear in emitting source) */
-    };
-
-    typedef int SlangSourceLanguageIntegral;
-    enum SlangSourceLanguage : SlangSourceLanguageIntegral
-    {
-        SLANG_SOURCE_LANGUAGE_UNKNOWN,
-        SLANG_SOURCE_LANGUAGE_SLANG,
-        SLANG_SOURCE_LANGUAGE_HLSL,
-        SLANG_SOURCE_LANGUAGE_GLSL,
-        SLANG_SOURCE_LANGUAGE_C,
-        SLANG_SOURCE_LANGUAGE_CPP,
-        SLANG_SOURCE_LANGUAGE_CUDA,
-        SLANG_SOURCE_LANGUAGE_SPIRV,
-        SLANG_SOURCE_LANGUAGE_METAL,
-        SLANG_SOURCE_LANGUAGE_COUNT_OF,
-    };
-
-    typedef unsigned int SlangProfileIDIntegral;
-    enum SlangProfileID : SlangProfileIDIntegral
-    {
-        SLANG_PROFILE_UNKNOWN,
-    };
-
-
-    typedef SlangInt32 SlangCapabilityIDIntegral;
-    enum SlangCapabilityID : SlangCapabilityIDIntegral
-    {
-        SLANG_CAPABILITY_UNKNOWN = 0,
-    };
-
-    typedef unsigned int SlangMatrixLayoutModeIntegral;
-    enum SlangMatrixLayoutMode : SlangMatrixLayoutModeIntegral
-    {
-        SLANG_MATRIX_LAYOUT_MODE_UNKNOWN = 0,
-        SLANG_MATRIX_LAYOUT_ROW_MAJOR,
-        SLANG_MATRIX_LAYOUT_COLUMN_MAJOR,
-    };
-
-    typedef SlangUInt32 SlangStageIntegral;
-    enum SlangStage : SlangStageIntegral
-    {
-        SLANG_STAGE_NONE,
-        SLANG_STAGE_VERTEX,
-        SLANG_STAGE_HULL,
-        SLANG_STAGE_DOMAIN,
-        SLANG_STAGE_GEOMETRY,
-        SLANG_STAGE_FRAGMENT,
-        SLANG_STAGE_COMPUTE,
-        SLANG_STAGE_RAY_GENERATION,
-        SLANG_STAGE_INTERSECTION,
-        SLANG_STAGE_ANY_HIT,
-        SLANG_STAGE_CLOSEST_HIT,
-        SLANG_STAGE_MISS,
-        SLANG_STAGE_CALLABLE,
-        SLANG_STAGE_MESH,
-        SLANG_STAGE_AMPLIFICATION,
-
-        // alias:
-        SLANG_STAGE_PIXEL = SLANG_STAGE_FRAGMENT,
-    };
-
-    typedef SlangUInt32 SlangDebugInfoLevelIntegral;
-    enum SlangDebugInfoLevel : SlangDebugInfoLevelIntegral
-    {
-        SLANG_DEBUG_INFO_LEVEL_NONE = 0,    /**< Don't emit debug information at all. */
-        SLANG_DEBUG_INFO_LEVEL_MINIMAL,     /**< Emit as little debug information as possible, while still supporting stack trackes. */
-        SLANG_DEBUG_INFO_LEVEL_STANDARD,    /**< Emit whatever is the standard level of debug information for each target. */
-        SLANG_DEBUG_INFO_LEVEL_MAXIMAL,     /**< Emit as much debug infromation as possible for each target. */
-        
-    };
-
-    /* Describes the debugging information format produced during a compilation. */
-    typedef SlangUInt32 SlangDebugInfoFormatIntegral;
-    enum SlangDebugInfoFormat : SlangDebugInfoFormatIntegral
-    {
-        SLANG_DEBUG_INFO_FORMAT_DEFAULT,         ///< Use the default debugging format for the target 
-        SLANG_DEBUG_INFO_FORMAT_C7,              ///< CodeView C7 format (typically means debugging infomation is embedded in the binary)
-        SLANG_DEBUG_INFO_FORMAT_PDB,             ///< Program database
-        
-        SLANG_DEBUG_INFO_FORMAT_STABS,          ///< Stabs
-        SLANG_DEBUG_INFO_FORMAT_COFF,           ///< COFF debug info
-        SLANG_DEBUG_INFO_FORMAT_DWARF,          ///< DWARF debug info (we may want to support specifying the version)
-
-        SLANG_DEBUG_INFO_FORMAT_COUNT_OF,
-    };
-
-    typedef SlangUInt32 SlangOptimizationLevelIntegral;
-    enum SlangOptimizationLevel : SlangOptimizationLevelIntegral
-    {
-        SLANG_OPTIMIZATION_LEVEL_NONE = 0,  /**< Don't optimize at all. */
-        SLANG_OPTIMIZATION_LEVEL_DEFAULT,   /**< Default optimization level: balance code quality and compilation time. */
-        SLANG_OPTIMIZATION_LEVEL_HIGH,      /**< Optimize aggressively. */
-        SLANG_OPTIMIZATION_LEVEL_MAXIMAL,   /**< Include optimizations that may take a very long time, or may involve severe space-vs-speed tradeoffs */
-    };
-
-    // All compiler option names supported by Slang.
-    namespace slang
-    {
-        enum class CompilerOptionName
-        {
-            MacroDefine,        // stringValue0: macro name;  stringValue1: macro value
-            DepFile,
-            EntryPointName,
-            Specialize,
-            Help,
-            HelpStyle,
-            Include,            // stringValue: additional include path.
-            Language,
-            MatrixLayoutColumn, // bool
-            MatrixLayoutRow,    // bool
-            ZeroInitialize,     // bool
-            IgnoreCapabilities, // bool
-            RestrictiveCapabilityCheck, // bool
-            ModuleName,         // stringValue0: module name.
-            Output,
-            Profile,            // intValue0: profile
-            Stage,              // intValue0: stage
-            Target,             // intValue0: CodeGenTarget
-            Version,
-            WarningsAsErrors,   // stringValue0: "all" or comma separated list of warning codes or names.
-            DisableWarnings,    // stringValue0: comma separated list of warning codes or names.
-            EnableWarning,      // stringValue0: warning code or name.
-            DisableWarning,     // stringValue0: warning code or name.
-            DumpWarningDiagnostics,
-            InputFilesRemain,
-            EmitIr,                // bool
-            ReportDownstreamTime,  // bool
-            ReportPerfBenchmark,   // bool
-            SkipSPIRVValidation,   // bool
-            SourceEmbedStyle,
-            SourceEmbedName,
-            SourceEmbedLanguage,
-            DisableShortCircuit,   // bool
-            MinimumSlangOptimization, // bool
-            DisableNonEssentialValidations, // bool
-            DisableSourceMap,       // bool
-            UnscopedEnum,           // bool
-            PreserveParameters,       // bool: preserve all resource parameters in the output code.
-
-            // Target
-
-            Capability,                 // intValue0: CapabilityName
-            DefaultImageFormatUnknown,  // bool
-            DisableDynamicDispatch,     // bool
-            DisableSpecialization,      // bool
-            FloatingPointMode,          // intValue0: FloatingPointMode
-            DebugInformation,           // intValue0: DebugInfoLevel
-            LineDirectiveMode,
-            Optimization,               // intValue0: OptimizationLevel
-            Obfuscate,                  // bool
-
-            VulkanBindShift,            // intValue0 (higher 8 bits): kind; intValue0(lower bits): set; intValue1: shift
-            VulkanBindGlobals,          // intValue0: index; intValue1: set
-            VulkanInvertY,              // bool
-            VulkanUseDxPositionW,       // bool
-            VulkanUseEntryPointName,    // bool
-            VulkanUseGLLayout,          // bool
-            VulkanEmitReflection,       // bool
-
-            GLSLForceScalarLayout,      // bool
-            EnableEffectAnnotations,    // bool
-
-            EmitSpirvViaGLSL,           // bool
-            EmitSpirvDirectly,          // bool
-            SPIRVCoreGrammarJSON,       // stringValue0: json path
-            IncompleteLibrary,          // bool, when set, will not issue an error when the linked program has unresolved extern function symbols.
-
-            // Downstream
-
-            CompilerPath,
-            DefaultDownstreamCompiler,
-            DownstreamArgs,             // stringValue0: downstream compiler name. stringValue1: argument list, one per line.
-            PassThrough,
-
-            // Repro
-
-            DumpRepro,
-            DumpReproOnError,
-            ExtractRepro,
-            LoadRepro,
-            LoadReproDirectory,
-            ReproFallbackDirectory,
-
-            // Debugging
-
-            DumpAst,
-            DumpIntermediatePrefix,
-            DumpIntermediates,      // bool
-            DumpIr,                 // bool
-            DumpIrIds,
-            PreprocessorOutput,
-            OutputIncludes,
-            ReproFileSystem,
-            SerialIr,               // bool
-            SkipCodeGen,            // bool
-            ValidateIr,             // bool
-            VerbosePaths,
-            VerifyDebugSerialIr,
-            NoCodeGen,              // Not used.
-
-            // Experimental
-
-            FileSystem,
-            Heterogeneous,
-            NoMangle,
-            NoHLSLBinding,
-            NoHLSLPackConstantBufferElements,
-            ValidateUniformity,
-            AllowGLSL,
-
-            // Internal
-
-            ArchiveType,
-            CompileStdLib,
-            Doc,
-            IrCompression,
-            LoadStdLib,
-            ReferenceModule,
-            SaveStdLib,
-            SaveStdLibBinSource,
-            TrackLiveness,
-            LoopInversion,              // bool, enable loop inversion optimization
-
-            // Deprecated
-            ParameterBlocksUseRegisterSpaces,
-
-            CountOfParsableOptions,
-
-            // Used in parsed options only.
-            DebugInformationFormat,     // intValue0: DebugInfoFormat
-            VulkanBindShiftAll,         // intValue0: kind; intValue1: shift
-            GenerateWholeProgram,       // bool
-            UseUpToDateBinaryModule,    // bool, when set, will only load
-                                        // precompiled modules if it is up-to-date with its source.
-
-            CountOf,
-        };
-
-        enum class CompilerOptionValueKind
-        {
-            Int,
-            String
-        };
-
-        struct CompilerOptionValue
-        {
-            CompilerOptionValueKind kind = CompilerOptionValueKind::Int;
-            int32_t intValue0 = 0;
-            int32_t intValue1 = 0;
-            const char* stringValue0 = nullptr;
-            const char* stringValue1 = nullptr;
-        };
-
-        struct CompilerOptionEntry
-        {
-            CompilerOptionName name;
-            CompilerOptionValue value;
-        };
-    }
-
-    /** A result code for a Slang API operation.
-
-    This type is generally compatible with the Windows API `HRESULT` type. In particular, negative values indicate
-    failure results, while zero or positive results indicate success.
-
-    In general, Slang APIs always return a zero result on success, unless documented otherwise. Strictly speaking
-    a negative value indicates an error, a positive (or 0) value indicates success. This can be tested for with the macros
-    SLANG_SUCCEEDED(x) or SLANG_FAILED(x).
-
-    It can represent if the call was successful or not. It can also specify in an extensible manner what facility
-    produced the result (as the integral 'facility') as well as what caused it (as an integral 'code').
-    Under the covers SlangResult is represented as a int32_t.
-
-    SlangResult is designed to be compatible with COM HRESULT.
-
-    It's layout in bits is as follows
-
-    Severity | Facility | Code
-    ---------|----------|-----
-    31       |    30-16 | 15-0
-
-    Severity - 1 fail, 0 is success - as SlangResult is signed 32 bits, means negative number indicates failure.
-    Facility is where the error originated from. Code is the code specific to the facility.
-
-    Result codes have the following styles,
-    1) SLANG_name
-    2) SLANG_s_f_name
-    3) SLANG_s_name
-
-    where s is S for success, E for error
-    f is the short version of the facility name
-
-    Style 1 is reserved for SLANG_OK and SLANG_FAIL as they are so commonly used.
-
-    It is acceptable to expand 'f' to a longer name to differentiate a name or drop if unique without it.
-    ie for a facility 'DRIVER' it might make sense to have an error of the form SLANG_E_DRIVER_OUT_OF_MEMORY
-    */
-
-    typedef int32_t SlangResult;
-
-    //! Use to test if a result was failure. Never use result != SLANG_OK to test for failure, as there may be successful codes != SLANG_OK.
-#define SLANG_FAILED(status) ((status) < 0)
-    //! Use to test if a result succeeded. Never use result == SLANG_OK to test for success, as will detect other successful codes as a failure.
-#define SLANG_SUCCEEDED(status) ((status) >= 0)
-
-    //! Get the facility the result is associated with
-#define SLANG_GET_RESULT_FACILITY(r)    ((int32_t)(((r) >> 16) & 0x7fff))
-    //! Get the result code for the facility
-#define SLANG_GET_RESULT_CODE(r)        ((int32_t)((r) & 0xffff))
-
-#define SLANG_MAKE_ERROR(fac, code)        ((((int32_t)(fac)) << 16) | ((int32_t)(code)) | int32_t(0x80000000))
-#define SLANG_MAKE_SUCCESS(fac, code)    ((((int32_t)(fac)) << 16) | ((int32_t)(code)))
-
-    /*************************** Facilities ************************************/
-
-    //! Facilities compatible with windows COM - only use if known code is compatible
-#define SLANG_FACILITY_WIN_GENERAL      0
-#define SLANG_FACILITY_WIN_INTERFACE    4
-#define SLANG_FACILITY_WIN_API          7
-
-    //! Base facility -> so as to not clash with HRESULT values (values in 0x200 range do not appear used)
-#define SLANG_FACILITY_BASE         0x200
-
-    /*! Facilities numbers must be unique across a project to make the resulting result a unique number.
-    It can be useful to have a consistent short name for a facility, as used in the name prefix */
-#define SLANG_FACILITY_CORE             SLANG_FACILITY_BASE
-    /* Facility for codes, that are not uniquely defined/protected. Can be used to pass back a specific error without requiring system wide facility uniqueness. Codes
-    should never be part of a public API. */
-#define SLANG_FACILITY_INTERNAL         SLANG_FACILITY_BASE + 1
-
-    /// Base for external facilities. Facilities should be unique across modules.
-#define SLANG_FACILITY_EXTERNAL_BASE 0x210
-
-    /* ************************ Win COM compatible Results ******************************/
-    // https://msdn.microsoft.com/en-us/library/windows/desktop/aa378137(v=vs.85).aspx
-
-    //! SLANG_OK indicates success, and is equivalent to SLANG_MAKE_SUCCESS(SLANG_FACILITY_WIN_GENERAL, 0)
-#define SLANG_OK                          0
-    //! SLANG_FAIL is the generic failure code - meaning a serious error occurred and the call couldn't complete
-#define SLANG_FAIL                          SLANG_MAKE_ERROR(SLANG_FACILITY_WIN_GENERAL, 0x4005)
-
-#define SLANG_MAKE_WIN_GENERAL_ERROR(code)  SLANG_MAKE_ERROR(SLANG_FACILITY_WIN_GENERAL, code)
-
-    //! Functionality is not implemented
-#define SLANG_E_NOT_IMPLEMENTED             SLANG_MAKE_WIN_GENERAL_ERROR(0x4001)
-    //! Interface not be found
-#define SLANG_E_NO_INTERFACE                SLANG_MAKE_WIN_GENERAL_ERROR(0x4002)
-    //! Operation was aborted (did not correctly complete)
-#define SLANG_E_ABORT                       SLANG_MAKE_WIN_GENERAL_ERROR(0x4004) 
-
-    //! Indicates that a handle passed in as parameter to a method is invalid.
-#define SLANG_E_INVALID_HANDLE              SLANG_MAKE_ERROR(SLANG_FACILITY_WIN_API, 6)
-    //! Indicates that an argument passed in as parameter to a method is invalid.
-#define SLANG_E_INVALID_ARG                 SLANG_MAKE_ERROR(SLANG_FACILITY_WIN_API, 0x57)
-    //! Operation could not complete - ran out of memory
-#define SLANG_E_OUT_OF_MEMORY               SLANG_MAKE_ERROR(SLANG_FACILITY_WIN_API, 0xe)
-
-    /* *************************** other Results **************************************/
-
-#define SLANG_MAKE_CORE_ERROR(code)         SLANG_MAKE_ERROR(SLANG_FACILITY_CORE, code)
-
-    // Supplied buffer is too small to be able to complete
-#define SLANG_E_BUFFER_TOO_SMALL            SLANG_MAKE_CORE_ERROR(1)
-    //! Used to identify a Result that has yet to be initialized.
-    //! It defaults to failure such that if used incorrectly will fail, as similar in concept to using an uninitialized variable.
-#define SLANG_E_UNINITIALIZED               SLANG_MAKE_CORE_ERROR(2)
-    //! Returned from an async method meaning the output is invalid (thus an error), but a result for the request is pending, and will be returned on a subsequent call with the async handle.
-#define SLANG_E_PENDING                     SLANG_MAKE_CORE_ERROR(3)
-    //! Indicates a file/resource could not be opened
-#define SLANG_E_CANNOT_OPEN                 SLANG_MAKE_CORE_ERROR(4)
-    //! Indicates a file/resource could not be found
-#define SLANG_E_NOT_FOUND                   SLANG_MAKE_CORE_ERROR(5)
-    //! An unhandled internal failure (typically from unhandled exception)
-#define SLANG_E_INTERNAL_FAIL               SLANG_MAKE_CORE_ERROR(6)
-    //! Could not complete because some underlying feature (hardware or software) was not available 
-#define SLANG_E_NOT_AVAILABLE               SLANG_MAKE_CORE_ERROR(7)
-        //! Could not complete because the operation times out. 
-#define SLANG_E_TIME_OUT                    SLANG_MAKE_CORE_ERROR(8)
-
-    /** A "Universally Unique Identifier" (UUID)
-
-    The Slang API uses UUIDs to identify interfaces when
-    using `queryInterface`.
-
-    This type is compatible with the `GUID` type defined
-    by the Component Object Model (COM), but Slang is
-    not dependent on COM.
-    */
-    struct SlangUUID
-    {
-        uint32_t data1;
-        uint16_t data2;
-        uint16_t data3;
-        uint8_t  data4[8];
-    };
-
-// Place at the start of an interface with the guid.
-// Guid should be specified as SLANG_COM_INTERFACE(0x00000000, 0x0000, 0x0000, { 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46 })
-// NOTE: it's the typical guid struct definition, without the surrounding {}
-// It is not necessary to use the multiple parameters (we can wrap in parens), but this is simple.
-#define SLANG_COM_INTERFACE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
-    public: \
-    SLANG_FORCE_INLINE constexpr static SlangUUID getTypeGuid() \
-    { \
-        return { a, b, c, d0, d1, d2, d3, d4, d5, d6, d7 }; \
-    }
-
-// Sometimes it's useful to associate a guid with a class to identify it. This macro can used for this,
-// and the guid extracted via the getTypeGuid() function defined in the type
-#define SLANG_CLASS_GUID(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
-    SLANG_FORCE_INLINE constexpr static SlangUUID getTypeGuid() \
-    { \
-        return { a, b, c, d0, d1, d2, d3, d4, d5, d6, d7 }; \
-    }
-
-// Helper to fill in pairs of GUIDs and return pointers. This ensures that the
-// type of the GUID passed matches the pointer type, and that it is derived
-// from ISlangUnknown,
-// TODO(c++20): would is_derived_from be more appropriate here for private inheritance of ISlangUnknown?
-//
-// with     : void createFoo(SlangUUID, void**);
-//            Slang::ComPtr<Bar> myBar;
-// call with: createFoo(SLANG_IID_PPV_ARGS(myBar.writeRef()))
-// to call  : createFoo(Bar::getTypeGuid(), (void**)(myBar.writeRef()))
-#define SLANG_IID_PPV_ARGS(ppType) \
-    std::decay_t<decltype(**(ppType))>::getTypeGuid(), \
-    ((void)[]{static_assert(std::is_base_of_v<ISlangUnknown, std::decay_t<decltype(**(ppType))>>);}, reinterpret_cast<void**>(ppType))
-
-
-    /** Base interface for components exchanged through the API.
-
-    This interface definition is compatible with the COM `IUnknown`,
-    and uses the same UUID, but Slang does not require applications
-    to use or initialize COM.
-    */
-    struct ISlangUnknown
-    {
-        SLANG_COM_INTERFACE(0x00000000, 0x0000, 0x0000, { 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46 })
-
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface(SlangUUID const& uuid, void** outObject) = 0;
-        virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() = 0;
-        virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() = 0;
-
-        /*
-        Inline methods are provided to allow the above operations to be called
-        using their traditional COM names/signatures:
-        */
-        SlangResult QueryInterface(struct _GUID const& uuid, void** outObject) { return queryInterface(*(SlangUUID const*)&uuid, outObject); }
-        uint32_t AddRef() { return addRef(); }
-        uint32_t Release() { return release(); }
-    };
-    #define SLANG_UUID_ISlangUnknown ISlangUnknown::getTypeGuid()
-
-
-    /* An interface to provide a mechanism to cast, that doesn't require ref counting
-    and doesn't have to return a pointer to a ISlangUnknown derived class */
-    class ISlangCastable : public ISlangUnknown
-    {
-        SLANG_COM_INTERFACE(0x87ede0e1, 0x4852, 0x44b0, { 0x8b, 0xf2, 0xcb, 0x31, 0x87, 0x4d, 0xe2, 0x39 });
-
-            /// Can be used to cast to interfaces without reference counting. 
-            /// Also provides access to internal implementations, when they provide a guid
-            /// Can simulate a 'generated' interface as long as kept in scope by cast from. 
-        virtual SLANG_NO_THROW void* SLANG_MCALL castAs(const SlangUUID& guid) = 0;
-    };
-
-    class ISlangClonable : public ISlangCastable
-    {
-        SLANG_COM_INTERFACE(0x1ec36168, 0xe9f4, 0x430d, { 0xbb, 0x17, 0x4, 0x8a, 0x80, 0x46, 0xb3, 0x1f });
-
-            /// Note the use of guid is for the desired interface/object.
-            /// The object is returned *not* ref counted. Any type that can implements the interface, 
-            /// derives from ICastable, and so (not withstanding some other issue) will always return
-            /// an ICastable interface which other interfaces/types are accessible from via castAs
-        SLANG_NO_THROW virtual void* SLANG_MCALL clone(const SlangUUID& guid) = 0;
-    };
-
-    /** A "blob" of binary data.
-
-    This interface definition is compatible with the `ID3DBlob` and `ID3D10Blob` interfaces.
-    */
-    struct ISlangBlob : public ISlangUnknown
-    {
-        SLANG_COM_INTERFACE(0x8BA5FB08, 0x5195, 0x40e2, { 0xAC, 0x58, 0x0D, 0x98, 0x9C, 0x3A, 0x01, 0x02 })
-
-        virtual SLANG_NO_THROW void const* SLANG_MCALL getBufferPointer() = 0;
-        virtual SLANG_NO_THROW size_t SLANG_MCALL getBufferSize() = 0;
-    };
-    #define SLANG_UUID_ISlangBlob ISlangBlob::getTypeGuid()
-
-    /* Can be requested from ISlangCastable cast to indicate the contained chars are null terminated.  
-    */
-    struct SlangTerminatedChars
-    {
-        SLANG_CLASS_GUID(0xbe0db1a8, 0x3594, 0x4603, { 0xa7, 0x8b, 0xc4, 0x86, 0x84, 0x30, 0xdf, 0xbb });
-        operator const char*() const { return chars; }
-        char chars[1];
-    };
-
-    /** A (real or virtual) file system.
-
-    Slang can make use of this interface whenever it would otherwise try to load files
-    from disk, allowing applications to hook and/or override filesystem access from
-    the compiler.
-
-    It is the responsibility of 
-    the caller of any method that returns a ISlangBlob to release the blob when it is no 
-    longer used (using 'release').
-    */
-
-    struct ISlangFileSystem : public ISlangCastable
-    {
-        SLANG_COM_INTERFACE(0x003A09FC, 0x3A4D, 0x4BA0, { 0xAD, 0x60, 0x1F, 0xD8, 0x63, 0xA9, 0x15, 0xAB })
-
-        /** Load a file from `path` and return a blob of its contents
-        @param path The path to load from, as a null-terminated UTF-8 string.
-        @param outBlob A destination pointer to receive the blob of the file contents.
-        @returns A `SlangResult` to indicate success or failure in loading the file.
-
-        NOTE! This is a *binary* load - the blob should contain the exact same bytes
-        as are found in the backing file. 
-
-        If load is successful, the implementation should create a blob to hold
-        the file's content, store it to `outBlob`, and return 0.
-        If the load fails, the implementation should return a failure status
-        (any negative value will do).
-        */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL loadFile(
-            char const*     path,
-            ISlangBlob** outBlob) = 0;
-    };
-    #define SLANG_UUID_ISlangFileSystem ISlangFileSystem::getTypeGuid()
-
-
-    typedef void(*SlangFuncPtr)(void);
-
-    /** 
-    (DEPRECATED) ISlangSharedLibrary
-    */
-    struct ISlangSharedLibrary_Dep1: public ISlangUnknown
-    {
-        SLANG_COM_INTERFACE( 0x9c9d5bc5, 0xeb61, 0x496f,{ 0x80, 0xd7, 0xd1, 0x47, 0xc4, 0xa2, 0x37, 0x30 })
-
-        virtual SLANG_NO_THROW void* SLANG_MCALL findSymbolAddressByName(char const* name) = 0;
-    };
-    #define SLANG_UUID_ISlangSharedLibrary_Dep1 ISlangSharedLibrary_Dep1::getTypeGuid()
-
-    /** An interface that can be used to encapsulate access to a shared library. An implementation
-    does not have to implement the library as a shared library
-    */
-    struct ISlangSharedLibrary : public ISlangCastable
-    {
-        SLANG_COM_INTERFACE(0x70dbc7c4, 0xdc3b, 0x4a07, { 0xae, 0x7e, 0x75, 0x2a, 0xf6, 0xa8, 0x15, 0x55 })
-
-        /** Get a function by name. If the library is unloaded will only return nullptr.
-        @param name The name of the function
-        @return The function pointer related to the name or nullptr if not found
-        */
-        SLANG_FORCE_INLINE SlangFuncPtr findFuncByName(char const* name) { return (SlangFuncPtr)findSymbolAddressByName(name); }
-
-        /** Get a symbol by name. If the library is unloaded will only return nullptr.
-        @param name The name of the symbol
-        @return The pointer related to the name or nullptr if not found
-        */
-        virtual SLANG_NO_THROW void* SLANG_MCALL findSymbolAddressByName(char const* name) = 0;
-    };
-    #define SLANG_UUID_ISlangSharedLibrary ISlangSharedLibrary::getTypeGuid()
-
-    struct ISlangSharedLibraryLoader: public ISlangUnknown
-    {
-        SLANG_COM_INTERFACE(0x6264ab2b, 0xa3e8, 0x4a06, { 0x97, 0xf1, 0x49, 0xbc, 0x2d, 0x2a, 0xb1, 0x4d })
-
-            /** Load a shared library. In typical usage the library name should *not* contain any platform
-            specific elements. For example on windows a dll name should *not* be passed with a '.dll' extension,
-            and similarly on linux a shared library should *not* be passed with the 'lib' prefix and '.so' extension
-            @path path The unadorned filename and/or path for the shared library
-            @ param sharedLibraryOut Holds the shared library if successfully loaded */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL loadSharedLibrary(
-            const char*     path,
-            ISlangSharedLibrary** sharedLibraryOut) = 0;
-    };
-    #define SLANG_UUID_ISlangSharedLibraryLoader ISlangSharedLibraryLoader::getTypeGuid()
-    
-    /* Type that identifies how a path should be interpreted */
-    typedef unsigned int SlangPathTypeIntegral;
-    enum SlangPathType : SlangPathTypeIntegral
-    {
-        SLANG_PATH_TYPE_DIRECTORY,      /**< Path specified specifies a directory. */
-        SLANG_PATH_TYPE_FILE,           /**< Path specified is to a file. */
-    };
-
-    /* Callback to enumerate the contents of of a directory in a ISlangFileSystemExt.
-    The name is the name of a file system object (directory/file) in the specified path (ie it is without a path) */
-    typedef void (*FileSystemContentsCallBack)(SlangPathType pathType, const char* name, void* userData);
-
-    /* Determines how paths map to files on the OS file system */
-    enum class OSPathKind : uint8_t
-    {
-        None,                ///< Paths do not map to the file system
-        Direct,              ///< Paths map directly to the file system
-        OperatingSystem,     ///< Only paths gained via PathKind::OperatingSystem map to the operating system file system
-    };
-
-    /* Used to determine what kind of path is required from an input path */
-    enum class PathKind
-    {
-            /// Given a path, returns a simplified version of that path.  
-            /// This typically means removing '..' and/or '.' from the path.
-            /// A simplified path must point to the same object as the original.
-        Simplified,             
-
-            /// Given a path, returns a 'canonical path' to the item. 
-            /// This may be the operating system 'canonical path' that is the unique path to the item.
-            /// 
-            /// If the item exists the returned canonical path should always be usable to access the item.
-            /// 
-            /// If the item the path specifies doesn't exist, the canonical path may not be returnable
-            /// or be a path simplification.             
-            /// Not all file systems support canonical paths.
-        Canonical,
-
-            /// Given a path returns a path such that it is suitable to be displayed to the user.
-            /// 
-            /// For example if the file system is a zip file - it might include the path to the zip
-            /// container as well as the path to the specific file.
-            /// 
-            /// NOTE! The display path won't necessarily work on the file system to access the item
-        Display,
-
-            /// Get the path to the item on the *operating system* file system, if available.
-        OperatingSystem,
-
-        CountOf,
-    };
-
-    /** An extended file system abstraction.
-    
-    Implementing and using this interface over ISlangFileSystem gives much more control over how paths
-    are managed, as well as how it is determined if two files 'are the same'.
-
-    All paths as input char*, or output as ISlangBlobs are always encoded as UTF-8 strings.
-    Blobs that contain strings are always zero terminated.
-    */
-    struct ISlangFileSystemExt : public ISlangFileSystem
-    {
-        SLANG_COM_INTERFACE(0x5fb632d2, 0x979d, 0x4481, { 0x9f, 0xee, 0x66, 0x3c, 0x3f, 0x14, 0x49, 0xe1 })
-
-        /** Get a uniqueIdentity which uniquely identifies an object of the file system.
-           
-        Given a path, returns a 'uniqueIdentity' which ideally is the same value for the same object on the file system.
-
-        The uniqueIdentity is used to compare if two paths are the same - which amongst other things allows Slang to
-        cache source contents internally. It is also used for #pragma once functionality.
-
-        A *requirement* is for any implementation is that two paths can only return the same uniqueIdentity if the
-        contents of the two files are *identical*. If an implementation breaks this constraint it can produce incorrect compilation.
-        If an implementation cannot *strictly* identify *the same* files, this will only have an effect on #pragma once behavior.
-
-        The string for the uniqueIdentity is held zero terminated in the ISlangBlob of outUniqueIdentity.
-   
-        Note that there are many ways a uniqueIdentity may be generated for a file. For example it could be the
-        'canonical path' - assuming it is available and unambiguous for a file system. Another possible mechanism
-        could be to store the filename combined with the file date time to uniquely identify it.
-     
-        The client must ensure the blob be released when no longer used, otherwise memory will leak.
-
-        NOTE! Ideally this method would be called 'getPathUniqueIdentity' but for historical reasons and
-        backward compatibility it's name remains with 'File' even though an implementation should be made to work
-        with directories too.
-
-        @param path
-        @param outUniqueIdentity
-        @returns A `SlangResult` to indicate success or failure getting the uniqueIdentity.
-        */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getFileUniqueIdentity(
-            const char* path,
-            ISlangBlob** outUniqueIdentity) = 0;
-
-        /** Calculate a path combining the 'fromPath' with 'path'
-
-        The client must ensure the blob be released when no longer used, otherwise memory will leak.
-
-        @param fromPathType How to interpret the from path - as a file or a directory.
-        @param fromPath The from path. 
-        @param path Path to be determined relative to the fromPath
-        @param pathOut Holds the string which is the relative path. The string is held in the blob zero terminated.  
-        @returns A `SlangResult` to indicate success or failure in loading the file.
-        */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL calcCombinedPath(
-            SlangPathType fromPathType,
-            const char* fromPath,
-            const char* path,
-            ISlangBlob** pathOut) = 0;          
-            
-        /** Gets the type of path that path is on the file system. 
-        @param path
-        @param pathTypeOut
-        @returns SLANG_OK if located and type is known, else an error. SLANG_E_NOT_FOUND if not found.
-        */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getPathType(
-            const char* path, 
-            SlangPathType* pathTypeOut) = 0;
-
-        /** Get a path based on the kind.
-
-        @param kind The kind of path wanted
-        @param path The input path
-        @param outPath The output path held in a blob
-        @returns SLANG_OK if successfully simplified the path (SLANG_E_NOT_IMPLEMENTED if not implemented, or some other error code)
-        */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getPath(
-            PathKind kind,
-            const char* path,
-            ISlangBlob** outPath) = 0;
-
-        /** Clears any cached information */
-        virtual SLANG_NO_THROW void SLANG_MCALL clearCache() = 0;
-
-        /** Enumerate the contents of the path
-        
-        Note that for normal Slang operation it isn't necessary to enumerate contents this can return SLANG_E_NOT_IMPLEMENTED.
-        
-        @param The path to enumerate
-        @param callback This callback is called for each entry in the path. 
-        @param userData This is passed to the callback
-        @returns SLANG_OK if successful 
-        */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL enumeratePathContents(
-            const char* path,
-            FileSystemContentsCallBack callback,
-            void* userData) = 0;
-
-        /** Returns how paths map to the OS file system
-        
-        @returns OSPathKind that describes how paths map to the Operating System file system
-        */
-        virtual SLANG_NO_THROW OSPathKind SLANG_MCALL getOSPathKind() = 0;
-    };
-
-    #define SLANG_UUID_ISlangFileSystemExt ISlangFileSystemExt::getTypeGuid()
-
-    struct ISlangMutableFileSystem : public ISlangFileSystemExt
-    {
-        SLANG_COM_INTERFACE(0xa058675c, 0x1d65, 0x452a, { 0x84, 0x58, 0xcc, 0xde, 0xd1, 0x42, 0x71, 0x5 })
-
-        /** Write data to the specified path.
-
-        @param path The path for data to be saved to
-        @param data The data to be saved
-        @param size The size of the data in bytes
-        @returns SLANG_OK if successful (SLANG_E_NOT_IMPLEMENTED if not implemented, or some other error code)
-        */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL saveFile(
-            const char* path,
-            const void* data,
-            size_t size) = 0;
-
-        /** Write data in the form of a blob to the specified path.
-
-        Depending on the implementation writing a blob might be faster/use less memory. It is assumed the 
-        blob is *immutable* and that an implementation can reference count it.
-
-        It is not guaranteed loading the same file will return the *same* blob - just a blob with same 
-        contents.
-
-        @param path The path for data to be saved to
-        @param dataBlob The data to be saved
-        @returns SLANG_OK if successful (SLANG_E_NOT_IMPLEMENTED if not implemented, or some other error code)
-        */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL saveFileBlob(
-            const char* path,
-            ISlangBlob* dataBlob) = 0;
-
-        /** Remove the entry in the path (directory of file). Will only delete an empty directory, if not empty
-        will return an error.
-
-        @param path The path to remove 
-        @returns SLANG_OK if successful 
-        */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL remove(
-            const char* path) = 0;
-
-        /** Create a directory.
-
-        The path to the directory must exist
-
-        @param path To the directory to create. The parent path *must* exist otherwise will return an error.
-        @returns SLANG_OK if successful 
-        */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL createDirectory(
-            const char* path) = 0;
-    };
-
-    #define SLANG_UUID_ISlangMutableFileSystem ISlangMutableFileSystem::getTypeGuid()
-
-    /* Identifies different types of writer target*/
-    typedef unsigned int SlangWriterChannelIntegral;
-    enum SlangWriterChannel : SlangWriterChannelIntegral
-    {
-        SLANG_WRITER_CHANNEL_DIAGNOSTIC,
-        SLANG_WRITER_CHANNEL_STD_OUTPUT,
-        SLANG_WRITER_CHANNEL_STD_ERROR,
-        SLANG_WRITER_CHANNEL_COUNT_OF,
-    };
-
-    typedef unsigned int SlangWriterModeIntegral;
-    enum SlangWriterMode : SlangWriterModeIntegral
-    {
-        SLANG_WRITER_MODE_TEXT,
-        SLANG_WRITER_MODE_BINARY,
-    };
-
-    /** A stream typically of text, used for outputting diagnostic as well as other information.
-    */
-    struct ISlangWriter : public ISlangUnknown
-    {
-        SLANG_COM_INTERFACE(0xec457f0e, 0x9add, 0x4e6b,{ 0x85, 0x1c, 0xd7, 0xfa, 0x71, 0x6d, 0x15, 0xfd })
-
-            /** Begin an append buffer.
-            NOTE! Only one append buffer can be active at any time.
-            @param maxNumChars The maximum of chars that will be appended
-            @returns The start of the buffer for appending to. */    
-        virtual SLANG_NO_THROW char* SLANG_MCALL beginAppendBuffer(size_t maxNumChars) = 0;
-            /** Ends the append buffer, and is equivalent to a write of the append buffer.
-            NOTE! That an endAppendBuffer is not necessary if there are no characters to write.
-            @param buffer is the start of the data to append and must be identical to last value returned from beginAppendBuffer
-            @param numChars must be a value less than or equal to what was returned from last call to beginAppendBuffer
-            @returns Result, will be SLANG_OK on success */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL endAppendBuffer(char* buffer, size_t numChars) = 0;
-            /** Write text to the writer
-            @param chars The characters to write out
-            @param numChars The amount of characters
-            @returns SLANG_OK on success */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL write(const char* chars, size_t numChars) = 0;
-            /** Flushes any content to the output */
-        virtual SLANG_NO_THROW void SLANG_MCALL flush() = 0;
-            /** Determines if the writer stream is to the console, and can be used to alter the output 
-            @returns Returns true if is a console writer */
-        virtual SLANG_NO_THROW SlangBool SLANG_MCALL isConsole() = 0;
-            /** Set the mode for the writer to use
-            @param mode The mode to use
-            @returns SLANG_OK on success */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL setMode(SlangWriterMode mode) = 0;
-    };
-    
-    #define SLANG_UUID_ISlangWriter ISlangWriter::getTypeGuid()
-
-    struct ISlangProfiler : public ISlangUnknown
-    {
-        SLANG_COM_INTERFACE(0x197772c7, 0x0155, 0x4b91, { 0x84, 0xe8, 0x66, 0x68, 0xba, 0xff, 0x06, 0x19 })
-        virtual SLANG_NO_THROW size_t SLANG_MCALL getEntryCount() = 0;
-        virtual SLANG_NO_THROW const char* SLANG_MCALL getEntryName(uint32_t index) = 0;
-        virtual SLANG_NO_THROW long SLANG_MCALL getEntryTimeMS(uint32_t index) = 0;
-        virtual SLANG_NO_THROW uint32_t SLANG_MCALL getEntryInvocationTimes(uint32_t index) = 0;
-    };
-    #define SLANG_UUID_ISlangProfiler ISlangProfiler::getTypeGuid()
-
-    namespace slang {
-    struct IGlobalSession;
-    struct ICompileRequest;
-
-    } // namespace slang
-
-    /*!
-    @brief An instance of the Slang library.
-    */
-    typedef slang::IGlobalSession SlangSession;
-    
-
-    typedef struct SlangProgramLayout SlangProgramLayout;
-
-    /*!
-    @brief A request for one or more compilation actions to be performed.
-    */
-    typedef struct slang::ICompileRequest SlangCompileRequest;
-
-
-    /*!
-    @brief Initialize an instance of the Slang library.
-    */
-    SLANG_API SlangSession* spCreateSession(const char* deprecated = 0);
-
-    /*!
-    @brief Clean up after an instance of the Slang library.
-    */
-    SLANG_API void spDestroySession(
-        SlangSession*   session);
-
-    /** @see slang::IGlobalSession::setSharedLibraryLoader
-    */
-    SLANG_API void spSessionSetSharedLibraryLoader(
-        SlangSession*               session,
-        ISlangSharedLibraryLoader*  loader);
-
-    /** @see slang::IGlobalSession::getSharedLibraryLoader
-    */
-    SLANG_API ISlangSharedLibraryLoader* spSessionGetSharedLibraryLoader(
-        SlangSession*   session);
-
-    /** @see slang::IGlobalSession::checkCompileTargetSupport
-    */
-    SLANG_API SlangResult spSessionCheckCompileTargetSupport(
-        SlangSession*       session,
-        SlangCompileTarget  target);
-
-    /** @see slang::IGlobalSession::checkPassThroughSupport
-    */
-    SLANG_API SlangResult spSessionCheckPassThroughSupport(
-        SlangSession*       session,
-        SlangPassThrough    passThrough
-    );
-
-    /** @see slang::IGlobalSession::addBuiltins
-    */
-    SLANG_API void spAddBuiltins(
-        SlangSession*   session,
-        char const*     sourcePath,
-        char const*     sourceString);
-
-        /*!
-    @brief Callback type used for diagnostic output. 
-    */
-    typedef void(*SlangDiagnosticCallback)(
-        char const* message,
-        void*       userData);
-
-    /*!
-    @brief Get the build version 'tag' string. The string is the same as produced via `git describe --tags`
-    for the project. If Slang is built separately from the automated build scripts
-    the contents will by default be 'unknown'. Any string can be set by changing the
-    contents of 'slang-tag-version.h' file and recompiling the project.
-
-    This function will return exactly the same result as the method getBuildTag string on IGlobalSession.
-
-    An advantage of using this function over the method is that doing so does not require the creation of
-    a session, which can be a fairly costly operation.
-
-    @return The build tag string
-    */
-    SLANG_API const char* spGetBuildTagString();
-
-    /* @see slang::IGlobalSession::createCompileRequest
-    */
-    SLANG_API SlangCompileRequest* spCreateCompileRequest(
-        SlangSession* session);
-
-    /*!
-    @brief Destroy a compile request.
-    Note a request is a COM object and can be destroyed via 'Release'.
-    */
-    SLANG_API void spDestroyCompileRequest(
-        SlangCompileRequest*    request);
-
-    /*! @see slang::ICompileRequest::setFileSystem */
-    SLANG_API void spSetFileSystem(
-        SlangCompileRequest*    request,
-        ISlangFileSystem*       fileSystem);
-
-    /*! @see slang::ICompileRequest::setCompileFlags */
-    SLANG_API void spSetCompileFlags(
-        SlangCompileRequest*    request,
-        SlangCompileFlags       flags);
-
-    /*! @see slang::ICompileRequest::getCompileFlags */
-    SLANG_API SlangCompileFlags spGetCompileFlags(
-        SlangCompileRequest*    request);
-
-    /*! @see slang::ICompileRequest::setDumpIntermediates */
-    SLANG_API void spSetDumpIntermediates(
-        SlangCompileRequest*    request,
-        int                     enable);
-
-    /*! @see slang::ICompileRequest::setDumpIntermediatePrefix */
-    SLANG_API void spSetDumpIntermediatePrefix(
-        SlangCompileRequest*    request,
-        const char* prefix);
-
-    /*! DEPRECATED: use `spSetTargetLineDirectiveMode` instead.
-        @see slang::ICompileRequest::setLineDirectiveMode */
-    SLANG_API void spSetLineDirectiveMode(
-        SlangCompileRequest*    request,
-        SlangLineDirectiveMode  mode);
-        
-    /*! @see slang::ICompileRequest::setTargetLineDirectiveMode */
-    SLANG_API void spSetTargetLineDirectiveMode(
-        SlangCompileRequest*    request,
-        int targetIndex,
-        SlangLineDirectiveMode  mode);
-
-    /*! @see slang::ICompileRequest::setTargetLineDirectiveMode */
-    SLANG_API void spSetTargetForceGLSLScalarBufferLayout(
-        SlangCompileRequest*    request,
-        int targetIndex,
-        bool forceScalarLayout);
-
-    /*! @see slang::ICompileRequest::setTargetUseMinimumSlangOptimization */
-    SLANG_API void spSetTargetUseMinimumSlangOptimization(
-        slang::ICompileRequest* request,
-        int targetIndex,
-        bool val);
-
-    /*! @see slang::ICompileRequest::setIngoreCapabilityCheck */
-    SLANG_API void spSetIgnoreCapabilityCheck(
-        slang::ICompileRequest* request,
-        bool val);
-
-    /*! @see slang::ICompileRequest::setCodeGenTarget */
-    SLANG_API void spSetCodeGenTarget(
-        SlangCompileRequest*    request,
-        SlangCompileTarget target);
-
-    /*! @see slang::ICompileRequest::addCodeGenTarget */
-    SLANG_API int spAddCodeGenTarget(
-        SlangCompileRequest*    request,
-        SlangCompileTarget      target);
-
-    /*! @see slang::ICompileRequest::setTargetProfile */
-    SLANG_API void spSetTargetProfile(
-        SlangCompileRequest*    request,
-        int                     targetIndex,
-        SlangProfileID          profile);
-
-    /*! @see slang::ICompileRequest::setTargetFlags */
-    SLANG_API void spSetTargetFlags(
-        SlangCompileRequest*    request,
-        int                     targetIndex,
-        SlangTargetFlags        flags);
-
-
-
-    /*! @see slang::ICompileRequest::setTargetFloatingPointMode */
-    SLANG_API void spSetTargetFloatingPointMode(
-        SlangCompileRequest*    request,
-        int                     targetIndex,
-        SlangFloatingPointMode  mode);
-
-    /*! @see slang::ICompileRequest::addTargetCapability */
-    SLANG_API void spAddTargetCapability(
-        slang::ICompileRequest* request,
-        int                     targetIndex,
-        SlangCapabilityID       capability);
-
-    /* DEPRECATED: use `spSetMatrixLayoutMode` instead. */
-    SLANG_API void spSetTargetMatrixLayoutMode(
-        SlangCompileRequest*    request,
-        int                     targetIndex,
-        SlangMatrixLayoutMode   mode);
-
-    /*! @see slang::ICompileRequest::setMatrixLayoutMode */
-    SLANG_API void spSetMatrixLayoutMode(
-        SlangCompileRequest*    request,
-        SlangMatrixLayoutMode   mode);
-
-    /*! @see slang::ICompileRequest::setDebugInfoLevel */
-    SLANG_API void spSetDebugInfoLevel(
-        SlangCompileRequest*    request,
-        SlangDebugInfoLevel     level);
-
-    /*! @see slang::ICompileRequest::setDebugInfoFormat */
-    SLANG_API void spSetDebugInfoFormat(
-        SlangCompileRequest*    request,
-        SlangDebugInfoFormat        format);
-
-    /*! @see slang::ICompileRequest::setOptimizationLevel */
-    SLANG_API void spSetOptimizationLevel(
-        SlangCompileRequest*    request,
-        SlangOptimizationLevel  level);
-
-
-    
-    /*! @see slang::ICompileRequest::setOutputContainerFormat */
-    SLANG_API void spSetOutputContainerFormat(
-        SlangCompileRequest*    request,
-        SlangContainerFormat    format);
-
-    /*! @see slang::ICompileRequest::setPassThrough */
-    SLANG_API void spSetPassThrough(
-        SlangCompileRequest*    request,
-        SlangPassThrough        passThrough);
-
-     /*! @see slang::ICompileRequest::setDiagnosticCallback */
-    SLANG_API void spSetDiagnosticCallback(
-        SlangCompileRequest*    request,
-        SlangDiagnosticCallback callback,
-        void const*             userData);
-
-    /*! @see slang::ICompileRequest::setWriter */
-    SLANG_API void spSetWriter(
-        SlangCompileRequest*    request,
-        SlangWriterChannel      channel, 
-        ISlangWriter*           writer);
-
-    /*! @see slang::ICompileRequest::getWriter */
-    SLANG_API ISlangWriter* spGetWriter(
-        SlangCompileRequest*    request,
-        SlangWriterChannel      channel);
-
-    /*! @see slang::ICompileRequest::addSearchPath */
-    SLANG_API void spAddSearchPath(
-        SlangCompileRequest*    request,
-        const char*             searchDir);
-
-   /*! @see slang::ICompileRequest::addPreprocessorDefine */
-    SLANG_API void spAddPreprocessorDefine(
-        SlangCompileRequest*    request,
-        const char*             key,
-        const char*             value);
-
-    /*! @see slang::ICompileRequest::processCommandLineArguments */
-    SLANG_API SlangResult spProcessCommandLineArguments(
-        SlangCompileRequest*    request,
-        char const* const*      args,
-        int                     argCount);
-
-    /*! @see slang::ICompileRequest::addTranslationUnit */
-    SLANG_API int spAddTranslationUnit(
-        SlangCompileRequest*    request,
-        SlangSourceLanguage     language,
-        char const*             name);
-
-    
-    /*! @see slang::ICompileRequest::setDefaultModuleName */
-    SLANG_API void spSetDefaultModuleName(
-        SlangCompileRequest*    request,
-        const char* defaultModuleName);
-
-    /*! @see slang::ICompileRequest::addPreprocessorDefine */
-    SLANG_API void spTranslationUnit_addPreprocessorDefine(
-        SlangCompileRequest*    request,
-        int                     translationUnitIndex,
-        const char*             key,
-        const char*             value);
-
-
-    /*! @see slang::ICompileRequest::addTranslationUnitSourceFile */
-    SLANG_API void spAddTranslationUnitSourceFile(
-        SlangCompileRequest*    request,
-        int                     translationUnitIndex,
-        char const*             path);
-
-    /*! @see slang::ICompileRequest::addTranslationUnitSourceString */
-    SLANG_API void spAddTranslationUnitSourceString(
-        SlangCompileRequest*    request,
-        int                     translationUnitIndex,
-        char const*             path,
-        char const*             source);
-
-
-    /*! @see slang::ICompileRequest::addLibraryReference */
-    SLANG_API SlangResult spAddLibraryReference(
-        SlangCompileRequest*    request,
-        const char* basePath,
-        const void* libData,
-        size_t libDataSize);
-
-    /*! @see slang::ICompileRequest::addTranslationUnitSourceStringSpan */
-    SLANG_API void spAddTranslationUnitSourceStringSpan(
-        SlangCompileRequest*    request,
-        int                     translationUnitIndex,
-        char const*             path,
-        char const*             sourceBegin,
-        char const*             sourceEnd);
-
-    /*! @see slang::ICompileRequest::addTranslationUnitSourceBlob */
-    SLANG_API void spAddTranslationUnitSourceBlob(
-        SlangCompileRequest*    request,
-        int                     translationUnitIndex,
-        char const*             path,
-        ISlangBlob*             sourceBlob);
-
-    /*! @see slang::IGlobalSession::findProfile */
-    SLANG_API SlangProfileID spFindProfile(
-        SlangSession*   session,
-        char const*     name);
-
-    /*! @see slang::IGlobalSession::findCapability */
-    SLANG_API SlangCapabilityID spFindCapability(
-        SlangSession*   session,
-        char const*     name);
-
-    /*! @see slang::ICompileRequest::addEntryPoint */
-    SLANG_API int spAddEntryPoint(
-        SlangCompileRequest*    request,
-        int                     translationUnitIndex,
-        char const*             name,
-        SlangStage              stage);
-
-    /*! @see slang::ICompileRequest::addEntryPointEx */
-    SLANG_API int spAddEntryPointEx(
-        SlangCompileRequest*    request,
-        int                     translationUnitIndex,
-        char const*             name,
-        SlangStage              stage,
-        int                     genericArgCount,
-        char const**            genericArgs);
-
-    /*! @see slang::ICompileRequest::setGlobalGenericArgs */
-    SLANG_API SlangResult spSetGlobalGenericArgs(
-        SlangCompileRequest*    request,
-        int                     genericArgCount,
-        char const**            genericArgs);
-
-    /*! @see slang::ICompileRequest::setTypeNameForGlobalExistentialTypeParam */
-    SLANG_API SlangResult spSetTypeNameForGlobalExistentialTypeParam(
-        SlangCompileRequest*    request,
-        int                     slotIndex,
-        char const*             typeName);
-
-    /*! @see slang::ICompileRequest::setTypeNameForEntryPointExistentialTypeParam */
-    SLANG_API SlangResult spSetTypeNameForEntryPointExistentialTypeParam(
-        SlangCompileRequest*    request,
-        int                     entryPointIndex,
-        int                     slotIndex,
-        char const*             typeName);
-
-    /*! @see slang::ICompileRequest::compile */
-    SLANG_API SlangResult spCompile(
-        SlangCompileRequest*    request);
-
-
-    /*! @see slang::ICompileRequest::getDiagnosticOutput */
-    SLANG_API char const* spGetDiagnosticOutput(
-        SlangCompileRequest*    request);
-
-    /*! @see slang::ICompileRequest::getDiagnosticOutputBlob */
-    SLANG_API SlangResult spGetDiagnosticOutputBlob(
-        SlangCompileRequest*    request,
-        ISlangBlob**            outBlob);
-
-
-    /*! @see slang::ICompileRequest::getDependencyFileCount */
-    SLANG_API int
-    spGetDependencyFileCount(
-        SlangCompileRequest*    request);
-
-    /*! @see slang::ICompileRequest::getDependencyFilePath */
-    SLANG_API char const*
-    spGetDependencyFilePath(
-        SlangCompileRequest*    request,
-        int                     index);
-
-    /*! @see slang::ICompileRequest::getTranslationUnitCount */
-    SLANG_API int
-    spGetTranslationUnitCount(
-        SlangCompileRequest*    request);
-
-    /*! @see slang::ICompileRequest::getEntryPointSource */
-    SLANG_API char const* spGetEntryPointSource(
-        SlangCompileRequest*    request,
-        int                     entryPointIndex);
-
-    /*! @see slang::ICompileRequest::getEntryPointCode */
-    SLANG_API void const* spGetEntryPointCode(
-        SlangCompileRequest*    request,
-        int                     entryPointIndex,
-        size_t*                 outSize);
-
-    /*! @see slang::ICompileRequest::getEntryPointCodeBlob */
-    SLANG_API SlangResult spGetEntryPointCodeBlob(
-        SlangCompileRequest*    request,
-        int                     entryPointIndex,
-        int                     targetIndex,
-        ISlangBlob**            outBlob);
-
-    /*! @see slang::ICompileRequest::getEntryPointHostCallable */
-    SLANG_API SlangResult spGetEntryPointHostCallable(
-        SlangCompileRequest*    request,
-        int                     entryPointIndex,
-        int                     targetIndex,
-        ISlangSharedLibrary**   outSharedLibrary);
-
-    /*! @see slang::ICompileRequest::getTargetCodeBlob */
-    SLANG_API SlangResult spGetTargetCodeBlob(
-        SlangCompileRequest*    request,
-        int                     targetIndex,
-        ISlangBlob**            outBlob);
-
-    /*! @see slang::ICompileRequest::getTargetHostCallable */
-    SLANG_API SlangResult spGetTargetHostCallable(
-        SlangCompileRequest*    request,
-        int                     targetIndex,
-        ISlangSharedLibrary**   outSharedLibrary);
-
-    /*! @see slang::ICompileRequest::getCompileRequestCode */
-    SLANG_API void const* spGetCompileRequestCode(
-        SlangCompileRequest*    request,
-        size_t*                 outSize);
-
-    /*! @see slang::ICompileRequest::getContainerCode */
-    SLANG_API SlangResult spGetContainerCode(
-        SlangCompileRequest*    request,
-        ISlangBlob**            outBlob);
-
-    /*! @see slang::ICompileRequest::loadRepro */
-    SLANG_API SlangResult spLoadRepro(
-        SlangCompileRequest* request,
-        ISlangFileSystem* fileSystem,
-        const void* data,
-        size_t size);
-
-    /*! @see slang::ICompileRequest::saveRepro */
-    SLANG_API SlangResult spSaveRepro(
-        SlangCompileRequest* request,
-        ISlangBlob** outBlob
-    );
-
-    /*! @see slang::ICompileRequest::enableReproCapture */
-    SLANG_API SlangResult spEnableReproCapture(
-        SlangCompileRequest* request);
-
-    /*! @see slang::ICompileRequest::getCompileTimeProfile */
-    SLANG_API SlangResult spGetCompileTimeProfile(
-        SlangCompileRequest* request,
-        ISlangProfiler** compileTimeProfile,
-        bool shouldClear);
-
-
-    /** Extract contents of a repro.
-
-    Writes the contained files and manifest with their 'unique' names into fileSystem. For more details read the
-    docs/repro.md documentation. 
-
-    @param session          The slang session
-    @param reproData        Holds the repro data
-    @param reproDataSize    The size of the repro data
-    @param fileSystem       File system that the contents of the repro will be written to
-    @returns                A `SlangResult` to indicate success or failure.
-    */
-    SLANG_API SlangResult spExtractRepro(
-        SlangSession* session,
-        const void* reproData,
-        size_t reproDataSize,
-        ISlangMutableFileSystem* fileSystem);
-
-    /* Turns a repro into a file system.
-
-    Makes the contents of the repro available as a file system - that is able to access the files with the same
-    paths as were used on the original repro file system. 
-
-    @param session          The slang session
-    @param reproData        The repro data
-    @param reproDataSize    The size of the repro data
-    @param replaceFileSystem  Will attempt to load by unique names from this file system before using contents of the repro. Optional.
-    @param outFileSystem    The file system that can be used to access contents
-    @returns                A `SlangResult` to indicate success or failure.
-    */
-    SLANG_API SlangResult spLoadReproAsFileSystem(
-        SlangSession* session,
-        const void* reproData,
-        size_t reproDataSize,
-        ISlangFileSystem* replaceFileSystem,
-        ISlangFileSystemExt** outFileSystem);
-
-    /*! @see slang::ICompileRequest::overrideDiagnosticSeverity */
-    SLANG_API void spOverrideDiagnosticSeverity(
-        SlangCompileRequest* request,
-        SlangInt messageID,
-        SlangSeverity overrideSeverity);
-
-    /*! @see slang::ICompileRequest::getDiagnosticFlags */
-    SLANG_API SlangDiagnosticFlags spGetDiagnosticFlags(SlangCompileRequest* request);
-
-    /*! @see slang::ICompileRequest::setDiagnosticFlags */
-    SLANG_API void spSetDiagnosticFlags(SlangCompileRequest* request, SlangDiagnosticFlags flags);
-
-    /*
-    Forward declarations of types used in the reflection interface;
-    */
-
-    typedef struct SlangProgramLayout SlangProgramLayout;
-    typedef struct SlangEntryPoint SlangEntryPoint;
-    typedef struct SlangEntryPointLayout SlangEntryPointLayout;
-
-    typedef struct SlangReflectionModifier          SlangReflectionModifier;
-    typedef struct SlangReflectionType              SlangReflectionType;
-    typedef struct SlangReflectionTypeLayout        SlangReflectionTypeLayout;
-    typedef struct SlangReflectionVariable          SlangReflectionVariable;
-    typedef struct SlangReflectionVariableLayout    SlangReflectionVariableLayout;
-    typedef struct SlangReflectionTypeParameter     SlangReflectionTypeParameter;
-    typedef struct SlangReflectionUserAttribute     SlangReflectionUserAttribute;
-
-    /*
-    Type aliases to maintain backward compatibility.
-    */
-    typedef SlangProgramLayout SlangReflection;
-    typedef SlangEntryPointLayout SlangReflectionEntryPoint;
-
-    // get reflection data from a compilation request
-    SLANG_API SlangReflection* spGetReflection(
-        SlangCompileRequest*    request);
-
-    // type reflection
-
-    typedef unsigned int SlangTypeKindIntegral;
-    enum SlangTypeKind : SlangTypeKindIntegral
-    {
-        SLANG_TYPE_KIND_NONE,
-        SLANG_TYPE_KIND_STRUCT,
-        SLANG_TYPE_KIND_ARRAY,
-        SLANG_TYPE_KIND_MATRIX,
-        SLANG_TYPE_KIND_VECTOR,
-        SLANG_TYPE_KIND_SCALAR,
-        SLANG_TYPE_KIND_CONSTANT_BUFFER,
-        SLANG_TYPE_KIND_RESOURCE,
-        SLANG_TYPE_KIND_SAMPLER_STATE,
-        SLANG_TYPE_KIND_TEXTURE_BUFFER,
-        SLANG_TYPE_KIND_SHADER_STORAGE_BUFFER,
-        SLANG_TYPE_KIND_PARAMETER_BLOCK,
-        SLANG_TYPE_KIND_GENERIC_TYPE_PARAMETER,
-        SLANG_TYPE_KIND_INTERFACE,
-        SLANG_TYPE_KIND_OUTPUT_STREAM,
-        SLANG_TYPE_KIND_MESH_OUTPUT,
-        SLANG_TYPE_KIND_SPECIALIZED,
-        SLANG_TYPE_KIND_FEEDBACK,
-        SLANG_TYPE_KIND_POINTER,
-        SLANG_TYPE_KIND_COUNT,
-    };
-
-    typedef unsigned int SlangScalarTypeIntegral;
-    enum SlangScalarType : SlangScalarTypeIntegral
-    {
-        SLANG_SCALAR_TYPE_NONE,
-        SLANG_SCALAR_TYPE_VOID,
-        SLANG_SCALAR_TYPE_BOOL,
-        SLANG_SCALAR_TYPE_INT32,
-        SLANG_SCALAR_TYPE_UINT32,
-        SLANG_SCALAR_TYPE_INT64,
-        SLANG_SCALAR_TYPE_UINT64,
-        SLANG_SCALAR_TYPE_FLOAT16,
-        SLANG_SCALAR_TYPE_FLOAT32,
-        SLANG_SCALAR_TYPE_FLOAT64,
-        SLANG_SCALAR_TYPE_INT8,
-        SLANG_SCALAR_TYPE_UINT8,
-        SLANG_SCALAR_TYPE_INT16,
-        SLANG_SCALAR_TYPE_UINT16,
-        SLANG_SCALAR_TYPE_INTPTR,
-        SLANG_SCALAR_TYPE_UINTPTR
-    };
-
-#ifndef SLANG_RESOURCE_SHAPE
-#    define SLANG_RESOURCE_SHAPE
-    typedef unsigned int SlangResourceShapeIntegral;
-    enum SlangResourceShape : SlangResourceShapeIntegral
-    {
-        SLANG_RESOURCE_BASE_SHAPE_MASK      = 0x0F,
-
-        SLANG_RESOURCE_NONE                 = 0x00,
-
-        SLANG_TEXTURE_1D                    = 0x01,
-        SLANG_TEXTURE_2D                    = 0x02,
-        SLANG_TEXTURE_3D                    = 0x03,
-        SLANG_TEXTURE_CUBE                  = 0x04,
-        SLANG_TEXTURE_BUFFER                = 0x05,
-
-        SLANG_STRUCTURED_BUFFER             = 0x06,
-        SLANG_BYTE_ADDRESS_BUFFER           = 0x07,
-        SLANG_RESOURCE_UNKNOWN              = 0x08,
-        SLANG_ACCELERATION_STRUCTURE        = 0x09,
-        SLANG_TEXTURE_SUBPASS               = 0x0A,
-
-        SLANG_RESOURCE_EXT_SHAPE_MASK       = 0xF0,
-
-        SLANG_TEXTURE_FEEDBACK_FLAG         = 0x10,
-        SLANG_TEXTURE_SHADOW_FLAG           = 0x20,
-        SLANG_TEXTURE_ARRAY_FLAG            = 0x40,
-        SLANG_TEXTURE_MULTISAMPLE_FLAG      = 0x80,
-
-        SLANG_TEXTURE_1D_ARRAY              = SLANG_TEXTURE_1D   | SLANG_TEXTURE_ARRAY_FLAG,
-        SLANG_TEXTURE_2D_ARRAY              = SLANG_TEXTURE_2D   | SLANG_TEXTURE_ARRAY_FLAG,
-        SLANG_TEXTURE_CUBE_ARRAY            = SLANG_TEXTURE_CUBE | SLANG_TEXTURE_ARRAY_FLAG,
-
-        SLANG_TEXTURE_2D_MULTISAMPLE        = SLANG_TEXTURE_2D | SLANG_TEXTURE_MULTISAMPLE_FLAG,
-        SLANG_TEXTURE_2D_MULTISAMPLE_ARRAY  = SLANG_TEXTURE_2D | SLANG_TEXTURE_MULTISAMPLE_FLAG | SLANG_TEXTURE_ARRAY_FLAG,
-        SLANG_TEXTURE_SUBPASS_MULTISAMPLE   = SLANG_TEXTURE_SUBPASS | SLANG_TEXTURE_MULTISAMPLE_FLAG,
-    };
-#endif
-    typedef unsigned int SlangResourceAccessIntegral;
-    enum SlangResourceAccess : SlangResourceAccessIntegral
-    {
-        SLANG_RESOURCE_ACCESS_NONE,
-        SLANG_RESOURCE_ACCESS_READ,
-        SLANG_RESOURCE_ACCESS_READ_WRITE,
-        SLANG_RESOURCE_ACCESS_RASTER_ORDERED,
-        SLANG_RESOURCE_ACCESS_APPEND,
-        SLANG_RESOURCE_ACCESS_CONSUME,
-        SLANG_RESOURCE_ACCESS_WRITE,
-        SLANG_RESOURCE_ACCESS_FEEDBACK,
-        SLANG_RESOURCE_ACCESS_UNKNOWN = 0x7FFFFFFF,
-    };
-
-    typedef unsigned int SlangParameterCategoryIntegral;
-    enum SlangParameterCategory : SlangParameterCategoryIntegral
-    {
-        SLANG_PARAMETER_CATEGORY_NONE,
-        SLANG_PARAMETER_CATEGORY_MIXED,
-        SLANG_PARAMETER_CATEGORY_CONSTANT_BUFFER,
-        SLANG_PARAMETER_CATEGORY_SHADER_RESOURCE,
-        SLANG_PARAMETER_CATEGORY_UNORDERED_ACCESS,
-        SLANG_PARAMETER_CATEGORY_VARYING_INPUT,
-        SLANG_PARAMETER_CATEGORY_VARYING_OUTPUT,
-        SLANG_PARAMETER_CATEGORY_SAMPLER_STATE,
-        SLANG_PARAMETER_CATEGORY_UNIFORM,
-        SLANG_PARAMETER_CATEGORY_DESCRIPTOR_TABLE_SLOT,
-        SLANG_PARAMETER_CATEGORY_SPECIALIZATION_CONSTANT,
-        SLANG_PARAMETER_CATEGORY_PUSH_CONSTANT_BUFFER,
-
-        // HLSL register `space`, Vulkan GLSL `set`
-        SLANG_PARAMETER_CATEGORY_REGISTER_SPACE,
-
-        // TODO: Ellie, Both APIs treat mesh outputs as more or less varying output,
-        // Does it deserve to be represented here??
-
-        // A parameter whose type is to be specialized by a global generic type argument
-        SLANG_PARAMETER_CATEGORY_GENERIC,
-
-        SLANG_PARAMETER_CATEGORY_RAY_PAYLOAD,
-        SLANG_PARAMETER_CATEGORY_HIT_ATTRIBUTES,
-        SLANG_PARAMETER_CATEGORY_CALLABLE_PAYLOAD,
-        SLANG_PARAMETER_CATEGORY_SHADER_RECORD,
-
-        // An existential type parameter represents a "hole" that
-        // needs to be filled with a concrete type to enable
-        // generation of specialized code.
-        //
-        // Consider this example:
-        //
-        //      struct MyParams
-        //      {
-        //          IMaterial material;
-        //          ILight lights[3];
-        //      };
-        //
-        // This `MyParams` type introduces two existential type parameters:
-        // one for `material` and one for `lights`. Even though `lights`
-        // is an array, it only introduces one type parameter, because
-        // we need to hae a *single* concrete type for all the array
-        // elements to be able to generate specialized code.
-        //
-        SLANG_PARAMETER_CATEGORY_EXISTENTIAL_TYPE_PARAM,
-
-        // An existential object parameter represents a value
-        // that needs to be passed in to provide data for some
-        // interface-type shader paameter.
-        //
-        // Consider this example:
-        //
-        //      struct MyParams
-        //      {
-        //          IMaterial material;
-        //          ILight lights[3];
-        //      };
-        //
-        // This `MyParams` type introduces four existential object parameters:
-        // one for `material` and three for `lights` (one for each array
-        // element). This is consistent with the number of interface-type
-        // "objects" that are being passed through to the shader.
-        //
-        SLANG_PARAMETER_CATEGORY_EXISTENTIAL_OBJECT_PARAM,
-
-        // The register space offset for the sub-elements that occupies register spaces.
-        SLANG_PARAMETER_CATEGORY_SUB_ELEMENT_REGISTER_SPACE,
-
-        // The input_attachment_index subpass occupancy tracker
-        SLANG_PARAMETER_CATEGORY_SUBPASS,
-
-        // Metal resource binding points.
-        SLANG_PARAMETER_CATEGORY_METAL_ARGUMENT_BUFFER_ELEMENT,
-
-        // Metal [[attribute]] inputs.
-        SLANG_PARAMETER_CATEGORY_METAL_ATTRIBUTE,
-
-        // Metal [[payload]] inputs
-        SLANG_PARAMETER_CATEGORY_METAL_PAYLOAD,
-
-        //
-        SLANG_PARAMETER_CATEGORY_COUNT,
-
-        // Aliases for Metal-specific categories.
-        SLANG_PARAMETER_CATEGORY_METAL_BUFFER = SLANG_PARAMETER_CATEGORY_CONSTANT_BUFFER,
-        SLANG_PARAMETER_CATEGORY_METAL_TEXTURE = SLANG_PARAMETER_CATEGORY_SHADER_RESOURCE,
-        SLANG_PARAMETER_CATEGORY_METAL_SAMPLER = SLANG_PARAMETER_CATEGORY_SAMPLER_STATE,
-
-        // DEPRECATED:
-        SLANG_PARAMETER_CATEGORY_VERTEX_INPUT = SLANG_PARAMETER_CATEGORY_VARYING_INPUT,
-        SLANG_PARAMETER_CATEGORY_FRAGMENT_OUTPUT = SLANG_PARAMETER_CATEGORY_VARYING_OUTPUT,
-        SLANG_PARAMETER_CATEGORY_COUNT_V1 = SLANG_PARAMETER_CATEGORY_SUBPASS,
-    };
-
-    /** Types of API-managed bindings that a parameter might use.
-    
-    `SlangBindingType` represents the distinct types of binding ranges that might be
-    understood by an underlying graphics API or cross-API abstraction layer.
-    Several of the enumeration cases here correspond to cases of `VkDescriptorType`
-    defined by the Vulkan API. Note however that the values of this enumeration
-    are not the same as those of any particular API.
-
-    The `SlangBindingType` enumeration is distinct from `SlangParameterCategory`
-    because `SlangParameterCategory` differentiates the types of parameters for
-    the purposes of layout, where the layout rules of some targets will treat
-    parameters of different types as occupying the same binding space for layout
-    (e.g., in SPIR-V both a `Texture2D` and `SamplerState` use the same space of
-    `binding` indices, and are not allowed to overlap), while those same types
-    map to different types of bindingsin the API (e.g., both textures and samplers
-    use different `VkDescriptorType` values).
-
-    When you want to answer "what register/binding did this parameter use?" you
-    should use `SlangParameterCategory`.
-
-    When you wnat to answer "what type of descriptor range should this parameter use?"
-    you should use `SlangBindingType`.
-    */
-    typedef SlangUInt32 SlangBindingTypeIntegral;
-    enum SlangBindingType : SlangBindingTypeIntegral
-    {
-        SLANG_BINDING_TYPE_UNKNOWN = 0,
-
-        SLANG_BINDING_TYPE_SAMPLER,
-        SLANG_BINDING_TYPE_TEXTURE,
-        SLANG_BINDING_TYPE_CONSTANT_BUFFER,
-        SLANG_BINDING_TYPE_PARAMETER_BLOCK,
-        SLANG_BINDING_TYPE_TYPED_BUFFER,
-        SLANG_BINDING_TYPE_RAW_BUFFER,
-        SLANG_BINDING_TYPE_COMBINED_TEXTURE_SAMPLER,
-        SLANG_BINDING_TYPE_INPUT_RENDER_TARGET,
-        SLANG_BINDING_TYPE_INLINE_UNIFORM_DATA,
-        SLANG_BINDING_TYPE_RAY_TRACING_ACCELERATION_STRUCTURE,
-
-        SLANG_BINDING_TYPE_VARYING_INPUT,
-        SLANG_BINDING_TYPE_VARYING_OUTPUT,
-
-        SLANG_BINDING_TYPE_EXISTENTIAL_VALUE,
-        SLANG_BINDING_TYPE_PUSH_CONSTANT,
-
-        SLANG_BINDING_TYPE_MUTABLE_FLAG = 0x100,
-
-        SLANG_BINDING_TYPE_MUTABLE_TETURE = SLANG_BINDING_TYPE_TEXTURE | SLANG_BINDING_TYPE_MUTABLE_FLAG,
-        SLANG_BINDING_TYPE_MUTABLE_TYPED_BUFFER = SLANG_BINDING_TYPE_TYPED_BUFFER | SLANG_BINDING_TYPE_MUTABLE_FLAG,
-        SLANG_BINDING_TYPE_MUTABLE_RAW_BUFFER = SLANG_BINDING_TYPE_RAW_BUFFER | SLANG_BINDING_TYPE_MUTABLE_FLAG,
-
-        SLANG_BINDING_TYPE_BASE_MASK = 0x00FF,
-        SLANG_BINDING_TYPE_EXT_MASK  = 0xFF00,
-    };
-
-    typedef SlangUInt32 SlangLayoutRulesIntegral;
-    enum SlangLayoutRules : SlangLayoutRulesIntegral
-    {
-        SLANG_LAYOUT_RULES_DEFAULT,
-    };
-
-    typedef SlangUInt32 SlangModifierIDIntegral;
-    enum SlangModifierID : SlangModifierIDIntegral
-    {
-        SLANG_MODIFIER_SHARED,
-    };
-
-    // User Attribute
-    SLANG_API char const* spReflectionUserAttribute_GetName(SlangReflectionUserAttribute* attrib);
-    SLANG_API unsigned int spReflectionUserAttribute_GetArgumentCount(SlangReflectionUserAttribute* attrib);
-    SLANG_API SlangReflectionType* spReflectionUserAttribute_GetArgumentType(SlangReflectionUserAttribute* attrib, unsigned int index);
-    SLANG_API SlangResult spReflectionUserAttribute_GetArgumentValueInt(SlangReflectionUserAttribute* attrib, unsigned int index, int * rs);
-    SLANG_API SlangResult spReflectionUserAttribute_GetArgumentValueFloat(SlangReflectionUserAttribute* attrib, unsigned int index, float * rs);
-
-    /** Returns the string-typed value of a user attribute argument
-        The string returned is not null-terminated. The length of the string is returned via `outSize`.
-        If index of out of range, or if the specified argument is not a string, the function will return nullptr.
-    */
-    SLANG_API const char* spReflectionUserAttribute_GetArgumentValueString(SlangReflectionUserAttribute* attrib, unsigned int index, size_t * outSize);
-
-    // Type Reflection
-
-    SLANG_API SlangTypeKind spReflectionType_GetKind(SlangReflectionType* type);
-    SLANG_API unsigned int spReflectionType_GetUserAttributeCount(SlangReflectionType* type);
-    SLANG_API SlangReflectionUserAttribute* spReflectionType_GetUserAttribute(SlangReflectionType* type, unsigned int index);
-    SLANG_API SlangReflectionUserAttribute* spReflectionType_FindUserAttributeByName(SlangReflectionType* type, char const* name);
-
-    SLANG_API unsigned int spReflectionType_GetFieldCount(SlangReflectionType* type);
-    SLANG_API SlangReflectionVariable* spReflectionType_GetFieldByIndex(SlangReflectionType* type, unsigned index);
-
-        /** Returns the number of elements in the given type.
-
-        This operation is valid for vector and array types. For other types it returns zero.
-
-        When invoked on an unbounded-size array it will return `SLANG_UNBOUNDED_SIZE`,
-        which is defined to be `~size_t(0)`.
-
-        If the size of a type cannot be statically computed, perhaps because it depends on
-        a generic parameter that has not been bound to a specific value, this function returns zero.
-        */
-    SLANG_API size_t spReflectionType_GetElementCount(SlangReflectionType* type);
-
-    #define SLANG_UNBOUNDED_SIZE (~size_t(0))
-
-    SLANG_API SlangReflectionType* spReflectionType_GetElementType(SlangReflectionType* type);
-
-    SLANG_API unsigned int spReflectionType_GetRowCount(SlangReflectionType* type);
-    SLANG_API unsigned int spReflectionType_GetColumnCount(SlangReflectionType* type);
-    SLANG_API SlangScalarType spReflectionType_GetScalarType(SlangReflectionType* type);
-
-    SLANG_API SlangResourceShape spReflectionType_GetResourceShape(SlangReflectionType* type);
-    SLANG_API SlangResourceAccess spReflectionType_GetResourceAccess(SlangReflectionType* type);
-    SLANG_API SlangReflectionType* spReflectionType_GetResourceResultType(SlangReflectionType* type);
-
-    SLANG_API char const* spReflectionType_GetName(SlangReflectionType* type);
-
-    // Type Layout Reflection
-
-    SLANG_API SlangReflectionType* spReflectionTypeLayout_GetType(SlangReflectionTypeLayout* type);
-    SLANG_API SlangTypeKind spReflectionTypeLayout_getKind(SlangReflectionTypeLayout* type);
-    SLANG_API size_t spReflectionTypeLayout_GetSize(SlangReflectionTypeLayout* type, SlangParameterCategory category);
-    SLANG_API size_t spReflectionTypeLayout_GetStride(SlangReflectionTypeLayout* type, SlangParameterCategory category);
-    SLANG_API int32_t spReflectionTypeLayout_getAlignment(SlangReflectionTypeLayout* type, SlangParameterCategory category);
-
-    SLANG_API uint32_t spReflectionTypeLayout_GetFieldCount(SlangReflectionTypeLayout* type);
-    SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_GetFieldByIndex(SlangReflectionTypeLayout* type, unsigned index);
-
-    SLANG_API SlangInt spReflectionTypeLayout_findFieldIndexByName(SlangReflectionTypeLayout* typeLayout, const char* nameBegin, const char* nameEnd);
-
-    SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_GetExplicitCounter(SlangReflectionTypeLayout* typeLayout);
-
-    SLANG_API size_t spReflectionTypeLayout_GetElementStride(SlangReflectionTypeLayout* type, SlangParameterCategory category);
-    SLANG_API SlangReflectionTypeLayout* spReflectionTypeLayout_GetElementTypeLayout(SlangReflectionTypeLayout* type);
-    SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_GetElementVarLayout(SlangReflectionTypeLayout* type);
-    SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_getContainerVarLayout(SlangReflectionTypeLayout* type);
-
-    SLANG_API SlangParameterCategory spReflectionTypeLayout_GetParameterCategory(SlangReflectionTypeLayout* type);
-
-    SLANG_API unsigned spReflectionTypeLayout_GetCategoryCount(SlangReflectionTypeLayout* type);
-    SLANG_API SlangParameterCategory spReflectionTypeLayout_GetCategoryByIndex(SlangReflectionTypeLayout* type, unsigned index);
-
-    SLANG_API SlangMatrixLayoutMode spReflectionTypeLayout_GetMatrixLayoutMode(SlangReflectionTypeLayout* type);
-
-    SLANG_API int spReflectionTypeLayout_getGenericParamIndex(SlangReflectionTypeLayout* type);
-
-    SLANG_API SlangReflectionTypeLayout* spReflectionTypeLayout_getPendingDataTypeLayout(SlangReflectionTypeLayout* type);
-
-    SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_getSpecializedTypePendingDataVarLayout(SlangReflectionTypeLayout* type);
-    SLANG_API SlangInt spReflectionType_getSpecializedTypeArgCount(SlangReflectionType* type);
-    SLANG_API SlangReflectionType* spReflectionType_getSpecializedTypeArgType(SlangReflectionType* type, SlangInt index);
-
-    SLANG_API SlangInt spReflectionTypeLayout_getBindingRangeCount(SlangReflectionTypeLayout* typeLayout);
-    SLANG_API SlangBindingType spReflectionTypeLayout_getBindingRangeType(SlangReflectionTypeLayout* typeLayout, SlangInt index);
-    SLANG_API SlangInt spReflectionTypeLayout_isBindingRangeSpecializable(SlangReflectionTypeLayout* typeLayout, SlangInt index);
-    SLANG_API SlangInt spReflectionTypeLayout_getBindingRangeBindingCount(SlangReflectionTypeLayout* typeLayout, SlangInt index);
-    SLANG_API SlangReflectionTypeLayout* spReflectionTypeLayout_getBindingRangeLeafTypeLayout(SlangReflectionTypeLayout* typeLayout, SlangInt index);
-    SLANG_API SlangReflectionVariable* spReflectionTypeLayout_getBindingRangeLeafVariable(SlangReflectionTypeLayout* typeLayout, SlangInt index);
-    SLANG_API SlangInt spReflectionTypeLayout_getFieldBindingRangeOffset(SlangReflectionTypeLayout* typeLayout, SlangInt fieldIndex);
-    SLANG_API SlangInt spReflectionTypeLayout_getExplicitCounterBindingRangeOffset(SlangReflectionTypeLayout* inTypeLayout);
-
-    SLANG_API SlangInt spReflectionTypeLayout_getBindingRangeDescriptorSetIndex(SlangReflectionTypeLayout* typeLayout, SlangInt index);
-    SLANG_API SlangInt spReflectionTypeLayout_getBindingRangeFirstDescriptorRangeIndex(SlangReflectionTypeLayout* typeLayout, SlangInt index);
-    SLANG_API SlangInt spReflectionTypeLayout_getBindingRangeDescriptorRangeCount(SlangReflectionTypeLayout* typeLayout, SlangInt index);
-
-    SLANG_API SlangInt spReflectionTypeLayout_getDescriptorSetCount(SlangReflectionTypeLayout* typeLayout);
-    SLANG_API SlangInt spReflectionTypeLayout_getDescriptorSetSpaceOffset(SlangReflectionTypeLayout* typeLayout, SlangInt setIndex);
-    SLANG_API SlangInt spReflectionTypeLayout_getDescriptorSetDescriptorRangeCount(SlangReflectionTypeLayout* typeLayout, SlangInt setIndex);
-    SLANG_API SlangInt spReflectionTypeLayout_getDescriptorSetDescriptorRangeIndexOffset(SlangReflectionTypeLayout* typeLayout, SlangInt setIndex, SlangInt rangeIndex);
-    SLANG_API SlangInt spReflectionTypeLayout_getDescriptorSetDescriptorRangeDescriptorCount(SlangReflectionTypeLayout* typeLayout, SlangInt setIndex, SlangInt rangeIndex);
-    SLANG_API SlangBindingType spReflectionTypeLayout_getDescriptorSetDescriptorRangeType(SlangReflectionTypeLayout* typeLayout, SlangInt setIndex, SlangInt rangeIndex);
-    SLANG_API SlangParameterCategory spReflectionTypeLayout_getDescriptorSetDescriptorRangeCategory(SlangReflectionTypeLayout* typeLayout, SlangInt setIndex, SlangInt rangeIndex);
-
-    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeCount(SlangReflectionTypeLayout* typeLayout);
-    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeBindingRangeIndex(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex);
-    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeSpaceOffset(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex);
-    SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_getSubObjectRangeOffset(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex);
-
-#if 0
-    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeCount(SlangReflectionTypeLayout* typeLayout);
-    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeObjectCount(SlangReflectionTypeLayout* typeLayout, SlangInt index);
-    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeBindingRangeIndex(SlangReflectionTypeLayout* typeLayout, SlangInt index);
-    SLANG_API SlangReflectionTypeLayout* spReflectionTypeLayout_getSubObjectRangeTypeLayout(SlangReflectionTypeLayout* typeLayout, SlangInt index);
-
-    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeDescriptorRangeCount(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex);
-    SLANG_API SlangBindingType spReflectionTypeLayout_getSubObjectRangeDescriptorRangeBindingType(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex, SlangInt bindingRangeIndexInSubObject);
-    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeDescriptorRangeBindingCount(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex, SlangInt bindingRangeIndexInSubObject);
-    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeDescriptorRangeIndexOffset(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex, SlangInt bindingRangeIndexInSubObject);
-    SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeDescriptorRangeSpaceOffset(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex, SlangInt bindingRangeIndexInSubObject);
-#endif
-
-    // Variable Reflection
-
-    SLANG_API char const* spReflectionVariable_GetName(SlangReflectionVariable* var);
-    SLANG_API SlangReflectionType* spReflectionVariable_GetType(SlangReflectionVariable* var);
-    SLANG_API SlangReflectionModifier* spReflectionVariable_FindModifier(SlangReflectionVariable* var, SlangModifierID modifierID);
-    SLANG_API unsigned int spReflectionVariable_GetUserAttributeCount(SlangReflectionVariable* var);
-    SLANG_API SlangReflectionUserAttribute* spReflectionVariable_GetUserAttribute(SlangReflectionVariable* var, unsigned int index);
-    SLANG_API SlangReflectionUserAttribute* spReflectionVariable_FindUserAttributeByName(SlangReflectionVariable* var, SlangSession * session, char const* name);
-
-    // Variable Layout Reflection
-
-    SLANG_API SlangReflectionVariable* spReflectionVariableLayout_GetVariable(SlangReflectionVariableLayout* var);
-
-    SLANG_API SlangReflectionTypeLayout* spReflectionVariableLayout_GetTypeLayout(SlangReflectionVariableLayout* var);
-
-    SLANG_API size_t spReflectionVariableLayout_GetOffset(SlangReflectionVariableLayout* var, SlangParameterCategory category);
-    SLANG_API size_t spReflectionVariableLayout_GetSpace(SlangReflectionVariableLayout* var, SlangParameterCategory category);
-
-    SLANG_API char const* spReflectionVariableLayout_GetSemanticName(SlangReflectionVariableLayout* var);
-    SLANG_API size_t spReflectionVariableLayout_GetSemanticIndex(SlangReflectionVariableLayout* var);
-
-    /** Get the stage that a variable belongs to (if any).
-
-    A variable "belongs" to a specific stage when it is a varying input/output
-    parameter either defined as part of the parameter list for an entry
-    point *or* at the global scope of a stage-specific GLSL code file (e.g.,
-    an `in` parameter in a GLSL `.vs` file belongs to the vertex stage).
-    */
-    SLANG_API SlangStage spReflectionVariableLayout_getStage(
-        SlangReflectionVariableLayout* var);
-
-
-    SLANG_API SlangReflectionVariableLayout* spReflectionVariableLayout_getPendingDataLayout(SlangReflectionVariableLayout* var);
-
-    // Shader Parameter Reflection
-
-    typedef SlangReflectionVariableLayout SlangReflectionParameter;
-
-    SLANG_API unsigned spReflectionParameter_GetBindingIndex(SlangReflectionParameter* parameter);
-    SLANG_API unsigned spReflectionParameter_GetBindingSpace(SlangReflectionParameter* parameter);
-
-    SLANG_API SlangResult spIsParameterLocationUsed(
-        SlangCompileRequest* request,
-        SlangInt entryPointIndex,
-        SlangInt targetIndex,
-        SlangParameterCategory category, // is this a `t` register? `s` register?
-        SlangUInt spaceIndex,      // `space` for D3D12, `set` for Vulkan
-        SlangUInt registerIndex,   // `register` for D3D12, `binding` for Vulkan
-        bool& outUsed);
-
-    // Entry Point Reflection
-
-    SLANG_API char const* spReflectionEntryPoint_getName(
-        SlangReflectionEntryPoint* entryPoint);
-
-    SLANG_API char const* spReflectionEntryPoint_getNameOverride(
-        SlangReflectionEntryPoint* entryPoint);
-
-    SLANG_API unsigned spReflectionEntryPoint_getParameterCount(
-        SlangReflectionEntryPoint* entryPoint);
-
-    SLANG_API SlangReflectionVariableLayout* spReflectionEntryPoint_getParameterByIndex(
-        SlangReflectionEntryPoint*  entryPoint,
-        unsigned                    index);
-
-    SLANG_API SlangStage spReflectionEntryPoint_getStage(SlangReflectionEntryPoint* entryPoint);
-
-    SLANG_API void spReflectionEntryPoint_getComputeThreadGroupSize(
-        SlangReflectionEntryPoint*  entryPoint,
-        SlangUInt                   axisCount,
-        SlangUInt*                  outSizeAlongAxis);
-
-    SLANG_API void spReflectionEntryPoint_getComputeWaveSize(
-        SlangReflectionEntryPoint* entryPoint,
-        SlangUInt* outWaveSize);
-
-    SLANG_API int spReflectionEntryPoint_usesAnySampleRateInput(
-        SlangReflectionEntryPoint* entryPoint);
-
-    SLANG_API SlangReflectionVariableLayout* spReflectionEntryPoint_getVarLayout(
-        SlangReflectionEntryPoint* entryPoint);
-
-    SLANG_API SlangReflectionVariableLayout* spReflectionEntryPoint_getResultVarLayout(
-        SlangReflectionEntryPoint* entryPoint);
-
-    SLANG_API int spReflectionEntryPoint_hasDefaultConstantBuffer(
-        SlangReflectionEntryPoint* entryPoint);
-
-    // SlangReflectionTypeParameter
-    SLANG_API char const* spReflectionTypeParameter_GetName(SlangReflectionTypeParameter* typeParam);
-    SLANG_API unsigned spReflectionTypeParameter_GetIndex(SlangReflectionTypeParameter* typeParam);
-    SLANG_API unsigned spReflectionTypeParameter_GetConstraintCount(SlangReflectionTypeParameter* typeParam);
-    SLANG_API SlangReflectionType* spReflectionTypeParameter_GetConstraintByIndex(SlangReflectionTypeParameter* typeParam, unsigned int index);
-
-    // Shader Reflection
-
-    SLANG_API unsigned spReflection_GetParameterCount(SlangReflection* reflection);
-    SLANG_API SlangReflectionParameter* spReflection_GetParameterByIndex(SlangReflection* reflection, unsigned index);
-
-    SLANG_API unsigned int spReflection_GetTypeParameterCount(SlangReflection* reflection);
-    SLANG_API SlangReflectionTypeParameter* spReflection_GetTypeParameterByIndex(SlangReflection* reflection, unsigned int index);
-    SLANG_API SlangReflectionTypeParameter* spReflection_FindTypeParameter(SlangReflection* reflection, char const* name);
-
-    SLANG_API SlangReflectionType* spReflection_FindTypeByName(SlangReflection* reflection, char const* name);
-    SLANG_API SlangReflectionTypeLayout* spReflection_GetTypeLayout(SlangReflection* reflection, SlangReflectionType* reflectionType, SlangLayoutRules rules);
-
-    SLANG_API SlangUInt spReflection_getEntryPointCount(SlangReflection* reflection);
-    SLANG_API SlangReflectionEntryPoint* spReflection_getEntryPointByIndex(SlangReflection* reflection, SlangUInt index);
-    SLANG_API SlangReflectionEntryPoint* spReflection_findEntryPointByName(SlangReflection* reflection, char const* name);
-
-    SLANG_API SlangUInt spReflection_getGlobalConstantBufferBinding(SlangReflection* reflection);
-    SLANG_API size_t spReflection_getGlobalConstantBufferSize(SlangReflection* reflection);
-
-    SLANG_API  SlangReflectionType* spReflection_specializeType(
-        SlangReflection*            reflection,
-        SlangReflectionType*        type,
-        SlangInt                    specializationArgCount,
-        SlangReflectionType* const* specializationArgs,
-        ISlangBlob**                outDiagnostics);
-
-        /// Get the number of hashed strings
-    SLANG_API SlangUInt spReflection_getHashedStringCount(
-        SlangReflection*  reflection);
-
-        /// Get a hashed string. The number of chars is written in outCount.
-        /// The count does *NOT* including terminating 0. The returned string will be 0 terminated. 
-    SLANG_API const char* spReflection_getHashedString(
-        SlangReflection*  reflection,
-        SlangUInt index,
-        size_t* outCount);
-
-        /// Compute a string hash.
-        /// Count should *NOT* include terminating zero.
-    SLANG_API SlangUInt32 spComputeStringHash(const char* chars, size_t count);
-
-        /// Get a type layout representing reflection information for the global-scope prameters.
-    SLANG_API SlangReflectionTypeLayout* spReflection_getGlobalParamsTypeLayout(
-        SlangReflection* reflection);
-
-        /// Get a variable layout representing reflection information for the global-scope prameters.
-    SLANG_API SlangReflectionVariableLayout* spReflection_getGlobalParamsVarLayout(
-        SlangReflection* reflection);
-
-}
-#ifdef __cplusplus
-
-namespace slang
-{
-    struct ISession;
-}
-
-SLANG_API slang::ISession* spReflection_GetSession(SlangReflection* reflection);
-
-/* Helper interfaces for C++ users */
-namespace slang
-{
-    struct BufferReflection;
-    struct TypeLayoutReflection;
-    struct TypeReflection;
-    struct VariableLayoutReflection;
-    struct VariableReflection;
-    
-    struct UserAttribute
-    {
-        char const* getName()
-        {
-            return spReflectionUserAttribute_GetName((SlangReflectionUserAttribute*)this);
-        }
-        uint32_t getArgumentCount()
-        {
-            return (uint32_t)spReflectionUserAttribute_GetArgumentCount((SlangReflectionUserAttribute*)this);
-        }
-        TypeReflection* getArgumentType(uint32_t index)
-        {
-            return (TypeReflection*)spReflectionUserAttribute_GetArgumentType((SlangReflectionUserAttribute*)this, index);
-        }
-        SlangResult getArgumentValueInt(uint32_t index, int * value)
-        {
-            return spReflectionUserAttribute_GetArgumentValueInt((SlangReflectionUserAttribute*)this, index, value);
-        }
-        SlangResult getArgumentValueFloat(uint32_t index, float * value)
-        {
-            return spReflectionUserAttribute_GetArgumentValueFloat((SlangReflectionUserAttribute*)this, index, value);
-        }
-        const char* getArgumentValueString(uint32_t index, size_t * outSize)
-        {
-            return spReflectionUserAttribute_GetArgumentValueString((SlangReflectionUserAttribute*)this, index, outSize);
-        }
-    };
-
-    struct TypeReflection
-    {
-        enum class Kind
-        {
-            None    = SLANG_TYPE_KIND_NONE,
-            Struct  = SLANG_TYPE_KIND_STRUCT,
-            Array   = SLANG_TYPE_KIND_ARRAY,
-            Matrix  = SLANG_TYPE_KIND_MATRIX,
-            Vector  = SLANG_TYPE_KIND_VECTOR,
-            Scalar  = SLANG_TYPE_KIND_SCALAR,
-            ConstantBuffer = SLANG_TYPE_KIND_CONSTANT_BUFFER,
-            Resource = SLANG_TYPE_KIND_RESOURCE,
-            SamplerState = SLANG_TYPE_KIND_SAMPLER_STATE,
-            TextureBuffer = SLANG_TYPE_KIND_TEXTURE_BUFFER,
-            ShaderStorageBuffer = SLANG_TYPE_KIND_SHADER_STORAGE_BUFFER,
-            ParameterBlock = SLANG_TYPE_KIND_PARAMETER_BLOCK,
-            GenericTypeParameter = SLANG_TYPE_KIND_GENERIC_TYPE_PARAMETER,
-            Interface = SLANG_TYPE_KIND_INTERFACE,
-            OutputStream = SLANG_TYPE_KIND_OUTPUT_STREAM,
-            Specialized = SLANG_TYPE_KIND_SPECIALIZED,
-            Feedback = SLANG_TYPE_KIND_FEEDBACK,
-            Pointer = SLANG_TYPE_KIND_POINTER,
-        };
-
-        enum ScalarType : SlangScalarTypeIntegral
-        {
-            None    = SLANG_SCALAR_TYPE_NONE,
-            Void    = SLANG_SCALAR_TYPE_VOID,
-            Bool    = SLANG_SCALAR_TYPE_BOOL,
-            Int32   = SLANG_SCALAR_TYPE_INT32,
-            UInt32  = SLANG_SCALAR_TYPE_UINT32,
-            Int64   = SLANG_SCALAR_TYPE_INT64,
-            UInt64  = SLANG_SCALAR_TYPE_UINT64,
-            Float16 = SLANG_SCALAR_TYPE_FLOAT16,
-            Float32 = SLANG_SCALAR_TYPE_FLOAT32,
-            Float64 = SLANG_SCALAR_TYPE_FLOAT64,
-            Int8    = SLANG_SCALAR_TYPE_INT8,
-            UInt8   = SLANG_SCALAR_TYPE_UINT8,
-            Int16   = SLANG_SCALAR_TYPE_INT16,
-            UInt16  = SLANG_SCALAR_TYPE_UINT16,
-        };
-
-        Kind getKind()
-        {
-            return (Kind) spReflectionType_GetKind((SlangReflectionType*) this);
-        }
-
-        // only useful if `getKind() == Kind::Struct`
-        unsigned int getFieldCount()
-        {
-            return spReflectionType_GetFieldCount((SlangReflectionType*) this);
-        }
-
-        VariableReflection* getFieldByIndex(unsigned int index)
-        {
-            return (VariableReflection*) spReflectionType_GetFieldByIndex((SlangReflectionType*) this, index);
-        }
-
-        bool isArray() { return getKind() == TypeReflection::Kind::Array; }
-
-        TypeReflection* unwrapArray()
-        {
-            TypeReflection* type = this;
-            while( type->isArray() )
-            {
-                type = type->getElementType();
-            }
-            return type;
-        }
-
-        // only useful if `getKind() == Kind::Array`
-        size_t getElementCount()
-        {
-            return spReflectionType_GetElementCount((SlangReflectionType*) this);
-        }
-
-        size_t getTotalArrayElementCount()
-        {
-            if(!isArray()) return 0;
-            size_t result = 1;
-            TypeReflection* type = this;
-            for(;;)
-            {
-                if(!type->isArray())
-                    return result;
-
-                result *= type->getElementCount();
-                type = type->getElementType();
-            }
-        }
-
-        TypeReflection* getElementType()
-        {
-            return (TypeReflection*) spReflectionType_GetElementType((SlangReflectionType*) this);
-        }
-
-        unsigned getRowCount()
-        {
-            return spReflectionType_GetRowCount((SlangReflectionType*) this);
-        }
-
-        unsigned getColumnCount()
-        {
-            return spReflectionType_GetColumnCount((SlangReflectionType*) this);
-        }
-
-        ScalarType getScalarType()
-        {
-            return (ScalarType) spReflectionType_GetScalarType((SlangReflectionType*) this);
-        }
-
-        TypeReflection* getResourceResultType()
-        {
-            return (TypeReflection*) spReflectionType_GetResourceResultType((SlangReflectionType*) this);
-        }
-
-        SlangResourceShape getResourceShape()
-        {
-            return spReflectionType_GetResourceShape((SlangReflectionType*) this);
-        }
-
-        SlangResourceAccess getResourceAccess()
-        {
-            return spReflectionType_GetResourceAccess((SlangReflectionType*) this);
-        }
-
-        char const* getName()
-        {
-            return spReflectionType_GetName((SlangReflectionType*) this);
-        }
-
-        unsigned int getUserAttributeCount()
-        {
-            return spReflectionType_GetUserAttributeCount((SlangReflectionType*)this);
-        }
-        UserAttribute* getUserAttributeByIndex(unsigned int index)
-        {
-            return (UserAttribute*)spReflectionType_GetUserAttribute((SlangReflectionType*)this, index);
-        }
-        UserAttribute* findUserAttributeByName(char const* name)
-        {
-            return (UserAttribute*)spReflectionType_FindUserAttributeByName((SlangReflectionType*)this, name);
-        }
-    };
-
-    enum ParameterCategory : SlangParameterCategoryIntegral
-    {
-        // TODO: these aren't scoped...
-        None = SLANG_PARAMETER_CATEGORY_NONE,
-        Mixed = SLANG_PARAMETER_CATEGORY_MIXED,
-        ConstantBuffer = SLANG_PARAMETER_CATEGORY_CONSTANT_BUFFER,
-        ShaderResource = SLANG_PARAMETER_CATEGORY_SHADER_RESOURCE,
-        UnorderedAccess = SLANG_PARAMETER_CATEGORY_UNORDERED_ACCESS,
-        VaryingInput = SLANG_PARAMETER_CATEGORY_VARYING_INPUT,
-        VaryingOutput = SLANG_PARAMETER_CATEGORY_VARYING_OUTPUT,
-        SamplerState = SLANG_PARAMETER_CATEGORY_SAMPLER_STATE,
-        Uniform = SLANG_PARAMETER_CATEGORY_UNIFORM,
-        DescriptorTableSlot = SLANG_PARAMETER_CATEGORY_DESCRIPTOR_TABLE_SLOT,
-        SpecializationConstant = SLANG_PARAMETER_CATEGORY_SPECIALIZATION_CONSTANT,
-        PushConstantBuffer = SLANG_PARAMETER_CATEGORY_PUSH_CONSTANT_BUFFER,
-        RegisterSpace = SLANG_PARAMETER_CATEGORY_REGISTER_SPACE,
-        GenericResource = SLANG_PARAMETER_CATEGORY_GENERIC,
-
-        RayPayload = SLANG_PARAMETER_CATEGORY_RAY_PAYLOAD,
-        HitAttributes = SLANG_PARAMETER_CATEGORY_HIT_ATTRIBUTES,
-        CallablePayload = SLANG_PARAMETER_CATEGORY_CALLABLE_PAYLOAD,
-
-        ShaderRecord = SLANG_PARAMETER_CATEGORY_SHADER_RECORD,
-
-        ExistentialTypeParam = SLANG_PARAMETER_CATEGORY_EXISTENTIAL_TYPE_PARAM,
-        ExistentialObjectParam = SLANG_PARAMETER_CATEGORY_EXISTENTIAL_OBJECT_PARAM,
-
-        SubElementRegisterSpace = SLANG_PARAMETER_CATEGORY_SUB_ELEMENT_REGISTER_SPACE,
-
-        InputAttachmentIndex = SLANG_PARAMETER_CATEGORY_SUBPASS,
-
-        MetalBuffer = SLANG_PARAMETER_CATEGORY_CONSTANT_BUFFER,
-        MetalTexture = SLANG_PARAMETER_CATEGORY_METAL_TEXTURE,
-        MetalArgumentBufferElement = SLANG_PARAMETER_CATEGORY_METAL_ARGUMENT_BUFFER_ELEMENT,
-        MetalAttribute = SLANG_PARAMETER_CATEGORY_METAL_ATTRIBUTE,
-        MetalPayload = SLANG_PARAMETER_CATEGORY_METAL_PAYLOAD,
-
-        // DEPRECATED:
-        VertexInput = SLANG_PARAMETER_CATEGORY_VERTEX_INPUT,
-        FragmentOutput = SLANG_PARAMETER_CATEGORY_FRAGMENT_OUTPUT,
-    };
-
-    enum class BindingType : SlangBindingTypeIntegral
-    {
-        Unknown                             = SLANG_BINDING_TYPE_UNKNOWN,
-
-        Sampler                             = SLANG_BINDING_TYPE_SAMPLER,
-        Texture                             = SLANG_BINDING_TYPE_TEXTURE,
-        ConstantBuffer                      = SLANG_BINDING_TYPE_CONSTANT_BUFFER,
-        ParameterBlock                      = SLANG_BINDING_TYPE_PARAMETER_BLOCK,
-        TypedBuffer                         = SLANG_BINDING_TYPE_TYPED_BUFFER,
-        RawBuffer                           = SLANG_BINDING_TYPE_RAW_BUFFER,
-        CombinedTextureSampler              = SLANG_BINDING_TYPE_COMBINED_TEXTURE_SAMPLER,
-        InputRenderTarget                   = SLANG_BINDING_TYPE_INPUT_RENDER_TARGET,
-        InlineUniformData                   = SLANG_BINDING_TYPE_INLINE_UNIFORM_DATA,
-        RayTracingAccelerationStructure     = SLANG_BINDING_TYPE_RAY_TRACING_ACCELERATION_STRUCTURE,
-        VaryingInput                        = SLANG_BINDING_TYPE_VARYING_INPUT,
-        VaryingOutput                       = SLANG_BINDING_TYPE_VARYING_OUTPUT,
-        ExistentialValue                    = SLANG_BINDING_TYPE_EXISTENTIAL_VALUE,
-        PushConstant                        = SLANG_BINDING_TYPE_PUSH_CONSTANT,
-
-        MutableFlag                         = SLANG_BINDING_TYPE_MUTABLE_FLAG,
-
-        MutableTexture                      = SLANG_BINDING_TYPE_MUTABLE_TETURE,
-        MutableTypedBuffer                  = SLANG_BINDING_TYPE_MUTABLE_TYPED_BUFFER,
-        MutableRawBuffer                    = SLANG_BINDING_TYPE_MUTABLE_RAW_BUFFER,
-
-        BaseMask                            = SLANG_BINDING_TYPE_BASE_MASK,
-        ExtMask                             = SLANG_BINDING_TYPE_EXT_MASK,
-    };
-
-    struct TypeLayoutReflection
-    {
-        TypeReflection* getType()
-        {
-            return (TypeReflection*) spReflectionTypeLayout_GetType((SlangReflectionTypeLayout*) this);
-        }
-
-        TypeReflection::Kind getKind()
-        {
-            return (TypeReflection::Kind) spReflectionTypeLayout_getKind((SlangReflectionTypeLayout*) this);
-        }
-
-        size_t getSize(SlangParameterCategory category = SLANG_PARAMETER_CATEGORY_UNIFORM)
-        {
-            return spReflectionTypeLayout_GetSize((SlangReflectionTypeLayout*) this, category);
-        }
-
-        size_t getStride(SlangParameterCategory category = SLANG_PARAMETER_CATEGORY_UNIFORM)
-        {
-            return spReflectionTypeLayout_GetStride((SlangReflectionTypeLayout*) this, category);
-        }
-
-        int32_t getAlignment(SlangParameterCategory category = SLANG_PARAMETER_CATEGORY_UNIFORM)
-        {
-            return spReflectionTypeLayout_getAlignment((SlangReflectionTypeLayout*) this, category);
-        }
-
-        unsigned int getFieldCount()
-        {
-            return spReflectionTypeLayout_GetFieldCount((SlangReflectionTypeLayout*)this);
-        }
-
-        VariableLayoutReflection* getFieldByIndex(unsigned int index)
-        {
-            return (VariableLayoutReflection*) spReflectionTypeLayout_GetFieldByIndex((SlangReflectionTypeLayout*) this, index);
-        }
-
-        SlangInt findFieldIndexByName(char const* nameBegin, char const* nameEnd = nullptr)
-        {
-            return spReflectionTypeLayout_findFieldIndexByName((SlangReflectionTypeLayout*) this, nameBegin, nameEnd);
-        }
-
-        VariableLayoutReflection* getExplicitCounter()
-        {
-            return (VariableLayoutReflection*) spReflectionTypeLayout_GetExplicitCounter((SlangReflectionTypeLayout*) this);
-        }
-
-        bool isArray() { return getType()->isArray(); }
-
-        TypeLayoutReflection* unwrapArray()
-        {
-            TypeLayoutReflection* typeLayout = this;
-            while( typeLayout->isArray() )
-            {
-                typeLayout = typeLayout->getElementTypeLayout();
-            }
-            return typeLayout;
-        }
-
-        // only useful if `getKind() == Kind::Array`
-        size_t getElementCount()
-        {
-            return getType()->getElementCount();
-        }
-
-        size_t getTotalArrayElementCount()
-        {
-            return getType()->getTotalArrayElementCount();
-        }
-
-        size_t getElementStride(SlangParameterCategory category)
-        {
-            return spReflectionTypeLayout_GetElementStride((SlangReflectionTypeLayout*) this, category);
-        }
-
-        TypeLayoutReflection* getElementTypeLayout()
-        {
-            return (TypeLayoutReflection*) spReflectionTypeLayout_GetElementTypeLayout((SlangReflectionTypeLayout*) this);
-        }
-
-        VariableLayoutReflection* getElementVarLayout()
-        {
-            return (VariableLayoutReflection*)spReflectionTypeLayout_GetElementVarLayout((SlangReflectionTypeLayout*) this);
-        }
-
-        VariableLayoutReflection* getContainerVarLayout()
-        {
-            return (VariableLayoutReflection*)spReflectionTypeLayout_getContainerVarLayout((SlangReflectionTypeLayout*) this);
-        }
-
-        // How is this type supposed to be bound?
-        ParameterCategory getParameterCategory()
-        {
-            return (ParameterCategory) spReflectionTypeLayout_GetParameterCategory((SlangReflectionTypeLayout*) this);
-        }
-
-        unsigned int getCategoryCount()
-        {
-            return spReflectionTypeLayout_GetCategoryCount((SlangReflectionTypeLayout*) this);
-        }
-
-        ParameterCategory getCategoryByIndex(unsigned int index)
-        {
-            return (ParameterCategory) spReflectionTypeLayout_GetCategoryByIndex((SlangReflectionTypeLayout*) this, index);
-        }
-
-        unsigned getRowCount()
-        {
-            return getType()->getRowCount();
-        }
-
-        unsigned getColumnCount()
-        {
-            return getType()->getColumnCount();
-        }
-
-        TypeReflection::ScalarType getScalarType()
-        {
-            return getType()->getScalarType();
-        }
-
-        TypeReflection* getResourceResultType()
-        {
-            return getType()->getResourceResultType();
-        }
-
-        SlangResourceShape getResourceShape()
-        {
-            return getType()->getResourceShape();
-        }
-
-        SlangResourceAccess getResourceAccess()
-        {
-            return getType()->getResourceAccess();
-        }
-
-        char const* getName()
-        {
-            return getType()->getName();
-        }
-
-        SlangMatrixLayoutMode getMatrixLayoutMode()
-        {
-            return spReflectionTypeLayout_GetMatrixLayoutMode((SlangReflectionTypeLayout*) this);
-        }
-
-        int getGenericParamIndex()
-        {
-            return spReflectionTypeLayout_getGenericParamIndex(
-                (SlangReflectionTypeLayout*) this);
-        }
-
-        TypeLayoutReflection* getPendingDataTypeLayout()
-        {
-            return (TypeLayoutReflection*) spReflectionTypeLayout_getPendingDataTypeLayout(
-                (SlangReflectionTypeLayout*) this);
-        }
-
-        VariableLayoutReflection* getSpecializedTypePendingDataVarLayout()
-        {
-            return (VariableLayoutReflection*) spReflectionTypeLayout_getSpecializedTypePendingDataVarLayout(
-                (SlangReflectionTypeLayout*) this);
-        }
-
-        SlangInt getBindingRangeCount()
-        {
-            return spReflectionTypeLayout_getBindingRangeCount(
-                (SlangReflectionTypeLayout*) this);
-        }
-
-        BindingType getBindingRangeType(SlangInt index)
-        {
-            return (BindingType) spReflectionTypeLayout_getBindingRangeType(
-                (SlangReflectionTypeLayout*) this,
-                index);
-        }
-
-        bool isBindingRangeSpecializable(SlangInt index)
-        {
-            return (bool)spReflectionTypeLayout_isBindingRangeSpecializable(
-                (SlangReflectionTypeLayout*)this,
-                index);
-
-        }
-
-        SlangInt getBindingRangeBindingCount(SlangInt index)
-        {
-            return spReflectionTypeLayout_getBindingRangeBindingCount(
-                (SlangReflectionTypeLayout*) this,
-                index);
-        }
-
-        /*
-        SlangInt getBindingRangeIndexOffset(SlangInt index)
-        {
-            return spReflectionTypeLayout_getBindingRangeIndexOffset(
-                (SlangReflectionTypeLayout*) this,
-                index);
-        }
-
-        SlangInt getBindingRangeSpaceOffset(SlangInt index)
-        {
-            return spReflectionTypeLayout_getBindingRangeSpaceOffset(
-                (SlangReflectionTypeLayout*) this,
-                index);
-        }
-        */
-
-        SlangInt getFieldBindingRangeOffset(SlangInt fieldIndex)
-        {
-            return spReflectionTypeLayout_getFieldBindingRangeOffset(
-                (SlangReflectionTypeLayout*) this,
-                fieldIndex);
-        }
-
-        SlangInt getExplicitCounterBindingRangeOffset()
-        {
-            return spReflectionTypeLayout_getExplicitCounterBindingRangeOffset(
-                (SlangReflectionTypeLayout*) this);
-        }
-
-        TypeLayoutReflection* getBindingRangeLeafTypeLayout(SlangInt index)
-        {
-            return (TypeLayoutReflection*) spReflectionTypeLayout_getBindingRangeLeafTypeLayout(
-                (SlangReflectionTypeLayout*) this,
-                index);
-        }
-
-        VariableReflection* getBindingRangeLeafVariable(SlangInt index)
-        {
-            return (VariableReflection*)spReflectionTypeLayout_getBindingRangeLeafVariable(
-                (SlangReflectionTypeLayout*)this, index);
-        }
-
-        SlangInt getBindingRangeDescriptorSetIndex(SlangInt index)
-        {
-            return spReflectionTypeLayout_getBindingRangeDescriptorSetIndex(
-                (SlangReflectionTypeLayout*) this,
-                index);
-        }
-
-        SlangInt getBindingRangeFirstDescriptorRangeIndex(SlangInt index)
-        {
-            return spReflectionTypeLayout_getBindingRangeFirstDescriptorRangeIndex(
-                (SlangReflectionTypeLayout*) this,
-                index);
-        }
-
-        SlangInt getBindingRangeDescriptorRangeCount(SlangInt index)
-        {
-            return spReflectionTypeLayout_getBindingRangeDescriptorRangeCount(
-                (SlangReflectionTypeLayout*) this,
-                index);
-        }
-
-        SlangInt getDescriptorSetCount()
-        {
-            return spReflectionTypeLayout_getDescriptorSetCount(
-                (SlangReflectionTypeLayout*) this);
-        }
-
-        SlangInt getDescriptorSetSpaceOffset(SlangInt setIndex)
-        {
-            return spReflectionTypeLayout_getDescriptorSetSpaceOffset(
-                (SlangReflectionTypeLayout*) this,
-                setIndex);
-        }
-
-        SlangInt getDescriptorSetDescriptorRangeCount(SlangInt setIndex)
-        {
-            return spReflectionTypeLayout_getDescriptorSetDescriptorRangeCount(
-                (SlangReflectionTypeLayout*) this,
-                setIndex);
-        }
-
-        SlangInt getDescriptorSetDescriptorRangeIndexOffset(SlangInt setIndex, SlangInt rangeIndex)
-        {
-            return spReflectionTypeLayout_getDescriptorSetDescriptorRangeIndexOffset(
-                (SlangReflectionTypeLayout*) this,
-                setIndex,
-                rangeIndex);
-        }
-
-        SlangInt getDescriptorSetDescriptorRangeDescriptorCount(SlangInt setIndex, SlangInt rangeIndex)
-        {
-            return spReflectionTypeLayout_getDescriptorSetDescriptorRangeDescriptorCount(
-                (SlangReflectionTypeLayout*) this,
-                setIndex,
-                rangeIndex);
-        }
-
-        BindingType getDescriptorSetDescriptorRangeType(SlangInt setIndex, SlangInt rangeIndex)
-        {
-            return (BindingType) spReflectionTypeLayout_getDescriptorSetDescriptorRangeType(
-                (SlangReflectionTypeLayout*) this,
-                setIndex,
-                rangeIndex);
-        }
-
-        ParameterCategory getDescriptorSetDescriptorRangeCategory(SlangInt setIndex, SlangInt rangeIndex)
-        {
-            return (ParameterCategory) spReflectionTypeLayout_getDescriptorSetDescriptorRangeCategory(
-                (SlangReflectionTypeLayout*) this,
-                setIndex,
-                rangeIndex);
-        }
-
-        SlangInt getSubObjectRangeCount()
-        {
-            return spReflectionTypeLayout_getSubObjectRangeCount(
-                (SlangReflectionTypeLayout*) this);
-        }
-
-        SlangInt getSubObjectRangeBindingRangeIndex(SlangInt subObjectRangeIndex)
-        {
-            return spReflectionTypeLayout_getSubObjectRangeBindingRangeIndex(
-                (SlangReflectionTypeLayout*) this,
-                subObjectRangeIndex);
-        }
-
-        SlangInt getSubObjectRangeSpaceOffset(SlangInt subObjectRangeIndex)
-        {
-            return spReflectionTypeLayout_getSubObjectRangeSpaceOffset(
-                (SlangReflectionTypeLayout*) this,
-                subObjectRangeIndex);
-        }
-
-        VariableLayoutReflection* getSubObjectRangeOffset(SlangInt subObjectRangeIndex)
-        {
-            return (VariableLayoutReflection*) spReflectionTypeLayout_getSubObjectRangeOffset(
-                (SlangReflectionTypeLayout*) this,
-                subObjectRangeIndex);
-        }
-    };
-
-    struct Modifier
-    {
-        enum ID : SlangModifierIDIntegral
-        {
-            Shared = SLANG_MODIFIER_SHARED,
-        };
-    };
-
-    struct VariableReflection
-    {
-        char const* getName()
-        {
-            return spReflectionVariable_GetName((SlangReflectionVariable*) this);
-        }
-
-        TypeReflection* getType()
-        {
-            return (TypeReflection*) spReflectionVariable_GetType((SlangReflectionVariable*) this);
-        }
-
-        Modifier* findModifier(Modifier::ID id)
-        {
-            return (Modifier*) spReflectionVariable_FindModifier((SlangReflectionVariable*) this, (SlangModifierID) id);
-        }
-
-        unsigned int getUserAttributeCount()
-        {
-            return spReflectionVariable_GetUserAttributeCount((SlangReflectionVariable*)this);
-        }
-        UserAttribute* getUserAttributeByIndex(unsigned int index)
-        {
-            return (UserAttribute*)spReflectionVariable_GetUserAttribute((SlangReflectionVariable*)this, index);
-        }
-        UserAttribute* findUserAttributeByName(SlangSession* session, char const* name)
-        {
-            return (UserAttribute*)spReflectionVariable_FindUserAttributeByName((SlangReflectionVariable*)this, session, name);
-        }
-    };
-
-    struct VariableLayoutReflection
-    {
-        VariableReflection* getVariable()
-        {
-            return (VariableReflection*) spReflectionVariableLayout_GetVariable((SlangReflectionVariableLayout*) this);
-        }
-
-        char const* getName()
-        {
-            return getVariable()->getName();
-        }
-
-        Modifier* findModifier(Modifier::ID id)
-        {
-            return getVariable()->findModifier(id);
-        }
-
-        TypeLayoutReflection* getTypeLayout()
-        {
-            return (TypeLayoutReflection*) spReflectionVariableLayout_GetTypeLayout((SlangReflectionVariableLayout*) this);
-        }
-
-        ParameterCategory getCategory()
-        {
-            return getTypeLayout()->getParameterCategory();
-        }
-
-        unsigned int getCategoryCount()
-        {
-            return getTypeLayout()->getCategoryCount();
-        }
-
-        ParameterCategory getCategoryByIndex(unsigned int index)
-        {
-            return getTypeLayout()->getCategoryByIndex(index);
-        }
-
-
-        size_t getOffset(SlangParameterCategory category = SLANG_PARAMETER_CATEGORY_UNIFORM)
-        {
-            return spReflectionVariableLayout_GetOffset((SlangReflectionVariableLayout*) this, category);
-        }
-
-        TypeReflection* getType()
-        {
-            return getVariable()->getType();
-        }
-
-        unsigned getBindingIndex()
-        {
-            return spReflectionParameter_GetBindingIndex((SlangReflectionVariableLayout*) this);
-        }
-
-        unsigned getBindingSpace()
-        {
-            return spReflectionParameter_GetBindingSpace((SlangReflectionVariableLayout*) this);
-        }
-
-        size_t getBindingSpace(SlangParameterCategory category)
-        {
-            return spReflectionVariableLayout_GetSpace((SlangReflectionVariableLayout*) this, category);
-        }
-
-        char const* getSemanticName()
-        {
-            return spReflectionVariableLayout_GetSemanticName((SlangReflectionVariableLayout*) this);
-        }
-
-        size_t getSemanticIndex()
-        {
-            return spReflectionVariableLayout_GetSemanticIndex((SlangReflectionVariableLayout*) this);
-        }
-
-        SlangStage getStage()
-        {
-            return spReflectionVariableLayout_getStage((SlangReflectionVariableLayout*) this);
-        }
-
-        VariableLayoutReflection* getPendingDataLayout()
-        {
-            return (VariableLayoutReflection*) spReflectionVariableLayout_getPendingDataLayout((SlangReflectionVariableLayout*) this);
-        }
-    };
-
-    struct EntryPointReflection
-    {
-        char const* getName()
-        {
-            return spReflectionEntryPoint_getName((SlangReflectionEntryPoint*) this);
-        }
-
-        char const* getNameOverride()
-        {
-            return spReflectionEntryPoint_getNameOverride((SlangReflectionEntryPoint*)this);
-        }
-
-        unsigned getParameterCount()
-        {
-            return spReflectionEntryPoint_getParameterCount((SlangReflectionEntryPoint*) this);
-        }
-
-        VariableLayoutReflection* getParameterByIndex(unsigned index)
-        {
-            return (VariableLayoutReflection*) spReflectionEntryPoint_getParameterByIndex((SlangReflectionEntryPoint*) this, index);
-        }
-
-        SlangStage getStage()
-        {
-            return spReflectionEntryPoint_getStage((SlangReflectionEntryPoint*) this);
-        }
-
-        void getComputeThreadGroupSize(
-            SlangUInt   axisCount,
-            SlangUInt*  outSizeAlongAxis)
-        {
-            return spReflectionEntryPoint_getComputeThreadGroupSize((SlangReflectionEntryPoint*) this, axisCount, outSizeAlongAxis);
-        }
-
-        void getComputeWaveSize(
-            SlangUInt* outWaveSize)
-        {
-            return spReflectionEntryPoint_getComputeWaveSize((SlangReflectionEntryPoint*)this, outWaveSize);
-        }
-
-        bool usesAnySampleRateInput()
-        {
-            return 0 != spReflectionEntryPoint_usesAnySampleRateInput((SlangReflectionEntryPoint*) this);
-        }
-
-        VariableLayoutReflection* getVarLayout()
-        {
-            return (VariableLayoutReflection*) spReflectionEntryPoint_getVarLayout((SlangReflectionEntryPoint*) this);
-        }
-
-        TypeLayoutReflection* getTypeLayout()
-        {
-            return getVarLayout()->getTypeLayout();
-        }
-
-        VariableLayoutReflection* getResultVarLayout()
-        {
-            return (VariableLayoutReflection*) spReflectionEntryPoint_getResultVarLayout((SlangReflectionEntryPoint*) this);
-        }
-
-        bool hasDefaultConstantBuffer()
-        {
-            return spReflectionEntryPoint_hasDefaultConstantBuffer((SlangReflectionEntryPoint*) this) != 0;
-        }
-    };
-    typedef EntryPointReflection EntryPointLayout;
-
-    struct TypeParameterReflection
-    {
-        char const* getName()
-        {
-            return spReflectionTypeParameter_GetName((SlangReflectionTypeParameter*) this);
-        }
-        unsigned getIndex()
-        {
-            return spReflectionTypeParameter_GetIndex((SlangReflectionTypeParameter*) this);
-        }
-        unsigned getConstraintCount()
-        {
-            return spReflectionTypeParameter_GetConstraintCount((SlangReflectionTypeParameter*) this);
-        }
-        TypeReflection* getConstraintByIndex(int index)
-        {
-            return (TypeReflection*)spReflectionTypeParameter_GetConstraintByIndex((SlangReflectionTypeParameter*) this, index);
-        }
-    };
-
-    enum class LayoutRules : SlangLayoutRulesIntegral
-    {
-        Default = SLANG_LAYOUT_RULES_DEFAULT,
-    };
-
-    typedef struct ShaderReflection ProgramLayout;
-
-    struct ShaderReflection
-    {
-        unsigned getParameterCount()
-        {
-            return spReflection_GetParameterCount((SlangReflection*) this);
-        }
-
-        unsigned getTypeParameterCount()
-        {
-            return spReflection_GetTypeParameterCount((SlangReflection*) this);
-        }
-
-        slang::ISession* getSession()
-        {
-            return spReflection_GetSession((SlangReflection*)this);
-        }
-
-        TypeParameterReflection* getTypeParameterByIndex(unsigned index)
-        {
-            return (TypeParameterReflection*)spReflection_GetTypeParameterByIndex((SlangReflection*) this, index);
-        }
-
-        TypeParameterReflection* findTypeParameter(char const* name)
-        {
-            return (TypeParameterReflection*)spReflection_FindTypeParameter((SlangReflection*)this, name);
-        }
-
-        VariableLayoutReflection* getParameterByIndex(unsigned index)
-        {
-            return (VariableLayoutReflection*) spReflection_GetParameterByIndex((SlangReflection*) this, index);
-        }
-
-        static ProgramLayout* get(SlangCompileRequest* request)
-        {
-            return (ProgramLayout*) spGetReflection(request);
-        }
-
-        SlangUInt getEntryPointCount()
-        {
-            return spReflection_getEntryPointCount((SlangReflection*) this);
-        }
-
-        EntryPointReflection* getEntryPointByIndex(SlangUInt index)
-        {
-            return (EntryPointReflection*) spReflection_getEntryPointByIndex((SlangReflection*) this, index);
-        }
-
-        SlangUInt getGlobalConstantBufferBinding()
-        {
-            return spReflection_getGlobalConstantBufferBinding((SlangReflection*)this);
-        }
-
-        size_t getGlobalConstantBufferSize()
-        {
-            return spReflection_getGlobalConstantBufferSize((SlangReflection*)this);
-        }
-
-        TypeReflection* findTypeByName(const char* name)
-        {
-            return (TypeReflection*)spReflection_FindTypeByName(
-                (SlangReflection*) this,
-                name);
-        }
-
-        TypeLayoutReflection* getTypeLayout(
-            TypeReflection* type,
-            LayoutRules     rules = LayoutRules::Default)
-        {
-            return (TypeLayoutReflection*)spReflection_GetTypeLayout(
-                (SlangReflection*) this,
-                (SlangReflectionType*)type,
-                SlangLayoutRules(rules));
-        }
-
-        EntryPointReflection* findEntryPointByName(const char* name)
-        {
-            return (EntryPointReflection*)spReflection_findEntryPointByName(
-                (SlangReflection*) this,
-                name);
-        }
-
-        TypeReflection* specializeType(
-            TypeReflection*         type,
-            SlangInt                specializationArgCount,
-            TypeReflection* const*  specializationArgs,
-            ISlangBlob**            outDiagnostics)
-        {
-            return (TypeReflection*) spReflection_specializeType(
-                (SlangReflection*) this,
-                (SlangReflectionType*) type,
-                specializationArgCount,
-                (SlangReflectionType* const*) specializationArgs,
-                outDiagnostics);
-        }
-
-        SlangUInt getHashedStringCount() const { return spReflection_getHashedStringCount((SlangReflection*)this); }
-
-        const char* getHashedString(SlangUInt index, size_t* outCount) const
-        {
-            return spReflection_getHashedString((SlangReflection*)this, index, outCount);
-        }
-
-        TypeLayoutReflection* getGlobalParamsTypeLayout()
-        {
-            return (TypeLayoutReflection*) spReflection_getGlobalParamsTypeLayout((SlangReflection*) this);
-        }
-
-        VariableLayoutReflection* getGlobalParamsVarLayout()
-        {
-            return (VariableLayoutReflection*) spReflection_getGlobalParamsVarLayout((SlangReflection*) this);
-        }
-    };
-
-    typedef uint32_t CompileStdLibFlags;
-    struct CompileStdLibFlag
-    {
-        enum Enum : CompileStdLibFlags
-        {
-            WriteDocumentation = 0x1,
-        };
-    };
-
-    typedef ISlangBlob IBlob;
-
-    struct IComponentType;
-    struct ITypeConformance;
-    struct IGlobalSession;
-    struct IModule;
-
-    struct SessionDesc;
-    struct SpecializationArg;
-    struct TargetDesc;
-
-        /** A global session for interaction with the Slang library.
-
-        An application may create and re-use a single global session across
-        multiple sessions, in order to amortize startups costs (in current
-        Slang this is mostly the cost of loading the Slang standard library).
-
-        The global session is currently *not* thread-safe and objects created from
-        a single global session should only be used from a single thread at
-        a time.
-        */
-    struct IGlobalSession : public ISlangUnknown
-    {
-        SLANG_COM_INTERFACE(0xc140b5fd, 0xc78, 0x452e, { 0xba, 0x7c, 0x1a, 0x1e, 0x70, 0xc7, 0xf7, 0x1c })
-
-            /** Create a new session for loading and compiling code.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL createSession(
-            SessionDesc const&  desc,
-            ISession**          outSession) = 0;
-
-            /** Look up the internal ID of a profile by its `name`.
-
-            Profile IDs are *not* guaranteed to be stable across versions
-            of the Slang library, so clients are expected to look up
-            profiles by name at runtime.
-            */
-        virtual SLANG_NO_THROW SlangProfileID SLANG_MCALL findProfile(
-            char const*     name) = 0;
-
-            /** Set the path that downstream compilers (aka back end compilers) will
-            be looked from.
-            @param passThrough Identifies the downstream compiler
-            @param path The path to find the downstream compiler (shared library/dll/executable)
-
-            For back ends that are dlls/shared libraries, it will mean the path will
-            be prefixed with the path when calls are made out to ISlangSharedLibraryLoader.
-            For executables - it will look for executables along the path */
-        virtual SLANG_NO_THROW void SLANG_MCALL setDownstreamCompilerPath(
-            SlangPassThrough passThrough,
-            char const* path) = 0;
-
-            /** DEPRECATED: Use setLanguagePrelude
-
-            Set the 'prelude' for generated code for a 'downstream compiler'.
-            @param passThrough The downstream compiler for generated code that will have the prelude applied to it. 
-            @param preludeText The text added pre-pended verbatim before the generated source
-
-            That for pass-through usage, prelude is not pre-pended, preludes are for code generation only. 
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL setDownstreamCompilerPrelude(
-            SlangPassThrough passThrough,
-            const char* preludeText) = 0;
-
-            /** DEPRECATED: Use getLanguagePrelude
-
-            Get the 'prelude' for generated code for a 'downstream compiler'.
-            @param passThrough The downstream compiler for generated code that will have the prelude applied to it. 
-            @param outPrelude  On exit holds a blob that holds the string of the prelude.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL getDownstreamCompilerPrelude(
-            SlangPassThrough passThrough,
-            ISlangBlob** outPrelude) = 0;
-
-            /** Get the build version 'tag' string. The string is the same as produced via `git describe --tags`
-            for the project. If Slang is built separately from the automated build scripts
-            the contents will by default be 'unknown'. Any string can be set by changing the
-            contents of 'slang-tag-version.h' file and recompiling the project.
-
-            This method will return exactly the same result as the free function spGetBuildTagString.
-
-            @return The build tag string
-            */
-        virtual SLANG_NO_THROW const char* SLANG_MCALL getBuildTagString() = 0;
-
-            /* For a given source language set the default compiler.
-            If a default cannot be chosen (for example the target cannot be achieved by the default),
-            the default will not be used. 
-
-            @param sourceLanguage the source language 
-            @param defaultCompiler the default compiler for that language
-            @return 
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL setDefaultDownstreamCompiler(
-            SlangSourceLanguage sourceLanguage,
-            SlangPassThrough defaultCompiler) = 0;
-
-            /* For a source type get the default compiler 
-
-            @param sourceLanguage the source language 
-            @return The downstream compiler for that source language */
-        virtual SlangPassThrough SLANG_MCALL getDefaultDownstreamCompiler(
-            SlangSourceLanguage sourceLanguage) = 0;
-
-            /* Set the 'prelude' placed before generated code for a specific language type.
-            
-            @param sourceLanguage The language the prelude should be inserted on.
-            @param preludeText The text added pre-pended verbatim before the generated source
-
-            Note! That for pass-through usage, prelude is not pre-pended, preludes are for code generation only. 
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL setLanguagePrelude(
-            SlangSourceLanguage sourceLanguage,
-            const char* preludeText) = 0;
-
-            /** Get the 'prelude' associated with a specific source language. 
-            @param sourceLanguage The language the prelude should be inserted on.
-            @param outPrelude  On exit holds a blob that holds the string of the prelude.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL getLanguagePrelude(
-            SlangSourceLanguage sourceLanguage,
-            ISlangBlob** outPrelude) = 0;
-
-            /** Create a compile request.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL createCompileRequest(
-            slang::ICompileRequest** outCompileRequest) = 0;
-
-            /** Add new builtin declarations to be used in subsequent compiles.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL addBuiltins(
-            char const*     sourcePath,
-            char const*     sourceString) = 0;
-
-            /** Set the session shared library loader. If this changes the loader, it may cause shared libraries to be unloaded
-            @param loader The loader to set. Setting nullptr sets the default loader. 
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL setSharedLibraryLoader(
-            ISlangSharedLibraryLoader* loader) = 0;
-
-            /** Gets the currently set shared library loader
-            @return Gets the currently set loader. If returns nullptr, it's the default loader
-            */
-        virtual SLANG_NO_THROW ISlangSharedLibraryLoader* SLANG_MCALL getSharedLibraryLoader() = 0;
-
-            /** Returns SLANG_OK if a the compilation target is supported for this session
-            
-            @param target The compilation target to test
-            @return SLANG_OK if the target is available
-            SLANG_E_NOT_IMPLEMENTED if not implemented in this build
-            SLANG_E_NOT_FOUND if other resources (such as shared libraries) required to make target work could not be found
-            SLANG_FAIL other kinds of failures */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL checkCompileTargetSupport(
-            SlangCompileTarget  target) = 0;
-
-            /** Returns SLANG_OK if a the pass through support is supported for this session
-            @param session Session
-            @param target The compilation target to test
-            @return SLANG_OK if the target is available
-            SLANG_E_NOT_IMPLEMENTED if not implemented in this build
-            SLANG_E_NOT_FOUND if other resources (such as shared libraries) required to make target work could not be found
-            SLANG_FAIL other kinds of failures */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL checkPassThroughSupport(
-            SlangPassThrough    passThrough) = 0;
-
-            /** Compile from (embedded source) the StdLib on the session.
-            Will return a failure if there is already a StdLib available
-            NOTE! API is experimental and not ready for production code
-            @param flags to control compilation
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL compileStdLib(CompileStdLibFlags flags) = 0;
-
-            /** Load the StdLib. Currently loads modules from the file system. 
-            @param stdLib Start address of the serialized stdlib
-            @param stdLibSizeInBytes The size in bytes of the serialized stdlib
-
-            NOTE! API is experimental and not ready for production code
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL loadStdLib(const void* stdLib, size_t stdLibSizeInBytes) = 0;
-
-            /** Save the StdLib modules to the file system
-            @param archiveType The type of archive used to hold the stdlib
-            @param outBlob The serialized blob containing the standard library
-
-            NOTE! API is experimental and not ready for production code  */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL saveStdLib(SlangArchiveType archiveType, ISlangBlob** outBlob) = 0;
-
-            /** Look up the internal ID of a capability by its `name`.
-
-            Capability IDs are *not* guaranteed to be stable across versions
-            of the Slang library, so clients are expected to look up
-            capabilities by name at runtime.
-            */
-        virtual SLANG_NO_THROW SlangCapabilityID SLANG_MCALL findCapability(
-            char const*     name) = 0;
-
-            /** Set the downstream/pass through compiler to be used for a transition from the source type to the target type
-            @param source The source 'code gen target'
-            @param target The target 'code gen target'
-            @param compiler The compiler/pass through to use for the transition from source to target
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL setDownstreamCompilerForTransition(SlangCompileTarget source, SlangCompileTarget target, SlangPassThrough compiler) = 0;
-
-            /** Get the downstream/pass through compiler for a transition specified by source and target
-            @param source The source 'code gen target'
-            @param target The target 'code gen target'
-            @return The compiler that is used for the transition. Returns SLANG_PASS_THROUGH_NONE it is not defined
-            */
-        virtual SLANG_NO_THROW SlangPassThrough SLANG_MCALL getDownstreamCompilerForTransition(SlangCompileTarget source, SlangCompileTarget target) = 0;
-
-            /** Get the time in seconds spent in the slang and downstream compiler.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL getCompilerElapsedTime(double* outTotalTime, double* outDownstreamTime) = 0;
-
-            /** Specify a spirv.core.grammar.json file to load and use when
-             * parsing and checking any SPIR-V code
-             */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL setSPIRVCoreGrammar(
-            char const* jsonPath) = 0;
-
-            /** Parse slangc command line options into a SessionDesc that can be used to create a session
-            *   with all the compiler options specified in the command line.
-            *   @param argc The number of command line arguments.
-            *   @param argv An input array of command line arguments to parse.
-            *   @param outSessionDesc A pointer to a SessionDesc struct to receive parsed session desc.
-            *   @param outAuxAllocation Auxillary memory allocated to hold data used in the sesion desc.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL parseCommandLineArguments(
-            int argc, const char* const* argv, SessionDesc* outSessionDesc, ISlangUnknown** outAuxAllocation) = 0;
-
-            /** Computes a digest that uniquely identifies the session description.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getSessionDescDigest(SessionDesc* sessionDesc, ISlangBlob** outBlob) = 0;
-    };
-
-    #define SLANG_UUID_IGlobalSession IGlobalSession::getTypeGuid()
-
-    /*!
-    @brief A request for one or more compilation actions to be performed.
-    */
-    struct ICompileRequest : public ISlangUnknown
-    {
-        SLANG_COM_INTERFACE( 0x96d33993, 0x317c, 0x4db5, { 0xaf, 0xd8, 0x66, 0x6e, 0xe7, 0x72, 0x48, 0xe2 } )
-   
-            /** Set the filesystem hook to use for a compile request
-
-            The provided `fileSystem` will be used to load any files that
-            need to be loaded during processing of the compile `request`.
-            This includes:
-
-              - Source files loaded via `spAddTranslationUnitSourceFile`
-              - Files referenced via `#include`
-              - Files loaded to resolve `#import` operations
-                */
-        virtual SLANG_NO_THROW void SLANG_MCALL setFileSystem(
-            ISlangFileSystem*       fileSystem) = 0;
-
-            /*!
-            @brief Set flags to be used for compilation.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL setCompileFlags(
-            SlangCompileFlags       flags) = 0;
-
-            /*!
-            @brief Returns the compilation flags previously set with `setCompileFlags`
-            */
-        virtual SLANG_NO_THROW SlangCompileFlags SLANG_MCALL getCompileFlags() = 0;
-
-            /*!
-            @brief Set whether to dump intermediate results (for debugging) or not.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL setDumpIntermediates(
-            int                     enable) = 0;
-
-        virtual SLANG_NO_THROW void SLANG_MCALL setDumpIntermediatePrefix(
-            const char* prefix) = 0;
-
-            /*!
-            @brief Set whether (and how) `#line` directives should be output.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL setLineDirectiveMode(
-            SlangLineDirectiveMode  mode) = 0;
-
-            /*!
-            @brief Sets the target for code generation.
-            @param target The code generation target. Possible values are:
-            - SLANG_GLSL. Generates GLSL code.
-            - SLANG_HLSL. Generates HLSL code.
-            - SLANG_SPIRV. Generates SPIR-V code.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL setCodeGenTarget(
-            SlangCompileTarget target) = 0;
-
-            /*!
-            @brief Add a code-generation target to be used.
-            */
-        virtual SLANG_NO_THROW int SLANG_MCALL addCodeGenTarget(
-            SlangCompileTarget      target) = 0;
-
-        virtual SLANG_NO_THROW void SLANG_MCALL setTargetProfile(
-            int                     targetIndex,
-            SlangProfileID          profile) = 0;
-
-        virtual SLANG_NO_THROW void SLANG_MCALL setTargetFlags(
-            int                     targetIndex,
-            SlangTargetFlags        flags) = 0;
-
-
-            /*!
-            @brief Set the floating point mode (e.g., precise or fast) to use a target.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL setTargetFloatingPointMode(
-            int                     targetIndex,
-            SlangFloatingPointMode  mode) = 0;
-
-            /* DEPRECATED: use `spSetMatrixLayoutMode` instead. */
-        virtual SLANG_NO_THROW void SLANG_MCALL setTargetMatrixLayoutMode(
-            int                     targetIndex,
-            SlangMatrixLayoutMode   mode) = 0;
-
-        virtual SLANG_NO_THROW void SLANG_MCALL setMatrixLayoutMode(
-            SlangMatrixLayoutMode   mode) = 0;
-
-            /*!
-            @brief Set the level of debug information to produce.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL setDebugInfoLevel(
-            SlangDebugInfoLevel     level) = 0;
-
-            /*!
-            @brief Set the level of optimization to perform.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL setOptimizationLevel(
-            SlangOptimizationLevel  level) = 0;
-
-
-    
-            /*!
-            @brief Set the container format to be used for binary output.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL setOutputContainerFormat(
-            SlangContainerFormat    format) = 0;
-
-        virtual SLANG_NO_THROW void SLANG_MCALL setPassThrough(
-            SlangPassThrough        passThrough) = 0;
-
-    
-        virtual SLANG_NO_THROW void SLANG_MCALL setDiagnosticCallback(
-            SlangDiagnosticCallback callback,
-            void const*             userData) = 0;
-
-        virtual SLANG_NO_THROW void SLANG_MCALL setWriter(
-            SlangWriterChannel      channel, 
-            ISlangWriter*           writer) = 0;
-
-        virtual SLANG_NO_THROW ISlangWriter* SLANG_MCALL getWriter(
-            SlangWriterChannel      channel) = 0;
-
-            /*!
-            @brief Add a path to use when searching for referenced files.
-            This will be used for both `#include` directives and also for explicit `__import` declarations.
-            @param ctx The compilation context.
-            @param searchDir The additional search directory.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL addSearchPath(
-            const char*             searchDir) = 0;
-
-            /*!
-            @brief Add a macro definition to be used during preprocessing.
-            @param key The name of the macro to define.
-            @param value The value of the macro to define.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL addPreprocessorDefine(
-            const char*             key,
-            const char*             value) = 0;
-
-            /*!
-            @brief Set options using arguments as if specified via command line.
-            @return Returns SlangResult. On success SLANG_SUCCEEDED(result) is true.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL processCommandLineArguments(
-            char const* const*      args,
-            int                     argCount) = 0;
-
-            /** Add a distinct translation unit to the compilation request
-
-            `name` is optional. 
-            Returns the zero-based index of the translation unit created.
-            */
-        virtual SLANG_NO_THROW int SLANG_MCALL addTranslationUnit(
-            SlangSourceLanguage     language,
-            char const*             name) = 0;
-
-    
-            /** Set a default module name. Translation units will default to this module name if one is not
-            passed. If not set each translation unit will get a unique name. 
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL setDefaultModuleName(
-            const char* defaultModuleName) = 0;
-
-            /** Add a preprocessor definition that is scoped to a single translation unit.
-
-            @param translationUnitIndex The index of the translation unit to get the definition.
-            @param key The name of the macro to define.
-            @param value The value of the macro to define.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL addTranslationUnitPreprocessorDefine(
-            int                     translationUnitIndex,
-            const char*             key,
-            const char*             value) = 0;
-
-
-            /** Add a source file to the given translation unit.
-
-            If a user-defined file system has been specified via
-            `spSetFileSystem`, then it will be used to load the
-            file at `path`. Otherwise, Slang will use the OS
-            file system.
-
-            This function does *not* search for a file using
-            the registered search paths (`spAddSearchPath`),
-            and instead using the given `path` as-is.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL addTranslationUnitSourceFile(
-            int                     translationUnitIndex,
-            char const*             path) = 0;
-
-            /** Add a source string to the given translation unit.
-
-            @param translationUnitIndex The index of the translation unit to add source to.
-            @param path The file-system path that should be assumed for the source code.
-            @param source A null-terminated UTF-8 encoded string of source code.
-
-            The implementation will make a copy of the source code data.
-            An application may free the buffer immediately after this call returns.
-
-            The `path` will be used in any diagnostic output, as well
-            as to determine the base path when resolving relative
-            `#include`s.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL addTranslationUnitSourceString(
-            int                     translationUnitIndex,
-            char const*             path,
-            char const*             source) = 0;
-
-
-            /** Add a slang library - such that its contents can be referenced during linking.
-            This is equivalent to the -r command line option.
-
-            @param basePath The base path used to lookup referenced modules.
-            @param libData The library data
-            @param libDataSize The size of the library data
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL addLibraryReference(
-            const char* basePath,
-            const void* libData,
-            size_t libDataSize) = 0;
-
-            /** Add a source string to the given translation unit.
-
-            @param translationUnitIndex The index of the translation unit to add source to.
-            @param path The file-system path that should be assumed for the source code.
-            @param sourceBegin A pointer to a buffer of UTF-8 encoded source code.
-            @param sourceEnd A pointer to to the end of the buffer specified in `sourceBegin`
-
-            The implementation will make a copy of the source code data.
-            An application may free the buffer immediately after this call returns.
-
-            The `path` will be used in any diagnostic output, as well
-            as to determine the base path when resolving relative
-            `#include`s.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL addTranslationUnitSourceStringSpan(
-            int                     translationUnitIndex,
-            char const*             path,
-            char const*             sourceBegin,
-            char const*             sourceEnd) = 0;
-
-            /** Add a blob of source code to the given translation unit.
-
-            @param translationUnitIndex The index of the translation unit to add source to.
-            @param path The file-system path that should be assumed for the source code.
-            @param sourceBlob A blob containing UTF-8 encoded source code.
-            @param sourceEnd A pointer to to the end of the buffer specified in `sourceBegin`
-
-            The compile request will retain a reference to the blob.
-
-            The `path` will be used in any diagnostic output, as well
-            as to determine the base path when resolving relative
-            `#include`s.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL addTranslationUnitSourceBlob(
-            int                     translationUnitIndex,
-            char const*             path,
-            ISlangBlob*             sourceBlob) = 0;
-
-            /** Add an entry point in a particular translation unit
-            */
-        virtual SLANG_NO_THROW int SLANG_MCALL addEntryPoint(
-            int                     translationUnitIndex,
-            char const*             name,
-            SlangStage              stage) = 0;
-
-            /** Add an entry point in a particular translation unit,
-                with additional arguments that specify the concrete
-                type names for entry-point generic type parameters.
-            */
-        virtual SLANG_NO_THROW int SLANG_MCALL addEntryPointEx(
-            int                     translationUnitIndex,
-            char const*             name,
-            SlangStage              stage,
-            int                     genericArgCount,
-            char const**            genericArgs) = 0;
-
-            /** Specify the arguments to use for global generic parameters.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL setGlobalGenericArgs(
-            int                     genericArgCount,
-            char const**            genericArgs) = 0;
-
-            /** Specify the concrete type to be used for a global "existential slot."
-
-            Every shader parameter (or leaf field of a `struct`-type shader parameter)
-            that has an interface or array-of-interface type introduces an existential
-            slot. The number of slots consumed by a shader parameter, and the starting
-            slot of each parameter can be queried via the reflection API using
-            `SLANG_PARAMETER_CATEGORY_EXISTENTIAL_TYPE_PARAM`.
-
-            In order to generate specialized code, a concrete type needs to be specified
-            for each existential slot. This function specifies the name of the type
-            (or in general a type *expression*) to use for a specific slot at the
-            global scope.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL setTypeNameForGlobalExistentialTypeParam(
-            int                     slotIndex,
-            char const*             typeName) = 0;
-
-            /** Specify the concrete type to be used for an entry-point "existential slot."
-
-            Every shader parameter (or leaf field of a `struct`-type shader parameter)
-            that has an interface or array-of-interface type introduces an existential
-            slot. The number of slots consumed by a shader parameter, and the starting
-            slot of each parameter can be queried via the reflection API using
-            `SLANG_PARAMETER_CATEGORY_EXISTENTIAL_TYPE_PARAM`.
-
-            In order to generate specialized code, a concrete type needs to be specified
-            for each existential slot. This function specifies the name of the type
-            (or in general a type *expression*) to use for a specific slot at the
-            entry-point scope.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL setTypeNameForEntryPointExistentialTypeParam(
-            int                     entryPointIndex,
-            int                     slotIndex,
-            char const*             typeName) = 0;
-
-            /** Enable or disable an experimental, best-effort GLSL frontend
-             */
-        virtual SLANG_NO_THROW void SLANG_MCALL setAllowGLSLInput(
-            bool                    value) = 0;
-
-            /** Execute the compilation request.
-
-            @returns  SlangResult, SLANG_OK on success. Use SLANG_SUCCEEDED() and SLANG_FAILED() to test SlangResult.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL compile() = 0;
-
-
-            /** Get any diagnostic messages reported by the compiler.
-
-            @returns A null-terminated UTF-8 encoded string of diagnostic messages.
-
-            The returned pointer is only guaranteed to be valid
-            until `request` is destroyed. Applications that wish to
-            hold on to the diagnostic output for longer should use
-            `getDiagnosticOutputBlob`.
-            */
-        virtual SLANG_NO_THROW char const* SLANG_MCALL getDiagnosticOutput() = 0;
-
-            /** Get diagnostic messages reported by the compiler.
-
-            @param outBlob A pointer to receive a blob holding a nul-terminated UTF-8 encoded string of diagnostic messages.
-            @returns A `SlangResult` indicating success or failure.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getDiagnosticOutputBlob(
-            ISlangBlob**            outBlob) = 0;
-
-
-            /** Get the number of files that this compilation depended on.
-
-            This includes both the explicit source files, as well as any
-            additional files that were transitively referenced (e.g., via
-            a `#include` directive).
-            */
-        virtual SLANG_NO_THROW int SLANG_MCALL getDependencyFileCount() = 0;
-
-            /** Get the path to a file this compilation depended on.
-            */
-        virtual SLANG_NO_THROW char const* SLANG_MCALL getDependencyFilePath(
-            int                     index) = 0;
-
-            /** Get the number of translation units associated with the compilation request
-            */
-        virtual SLANG_NO_THROW int SLANG_MCALL getTranslationUnitCount() = 0;
-
-            /** Get the output source code associated with a specific entry point.
-
-            The lifetime of the output pointer is the same as `request`.
-            */
-        virtual SLANG_NO_THROW char const* SLANG_MCALL getEntryPointSource(
-            int                     entryPointIndex) = 0;
-
-            /** Get the output bytecode associated with a specific entry point.
-
-            The lifetime of the output pointer is the same as `request`.
-            */
-        virtual SLANG_NO_THROW void const* SLANG_MCALL getEntryPointCode(
-            int                     entryPointIndex,
-            size_t*                 outSize) = 0;
-
-            /** Get the output code associated with a specific entry point.
-
-            @param entryPointIndex The index of the entry point to get code for.
-            @param targetIndex The index of the target to get code for (default: zero).
-            @param outBlob A pointer that will receive the blob of code
-            @returns A `SlangResult` to indicate success or failure.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPointCodeBlob(
-            int                     entryPointIndex,
-            int                     targetIndex,
-            ISlangBlob**            outBlob) = 0;
-
-            /** Get entry point 'callable' functions accessible through the ISlangSharedLibrary interface.
-
-            That the functions remain in scope as long as the ISlangSharedLibrary interface is in scope.
-
-            NOTE! Requires a compilation target of SLANG_HOST_CALLABLE.
-    
-            @param entryPointIndex  The index of the entry point to get code for.
-            @param targetIndex      The index of the target to get code for (default: zero).
-            @param outSharedLibrary A pointer to a ISharedLibrary interface which functions can be queried on.
-            @returns                A `SlangResult` to indicate success or failure.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPointHostCallable(
-            int                     entryPointIndex,
-            int                     targetIndex,
-            ISlangSharedLibrary**   outSharedLibrary) = 0;
-
-            /** Get the output code associated with a specific target.
-
-            @param targetIndex The index of the target to get code for (default: zero).
-            @param outBlob A pointer that will receive the blob of code
-            @returns A `SlangResult` to indicate success or failure.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getTargetCodeBlob(
-            int                     targetIndex,
-            ISlangBlob**            outBlob) = 0;
-
-            /** Get 'callable' functions for a target accessible through the ISlangSharedLibrary interface.
-
-            That the functions remain in scope as long as the ISlangSharedLibrary interface is in scope.
-
-            NOTE! Requires a compilation target of SLANG_HOST_CALLABLE.
-    
-            @param targetIndex      The index of the target to get code for (default: zero).
-            @param outSharedLibrary A pointer to a ISharedLibrary interface which functions can be queried on.
-            @returns                A `SlangResult` to indicate success or failure.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getTargetHostCallable(
-            int                     targetIndex,
-            ISlangSharedLibrary**   outSharedLibrary) = 0;
-
-            /** Get the output bytecode associated with an entire compile request.
-
-            The lifetime of the output pointer is the same as `request` and the last spCompile.
-
-            @param outSize          The size of the containers contents in bytes. Will be zero if there is no code available.
-            @returns                Pointer to start of the contained data, or nullptr if there is no code available.
-            */
-        virtual SLANG_NO_THROW void const* SLANG_MCALL getCompileRequestCode(
-            size_t*                 outSize) = 0;
-
-            /** Get the compilation result as a file system.
-            The result is not written to the actual OS file system, but is made avaiable as an 
-            in memory representation.
-            */
-        virtual SLANG_NO_THROW ISlangMutableFileSystem* SLANG_MCALL getCompileRequestResultAsFileSystem() = 0;
-
-            /** Return the container code as a blob. The container blob is created as part of a compilation (with spCompile),
-            and a container is produced with a suitable ContainerFormat. 
-
-            @param outSize          The blob containing the container data. 
-            @returns                A `SlangResult` to indicate success or failure.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getContainerCode(
-            ISlangBlob**            outBlob) = 0;
-
-            /** Load repro from memory specified.
-
-            Should only be performed on a newly created request.
-
-            NOTE! When using the fileSystem, files will be loaded via their `unique names` as if they are part of the flat file system. This
-            mechanism is described more fully in docs/repro.md.
-
-            @param fileSystem       An (optional) filesystem. Pass nullptr to just use contents of repro held in data.
-            @param data             The data to load from.
-            @param size             The size of the data to load from. 
-            @returns                A `SlangResult` to indicate success or failure.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL loadRepro(
-            ISlangFileSystem* fileSystem,
-            const void* data,
-            size_t size) = 0;
-
-            /** Save repro state. Should *typically* be performed after spCompile, so that everything
-            that is needed for a compilation is available. 
-
-            @param outBlob          Blob that will hold the serialized state
-            @returns                A `SlangResult` to indicate success or failure.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL saveRepro(
-            ISlangBlob** outBlob) = 0;
-
-            /** Enable repro capture.
-
-            Should be set after any ISlangFileSystem has been set, but before any compilation. It ensures that everything
-            that the ISlangFileSystem accesses will be correctly recorded.
-            Note that if a ISlangFileSystem/ISlangFileSystemExt isn't explicitly set (ie the default is used), then the
-            request will automatically be set up to record everything appropriate. 
-
-            @returns                A `SlangResult` to indicate success or failure.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL enableReproCapture() = 0;
-
-            /** Get the (linked) program for a compile request.
-
-            The linked program will include all of the global-scope modules for the
-            translation units in the program, plus any modules that they `import`
-            (transitively), specialized to any global specialization arguments that
-            were provided via the API.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getProgram(
-            slang::IComponentType** outProgram) = 0;
-
-            /** Get the (partially linked) component type for an entry point.
-
-            The returned component type will include the entry point at the
-            given index, and will be specialized using any specialization arguments
-            that were provided for it via the API.
-
-            The returned component will *not* include the modules representing
-            the global scope and its dependencies/specialization, so a client
-            program will typically want to compose this component type with
-            the one returned by `spCompileRequest_getProgram` to get a complete
-            and usable component type from which kernel code can be requested.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPoint(
-            SlangInt                entryPointIndex,
-            slang::IComponentType** outEntryPoint) = 0;
-
-            /** Get the (un-linked) module for a translation unit.
-
-            The returned module will not be linked against any dependencies,
-            nor against any entry points (even entry points declared inside
-            the module). Similarly, the module will not be specialized
-            to the arguments that might have been provided via the API.
-
-            This function provides an atomic unit of loaded code that
-            is suitable for looking up types and entry points in the
-            given module, and for linking together to produce a composite
-            program that matches the needs of an application.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getModule(
-            SlangInt                translationUnitIndex,
-            slang::IModule**        outModule) = 0;
-
-            /** Get the `ISession` handle behind the `SlangCompileRequest`.
-            TODO(JS): Arguably this should just return the session pointer.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getSession(
-            slang::ISession** outSession) = 0;
-
-            /** get reflection data from a compilation request */
-        virtual SLANG_NO_THROW SlangReflection* SLANG_MCALL getReflection() = 0;
-
-            /** Make output specially handled for command line output */
-        virtual SLANG_NO_THROW void SLANG_MCALL setCommandLineCompilerMode() = 0;
-
-            /** Add a defined capability that should be assumed available on the target */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL addTargetCapability(
-            SlangInt            targetIndex,
-            SlangCapabilityID   capability) = 0;
-
-            /** Get the (linked) program for a compile request, including all entry points.
-
-            The resulting program will include all of the global-scope modules for the
-            translation units in the program, plus any modules that they `import`
-            (transitively), specialized to any global specialization arguments that
-            were provided via the API, as well as all entry points specified for compilation,
-            specialized to their entry-point specialization arguments.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getProgramWithEntryPoints(
-            slang::IComponentType** outProgram) = 0;
-
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL isParameterLocationUsed(
-            SlangInt entryPointIndex,
-            SlangInt targetIndex,
-            SlangParameterCategory category,
-            SlangUInt spaceIndex,
-            SlangUInt registerIndex,
-            bool& outUsed) = 0;
-
-            /** Set the line directive mode for a target.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL setTargetLineDirectiveMode(
-            SlangInt targetIndex,
-            SlangLineDirectiveMode mode) = 0;
-
-            /** Set whether to use scalar buffer layouts for GLSL/Vulkan targets.
-                If true, the generated GLSL/Vulkan code will use `scalar` layout for storage buffers.
-                If false, the resulting code will std430 for storage buffers.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL setTargetForceGLSLScalarBufferLayout(int targetIndex, bool forceScalarLayout) = 0;
-
-            /** Overrides the severity of a specific diagnostic message.
-
-            @param messageID            Numeric identifier of the message to override,
-                                        as defined in the 1st parameter of the DIAGNOSTIC macro.
-            @param overrideSeverity     New severity of the message. If the message is originally Error or Fatal,
-                                        the new severity cannot be lower than that.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL overrideDiagnosticSeverity(
-            SlangInt messageID,
-            SlangSeverity overrideSeverity) = 0;
-
-            /** Returns the currently active flags of the request's diagnostic sink. */
-        virtual SLANG_NO_THROW SlangDiagnosticFlags SLANG_MCALL getDiagnosticFlags() = 0;
-
-            /** Sets the flags of the request's diagnostic sink.
-                The previously specified flags are discarded. */
-        virtual SLANG_NO_THROW void SLANG_MCALL setDiagnosticFlags(SlangDiagnosticFlags flags) = 0;
-
-            /** Set the debug format to be used for debugging information */
-        virtual SLANG_NO_THROW void SLANG_MCALL setDebugInfoFormat(SlangDebugInfoFormat debugFormat) = 0;
-
-        virtual SLANG_NO_THROW void SLANG_MCALL setEnableEffectAnnotations(bool value) = 0;
-
-        virtual SLANG_NO_THROW void SLANG_MCALL setReportDownstreamTime(bool value) = 0;
-
-        virtual SLANG_NO_THROW void SLANG_MCALL setReportPerfBenchmark(bool value) = 0;
-
-        virtual SLANG_NO_THROW void SLANG_MCALL setSkipSPIRVValidation(bool value) = 0;
-
-        virtual SLANG_NO_THROW void SLANG_MCALL setTargetUseMinimumSlangOptimization(int targetIndex, bool value) = 0;
-
-        virtual SLANG_NO_THROW void SLANG_MCALL setIgnoreCapabilityCheck(bool value) = 0;
-
-        // return a copy of internal profiling results, and if `shouldClear` is true, clear the internal profiling results before returning.
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getCompileTimeProfile(ISlangProfiler** compileTimeProfile, bool shouldClear) = 0;
-
-    };
-
-    #define SLANG_UUID_ICompileRequest ICompileRequest::getTypeGuid()
-
-        /** Description of a code generation target.
-        */
-    struct TargetDesc
-    {
-            /** The size of this structure, in bytes.
-            */
-        size_t structureSize = sizeof(TargetDesc);
-
-            /** The target format to generate code for (e.g., SPIR-V, DXIL, etc.)
-            */
-        SlangCompileTarget      format = SLANG_TARGET_UNKNOWN;
-
-            /** The compilation profile supported by the target (e.g., "Shader Model 5.1")
-            */
-        SlangProfileID          profile = SLANG_PROFILE_UNKNOWN;
-
-            /** Flags for the code generation target. Currently unused. */
-        SlangTargetFlags        flags = kDefaultTargetFlags;
-
-            /** Default mode to use for floating-point operations on the target.
-            */
-        SlangFloatingPointMode  floatingPointMode = SLANG_FLOATING_POINT_MODE_DEFAULT;
-
-            /** The line directive mode for output source code.
-            */
-        SlangLineDirectiveMode lineDirectiveMode = SLANG_LINE_DIRECTIVE_MODE_DEFAULT;
-
-            /** Whether to force `scalar` layout for glsl shader storage buffers.
-            */
-        bool forceGLSLScalarBufferLayout = false;
-
-            /** Pointer to an array of compiler option entries, whose size is compilerOptionEntryCount.
-            */
-        CompilerOptionEntry* compilerOptionEntries = nullptr;
-
-            /** Number of additional compiler option entries.
-            */
-        uint32_t compilerOptionEntryCount = 0;
-
-    };
-
-    typedef uint32_t SessionFlags;
-    enum
-    {
-        kSessionFlags_None = 0
-    };
-
-    struct PreprocessorMacroDesc
-    {
-        const char* name;
-        const char* value;
-    };
-
-    struct SessionDesc
-    {
-            /** The size of this structure, in bytes.
-             */
-        size_t structureSize = sizeof(SessionDesc);
-
-            /** Code generation targets to include in the session.
-            */
-        TargetDesc const*   targets = nullptr;
-        SlangInt            targetCount = 0;
-
-            /** Flags to configure the session.
-            */
-        SessionFlags flags = kSessionFlags_None;
-
-            /** Default layout to assume for variables with matrix types.
-            */
-        SlangMatrixLayoutMode defaultMatrixLayoutMode = SLANG_MATRIX_LAYOUT_ROW_MAJOR;
-
-            /** Paths to use when searching for `#include`d or `import`ed files.
-            */
-        char const* const*  searchPaths = nullptr;
-        SlangInt            searchPathCount = 0;
-
-        PreprocessorMacroDesc const*    preprocessorMacros = nullptr;
-        SlangInt                        preprocessorMacroCount = 0;
-
-        ISlangFileSystem* fileSystem = nullptr;
-
-        bool enableEffectAnnotations = false;
-        bool allowGLSLSyntax = false;
-
-        /** Pointer to an array of compiler option entries, whose size is compilerOptionEntryCount.
-        */
-        CompilerOptionEntry* compilerOptionEntries = nullptr;
-
-        /** Number of additional compiler option entries.
-        */
-        uint32_t compilerOptionEntryCount = 0;
-
-    };
-
-    enum class ContainerType
-    {
-        None, UnsizedArray, StructuredBuffer, ConstantBuffer, ParameterBlock
-    };
-
-        /** A session provides a scope for code that is loaded.
-
-        A session can be used to load modules of Slang source code,
-        and to request target-specific compiled binaries and layout
-        information.
-
-        In order to be able to load code, the session owns a set
-        of active "search paths" for resolving `#include` directives
-        and `import` declrations, as well as a set of global
-        preprocessor definitions that will be used for all code
-        that gets `import`ed in the session.
-
-        If multiple user shaders are loaded in the same session,
-        and import the same module (e.g., two source files do `import X`)
-        then there will only be one copy of `X` loaded within the session.
-
-        In order to be able to generate target code, the session
-        owns a list of available compilation targets, which specify
-        code generation options.
-
-        Code loaded and compiled within a session is owned by the session
-        and will remain resident in memory until the session is released.
-        Applications wishing to control the memory usage for compiled
-        and loaded code should use multiple sessions.
-        */
-    struct ISession : public ISlangUnknown
-    {
-        SLANG_COM_INTERFACE( 0x67618701, 0xd116, 0x468f, { 0xab, 0x3b, 0x47, 0x4b, 0xed, 0xce, 0xe, 0x3d } )
-
-            /** Get the global session thas was used to create this session.
-            */
-        virtual SLANG_NO_THROW IGlobalSession* SLANG_MCALL getGlobalSession() = 0;
-
-            /** Load a module as it would be by code using `import`.
-            */
-        virtual SLANG_NO_THROW IModule* SLANG_MCALL loadModule(
-            const char* moduleName,
-            IBlob**     outDiagnostics = nullptr) = 0;
-
-            /** Load a module from Slang source code.
-            */
-        virtual SLANG_NO_THROW IModule* SLANG_MCALL loadModuleFromSource(
-            const char* moduleName,
-            const char* path,
-            slang::IBlob* source,
-            slang::IBlob** outDiagnostics = nullptr) = 0;
-
-            /** Combine multiple component types to create a composite component type.
-
-            The `componentTypes` array must contain `componentTypeCount` pointers
-            to component types that were loaded or created using the same session.
-
-            The shader parameters and specialization parameters of the composite will
-            be the union of those in `componentTypes`. The relative order of child
-            component types is significant, and will affect the order in which
-            parameters are reflected and laid out.
-
-            The entry-point functions of the composite will be the union of those in
-            `componentTypes`, and will follow the ordering of `componentTypes`.
-
-            The requirements of the composite component type will be a subset of
-            those in `componentTypes`. If an entry in `componentTypes` has a requirement
-            that can be satisfied by another entry, then the composition will
-            satisfy the requirement and it will not appear as a requirement of
-            the composite. If multiple entries in `componentTypes` have a requirement
-            for the same type, then only the first such requirement will be retained
-            on the composite. The relative ordering of requirements on the composite
-            will otherwise match that of `componentTypes`.
-
-            If any diagnostics are generated during creation of the composite, they
-            will be written to `outDiagnostics`. If an error is encountered, the
-            function will return null.
-
-            It is an error to create a composite component type that recursively
-            aggregates the a single module more than once.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL createCompositeComponentType(
-            IComponentType* const*  componentTypes,
-            SlangInt                componentTypeCount,
-            IComponentType**        outCompositeComponentType,
-            ISlangBlob**            outDiagnostics = nullptr) = 0;
-
-            /** Specialize a type based on type arguments.
-            */
-        virtual SLANG_NO_THROW TypeReflection* SLANG_MCALL specializeType(
-            TypeReflection*             type,
-            SpecializationArg const*    specializationArgs,
-            SlangInt                    specializationArgCount,
-            ISlangBlob**                outDiagnostics = nullptr) = 0;
-
-
-            /** Get the layout `type` on the chosen `target`.
-            */
-        virtual SLANG_NO_THROW TypeLayoutReflection* SLANG_MCALL getTypeLayout(
-            TypeReflection* type,
-            SlangInt        targetIndex = 0,
-            LayoutRules     rules = LayoutRules::Default,
-            ISlangBlob**    outDiagnostics = nullptr) = 0;
-
-            /** Get a container type from `elementType`. For example, given type `T`, returns
-                a type that represents `StructuredBuffer<T>`.
-
-                @param `elementType`: the element type to wrap around.
-                @param `containerType`: the type of the container to wrap `elementType` in.
-                @param `outDiagnostics`: a blob to receive diagnostic messages.
-            */
-        virtual SLANG_NO_THROW TypeReflection* SLANG_MCALL getContainerType(
-            TypeReflection* elementType,
-            ContainerType containerType,
-            ISlangBlob** outDiagnostics = nullptr) = 0;
-
-            /** Return a `TypeReflection` that represents the `__Dynamic` type.
-                This type can be used as a specialization argument to indicate using
-                dynamic dispatch.
-            */
-        virtual SLANG_NO_THROW TypeReflection* SLANG_MCALL getDynamicType() = 0;
-
-            /** Get the mangled name for a type RTTI object.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getTypeRTTIMangledName(
-            TypeReflection* type,
-            ISlangBlob** outNameBlob) = 0;
-
-            /** Get the mangled name for a type witness.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getTypeConformanceWitnessMangledName(
-            TypeReflection* type,
-            TypeReflection* interfaceType,
-            ISlangBlob** outNameBlob) = 0;
-
-            /** Get the sequential ID used to identify a type witness in a dynamic object.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getTypeConformanceWitnessSequentialID(
-            slang::TypeReflection* type,
-            slang::TypeReflection* interfaceType,
-            uint32_t*              outId) = 0;
-
-            /** Create a request to load/compile front-end code.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL createCompileRequest(
-            SlangCompileRequest**   outCompileRequest) = 0;
-
-        
-            /** Creates a `IComponentType` that represents a type's conformance to an interface.
-                The retrieved `ITypeConformance` objects can be included in a composite `IComponentType`
-                to explicitly specify which implementation types should be included in the final compiled
-                code. For example, if an module defines `IMaterial` interface and `AMaterial`,
-                `BMaterial`, `CMaterial` types that implements the interface, the user can exclude
-                `CMaterial` implementation from the resulting shader code by explcitly adding
-                `AMaterial:IMaterial` and `BMaterial:IMaterial` conformances to a composite
-                `IComponentType` and get entry point code from it. The resulting code will not have
-                anything related to `CMaterial` in the dynamic dispatch logic. If the user does not
-                explicitly include any `TypeConformances` to an interface type, all implementations to
-                that interface will be included by default. By linking a `ITypeConformance`, the user is
-                also given the opportunity to specify the dispatch ID of the implementation type. If
-                `conformanceIdOverride` is -1, there will be no override behavior and Slang will
-                automatically assign IDs to implementation types. The automatically assigned IDs can be
-                queried via `ISession::getTypeConformanceWitnessSequentialID`.
-
-                Returns SLANG_OK if succeeds, or SLANG_FAIL if `type` does not conform to `interfaceType`.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL createTypeConformanceComponentType(
-            slang::TypeReflection* type,
-            slang::TypeReflection* interfaceType,
-            ITypeConformance** outConformance,
-            SlangInt conformanceIdOverride,
-            ISlangBlob** outDiagnostics) = 0;
-
-            /** Load a module from a Slang module blob.
-            */
-        virtual SLANG_NO_THROW IModule* SLANG_MCALL loadModuleFromIRBlob(
-            const char* moduleName,
-            const char* path,
-            slang::IBlob* source,
-            slang::IBlob** outDiagnostics = nullptr) = 0;
-
-        virtual SLANG_NO_THROW SlangInt SLANG_MCALL getLoadedModuleCount() = 0;
-        virtual SLANG_NO_THROW IModule* SLANG_MCALL getLoadedModule(SlangInt index) = 0;
-
-            /** Checks if a precompiled binary module is up-to-date with the current compiler
-            *   option settings and the source file contents.
-            */
-        virtual SLANG_NO_THROW bool SLANG_MCALL isBinaryModuleUpToDate(
-            const char* modulePath, slang::IBlob* binaryModuleBlob) = 0;
-
-            /** Load a module from a string.
-            */
-        virtual SLANG_NO_THROW IModule* SLANG_MCALL loadModuleFromSourceString(
-            const char* moduleName,
-            const char* path,
-            const char* string,
-            slang::IBlob** outDiagnostics = nullptr) = 0;
-    };
-
-    #define SLANG_UUID_ISession ISession::getTypeGuid()
-
-        /** A component type is a unit of shader code layout, reflection, and linking.
-
-        A component type is a unit of shader code that can be included into
-        a linked and compiled shader program. Each component type may have:
-
-        * Zero or more uniform shader parameters, representing textures,
-          buffers, etc. that the code in the component depends on.
-
-        * Zero or more *specialization* parameters, which are type or
-          value parameters that can be used to synthesize specialized
-          versions of the component type.
-
-        * Zero or more entry points, which are the individually invocable
-          kernels that can have final code generated.
-
-        * Zero or more *requirements*, which are other component
-          types on which the component type depends.
-
-        One example of a component type is a module of Slang code:
-
-        * The global-scope shader parameters declared in the module are
-          the parameters when considered as a component type.
-
-        * Any global-scope generic or interface type parameters introduce
-          specialization parameters for the module.
-
-        * A module does not by default include any entry points when
-          considered as a component type (although the code of the
-          module might *declare* some entry points).
-
-        * Any other modules that are `import`ed in the source code
-          become requirements of the module, when considered as a
-          component type.
-
-        An entry point is another example of a component type:
-
-        * The `uniform` parameters of the entry point function are
-          its shader parameters when considered as a component type.
-
-        * Any generic or interface-type parameters of the entry point
-          introduce specialization parameters.
-
-        * An entry point component type exposes a single entry point (itself).
-
-        * An entry point has one requirement for the module in which
-          it was defined.
-
-        Component types can be manipulated in a few ways:
-
-        * Multiple component types can be combined into a composite, which
-          combines all of their code, parameters, etc.
-
-        * A component type can be specialized, by "plugging in" types and
-          values for its specialization parameters.
-
-        * A component type can be laid out for a particular target, giving
-          offsets/bindings to the shader parameters it contains.
-
-        * Generated kernel code can be requested for entry points.
-
-        */
-    struct IComponentType : public ISlangUnknown
-    {
-        SLANG_COM_INTERFACE(0x5bc42be8, 0x5c50, 0x4929, { 0x9e, 0x5e, 0xd1, 0x5e, 0x7c, 0x24, 0x1, 0x5f })
-
-            /** Get the runtime session that this component type belongs to.
-            */
-        virtual SLANG_NO_THROW ISession* SLANG_MCALL getSession() = 0;
-
-            /** Get the layout for this program for the chosen `targetIndex`.
-
-            The resulting layout will establish offsets/bindings for all
-            of the global and entry-point shader parameters in the
-            component type.
-
-            If this component type has specialization parameters (that is,
-            it is not fully specialized), then the resulting layout may
-            be incomplete, and plugging in arguments for generic specialization
-            parameters may result in a component type that doesn't have
-            a compatible layout. If the component type only uses
-            interface-type specialization parameters, then the layout
-            for a specialization should be compatible with an unspecialized
-            layout (all parameters in the unspecialized layout will have
-            the same offset/binding in the specialized layout).
-
-            If this component type is combined into a composite, then
-            the absolute offsets/bindings of parameters may not stay the same.
-            If the shader parameters in a component type don't make
-            use of explicit binding annotations (e.g., `register(...)`),
-            then the *relative* offset of shader parameters will stay
-            the same when it is used in a composition.
-            */
-        virtual SLANG_NO_THROW ProgramLayout* SLANG_MCALL getLayout(
-            SlangInt    targetIndex = 0,
-            IBlob**     outDiagnostics = nullptr) = 0;
-
-            /** Get the number of (unspecialized) specialization parameters for the component type.
-            */
-        virtual SLANG_NO_THROW SlangInt SLANG_MCALL getSpecializationParamCount() = 0;
-
-            /** Get the compiled code for the entry point at `entryPointIndex` for the chosen `targetIndex`
-
-            Entry point code can only be computed for a component type that
-            has no specialization parameters (it must be fully specialized)
-            and that has no requirements (it must be fully linked).
-
-            If code has not already been generated for the given entry point and target,
-            then a compilation error may be detected, in which case `outDiagnostics`
-            (if non-null) will be filled in with a blob of messages diagnosing the error.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPointCode(
-            SlangInt    entryPointIndex,
-            SlangInt    targetIndex,
-            IBlob**     outCode,
-            IBlob**     outDiagnostics = nullptr) = 0;
-
-            /** Get the compilation result as a file system.
-
-            Has the same requirements as getEntryPointCode.
-
-            The result is not written to the actual OS file system, but is made avaiable as an
-            in memory representation.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getResultAsFileSystem(
-            SlangInt    entryPointIndex,
-            SlangInt    targetIndex, 
-            ISlangMutableFileSystem** outFileSystem) = 0;
-
-            /** Compute a hash for the entry point at `entryPointIndex` for the chosen `targetIndex`.
-
-            This computes a hash based on all the dependencies for this component type as well as the
-            target settings affecting the compiler backend. The computed hash is used as a key for caching
-            the output of the compiler backend to implement shader caching.
-            */
-        virtual SLANG_NO_THROW void SLANG_MCALL getEntryPointHash(
-            SlangInt    entryPointIndex,
-            SlangInt    targetIndex,
-            IBlob**     outHash) = 0;
-
-            /** Specialize the component by binding its specialization parameters to concrete arguments.
-
-            The `specializationArgs` array must have `specializationArgCount` entries, and
-            this must match the number of specialization parameters on this component type.
-
-            If any diagnostics (error or warnings) are produced, they will be written to `outDiagnostics`.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL specialize(
-            SpecializationArg const*    specializationArgs,
-            SlangInt                    specializationArgCount,
-            IComponentType**            outSpecializedComponentType,
-            ISlangBlob**                outDiagnostics = nullptr) = 0;
-
-            /** Link this component type against all of its unsatisifed dependencies.
-            
-            A component type may have unsatisfied dependencies. For example, a module
-            depends on any other modules it `import`s, and an entry point depends
-            on the module that defined it.
-
-            A user can manually satisfy dependencies by creating a composite
-            component type, and when doing so they retain full control over
-            the relative ordering of shader parameters in the resulting layout.
-
-            It is an error to try to generate/access compiled kernel code for
-            a component type with unresolved dependencies, so if dependencies
-            remain after whatever manual composition steps an application
-            cares to peform, the `link()` function can be used to automatically
-            compose in any remaining dependencies. The order of parameters
-            (and hence the global layout) that results will be deterministic,
-            but is not currently documented.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL link(
-            IComponentType**            outLinkedComponentType,
-            ISlangBlob**                outDiagnostics = nullptr) = 0;
-
-            /** Get entry point 'callable' functions accessible through the ISlangSharedLibrary interface.
-
-            The functions remain in scope as long as the ISlangSharedLibrary interface is in scope.
-
-            NOTE! Requires a compilation target of SLANG_HOST_CALLABLE.
-    
-            @param entryPointIndex  The index of the entry point to get code for.
-            @param targetIndex      The index of the target to get code for (default: zero).
-            @param outSharedLibrary A pointer to a ISharedLibrary interface which functions can be queried on.
-            @returns                A `SlangResult` to indicate success or failure.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPointHostCallable(
-            int                     entryPointIndex,
-            int                     targetIndex,
-            ISlangSharedLibrary**   outSharedLibrary,
-            slang::IBlob**          outDiagnostics = 0) = 0;
-
-            /** Get a new ComponentType object that represents a renamed entry point.
-
-            The current object must be a single EntryPoint, or a CompositeComponentType or
-            SpecializedComponentType that contains one EntryPoint component.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL renameEntryPoint(
-            const char* newName, IComponentType** outEntryPoint) = 0;
-        
-            /** Link and specify additional compiler options when generating code
-            *   from the linked program.
-            */
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL linkWithOptions(
-            IComponentType** outLinkedComponentType,
-            uint32_t compilerOptionEntryCount,
-            CompilerOptionEntry* compilerOptionEntries,
-            ISlangBlob** outDiagnostics = nullptr) = 0;
-
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL getTargetCode(
-            SlangInt targetIndex,
-            IBlob** outCode,
-            IBlob** outDiagnostics = nullptr) = 0;
-    };
-    #define SLANG_UUID_IComponentType IComponentType::getTypeGuid()
-
-    struct IEntryPoint : public IComponentType
-    {
-        SLANG_COM_INTERFACE(0x8f241361, 0xf5bd, 0x4ca0, { 0xa3, 0xac, 0x2, 0xf7, 0xfa, 0x24, 0x2, 0xb8 })
-    };
-
-    #define SLANG_UUID_IEntryPoint IEntryPoint::getTypeGuid()
-
-    struct ITypeConformance : public IComponentType
-    {
-        SLANG_COM_INTERFACE(0x73eb3147, 0xe544, 0x41b5, { 0xb8, 0xf0, 0xa2, 0x44, 0xdf, 0x21, 0x94, 0xb })
-    };
-    #define SLANG_UUID_ITypeConformance ITypeConformance::getTypeGuid()
-
-        /** A module is the granularity of shader code compilation and loading.
-
-        In most cases a module corresponds to a single compile "translation unit."
-        This will often be a single `.slang` or `.hlsl` file and everything it
-        `#include`s.
-
-        Notably, a module `M` does *not* include the things it `import`s, as these
-        as distinct modules that `M` depends on. There is a directed graph of
-        module dependencies, and all modules in the graph must belong to the
-        same session (`ISession`).
-
-        A module establishes a namespace for looking up types, functions, etc.
-        */
-    struct IModule : public IComponentType
-    {
-        SLANG_COM_INTERFACE(0xc720e64, 0x8722, 0x4d31, { 0x89, 0x90, 0x63, 0x8a, 0x98, 0xb1, 0xc2, 0x79 })
-
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL findEntryPointByName(
-            char const*     name,
-            IEntryPoint**   outEntryPoint) = 0;
-
-        /// Get number of entry points defined in the module. An entry point defined in a module
-        /// is by default not included in the linkage, so calls to `IComponentType::getEntryPointCount`
-        /// on an `IModule` instance will always return 0. However `IModule::getDefinedEntryPointCount`
-        /// will return the number of defined entry points.
-        virtual SLANG_NO_THROW SlangInt32 SLANG_MCALL getDefinedEntryPointCount() = 0;
-        /// Get the name of an entry point defined in the module.
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL
-            getDefinedEntryPoint(SlangInt32 index, IEntryPoint** outEntryPoint) = 0;
-
-        /// Get a serialized representation of the checked module.
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL serialize(ISlangBlob** outSerializedBlob) = 0;
-
-        /// Write the serialized representation of this module to a file.
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL writeToFile(char const* fileName) = 0;
-
-        /// Get the name of the module.
-        virtual SLANG_NO_THROW const char* SLANG_MCALL getName() = 0;
-
-        /// Get the path of the module.
-        virtual SLANG_NO_THROW const char* SLANG_MCALL getFilePath() = 0;
-
-        /// Get the unique identity of the module.
-        virtual SLANG_NO_THROW const char* SLANG_MCALL getUniqueIdentity() = 0;
-
-        /// Find and validate an entry point by name, even if the function is
-        /// not marked with the `[shader("...")]` attribute.
-        virtual SLANG_NO_THROW SlangResult SLANG_MCALL findAndCheckEntryPoint(
-            char const* name,
-            SlangStage stage,
-            IEntryPoint** outEntryPoint,
-            ISlangBlob** outDiagnostics) = 0;
-    };
-    
-    #define SLANG_UUID_IModule IModule::getTypeGuid()
-
-        /** Argument used for specialization to types/values.
-        */
-    struct SpecializationArg
-    {
-        enum class Kind : int32_t
-        {
-            Unknown,    /**< An invalid specialization argument. */
-            Type,       /**< Specialize to a type. */
-        };
-
-        /** The kind of specialization argument. */
-        Kind kind;
-        union
-        {
-            /** A type specialization argument, used for `Kind::Type`. */
-            TypeReflection* type;
-        };
-
-        static SpecializationArg fromType(TypeReflection* inType)
-        {
-            SpecializationArg rs;
-            rs.kind = Kind::Type;
-            rs.type = inType;
-            return rs;
-        }
-    };
-}
-
-// Passed into functions to create globalSession to identify the API version client code is
-// using. 
-#define SLANG_API_VERSION 0
-
-/* Create a global session, with built in StdLib.
-
-@param apiVersion Pass in SLANG_API_VERSION
-@param outGlobalSession (out)The created global session. 
-*/
-SLANG_EXTERN_C SLANG_API SlangResult slang_createGlobalSession(
-    SlangInt                apiVersion,
-    slang::IGlobalSession** outGlobalSession);
-
-/* Create a global session, but do not set up the stdlib. The stdlib can
-then be loaded via loadStdLib or compileStdLib
-
-@param apiVersion Pass in SLANG_API_VERSION
-@param outGlobalSession (out)The created global session that doesn't have a StdLib setup.
-
-NOTE! API is experimental and not ready for production code 
-*/
-SLANG_EXTERN_C SLANG_API SlangResult slang_createGlobalSessionWithoutStdLib(
-    SlangInt                apiVersion,
-    slang::IGlobalSession** outGlobalSession);
-
-/* Returns a blob that contains the serialized stdlib.
-Returns nullptr if there isn't an embedded stdlib.
-*/
-SLANG_API ISlangBlob* slang_getEmbeddedStdLib();
-
-namespace slang
-{
-    inline SlangResult createGlobalSession(
-        slang::IGlobalSession** outGlobalSession)
-    {
-        return slang_createGlobalSession(SLANG_API_VERSION, outGlobalSession);
-    }
-}
-
-/** @see slang::ICompileRequest::getProgram
-*/
-SLANG_EXTERN_C SLANG_API SlangResult spCompileRequest_getProgram(
-    SlangCompileRequest*    request,
-    slang::IComponentType** outProgram);
-
-/** @see slang::ICompileRequest::getProgramWithEntryPoints
-*/
-SLANG_EXTERN_C SLANG_API SlangResult spCompileRequest_getProgramWithEntryPoints(
-    SlangCompileRequest*    request,
-    slang::IComponentType** outProgram);
-
-/** @see slang::ICompileRequest::getEntryPoint
-*/
-SLANG_EXTERN_C SLANG_API SlangResult spCompileRequest_getEntryPoint(
-    SlangCompileRequest*    request,
-    SlangInt                entryPointIndex,
-    slang::IComponentType** outEntryPoint);
-
-/** @see slang::ICompileRequest::getModule
-*/
-SLANG_EXTERN_C SLANG_API SlangResult spCompileRequest_getModule(
-    SlangCompileRequest*    request,
-    SlangInt                translationUnitIndex,
-    slang::IModule**        outModule);
-
-/** @see slang::ICompileRequest::getSession
-*/
-SLANG_EXTERN_C SLANG_API SlangResult spCompileRequest_getSession(
-    SlangCompileRequest* request,
-    slang::ISession** outSession);
-#endif
-
-/* DEPRECATED DEFINITIONS
-
-Everything below this point represents deprecated APIs/definition that are only
-being kept around for source/binary compatibility with old client code. New
-code should not use any of these declarations, and the Slang API will drop these
-declarations over time.
-*/
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define SLANG_ERROR_INSUFFICIENT_BUFFER SLANG_E_BUFFER_TOO_SMALL
-#define SLANG_ERROR_INVALID_PARAMETER SLANG_E_INVALID_ARG
-
-SLANG_API char const* spGetTranslationUnitSource(
-    SlangCompileRequest*    request,
-    int                     translationUnitIndex);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/gigihelp.html b/gigihelp.html
index e35c86ea..1aec60d4 100644
--- a/gigihelp.html
+++ b/gigihelp.html
@@ -64,6 +64,8 @@ <h1>Enums</h1>
 <tr><td>Bool</td><td>bool</td></tr>
 <tr><td>Float4x4</td><td>float[4][4]</td></tr>
 <tr><td>Uint_16</td><td>a 16 bit uint</td></tr>
+<tr><td>Int_64</td><td>a 64 bit int</td></tr>
+<tr><td>Uint_64</td><td>a 64 bit uint</td></tr>
 <tr><td>Count</td><td></td></tr>
 </table>
 <br/>
@@ -80,6 +82,8 @@ <h1>Enums</h1>
 <tr><td>Float2</td><td>float[2]</td></tr>
 <tr><td>Float3</td><td>float[3]</td></tr>
 <tr><td>Float4</td><td>float[4]</td></tr>
+<tr><td>Int_64</td><td>int64_t</td></tr>
+<tr><td>Uint_64</td><td>uint64_t</td></tr>
 </table>
 <br/>
 
@@ -144,6 +148,25 @@ <h1>Enums</h1>
 </table>
 <br/>
 
+<b>GigiSlangOptimizationLevel : The level of optimizations</b><br/><br/>
+<table>
+<tr><th colspan=2>GigiSlangOptimizationLevel</th></tr>
+<tr><td>None</td><td>Don't optimize at all.</td></tr>
+<tr><td>Default</td><td>Default optimization level: balance code quality and compilation time.</td></tr>
+<tr><td>High</td><td>Optimize aggressively.</td></tr>
+<tr><td>Maximum</td><td>Include optimizations that may take a very long time, or may involve severe space-vs-speed tradeoffs.</td></tr>
+</table>
+<br/>
+
+<b>GigiSlangFloatingPointMode : Floating point mode</b><br/><br/>
+<table>
+<tr><th colspan=2>GigiSlangFloatingPointMode</th></tr>
+<tr><td>Default</td><td></td></tr>
+<tr><td>Fast</td><td></td></tr>
+<tr><td>Precise</td><td></td></tr>
+</table>
+<br/>
+
 <b>VariableVisibility : The visibility of the variable</b><br/><br/>
 <table>
 <tr><th colspan=2>VariableVisibility</th></tr>
@@ -730,10 +753,16 @@ <h1>Structs</h1>
 </table>
 <br/>
 
-<b>SlangOptions : A declaration of a shader</b><br/><br/>
+<b>SlangOptions : Slang options</b><br/><br/>
 <table>
 <tr><th colspan=3>SlangOptions</th></tr>
 <tr><td>bool process</td><td>false</td><td>if true, this shader will be processed by slang</td></tr>
+<tr><td>bool noNameMangling</td><td>false</td><td>Do as little mangling of names as possible, to try to preserve original names.</td></tr>
+<tr><td>bool lineDirectives</td><td>true</td><td>Whether to output line directives in the shader.</td></tr>
+<tr><td>bool warningsAsErrors</td><td>false</td><td>Warnings are errors.</td></tr>
+<tr><td>bool verbosePaths</td><td>false</td><td>Verbose Paths.</td></tr>
+<tr><td>GigiSlangFloatingPointMode floatingPointMode</td><td>GigiSlangFloatingPointMode::Default</td><td>Floating point mode</td></tr>
+<tr><td>GigiSlangOptimizationLevel optimizationLevel</td><td>GigiSlangOptimizationLevel::Default</td><td>Optimization level</td></tr>
 </table>
 <br/>
 
@@ -792,6 +821,7 @@ <h1>Structs</h1>
 <tr><td><i>int enumIndex</i></td><td>-1</td><td>Calculated for convenience.</td></tr>
 <tr><td><i>std::string originalName</i></td><td>""</td><td>The name before renames and sanitization</td></tr>
 <tr><td><i>std::string scope</i></td><td>""</td><td>The scope that the node lives in. A possibly nested list of subgraph node names, seperated by a dot.</td></tr>
+<tr><td><i>bool system</i></td><td>false</td><td>Is set if the runtime overrides the value</td></tr>
 <tr><td>VariableUIHint UIHint</td><td>VariableUIHint::Count</td><td>Any hints for UI</td></tr>
 </table>
 <br/>
diff --git a/gigischema.json b/gigischema.json
index 3b419240..b6bc6991 100644
--- a/gigischema.json
+++ b/gigischema.json
@@ -32,7 +32,7 @@
           "type": {
             "description": "The type of the variable",
             "type": "string",
-            "enum": ["Int", "Int2", "Int3", "Int4", "Uint", "Uint2", "Uint3", "Uint4", "Float", "Float2", "Float3", "Float4", "Bool", "Float4x4", "Uint_16", "Count"]
+            "enum": ["Int", "Int2", "Int3", "Int4", "Uint", "Uint2", "Uint3", "Uint4", "Float", "Float2", "Float3", "Float4", "Bool", "Float4x4", "Uint_16", "Int_64", "Uint_64", "Count"]
           },
           "Const": {
             "description": "If true, the variable is declared const and cannot change at runtime",
@@ -181,6 +181,32 @@
               "process": {
                 "description": "if true, this shader will be processed by slang",
                 "type": "boolean"
+              },
+              "noNameMangling": {
+                "description": "Do as little mangling of names as possible, to try to preserve original names.",
+                "type": "boolean"
+              },
+              "lineDirectives": {
+                "description": "Whether to output line directives in the shader.",
+                "type": "boolean"
+              },
+              "warningsAsErrors": {
+                "description": "Warnings are errors.",
+                "type": "boolean"
+              },
+              "verbosePaths": {
+                "description": "Verbose Paths.",
+                "type": "boolean"
+              },
+              "floatingPointMode": {
+                "description": "Floating point mode",
+                "type": "string",
+                "enum": ["Default", "Fast", "Precise"]
+              },
+              "optimizationLevel": {
+                "description": "Optimization level",
+                "type": "string",
+                "enum": ["None", "Default", "High", "Maximum"]
               }
             }
           },
@@ -229,7 +255,7 @@
                     "type": {
                       "description": "The data type of the buffer if a simple type",
                       "type": "string",
-                      "enum": ["Int", "Int2", "Int3", "Int4", "Uint", "Uint2", "Uint3", "Uint4", "Float", "Float2", "Float3", "Float4", "Bool", "Float4x4", "Uint_16", "Count"]
+                      "enum": ["Int", "Int2", "Int3", "Int4", "Uint", "Uint2", "Uint3", "Uint4", "Float", "Float2", "Float3", "Float4", "Bool", "Float4x4", "Uint_16", "Int_64", "Uint_64", "Count"]
                     },
                     "typeStruct": {
                       "description": "The data type of the buffer if a struct type",
@@ -267,7 +293,7 @@
                     "viewType": {
                       "description": "The dimensionality of the texture",
                       "type": "string",
-                      "enum": ["Int", "Int4", "Uint", "Uint2", "Uint4", "Float", "Float2", "Float3", "Float4"]
+                      "enum": ["Int", "Int4", "Uint", "Uint2", "Uint4", "Float", "Float2", "Float3", "Float4", "Int_64", "Uint_64"]
                     },
                     "globallyCoherent": {
                       "description": "Set this to true if you want the resource to be declared as globallycoherent.",
@@ -366,7 +392,7 @@
                 "type": {
                   "description": "The type of the field",
                   "type": "string",
-                  "enum": ["Int", "Int2", "Int3", "Int4", "Uint", "Uint2", "Uint3", "Uint4", "Float", "Float2", "Float3", "Float4", "Bool", "Float4x4", "Uint_16", "Count"]
+                  "enum": ["Int", "Int2", "Int3", "Int4", "Uint", "Uint2", "Uint3", "Uint4", "Float", "Float2", "Float3", "Float4", "Bool", "Float4x4", "Uint_16", "Int_64", "Uint_64", "Count"]
                 },
                 "dflt": {
                   "description": "The default value",
@@ -528,7 +554,7 @@
                   "type": {
                     "description": "The buffer type. May become a typed or structured buffer, depending on the type chosen.",
                     "type": "string",
-                    "enum": ["Int", "Int2", "Int3", "Int4", "Uint", "Uint2", "Uint3", "Uint4", "Float", "Float2", "Float3", "Float4", "Bool", "Float4x4", "Uint_16", "Count"]
+                    "enum": ["Int", "Int2", "Int3", "Int4", "Uint", "Uint2", "Uint3", "Uint4", "Float", "Float2", "Float3", "Float4", "Bool", "Float4x4", "Uint_16", "Int_64", "Uint_64", "Count"]
                   }
                 }
               },
diff --git a/renderdoc.dll b/renderdoc.dll
index 6aab5c73..9ca7e01e 100644
Binary files a/renderdoc.dll and b/renderdoc.dll differ
diff --git a/slang.dll b/slang.dll
index 16370f02..96493165 100644
Binary files a/slang.dll and b/slang.dll differ