From 305b62cf4f7e2a4ca3cc56109003aed6bde61c25 Mon Sep 17 00:00:00 2001
From: Mikko Rasa <tdb@tdb.fi>
Date: Tue, 6 Apr 2021 12:54:06 +0300
Subject: [PATCH] Adjust GLSL builtins

Remove some functions which are complicated to implement in SPIR-V due
to the use of ad-hoc struct types or pointers.  I may add them back
later if there turns out to be some use for them.

Add inline definitions for some overloads taking mixed vector and scalar
arguments.  SPIR-V operations require all operands to have the same
number of elements.  Builtin functions are always inlined even if used
more than once.
---
 builtin_data/_builtin.glsl | 102 +++++++++++++++----------------------
 scripts/builtin_funcs.py   |  40 +++++++--------
 source/glsl/optimize.cpp   |   4 +-
 source/glsl/optimize.h     |   2 +-
 4 files changed, 62 insertions(+), 86 deletions(-)

diff --git a/builtin_data/_builtin.glsl b/builtin_data/_builtin.glsl
index aa36d5c6..124a117b 100644
--- a/builtin_data/_builtin.glsl
+++ b/builtin_data/_builtin.glsl
@@ -169,13 +169,9 @@ float mod(float x, float y);
 vec2 mod(vec2 x, vec2 y);
 vec3 mod(vec3 x, vec3 y);
 vec4 mod(vec4 x, vec4 y);
-vec2 mod(vec2 x, float y);
-vec3 mod(vec3 x, float y);
-vec4 mod(vec4 x, float y);
-float modf(float x, out float y);
-vec2 modf(vec2 x, out vec2 y);
-vec3 modf(vec3 x, out vec3 y);
-vec4 modf(vec4 x, out vec4 y);
+vec2 mod(vec2 x, float y) { return mod(x, vec2(y)); }
+vec3 mod(vec3 x, float y) { return mod(x, vec3(y)); }
+vec4 mod(vec4 x, float y) { return mod(x, vec4(y)); }
 float min(float x, float y);
 vec2 min(vec2 x, vec2 y);
 vec3 min(vec3 x, vec3 y);
@@ -184,12 +180,12 @@ int min(int x, int y);
 ivec2 min(ivec2 x, ivec2 y);
 ivec3 min(ivec3 x, ivec3 y);
 ivec4 min(ivec4 x, ivec4 y);
-vec2 min(vec2 x, float y);
-vec3 min(vec3 x, float y);
-vec4 min(vec4 x, float y);
-ivec2 min(ivec2 x, int y);
-ivec3 min(ivec3 x, int y);
-ivec4 min(ivec4 x, int y);
+vec2 min(vec2 x, float y) { return min(x, vec2(y)); }
+vec3 min(vec3 x, float y) { return min(x, vec3(y)); }
+vec4 min(vec4 x, float y) { return min(x, vec4(y)); }
+ivec2 min(ivec2 x, int y) { return min(x, ivec2(y)); }
+ivec3 min(ivec3 x, int y) { return min(x, ivec3(y)); }
+ivec4 min(ivec4 x, int y) { return min(x, ivec4(y)); }
 float max(float x, float y);
 vec2 max(vec2 x, vec2 y);
 vec3 max(vec3 x, vec3 y);
@@ -198,12 +194,12 @@ int max(int x, int y);
 ivec2 max(ivec2 x, ivec2 y);
 ivec3 max(ivec3 x, ivec3 y);
 ivec4 max(ivec4 x, ivec4 y);
-vec2 max(vec2 x, float y);
-vec3 max(vec3 x, float y);
-vec4 max(vec4 x, float y);
-ivec2 max(ivec2 x, int y);
-ivec3 max(ivec3 x, int y);
-ivec4 max(ivec4 x, int y);
+vec2 max(vec2 x, float y) { return max(x, vec2(y)); }
+vec3 max(vec3 x, float y) { return max(x, vec3(y)); }
+vec4 max(vec4 x, float y) { return max(x, vec4(y)); }
+ivec2 max(ivec2 x, int y) { return max(x, ivec2(y)); }
+ivec3 max(ivec3 x, int y) { return max(x, ivec3(y)); }
+ivec4 max(ivec4 x, int y) { return max(x, ivec4(y)); }
 float clamp(float x, float minVal, float maxVal);
 vec2 clamp(vec2 x, vec2 minVal, vec2 maxVal);
 vec3 clamp(vec3 x, vec3 minVal, vec3 maxVal);
@@ -212,19 +208,20 @@ int clamp(int x, int minVal, int maxVal);
 ivec2 clamp(ivec2 x, ivec2 minVal, ivec2 maxVal);
 ivec3 clamp(ivec3 x, ivec3 minVal, ivec3 maxVal);
 ivec4 clamp(ivec4 x, ivec4 minVal, ivec4 maxVal);
-vec2 clamp(vec2 x, float minVal, float maxVal);
-vec3 clamp(vec3 x, float minVal, float maxVal);
-vec4 clamp(vec4 x, float minVal, float maxVal);
-ivec2 clamp(ivec2 x, int minVal, int maxVal);
-ivec3 clamp(ivec3 x, int minVal, int maxVal);
-ivec4 clamp(ivec4 x, int minVal, int maxVal);
+vec2 clamp(vec2 x, float minVal, float maxVal) { return clamp(x, vec2(minVal), vec2(maxVal)); }
+vec3 clamp(vec3 x, float minVal, float maxVal) { return clamp(x, vec3(minVal), vec3(maxVal)); }
+vec4 clamp(vec4 x, float minVal, float maxVal) { return clamp(x, vec4(minVal), vec4(maxVal)); }
+ivec2 clamp(ivec2 x, int minVal, int maxVal) { return clamp(x, ivec2(minVal), ivec2(maxVal)); }
+ivec3 clamp(ivec3 x, int minVal, int maxVal) { return clamp(x, ivec3(minVal), ivec3(maxVal)); }
+ivec4 clamp(ivec4 x, int minVal, int maxVal) { return clamp(x, ivec4(minVal), ivec4(maxVal)); }
 float mix(float x, float y, float a);
 vec2 mix(vec2 x, vec2 y, vec2 a);
 vec3 mix(vec3 x, vec3 y, vec3 a);
 vec4 mix(vec4 x, vec4 y, vec4 a);
-vec2 mix(vec2 x, vec2 y, float a);
-vec3 mix(vec3 x, vec3 y, float a);
-vec4 mix(vec4 x, vec4 y, float a);
+float mix(float x, float y, float a) { return mix(x, y, float(a)); }
+vec2 mix(vec2 x, vec2 y, float a) { return mix(x, y, vec2(a)); }
+vec3 mix(vec3 x, vec3 y, float a) { return mix(x, y, vec3(a)); }
+vec4 mix(vec4 x, vec4 y, float a) { return mix(x, y, vec4(a)); }
 float mix(float x, float y, bool a);
 vec2 mix(vec2 x, vec2 y, bvec2 a);
 vec3 mix(vec3 x, vec3 y, bvec3 a);
@@ -237,16 +234,16 @@ float step(float edge, float x);
 vec2 step(vec2 edge, vec2 x);
 vec3 step(vec3 edge, vec3 x);
 vec4 step(vec4 edge, vec4 x);
-vec2 step(float edge, vec2 x);
-vec3 step(float edge, vec3 x);
-vec4 step(float edge, vec4 x);
+vec2 step(float edge, vec2 x) { return step(vec2(edge), x); }
+vec3 step(float edge, vec3 x) { return step(vec3(edge), x); }
+vec4 step(float edge, vec4 x) { return step(vec4(edge), x); }
 float smoothstep(float edge0, float edge1, float x);
 vec2 smoothstep(vec2 edge0, vec2 edge1, vec2 x);
 vec3 smoothstep(vec3 edge0, vec3 edge1, vec3 x);
 vec4 smoothstep(vec4 edge0, vec4 edge1, vec4 x);
-vec2 smoothstep(float edge0, float edge1, vec2 x);
-vec3 smoothstep(float edge0, float edge1, vec3 x);
-vec4 smoothstep(float edge0, float edge1, vec4 x);
+vec2 smoothstep(float edge0, float edge1, vec2 x) { return smoothstep(vec2(edge0), vec2(edge1), x); }
+vec3 smoothstep(float edge0, float edge1, vec3 x) { return smoothstep(vec3(edge0), vec3(edge1), x); }
+vec4 smoothstep(float edge0, float edge1, vec4 x) { return smoothstep(vec4(edge0), vec4(edge1), x); }
 bool isnan(float x);
 bvec2 isnan(vec2 x);
 bvec3 isnan(vec3 x);
@@ -255,26 +252,10 @@ bool isinf(float x);
 bvec2 isinf(vec2 x);
 bvec3 isinf(vec3 x);
 bvec4 isinf(vec4 x);
-int floatBitsToInt(float value);
-ivec2 floatBitsToInt(vec2 value);
-ivec3 floatBitsToInt(vec3 value);
-ivec4 floatBitsToInt(vec4 value);
-float intBitsToFloat(int value);
-vec2 intBitsToFloat(ivec2 value);
-vec3 intBitsToFloat(ivec3 value);
-vec4 intBitsToFloat(ivec4 value);
 float fma(float a, float b, float c);
 vec2 fma(vec2 a, vec2 b, vec2 c);
 vec3 fma(vec3 a, vec3 b, vec3 c);
 vec4 fma(vec4 a, vec4 b, vec4 c);
-float frexp(float x, out int exp);
-vec2 frexp(vec2 x, out ivec2 exp);
-vec3 frexp(vec3 x, out ivec3 exp);
-vec4 frexp(vec4 x, out ivec4 exp);
-float ldexp(float x, int exp);
-vec2 ldexp(vec2 x, ivec2 exp);
-vec3 ldexp(vec3 x, ivec3 exp);
-vec4 ldexp(vec4 x, ivec4 exp);
 float length(float x);
 float length(vec2 x);
 float length(vec3 x);
@@ -296,14 +277,14 @@ float faceforward(float N, float I, float Nref);
 vec2 faceforward(vec2 N, vec2 I, vec2 Nref);
 vec3 faceforward(vec3 N, vec3 I, vec3 Nref);
 vec4 faceforward(vec4 N, vec4 I, vec4 Nref);
-float reflect(float N, float I);
-vec2 reflect(vec2 N, vec2 I);
-vec3 reflect(vec3 N, vec3 I);
-vec4 reflect(vec4 N, vec4 I);
-float refract(float N, float I, float eta);
-vec2 refract(vec2 N, vec2 I, float eta);
-vec3 refract(vec3 N, vec3 I, float eta);
-vec4 refract(vec4 N, vec4 I, float eta);
+float reflect(float I, float N);
+vec2 reflect(vec2 I, vec2 N);
+vec3 reflect(vec3 I, vec3 N);
+vec4 reflect(vec4 I, vec4 N);
+float refract(float I, float N, float eta);
+vec2 refract(vec2 I, vec2 N, float eta);
+vec3 refract(vec3 I, vec3 N, float eta);
+vec4 refract(vec4 I, vec4 N, float eta);
 mat2 matrixCompMult(mat2 x, mat2 y);
 mat3 matrixCompMult(mat3 x, mat3 y);
 mat4 matrixCompMult(mat4 x, mat4 y);
@@ -382,10 +363,6 @@ bool all(bvec4 x);
 bool not(bvec2 x);
 bool not(bvec3 x);
 bool not(bvec4 x);
-void imulExtended(int x, int y, out int msb, out int lsb);
-void imulExtended(ivec2 x, ivec2 y, out ivec2 msb, out ivec2 lsb);
-void imulExtended(ivec3 x, ivec3 y, out ivec3 msb, out ivec3 lsb);
-void imulExtended(ivec4 x, ivec4 y, out ivec4 msb, out ivec4 lsb);
 int bitfieldExtract(int value, int offset, int bits);
 ivec2 bitfieldExtract(ivec2 value, int offset, int bits);
 ivec3 bitfieldExtract(ivec3 value, int offset, int bits);
@@ -468,6 +445,7 @@ in gl_PerVertex
   float gl_ClipDistance[];
 } gl_in[];
 in int gl_PrimitiveIDIn;
+in int gl_InvocationID;
 out gl_PerVertex
 {
   vec4 gl_Position;
diff --git a/scripts/builtin_funcs.py b/scripts/builtin_funcs.py
index e7618877..24cc3643 100755
--- a/scripts/builtin_funcs.py
+++ b/scripts/builtin_funcs.py
@@ -41,6 +41,7 @@ traits = {
 
 float32vectypes = ("vec2", "vec3", "vec4")
 float32types = ("float",)+float32vectypes
+floatvectypes = float32vectypes
 floattypes = float32types
 int32vectypes = ("ivec2", "ivec3", "ivec4")
 int32types = ("int",)+int32vectypes
@@ -91,28 +92,23 @@ shared_funcs = [
 	("T ceil(T x)", floattypes),
 	("T fract(T x)", floattypes),
 	("T mod(T x, T y)", floattypes),
-	("T mod(T x, T::Base y)", floattypes),
-	("T modf(T x, out T y)", floattypes),
+	("T mod(T x, T::Base y)", "mod(x, T(y))", floatvectypes),
 	("T min(T x, T y)", arithmetictypes),
-	("T min(T x, T::Base y)", arithmetictypes),
+	("T min(T x, T::Base y)", "min(x, T(y))", arithmeticvectypes),
 	("T max(T x, T y)", arithmetictypes),
-	("T max(T x, T::Base y)", arithmetictypes),
+	("T max(T x, T::Base y)", "max(x, T(y))", arithmeticvectypes),
 	("T clamp(T x, T minVal, T maxVal)", arithmetictypes),
-	("T clamp(T x, T::Base minVal, T::Base maxVal)", arithmetictypes),
+	("T clamp(T x, T::Base minVal, T::Base maxVal)", "clamp(x, T(minVal), T(maxVal))", arithmeticvectypes),
 	("T mix(T x, T y, T a)", floattypes),
-	("T mix(T x, T y, T::Base a)", floattypes),
+	("T mix(T x, T y, T::Base a)", "mix(x, y, T(a))", floattypes),
 	("T mix(T x, T y, bool[T::Dim] a)", arithmetictypes),
 	("T step(T edge, T x)", floattypes),
-	("T step(T::Base edge, T x)", floattypes),
+	("T step(T::Base edge, T x)", "step(T(edge), x)", floatvectypes),
 	("T smoothstep(T edge0, T edge1, T x)", floattypes),
-	("T smoothstep(T::Base edge0, T::Base edge1, T x)", floattypes),
+	("T smoothstep(T::Base edge0, T::Base edge1, T x)", "smoothstep(T(edge0), T(edge1), x)", floatvectypes),
 	("bool[T::Dim] isnan(T x)", floattypes),
 	("bool[T::Dim] isinf(T x)", floattypes),
-	("int[T::Dim] floatBitsToInt(T value)", float32types),
-	("T intBitsToFloat(int[T::Dim] value)", float32types),
 	("T fma(T a, T b, T c)", floattypes),
-	("T frexp(T x, out int[T::Dim] exp)", floattypes),
-	("T ldexp(T x, int[T::Dim] exp)", floattypes),
 
 	# Geometric
 	("T::Base length(T x)", floattypes),
@@ -121,8 +117,8 @@ shared_funcs = [
 	"vec3 cross(vec3 x)",
 	("T normalize(T x)", floattypes),
 	("T faceforward(T N, T I, T Nref)", floattypes),
-	("T reflect(T N, T I)", floattypes),
-	("T refract(T N, T I, float eta)", floattypes),
+	("T reflect(T I, T N)", floattypes),
+	("T refract(T I, T N, float eta)", floattypes),
 
 	# Matrix
 	("T matrixCompMult(T x, T y)", matrixtypes),
@@ -143,7 +139,6 @@ shared_funcs = [
 	("bool not(T x)", boolvectypes),
 
 	# Integer
-	("void imulExtended(T x, T y, out T msb, out T lsb)", int32types),
 	("T bitfieldExtract(T value, int offset, int bits)", int32types),
 	("T bitfieldInsert(T value, T insert, int offset, int bits)", int32types),
 	("T bitfieldReverse(T value)", int32types),
@@ -235,10 +230,9 @@ def expand_template(template, gentype):
 			result += " "
 		special = not t[0].isalpha()
 
-		if t==",":
-			result += ", "
-		else:
-			result += t
+		result += t
+		if t[-1]==",":
+			result += " "
 
 		i += advance
 
@@ -249,10 +243,14 @@ def generate_functions(funcs):
 	generated = set()
 	for f in funcs:
 		if type(f)==tuple:
-			for t in f[1]:
+			for t in f[-1]:
 				decl = expand_template(f[0], t)
+				if len(f)>=3:
+					decl += " {{ return {}; }}".format(expand_template(f[1], t))
+				else:
+					decl += ";"
 				if not decl in generated:
-					out_lines.append(decl+";\n")
+					out_lines.append(decl+"\n")
 					generated.add(decl)
 		else:
 			out_lines.append(f+";\n")
diff --git a/source/glsl/optimize.cpp b/source/glsl/optimize.cpp
index 927047cc..0f765c2a 100644
--- a/source/glsl/optimize.cpp
+++ b/source/glsl/optimize.cpp
@@ -75,7 +75,7 @@ void InlineableFunctionLocator::visit(FunctionCall &call)
 		++count;
 		/* Don't inline functions which are called more than once or are called
 		recursively. */
-		if(count>1 || def==current_function)
+		if((count>1 && def->source!=BUILTIN_SOURCE) || def==current_function)
 			inlineable.erase(def);
 	}
 
@@ -89,7 +89,7 @@ void InlineableFunctionLocator::visit(FunctionDeclaration &func)
 		has_out_params = ((*i)->interface=="out");
 
 	unsigned &count = refcounts[func.definition];
-	if(count<=1 && !has_out_params)
+	if((count<=1 || func.source==BUILTIN_SOURCE) && !has_out_params)
 		inlineable.insert(func.definition);
 
 	SetForScope<FunctionDeclaration *> set(current_function, &func);
diff --git a/source/glsl/optimize.h b/source/glsl/optimize.h
index 249a859a..b7b39176 100644
--- a/source/glsl/optimize.h
+++ b/source/glsl/optimize.h
@@ -27,7 +27,7 @@ private:
 
 /** Finds functions which are candidates for inlining.  Currently this means
 functions which have no flow control statements, no more than one return
-statement, and are only called once. */
+statement, and are either builtins or only called once. */
 class InlineableFunctionLocator: private TraversingVisitor
 {
 private:
-- 
2.45.2