Fixed translation of matrix+scalar, matrix-scalar, matrix/scalar operations on Metal.

aras-p · aras-p · commit 14946bd06405 · 2015-06-30T08:48:25.000+03:00
diff --git a/Changelog.md b/Changelog.md
@@ -8,6 +8,7 @@ Fixes:
 
 * Fixed some cases of different precision matrix assignments being miscompiled on Metal.
 * Fixed yet more issues with translation of weird loops.
+* Fixed translation of matrix+scalar, matrix-scalar, matrix/scalar operations on Metal.
 
 
 2015 05
diff --git a/src/glsl/ir_print_metal_visitor.cpp b/src/glsl/ir_print_metal_visitor.cpp
@@ -101,6 +101,7 @@ struct metal_print_context
 	, paramsStr(ralloc_strdup(buffer, ""))
 	, writingParams(false)
 	, matrixCastsDone(false)
+	, matrixConstructorsDone(false)
 	, shadowSamplerDone(false)
 	, textureCounter(0)
 	, attributeCounter(0)
@@ -118,6 +119,7 @@ struct metal_print_context
 	string_buffer paramsStr;
 	bool writingParams;
 	bool matrixCastsDone;
+	bool matrixConstructorsDone;
 	bool shadowSamplerDone;
 	int textureCounter;
 	int attributeCounter;
@@ -940,21 +942,57 @@ void ir_print_metal_visitor::visit(ir_expression *ir)
 	bool op0cast = ir->operands[0] && is_different_precision(arg_prec, ir->operands[0]->get_precision());
 	bool op1cast = ir->operands[1] && is_different_precision(arg_prec, ir->operands[1]->get_precision());
 	bool op2cast = ir->operands[2] && is_different_precision(arg_prec, ir->operands[2]->get_precision());
+	const bool op0matrix = ir->operands[0] && ir->operands[0]->type->is_matrix();
+	const bool op1matrix = ir->operands[1] && ir->operands[1]->type->is_matrix();
+	bool op0castTo1 = false;
+	bool op1castTo0 = false;
 
 	// Metal does not support matrix precision casts, so when any of the arguments is a matrix,
 	// take precision from it. This isn't fully robust now, but oh well.
-	if (op0cast && ir->operands[0]->type->is_matrix() && !op1cast)
+	if (op0cast && op0matrix && !op1cast)
 	{
 		op0cast = false;
 		arg_prec = ir->operands[0]->get_precision();
 		op1cast = ir->operands[1] && is_different_precision(arg_prec, ir->operands[1]->get_precision());
 	}
-	if (op1cast && ir->operands[1]->type->is_matrix() && !op0cast)
+	if (op1cast && op1matrix && !op0cast)
 	{
 		op1cast = false;
 		arg_prec = ir->operands[1]->get_precision();
 		op0cast = ir->operands[0] && is_different_precision(arg_prec, ir->operands[0]->get_precision());
 	}
+
+	// Metal does not have matrix+scalar and matrix-scalar operations; we need to create matrices
+	// out of the non-matrix argument.
+	if (ir->operation == ir_binop_add || ir->operation == ir_binop_sub)
+	{
+		if (op0matrix && !op1matrix)
+		{
+			op1cast = true;
+			op1castTo0 = true;
+		}
+		if (op1matrix && !op0matrix)
+		{
+			op0cast = true;
+			op0castTo1 = true;
+		}
+		if (op1castTo0 || op0castTo1)
+		{
+			if (!ctx.matrixConstructorsDone)
+			{
+				ctx.prefixStr.asprintf_append(
+											  "inline float4x4 _xlinit_float4x4(float v) { return float4x4(float4(v), float4(v), float4(v), float4(v)); }\n"
+											  "inline float3x3 _xlinit_float3x3(float v) { return float3x3(float3(v), float3(v), float3(v)); }\n"
+											  "inline float2x2 _xlinit_float2x2(float v) { return float2x2(float2(v), float2(v)); }\n"
+											  "inline half4x4 _xlinit_half4x4(half v) { return half4x4(half4(v), half4(v), half4(v), half4(v)); }\n"
+											  "inline half3x3 _xlinit_half3x3(half v) { return half3x3(half3(v), half3(v), half3(v)); }\n"
+											  "inline half2x2 _xlinit_half2x2(half v) { return half2x2(half2(v), half2(v)); }\n"
+											  );
+				ctx.matrixConstructorsDone = true;
+			}
+		}
+	}
+	
 	
 	const bool rescast = is_different_precision(arg_prec, res_prec) && !ir->type->is_boolean();
 	if (rescast)
@@ -1000,6 +1038,7 @@ void ir_print_metal_visitor::visit(ir_expression *ir)
 	}
 	else if (is_binop_func_like(ir->operation, ir->type))
 	{
+		// binary operation that must be printed like a function, "foo(a,b)"
 		if (ir->operation == ir_binop_mod)
 		{
 			buffer.asprintf_append ("(");
@@ -1025,23 +1064,58 @@ void ir_print_metal_visitor::visit(ir_expression *ir)
 		if (ir->operation == ir_binop_mod)
             buffer.asprintf_append ("))");
 	}
+	else if (ir->get_num_operands() == 2 && ir->operation == ir_binop_div && op0matrix && !op1matrix)
+	{
+		// "matrix/scalar" - Metal does not have it, so print multiply by inverse instead
+		buffer.asprintf_append ("(");
+		ir->operands[0]->accept(this);
+		const bool halfCast = (arg_prec == glsl_precision_medium || arg_prec == glsl_precision_low);
+		buffer.asprintf_append (halfCast ? " * (1.0h/half(" : " * (1.0/(");
+		ir->operands[1]->accept(this);
+		buffer.asprintf_append (")))");
+	}
 	else if (ir->get_num_operands() == 2)
 	{
+		// regular binary operator
 		buffer.asprintf_append ("(");
 		if (ir->operands[0])
 		{
-			if (op0cast)
+			if (op0castTo1)
+			{
+				buffer.asprintf_append ("_xlinit_");
+				print_type_precision(buffer, ir->operands[1]->type, arg_prec, false);
+				buffer.asprintf_append ("(");
+			}
+			else if (op0cast)
+			{
 				print_cast (buffer, arg_prec, ir->operands[0]);
+			}
 			ir->operands[0]->accept(this);
+			if (op0castTo1)
+			{
+				buffer.asprintf_append (")");
+			}
 		}
 
 		buffer.asprintf_append (" %s ", operator_glsl_strs[ir->operation]);
 
 		if (ir->operands[1])
 		{
-			if (op1cast)
+			if (op1castTo0)
+			{
+				buffer.asprintf_append ("_xlinit_");
+				print_type_precision(buffer, ir->operands[0]->type, arg_prec, false);
+				buffer.asprintf_append ("(");
+			}
+			else if (op1cast)
+			{
 				print_cast (buffer, arg_prec, ir->operands[1]);
+			}
 			ir->operands[1]->accept(this);
+			if (op1castTo0)
+			{
+				buffer.asprintf_append (")");
+			}
 		}
 		buffer.asprintf_append (")");
 	}
diff --git a/tests/fragment/matrix-ops-outES3Metal.txt b/tests/fragment/matrix-ops-outES3Metal.txt
@@ -1,5 +1,11 @@
 #include <metal_stdlib>
 using namespace metal;
+inline float4x4 _xlinit_float4x4(float v) { return float4x4(float4(v), float4(v), float4(v), float4(v)); }
+inline float3x3 _xlinit_float3x3(float v) { return float3x3(float3(v), float3(v), float3(v)); }
+inline float2x2 _xlinit_float2x2(float v) { return float2x2(float2(v), float2(v)); }
+inline half4x4 _xlinit_half4x4(half v) { return half4x4(half4(v), half4(v), half4(v), half4(v)); }
+inline half3x3 _xlinit_half3x3(half v) { return half3x3(half3(v), half3(v), half3(v)); }
+inline half2x2 _xlinit_half2x2(half v) { return half2x2(half2(v), half2(v)); }
 inline float4x4 _xlcast_float4x4(half4x4 v) { return float4x4(float4(v[0]), float4(v[1]), float4(v[2]), float4(v[3])); }
 inline float3x3 _xlcast_float3x3(half3x3 v) { return float3x3(float3(v[0]), float3(v[1]), float3(v[2])); }
 inline float2x2 _xlcast_float2x2(half2x2 v) { return float2x2(float2(v[0]), float2(v[1])); }
@@ -33,21 +39,21 @@ fragment xlatMtlShaderOutput xlatMtlMain (xlatMtlShaderInput _mtl_i [[stage_in]]
   tmpvar_6[1].x = -(tmpvar_3);
   tmpvar_6[1].y = tmpvar_5;
   rotationMatrix_2 = (tmpvar_6 * 2.0);
-  rotationMatrix_2 = (rotationMatrix_2 - 1.0);
-  rotationMatrix_2 = (tmpvar_3 - rotationMatrix_2);
-  rotationMatrix_2 = (tmpvar_5 + rotationMatrix_2);
-  rotationMatrix_2 = (rotationMatrix_2 / tmpvar_3);
+  rotationMatrix_2 = (rotationMatrix_2 - _xlinit_float2x2(1.0));
+  rotationMatrix_2 = (_xlinit_float2x2(tmpvar_3) - rotationMatrix_2);
+  rotationMatrix_2 = (_xlinit_float2x2(tmpvar_5) + rotationMatrix_2);
+  rotationMatrix_2 = (rotationMatrix_2 * (1.0/(tmpvar_3)));
   float2x2 tmpvar_7;
   tmpvar_7[0].x = tmpvar_5;
   tmpvar_7[0].y = tmpvar_3;
   tmpvar_7[1].x = -(tmpvar_3);
   tmpvar_7[1].y = tmpvar_5;
   halfMatrix_1 = _xlcast_half2x2(tmpvar_7);
   halfMatrix_1 = (halfMatrix_1 * (half)2.0);
-  halfMatrix_1 = (halfMatrix_1 - (half)1.0);
-  halfMatrix_1 = _xlcast_half2x2(((float2x2)((half)tmpvar_3 - halfMatrix_1)));
-  halfMatrix_1 = _xlcast_half2x2(((float2x2)((half)tmpvar_5 + halfMatrix_1)));
-  halfMatrix_1 = _xlcast_half2x2(((float2x2)(halfMatrix_1 / (half)tmpvar_3)));
+  halfMatrix_1 = (halfMatrix_1 - _xlinit_half2x2(1.0));
+  halfMatrix_1 = _xlcast_half2x2(((float2x2)(_xlinit_half2x2(tmpvar_3) - halfMatrix_1)));
+  halfMatrix_1 = _xlcast_half2x2(((float2x2)(_xlinit_half2x2(tmpvar_5) + halfMatrix_1)));
+  halfMatrix_1 = _xlcast_half2x2(((float2x2)(halfMatrix_1 * (1.0h/half(tmpvar_3)))));
   float4 tmpvar_8;
   tmpvar_8.xy = (rotationMatrix_2 * _mtl_i.uv);
   tmpvar_8.zw = ((float2)(halfMatrix_1 * (half2)_mtl_i.uv));