Merge pull request #8 from kgamecarter/master

deepakkumar1984 · web-flow · commit 88926c056c85 · 2019-12-14T09:30:30.000+10:30
Add half type, vload vstore function
diff --git a/examples/AmplifierExamples/Kernels/SimpleKernels.cs b/examples/AmplifierExamples/Kernels/SimpleKernels.cs
@@ -8,14 +8,23 @@ namespace AmplifierExamples.Kernels
     class SimpleKernels : OpenCLFunctions
     {
         [OpenCLKernel]
-        void AddData([Global, Input]float[] a, [Global] float[] b, [Global, Output]float[] r)
+        void AddData([Global, Input]float[] a, [Global]float[] b, [Global, Output]float[] r)
         {
             int i = get_global_id(0);
             b[i] = 0.5f * b[i];
             r[i] = a[i] + b[i];
             a[i] += 2; // result will not copy out
         }
 
+        [OpenCLKernel]
+        void AddHalf([Global, Input]half[] a, [Global]half[] b)
+        {
+            int i = get_global_id(0);
+            float af = vload_half(i, a);
+            float bf = vload_half(i, b);
+            vstore_half(af + bf, i, b);
+        }
+
         [OpenCLKernel]
         void Fill([Global] float[] x, float value)
         {
diff --git a/examples/AmplifierExamples/SimpleKernelEx.cs b/examples/AmplifierExamples/SimpleKernelEx.cs
@@ -1,4 +1,5 @@
 ﻿using Amplifier;
+using Amplifier.OpenCL;
 using AmplifierExamples.Kernels;
 using System;
 using System.Collections.Generic;
@@ -34,9 +35,9 @@ public void Execute()
             }
 
             //Create variable a, b and r
-            Array x = new float[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
-            Array y = new float[9];
-            Array z = new float[9];
+            var x = new float[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+            var y = new float[9];
+            var z = new float[9];
 
             //Get the execution engine
             var exec = compiler.GetExec();
@@ -47,6 +48,12 @@ public void Execute()
             //Execute AddData kernel method
             exec.AddData(x, y, z);
 
+            //Execute AddHalf kernel method
+            var xhalf = Array.ConvertAll(x, v => (half)v);
+            var yhalf = Array.ConvertAll(y, v => (half)v);
+            exec.AddHalf(xhalf, yhalf);
+            z = Array.ConvertAll(yhalf, v => (float)v);
+
             //Execuete SAXPY kernel method
             exec.SAXPY(x, y, 2f);
 
diff --git a/src/Amplifier.Net/OpenCL/DataTypes/ScalarDataTypes.cs b/src/Amplifier.Net/OpenCL/DataTypes/ScalarDataTypes.cs
@@ -35,4 +35,126 @@ public static implicit operator uintptr_t(uint d)
             return new uintptr_t();
         }
     }
+
+    public struct half
+    {
+        private ushort Value;
+
+        public override string ToString()
+        {
+            return ((float)this).ToString();
+        }
+
+        public static explicit operator half(float d)
+        {
+            return new half(d);
+        }
+
+        public unsafe static explicit operator float(half d)
+        {
+            bool isPos          = (d.Value & Float16Params.SignMask) == 0;
+            uint biasedExponent = (d.Value & Float16Params.ExpMask) >> Float16Params.ExpOffset;
+            uint frac           = (d.Value & Float16Params.FracMask);
+            bool isInf          = biasedExponent == Float16Params.BiasedExpMax && (frac == 0);
+
+            if (isInf)
+            {
+                return isPos ? float.PositiveInfinity : float.NegativeInfinity;
+            }
+
+            bool isNan = biasedExponent == Float16Params.BiasedExpMax && (frac != 0);
+            if (isNan)
+            {
+                return float.NaN;
+            }
+
+            bool isSubnormal = biasedExponent == 0;
+            if (isSubnormal)
+            {
+                return frac * Float16Params.SmallestSubnormalAsFloat * (isPos ? 1.0f : -1.0f);
+            }
+
+            int unbiasedExp         = (int)biasedExponent - Float16Params.ExpBias;
+            uint biasedF32Exponent  = (uint)(unbiasedExp + Float32Params.ExpBias);
+
+            uint bits;
+
+            bits = (isPos ? 0u : 1u << Float32Params.SignOffset)
+                    | (biasedF32Exponent << Float32Params.ExpOffset)
+                    | (frac << (Float32Params.ExpOffset - Float16Params.ExpOffset));
+
+            return *(float*)&bits;
+        }
+
+        public unsafe half(float d)
+        {
+            uint bits = *(uint*)&d;
+
+            uint fAbsBits = bits & Float32Params.AbsValueMask;
+            bool isNeg    = (bits & Float32Params.SignBitMask) != 0;
+            uint sign     = (bits & Float32Params.SignBitMask) >> (Float16Params.NumFracBits + Float16Params.NumExpBits + 1);
+            uint half;
+
+            if (float.IsNaN(d))
+            {
+                half = (Float16Params.ExpMask | Float16Params.FracMask);
+            }
+            else if (float.IsInfinity(d))
+            {
+                half = isNeg ? Float16Params.SignMask | Float16Params.ExpMask : Float16Params.ExpMask;
+            }
+            else if (fAbsBits > Float16Params.MaxNormal)
+            {
+                // Clamp to max float 16 value
+                half = sign | (((1 << Float16Params.NumExpBits) - 1) << Float16Params.NumFracBits) | Float16Params.FracMask;
+            }
+            else if (fAbsBits < Float16Params.MinNormal)
+            {
+                uint fracBits    = (fAbsBits & Float32Params.MantissaMask) | (1 << Float32Params.NumMantissaBits);
+                int nshift       = Float16Params.Emin + Float32Params.Emax - (int)(fAbsBits >> Float32Params.NumMantissaBits);
+                uint shiftedBits = nshift < 24 ? fracBits >> nshift : 0;
+                half             = sign | (shiftedBits >> Float16Params.FracBitsDiff);
+            }
+            else
+            {
+                half = sign | ((fAbsBits + Float16Params.BiasDiff) >> Float16Params.FracBitsDiff);
+            }
+            this.Value = (ushort)half;
+        }
+
+        private static class Float16Params
+        {
+            public const uint BitSize = 16;                                                   // total number of bits in the representation
+            public const int NumFracBits = 10;                                                // number of fractional (mantissa) bits
+            public const int NumExpBits = 5;                                                  // number of (biased) exponent bits
+            public const uint SignBit = 15;                                                   // position of the sign bit
+            public const uint SignMask = 1 << 15;                                             // mask to extract sign bit
+            public const uint FracMask = (1 << 10) - 1;                                       // mask to extract the fractional (mantissa) bits
+            public const uint ExpMask = ((1 << 5) - 1) << 10;                                 // mask to extract the exponent bits
+            public const uint Emax = (1 << (5 - 1)) - 1;                                      // max value for the exponent
+            public const int Emin = -((1 << (5 - 1)) - 1) + 1;                                // min value for the exponent
+            public const uint MaxNormal = ((((1 << (5 - 1)) - 1) + 127) << 23) | 0x7FE000;    // max value that can be represented by the 16 bit float
+            public const uint MinNormal = ((-((1 << (5 - 1)) - 1) + 1) + 127) << 23;          // min value that can be represented by the 16 bit float
+            public const uint BiasDiff = unchecked((uint)(((1 << (5 - 1)) - 1) - 127) << 23); // difference in bias between the float16 and float32 exponent
+            public const int FracBitsDiff = 23 - 10;                                          // difference in number of fractional bits between float16/float32
+
+            public const int ExpBias = 15;
+            public const int ExpOffset = 10;
+            public const ushort BiasedExpMax = (1 << 5) - 1;
+            public const float SmallestSubnormalAsFloat = 5.96046448e-8f;
+        }
+
+        private static class Float32Params
+        {
+            public const uint AbsValueMask = 0x7FFFFFFF; // ANDing with this value gives the abs value
+            public const uint SignBitMask = 0x80000000;  // ANDing with this value gives the sign
+            public const int Emax = 127;                 // max value for the exponent
+            public const int NumMantissaBits = 23;       // 23 bit mantissa on single precision floats
+            public const uint MantissaMask = 0x007FFFFF; // 23 bit mantissa on single precision floats
+
+            public const int SignOffset = 31;
+            public const int ExpBias = 127;
+            public const int ExpOffset = 23;
+        }
+    }
 }
diff --git a/src/Amplifier.Net/OpenCL/Functions/MiscVector.cs b/src/Amplifier.Net/OpenCL/Functions/MiscVector.cs

Original file line number	Diff line number	Diff line change
`@@ -8,14 +8,23 @@ namespace AmplifierExamples.Kernels`
`8`	`8`	`class SimpleKernels : OpenCLFunctions`
`9`	`9`	`{`
`10`	`10`	`[OpenCLKernel]`
`11`		`- void AddData([Global, Input]float[] a, [Global] float[] b, [Global, Output]float[] r)`
	`11`	`+ void AddData([Global, Input]float[] a, [Global]float[] b, [Global, Output]float[] r)`
`12`	`12`	`{`
`13`	`13`	`int i = get_global_id(0);`
`14`	`14`	`b[i] = 0.5f * b[i];`
`15`	`15`	`r[i] = a[i] + b[i];`
`16`	`16`	`a[i] += 2; // result will not copy out`
`17`	`17`	`}`
`18`	`18`
	`19`	`+ [OpenCLKernel]`
	`20`	`+ void AddHalf([Global, Input]half[] a, [Global]half[] b)`
	`21`	`+ {`
	`22`	`+ int i = get_global_id(0);`
	`23`	`+ float af = vload_half(i, a);`
	`24`	`+ float bf = vload_half(i, b);`
	`25`	`+ vstore_half(af + bf, i, b);`
	`26`	`+ }`
	`27`	`+`
`19`	`28`	`[OpenCLKernel]`
`20`	`29`	`void Fill([Global] float[] x, float value)`
`21`	`30`	`{`