Author: Marius Cornea, Intel Corporation
We are providing two files, RECIP14.c and RECIP28EXP2.c, containing reference implementations for the scalar versions of 10 approximation instructions introduced in the "Intel® Architecture Instruction Set Extensions Programming Reference" document (see https://software.intel.com/en-us/isa-extensions). The files can be downloaded from the links provided above.
RCP14.c contains emulation routines for the underlying algorithms of:
- VRCP14PD - Compute Approximate Reciprocals of Packed Float64 Values with relative error of less than 2-14
- VRCP14SD - Compute Approximate Reciprocal of Scalar Float64 Value with relative error of less than 2-14
- VRCP14PS - Compute Approximate Reciprocals of Packed Float32 Values with relative error of less than 2-14
- VRCP14SS - Compute Approximate Reciprocal of Scalar Float32 Value with relative error of less than 2-14
- VRSQRT14PD - Compute Approximate Reciprocals of Square Roots of Packed Float64 Values with relative error of less than 2-14
- VRSQRT14SD - Compute Approximate Reciprocal of Square Root of Scalar Float64 Value with relative error of less than 2-14
- VRSQRT14PS - Compute Approximate Reciprocals of Square Roots of PackedFloat32 Values with relative error of less than 2-14
- VRSQRT14SS - Compute Approximate Reciprocal of Square Root of Scalar Float32 Value with relative error of less than 2-14
The corresponding emulation routines (only scalar versions) are:
- RCP14S - reciprocal approximation for Float32
- RCP14D - reciprocal approximation for Float64
- RSQRT14S - reciprocal square root approximation for Float32
- RSQRT14D - reciprocal square root approximation for Float64
RCP28EXP2.c contains emulation routines for the underlying algorithms of:
- VRCP28PD - Approximation to the Reciprocal of Packed Double Precision Floating-Point Values with Less Than 2-28 Relative Error
- VRCP28SD - Approximation to the Reciprocal of Scalar Double Precision Floating-Point Value with Less Than 2-28 Relative Error
- VRCP28PS - Approximation to the Reciprocal of Packed Single Precision Floating-Point Values with Less Than 2-28 Relative Error
- VRCP28SS - Approximation to the Reciprocal of Scalar Single Precision Floating-Point Value with Less Than 2-28 Relative Error
- VRSQRT28PD - Approximation to the Reciprocal Square Root of Packed Double Precision Floating-Point Values with Less Than 2-28 Relative Error
- VRSQRT28SD - Approximation to the Reciprocal Square Root of Scalar Double Precision Floating-Point Value with Less Than 2^-28 Relative Error
- VRSQRT28PS - Approximation to the Reciprocal Square Root of Packed Single Precision Floating-Point Values with Less Than 2-28 Relative Error
- VRSQRT28SS - Approximation to the Reciprocal Square Root of Scalar Single Precision Floating-Point Value with Less Than 2-28 Relative Error
- VEXP2PD - Approximation to the Exponential 2x of Packed Double Precision Floating-Point Values with Less Than 2-23 Relative Error
- VEXP2PS - Approximation to the Exponential 2x of Packed Single Precision Floating-Point Values with Less Than 2-23 Relative Error
The corresponding emulation routines (only scalar versions) are:
- RCP28S - reciprocal approximation for Float32
- RCP28D - reciprocal approximation for Float64
- RSQRT28S - reciprocal square root approximation for Float32
- RSQRT28D - reciprocal square root approximation for Float64
- EXP2S - Base-2 exponential approximation for Float32
- EXP2D - Base-2 exponential approximation for Float64
The reference functions have to be compiled with the DAZ and FTZ mode turned off (e.g. with the Intel compiler for Linux, using the -no-ftz option), and have to be run with the rounding mode set to round-to-nearest, and with floating-point exceptions masked.
Usage example for RCP14S and RCP14D
The following example may be compiled with any of the following (or other, equivalent) commands:
icc -no-ftz -Wall -Werror main.c RECIP14.c gcc -m32 -Wall -Werror main.c RECIP14.c -lm gcc -Wall -Werror main.c RECIP14.c -lm
where main.c is shown below:
#include <stdio.h> typedef union { unsigned int u; float f; } type32; typedef union { unsigned long long u; double f; } type64; extern void RCP14S (unsigned int mxcsr, type32 *dst, type32 src); extern void RCP14D (unsigned int mxcsr, type64 *dst, type64 src); int main () { type32 dst32, src32; type64 dst64, src64; unsigned int mxcsr = 0x00000000; printf ("MXCSR = %8.8x\n", mxcsr); src32.f = 3.0; RCP14S (mxcsr, &dst32, src32); printf ("RCP14S(%f = %8.8x HEX) = (%f = %8.8x HEX)\n", src32.f, src32.u, dst32.f, dst32.u); src64.f = 3.0; RCP14D (mxcsr, &dst64, src64); printf ("RCP14D(%f = %16.16llx HEX) = (%f = %16.16llx HEX)\n", src64.f, src64.u, dst64.f, dst64.u); return (0); }
Usage example for RSQRT14S and RSQRT14D
The following example may be compiled with any of the following (or other, equivalent) commands:
icc -no-ftz -Wall -Werror main.c RECIP14.c gcc -m32 -Wall -Werror main.c RECIP14.c -lm gcc -Wall -Werror main.c RECIP14.c -lm
where main.c is shown below:
#include <stdio.h> typedef union { unsigned int u; float f; } type32; typedef union { unsigned long long u; double f; } type64; extern void RSQRT14S (unsigned int mxcsr, type32 *dst, type32 src); extern void RSQRT14D (unsigned int mxcsr, type64 *dst, type64 src); int main () { type32 dst32, src32; type64 dst64, src64; unsigned int mxcsr = 0x00000000; printf ("MXCSR = %8.8x\n", mxcsr); src32.f = 2.0; RSQRT14S (mxcsr, &dst32, src32); printf ("RSQRT14S(%f = %8.8x HEX) = (%f = %8.8x HEX)\n", src32.f, src32.u, dst32.f, dst32.u); src64.f = 2.0; RSQRT14D (mxcsr, &dst64, src64); printf ("RSQRT14D(%f = %16.16llx HEX) = (%f = %16.16llx HEX)\n", src64.f, src64.u, dst64.f, dst64.u); return (0); }
Usage example for RCP28S and RCP28D
The following example may be compiled with any of the following (or other, equivalent) commands:
icc -no-ftz -Wall -Werror main.c RECIP28EXP2.c gcc -m32 -Wall -Werror main.c RECIP28EXP2.c -lm gcc -Wall -Werror main.c RECIP28EXP2.c -lm
where main.c is shown below:
#include <stdio.h> typedef union { unsigned int u; float f; } type32; typedef union { unsigned long long u; double f; } type64; extern unsigned int RCP28S (type32 *dst, type32 src); extern unsigned int RCP28D (type64 *dst, type64 src); int main () { type32 dst32, src32; type64 dst64, src64; unsigned int flags = 0x00000000; // PUOZDI printf ("FLAGS = %2.2x\n", flags); src32.f = 3.0; flags = RCP28S (&dst32, src32); printf ("RCP28S(%f = %8.8x HEX) = (%f = %8.8x HEX) flags = %2.2x\n", src32.f, src32.u, dst32.f, dst32.u, flags); src64.f = 3.0; flags = RCP28D (&dst64, src64); printf ("RCP28D(%f = %16.16llx HEX) = (%f = %16.16llx HEX) flags = %2.2x\n", src64.f, src64.u, dst64.f, dst64.u, flags); return (0); }
Usage example for RSQRT28S and RSQRT28D
The following example may be compiled with any of the following (or other, equivalent) commands:
icc -no-ftz -Wall -Werror main.c RECIP28EXP2.c gcc -m32 -Wall -Werror main.c RECIP28EXP2.c -lm gcc -Wall -Werror main.c RECIP28EXP2.c -lm
where main.c is shown below:
#include <stdio.h> typedef union { unsigned int u; float f; } type32; typedef union { unsigned long long u; double f; } type64; extern unsigned int RSQRT28S (type32 *dst, type32 src); extern unsigned int RSQRT28D (type64 *dst, type64 src); int main () { type32 dst32, src32; type64 dst64, src64; unsigned int flags = 0x00000000; // PUOZDI printf ("FLAGS = %2.2x\n", flags); src32.f = 2.0; flags = RSQRT28S (&dst32, src32); printf ("RSQRT28S(%f = %8.8x HEX) = (%f = %8.8x HEX) flags = %2.2x\n", src32.f, src32.u, dst32.f, dst32.u, flags); src64.f = 2.0; flags = RSQRT28D (&dst64, src64); printf ("RSQRT28D(%f = %16.16llx HEX) = (%f = %16.16llx HEX) flags = %2.2x\n", src64.f, src64.u, dst64.f, dst64.u, flags); return (0); }
Usage example for EXP2S and EXP2D
The following example may be compiled with any of the following (or other, equivalent) commands:
icc -no-ftz -Wall -Werror main.c RECIP28EXP2.c gcc -m32 -Wall -Werror main.c RECIP28EXP2.c -lm gcc -Wall -Werror main.c RECIP28EXP2.c -lm
where main.c is shown below:
#include <stdio.h> typedef union { unsigned int u; float f; } type32; typedef union { unsigned long long u; double f; } type64; extern unsigned int EXP2S (type32 *dst, type32 src); extern unsigned int EXP2D (type64 *dst, type64 src); int main () { type32 dst32, src32; type64 dst64, src64; unsigned int flags = 0x00000000; // PUOZDI printf ("FLAGS = %2.2x\n", flags); src32.f = 1.5; flags = EXP2S (&dst32, src32); printf ("EXP2S(%f = %8.8x HEX) = (%f = %8.8x HEX) flags = %2.2x\n", src32.f, src32.u, dst32.f, dst32.u, flags); src64.f = 1.5; flags = EXP2D (&dst64, src64); printf ("EXP2D(%f = %16.16llx HEX) = (%f = %16.16llx HEX) flags = %2.2x\n", src64.f, src64.u, dst64.f, dst64.u, flags); return (0); }