static double const a0 = +1.0; static double const a1 = -1.666666666666666666666666666666666667e-1; static double const a2 = +8.333333333333333333333333333333333333e-3; static double const a3 = -1.984126984126984126984126984126984127e-4; static double const a4 = +2.755731922398589065255731922398589065e-6; static double const a5 = -2.505210838544171877505210838544171878e-8; static double const a6 = +1.605904383682161459939237717015494793e-10; static double const a7 = -7.647163731819816475901131985788070444e-13; double sin7(double x) { double x2 = x * x; double x3 = x2 * x; //__asm__("" : "+f" (x3)); double x4 = x2 * x2; double x8 = x4 * x4; double x9 = x8 * x; //__asm__("" : "+f" (x9)); double A = x + x3 * (a1 + x2 * (a2 + x2 * a3)); double B = a4 + x2 * (a5 + x2 * (a6 + x2 * a7)); return A + x9 * B; } /* * SNC disassembly (47.73ns) * ------------------------- * 00016530 3C600001 lis r3,0x1 * 00016534 FC410072 fmul f2,f1,f1 * 00016538 386364F8 addi r3,r3,0x64F8 01 (00016530) REG * 0001653C C8630000 lfd f3,0x0(r3) 04 (00016538) REG LSU * 00016540 C8830008 lfd f4,0x8(r3) * 00016544 C8A30018 lfd f5,0x18(r3) PIPE * 00016548 C8C30020 lfd f6,0x20(r3) * 0001654C FCE200B2 fmul f7,f2,f2 PIPE * 00016550 C9030010 lfd f8,0x10(r3) * 00016554 FC6220FA fmadd f3,f2,f3,f4 PIPE * 00016558 C9230028 lfd f9,0x28(r3) * 0001655C FCA2317A fmadd f5,f2,f5,f6 PIPE * 00016560 C8C30030 lfd f6,0x30(r3) * 00016564 FD4100B2 fmul f10,f1,f2 PIPE * 00016568 FC8701F2 fmul f4,f7,f7 03 (0001654C) REG * 0001656C FC6240FA fmadd f3,f2,f3,f8 01 (00016554) REG PIPE * 00016570 FCA2497A fmadd f5,f2,f5,f9 01 (0001655C) REG * 00016574 FC810132 fmul f4,f1,f4 05 (00016568) REG PIPE * 00016578 FC2A08FA fmadd f1,f10,f3,f1 01 (0001656C) REG * 0001657C FC42317A fmadd f2,f2,f5,f6 01 (00016570) REG PIPE * 00016580 FC22093A fmadd f1,f2,f4,f1 09 (0001657C) REG * 00016584 4E800020 blr 08 * * GCC disassembly (42.14ns) * ------------------------- * 00010B14 FDA00890 fmr f13,f1 PIPE * 00010B18 C80280C0 lfd f0,-0x7F40(r2) * 00010B1C FC210072 fmul f1,f1,f1 PIPE * 00010B20 C98280D0 lfd f12,-0x7F30(r2) * 00010B24 C94280B8 lfd f10,-0x7F48(r2) PIPE * 00010B28 C9628080 lfd f11,-0x7F80(r2) * 00010B2C C92280C8 lfd f9,-0x7F38(r2) PIPE * 00010B30 C90280D8 lfd f8,-0x7F28(r2) * 00010B34 C8E280E0 lfd f7,-0x7F20(r2) PIPE * 00010B38 FD4102BA fmadd f10,f1,f10,f0 03 (00010B1C) REG * 00010B3C FD6162FA fmadd f11,f1,f11,f12 PIPE * 00010B40 FC010072 fmul f0,f1,f1 * 00010B44 FD8D0072 fmul f12,f13,f1 PIPE * 00010B48 FD414AB8 fmsub f10,f1,f10,f9 06 (00010B38) REG * 00010B4C FD6142F8 fmsub f11,f1,f11,f8 PIPE * 00010B50 FC000032 fmul f0,f0,f0 * 00010B54 FD8C6ABA fmadd f12,f12,f10,f13 07 (00010B48) REG PIPE * 00010B58 FC213AFA fmadd f1,f1,f11,f7 * 00010B5C FDAD0032 fmul f13,f13,f0 PIPE * 00010B60 FC2D607A fmadd f1,f13,f1,f12 09 (00010B5C) REG * 00010B64 4E800020 blr 08 */ static double const b0 = +174470112000.0; static double const b1 = -17377416000.0; static double const b3 = +36817200.0; static double const b4 = -579600.0; static double const b5 = +13860.0; static double const c0 = -30.0; static double const d0 = -7.647163731819816475901131985788070444e-13; static double const e0 = 347.478260869565217391304347826086956521739; double sin8(double x) { double xp = d0 * x; double y = x * x + c0; double y2 = y * y; double yp = b3 + y2 * (b5 + y2); double y3 = y2 * y; //__asm__("" : "+f" (y), "+f" (y2), "+f" (y3), "+f" (yp), "+f" (xp)); return (b0 + b1 * y + y3 * (b3 + y2 * (b5 + y2)) + b4 * y2 * (e0 + y2)) * xp; } /* * SNC disassembly (52.72ns) * ------------------------- * 000165C8 3C600001 lis r3,0x1 * 000165CC 38636588 addi r3,r3,0x6588 01 (000165C8) REG PIPE * 000165D0 C8430008 lfd f2,0x8(r3) 03 (000165CC) REG LSU * 000165D4 C8630020 lfd f3,0x20(r3) PIPE * 000165D8 FC41107A fmadd f2,f1,f1,f2 * 000165DC C8830010 lfd f4,0x10(r3) * 000165E0 C8A30018 lfd f5,0x18(r3) * 000165E4 C8C30028 lfd f6,0x28(r3) PIPE * 000165E8 C8E30000 lfd f7,0x0(r3) * 000165EC C9030030 lfd f8,0x30(r3) PIPE * 000165F0 FC2101F2 fmul f1,f1,f7 * 000165F4 FD4200B2 fmul f10,f2,f2 04 (000165D8) REG PIPE * 000165F8 FC82293A fmadd f4,f2,f4,f5 * 000165FC C8A30038 lfd f5,0x38(r3) * 00016600 FC6A182A fadd f3,f10,f3 08 (000165F4) REG * 00016604 FC4202B2 fmul f2,f2,f10 PIPE * 00016608 FCEA0232 fmul f7,f10,f8 * 0001660C FCAA282A fadd f5,f10,f5 PIPE * 00016610 FC6A30FA fmadd f3,f10,f3,f6 06 (00016600) REG * 00016614 FC4220FA fmadd f2,f2,f3,f4 09 (00016610) REG PIPE * 00016618 FC47117A fmadd f2,f7,f5,f2 09 (00016614) REG * 0001661C FC220072 fmul f1,f2,f1 09 (00016618) REG PIPE * 00016620 4E800020 blr 08 * * GCC disassembly (50.78ns) * ------------------------- * 00010AB8 FD600890 fmr f11,f1 * 00010ABC C82280B0 lfd f1,-0x7F50(r2) * 00010AC0 C9A28090 lfd f13,-0x7F70(r2) * 00010AC4 C1428098 lfs f10,-0x7F68(r2) PIPE * 00010AC8 C122809C lfs f9,-0x7F64(r2) * 00010ACC C9828080 lfd f12,-0x7F80(r2) PIPE * 00010AD0 C0E280A0 lfs f7,-0x7F60(r2) * 00010AD4 C8C280A8 lfd f6,-0x7F58(r2) PIPE * 00010AD8 C9028088 lfd f8,-0x7F78(r2) * 00010ADC FC2B0AF8 fmsub f1,f11,f11,f1 02 (00010AB8) REG PIPE * 00010AE0 FD6B0332 fmul f11,f11,f12 * 00010AE4 FC010072 fmul f0,f1,f1 08 (00010ADC) REG PIPE * 00010AE8 FD4002B2 fmul f10,f0,f10 09 (00010AE4) REG * 00010AEC FDA0682A fadd f13,f0,f13 PIPE * 00010AF0 FD20482A fadd f9,f0,f9 * 00010AF4 FD810032 fmul f12,f1,f0 PIPE * 00010AF8 FDAD02B2 fmul f13,f13,f10 07 (00010AEC) REG * 00010AFC FC003A7A fmadd f0,f0,f9,f7 PIPE * 00010B00 FC216A3A fmadd f1,f1,f8,f13 08 (00010AF8) REG * 00010B04 FC00333A fmadd f0,f0,f12,f6 PIPE * 00010B08 FC21002A fadd f1,f1,f0 09 (00010B04) REG * 00010B0C FC2B0072 fmul f1,f11,f1 09 (00010B08) REG PIPE * 00010B10 4E800020 blr 08 * */