#include <chrono>
#include <cmath>
#include <iostream>
#include <iomanip>
#include <cassert>

using namespace std;

double sin1(double x) __attribute__((noinline));
double sin2(double x) __attribute__((noinline));
double sin3(double x) __attribute__((noinline));
double sin4(double x) __attribute__((noinline));
double sin5(double x) __attribute__((noinline));
double sin6(double x) __attribute__((noinline));
double sin7(double x) __attribute__((noinline));
double sin8(double x) __attribute__((noinline));

static double const a0 = +1.0;
static double const a1 = -1.666666666666666666666666666666666667e-1;
static double const a2 = +8.333333333333333333333333333333333333e-3;
static double const a3 = -1.984126984126984126984126984126984127e-4;
static double const a4 = +2.755731922398589065255731922398589065e-6;
static double const a5 = -2.505210838544171877505210838544171878e-8;
static double const a6 = +1.605904383682161459939237717015494793e-10;
static double const a7 = -7.647163731819816475901131985788070444e-13;

static double const b0 = +174470112000.0;
static double const b1 = -17377416000.0;
static double const b3 = +36817200.0;
static double const b4 = -579600.0;
static double const b5 = +13860.0;
static double const c0 = -30.0;
static double const d0 = -7.647163731819816475901131985788070444e-13;
static double const e0 = 347.478260869565217391304347826086956521739;


double sin1(double x)
{
    return x * a0
         + x * x * x * a1
         + x * x * x * x * x * a2
         + x * x * x * x * x * x * x * a3
         + x * x * x * x * x * x * x * x * x * a4
         + x * x * x * x * x * x * x * x * x * x * x * a5
         + x * x * x * x * x * x * x * x * x * x * x * x * x * a6
         + x * x * x * x * x * x * x * x * x * x * x * x * x * x * x * a7;
}

double sin2(double x)
{
    double ret = 0.0;
    double y = x;
    double x2 = x * x;
    ret += a0 * y; y *= x2;
    ret += a1 * y; y *= x2;
    ret += a2 * y; y *= x2;
    ret += a3 * y; y *= x2;
    ret += a4 * y; y *= x2;
    ret += a5 * y; y *= x2;
    ret += a6 * y; y *= x2;
    ret += a7 * y;
    return ret;
}

double sin3(double x)
{
    double x2 = x * x;
    return x * (a0 + x2 * (a1 + x2 * (a2 + x2 * (a3 + x2 * (a4 + x2 * (a5 + x2 * (a6 + x2 * a7)))))));
}

double sin4(double x)
{
    double x2 = x * x;
    double x4 = x2 * x2;
    double A = a0 + x4 * (a2 + x4 * (a4 + x4 * a6));
    double B = a1 + x4 * (a3 + x4 * (a5 + x4 * a7));
    return x * (A + x2 * B);
}

double sin5(double x)
{
    double x2 = x * x;
    double x4 = x2 * x2;
    double x6 = x4 * x2;
    double A = a0 + x6 * (a3 + x6 * a6);
    double B = a1 + x6 * (a4 + x6 * a7);
    double C = a2 + x6 * a5;
    return x * (A + x2 * B + x4 * C);
}

double sin6(double x)
{
    double x2 = x * x;
    double x4 = x2 * x2;
    double x8 = x4 * x4;
    double A = a0 + x2 * (a1 + x2 * (a2 + x2 * a3));
    double B = a4 + x2 * (a5 + x2 * (a6 + x2 * a7));
    return x * (A + x8 * B);
}

double sin7(double x)
{
    double x2 = x * x;
    double x3 = x2 * x;
    double x4 = x2 * x2;
    double x8 = x4 * x4;
    double x9 = x8 * x;
    __asm__("" : "+x" (x2), "+x" (x3), "+x" (x4), "+x" (x8), "+x" (x9));
    double A = x3 * (a1 + x2 * (a2 + x2 * a3));
    double B = a4 + x2 * (a5 + x2 * (a6 + x2 * a7));
    double C = a0 * x;
    return A + C + x9 * B;
}

double sin8(double x)
{
    double xp = d0 * x;
    double y = x * x + c0;
    double y2 = y * y;
    double yp = b3 + y2 * (b5 + y2);
    double y3 = y2 * y;
    __asm__("" : "+x" (y), "+x" (y2), "+x" (y3), "+x" (yp), "+x" (xp));
    return (b0 + b1 * y + y3 * (b3 + y2 * (b5 + y2)) + b4 * y2 * (e0 + y2)) * xp;
}


int main()
{
    typedef chrono::high_resolution_clock clock_t;
    clock_t::time_point t0, t1;
    size_t const iterations = 10000000;
    double const inv = 1.0 / iterations;

    double sum = 0.0;
    t0 = clock_t::now();
    for (size_t run = 0; run < iterations; run++)
        sum += run * inv;
    t1 = clock_t::now();
    double norm = chrono::nanoseconds(t1 - t0).count() * inv;

    double sum0 = 0.0;
    t0 = clock_t::now();
    for (size_t run = 0; run < iterations; run++)
        sum0 += sin(run * inv);
    t1 = clock_t::now();
    cout << "sin: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;

    double sum1 = 0.0;
    t0 = clock_t::now();
    for (size_t run = 0; run < iterations; run++)
        sum1 += sin1(run * inv);
    t1 = clock_t::now();
    cout << "sin1: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;

    double sum2 = 0.0;
    t0 = clock_t::now();
    for (size_t run = 0; run < iterations; run++)
        sum2 += sin2(run * inv);
    t1 = clock_t::now();
    cout << "sin2: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;

    double sum3 = 0.0;
    t0 = clock_t::now();
    for (size_t run = 0; run < iterations; run++)
        sum3 += sin3(run * inv);
    t1 = clock_t::now();
    cout << "sin3: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;

    double sum4 = 0.0;
    t0 = clock_t::now();
    for (size_t run = 0; run < iterations; run++)
        sum4 += sin4(run * inv);
    t1 = clock_t::now();
    cout << "sin4: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;

    double sum5 = 0.0;
    t0 = clock_t::now();
    for (size_t run = 0; run < iterations; run++)
        sum5 += sin5(run * inv);
    t1 = clock_t::now();
    cout << "sin5: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;

    double sum6 = 0.0;
    t0 = clock_t::now();
    for (size_t run = 0; run < iterations; run++)
        sum6 += sin6(run * inv);
    t1 = clock_t::now();
    cout << "sin6: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;

    double sum7 = 0.0;
    t0 = clock_t::now();
    for (size_t run = 0; run < iterations; run++)
        sum7 += sin7(run * inv);
    t1 = clock_t::now();
    cout << "sin7: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;

    double sum8 = 0.0;
    t0 = clock_t::now();
    for (size_t run = 0; run < iterations; run++)
        sum8 += sin8(run * inv);
    t1 = clock_t::now();
    cout << "sin8: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;


    cout << setprecision(20);
    cout << sum0 << endl;
    cout << sum1 << endl;
    cout << sum2 << endl;
    cout << sum3 << endl;
    cout << sum4 << endl;
    cout << sum5 << endl;
    cout << sum6 << endl;
    cout << sum7 << endl;
    cout << sum8 << endl;

    return sum + sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 == 0.0;
}

