Timing Test of Multi-Threaded Newton Method

Loading [MathJax]/extensions/tex2jax.js

multi_newton_time

@(@\newcommand{\W}[1]{ \; #1 \; } \newcommand{\R}[1]{ {\rm #1} } \newcommand{\B}[1]{ {\bf #1} } \newcommand{\D}[2]{ \frac{\partial #1}{\partial #2} } \newcommand{\DD}[3]{ \frac{\partial^2 #1}{\partial #2 \partial #3} } \newcommand{\Dpow}[2]{ \frac{\partial^{#1}}{\partial {#2}^{#1}} } \newcommand{\dpow}[2]{ \frac{ {\rm d}^{#1}}{{\rm d}\, {#2}^{#1}} }@)@. This is cppad-20221105 documentation. Here is a link to its current documentation . Timing Test of Multi-Threaded Newton Method
Syntax

ok = multi_newton_time(time_out, test_time, num_threads,

    num_zero, num_sub, num_sum, use_ad

)

Purpose
Runs correctness and timing test for a multi-threaded Newton method. This test uses Newton's method to determine all the zeros of the sine function on an interval. CppAD, or hand coded derivatives, can be used to calculate the derivatives used by Newton's method. The calculation can be done in parallel on the different sub-intervals. In addition, the calculation can be done without multi-threading.

Thread
It is assumed that this function is called by thread zero in sequential mode; i.e., not in_parallel .

ok
This return value has prototype



    bool ok

If it is true, multi_newton_time passed the correctness test. Otherwise it is false.

time_out
This argument has prototype



    double& time_out

The input value of the argument does not matter. Upon return it is the number of wall clock seconds required for the multi-threaded Newton method can compute all the zeros.

test_time
Is the minimum amount of wall clock time that the test should take. The number of repeats for the test will be increased until this time is reached. The reported time_out is the total wall clock time divided by the number of repeats.

num_threads
This argument has prototype



    size_t num_threads

It specifies the number of threads that are available for this test. If it is zero, the test is run without multi-threading and



    1 == thread_alloc::num_threads()

when multi_newton_time is called. If it is non-zero, the test is run with multi-threading and



    num_threads == thread_alloc::num_threads()

when multi_newton_time is called.

num_zero
This argument has prototype



    size_t num_zero

and it must be greater than one. It specifies the actual number of zeros in the test function @(@ \sin(x) @)@. To be specific, multi_newton_time will attempt to determine all of the values of @(@ x @)@ for which @(@ \sin(x) = 0 @)@ and @(@ x @)@ is in the interval



    [ 0 , (num_zero - 1) * pi ]

.

num_sub
This argument has prototype



    size_t num_sub

It specifies the number of sub-intervals to divide the total interval into. It must be greater than num_zero (so that the correctness test can check we have found all the zeros).

num_sum
This argument has prototype



    size_t num_sum

and must be greater than zero. The actual function used by the Newton method is @[@ f(x) = \frac{1}{n} \sum_{i=1}^{n} \sin (x) @]@ where @(@ n @)@ is equal to num_sum . Larger values of num_sum simulate a case where the evaluation of the function @(@ f(x) @)@ takes more time.

use_ad
This argument has prototype



    bool user_ad

If use_ad is true, then derivatives will be computed using CppAD. Note that this derivative computation includes re-taping the function for each value of @(@ x @)@ (even though re-taping is not necessary).

If use_ad is false, derivatives will be computed using a hand coded routine.

Source



namespace { // empty namespace

    // values correspond to arguments in previous call to multi_newton_time
    size_t num_zero_;   // number of zeros of f(x) in the total interval
    size_t num_sub_;    // number of sub-intervals to split calculation into
    size_t num_sum_;    // larger values make f(x) take longer to calculate

    // value of xout corresponding to most recent call to test_once
    vector<double> xout_;

    // A version of the sine function that can be made as slow as we like
    template <class Float>
    Float f_eval(Float x)
    {   Float sum = 0.;
        size_t i;
        for(i = 0; i < num_sum_; i++)
            sum += sin(x);

        return sum / Float(num_sum_);
    }

    // Direct calculation of derivative with same number of floating point
    // operations as for f_eval.
    double df_direct(double x)
    {   double sum = 0.;
        size_t i;
        for(i = 0; i < num_sum_; i++)
            sum += cos(x);

        return sum / double(num_sum_);
    }

    // AD calculation of detivative
    void fun_ad(double x, double& f, double& df)
    {   using CppAD::AD;

        // use vector because it uses fast multi-threaded memory alloc
        vector< AD<double> > X(1), Y(1);
        X[0] = x;
        CppAD::Independent(X);
        Y[0] = f_eval(X[0]);
        CppAD::ADFun<double> F(X, Y);
        vector<double> dx(1), dy(1);
        dx[0] = 1.;
        dy    = F.Forward(1, dx);
        f     = Value( Y[0] );
        df    = dy[0];
        return;
    }

    // evaulate the function and its derivative
    void fun_no(double x, double& f, double& df)
    {   f  = f_eval(x);
        df = df_direct(x);
        return;
    }


    // Run computation of all the zeros once
    void test_once(void)
    {   if(  num_zero_ == 0 )
        {   std::cerr << "multi_newton_time: num_zero == 0" << std::endl;
            exit(1);
        }
        double pi      = 4. * std::atan(1.);
        double xlow    = 0.;
        double xup     = double(num_zero_ - 1) * pi;
        double eps     =
            xup * 100. * CppAD::numeric_limits<double>::epsilon();
        size_t max_itr = 20;

        // note that fun_ is set to fun_ad or fun_no by multi_newton_time
        bool ok = multi_newton_run(
            xout_       ,
            fun_        ,
            num_sub_    ,
            xlow        ,
            xup         ,
            eps         ,
            max_itr     ,
            num_threads_
        );
        if( ! ok )
        {   std::cerr << "multi_newton: error" << std::endl;
            exit(1);
        }
        return;
    }

    // Repeat computation of all the zeros a specied number of times
    void test_repeat(size_t repeat)
    {   size_t i;
        for(i = 0; i < repeat; i++)
            test_once();
        return;
    }
} // end empty namespace


// This is the only routine that is accessible outside of this file
bool multi_newton_time(
    double& time_out      ,
    double  test_time     ,
    size_t  num_threads   ,
    size_t  num_zero      ,
    size_t  num_sub       ,
    size_t  num_sum       ,
    bool    use_ad
)
{
    bool ok = true;
    ok     &= thread_alloc::thread_num() == 0;
    ok     &= num_sub > num_zero;

    // Set local namespace environment variables
    num_threads_  = num_threads;
    if( use_ad )
        fun_ = fun_ad;
    else
        fun_ = fun_no;
    //
    num_zero_     = num_zero;
    num_sub_      = num_sub;
    num_sum_      = num_sum;

    // create team of threads
    ok &= thread_alloc::in_parallel() == false;
    if( num_threads > 0 )
    {   team_create(num_threads);
        ok &= num_threads == thread_alloc::num_threads();
    }
    else
    {   ok &= 1 == thread_alloc::num_threads();
    }

    // run the test case and set time return value
    time_out = CppAD::time_test(test_repeat, test_time);

    // destroy team of threads
    if( num_threads > 0 )
        team_destroy();
    ok &= thread_alloc::in_parallel() == false;
    //
    // correctness check
    double pi      = 4. * std::atan(1.);
    double xup     = double(num_zero_ - 1) * pi;
    double eps     = xup * 100. * CppAD::numeric_limits<double>::epsilon();
    ok        &= (xout_.size() == num_zero);
    size_t i   = 0;
    for(i = 0; i < xout_.size(); i++)
        ok &= std::fabs( xout_[i] - pi * double(i)) <= 2 * eps;

    // xout_ is a static variable, so clear it to free its memory
    xout_.clear();

    // return correctness check result
    return  ok;
}

Input File: example/multi_thread/multi_newton.cpp