Multi-Threaded chkpoint

multi_chkpoint_one_setup

@(@\newcommand{\W}[1]{ \; #1 \; } \newcommand{\R}[1]{ {\rm #1} } \newcommand{\B}[1]{ {\bf #1} } \newcommand{\D}[2]{ \frac{\partial #1}{\partial #2} } \newcommand{\DD}[3]{ \frac{\partial^2 #1}{\partial #2 \partial #3} } \newcommand{\Dpow}[2]{ \frac{\partial^{#1}}{\partial {#2}^{#1}} } \newcommand{\dpow}[2]{ \frac{ {\rm d}^{#1}}{{\rm d}\, {#2}^{#1}} }@)@This is cppad-20221105 documentation. Here is a link to its current documentation . Multi-Threaded chkpoint_one Set Up .

Syntax
ok = multi_chkpoint_one_setup(y_squared)

Purpose
This routine splits up the computation into the individual threads.

Thread
It is assumed that this function is called by thread zero and all the other threads are blocked (waiting).

y_squared
This argument has prototype



    const vector<double>& y_squared

and its size is equal to the number of equations to solve. It is the values that we are computing the square root of.

ok
This return value has prototype



    bool ok

If it is false, multi_chkpoint_one_setup detected an error.

Source


namespace {
bool multi_chkpoint_one_setup(const vector<double>& y_squared)
{   size_t num_threads = std::max(num_threads_, size_t(1));
    bool   ok          = num_threads == thread_alloc::num_threads();
    ok                &= thread_alloc::thread_num() == 0;
    //
    // declare independent variable variable vector
    vector<a_double> ax(1);
    ax[0] = 2.0;
    CppAD::Independent(ax);
    //
    // argument and result for checkpoint algorithm
    vector<a_double> au(2), ay(1);
    au[0] = ax[0];                  // y_initial
    au[1] = ax[0];                  // y_squared

    // put user checkpoint function in recording
    (*a_square_root_)(au, ay);

    // f(u) = sqrt(u)
    CppAD::ADFun<double> fun(ax, ay);
    //
    // number of square roots for each thread
    size_t per_thread = (y_squared.size() + num_threads - 1) / num_threads;
    size_t y_index    = 0;
    //
    for(size_t thread_num = 0; thread_num < num_threads; thread_num++)
    {   // allocate separate memory for each thread to avoid false sharing
        size_t min_bytes(sizeof(work_one_t)), cap_bytes;
        void* v_ptr = thread_alloc::get_memory(min_bytes, cap_bytes);
        work_all_[thread_num] = static_cast<work_one_t*>(v_ptr);
        //
        // Run constructor on work_all_[thread_num]->fun
        work_all_[thread_num]->fun = new CppAD::ADFun<double>;
        //
        // Run constructor on work_all_[thread_num] vectors
        work_all_[thread_num]->y_squared = new vector<double>;
        work_all_[thread_num]->square_root = new vector<double>;
        //
        // Each worker gets a separate copy of fun. This is necessary because
        // the Taylor coefficients will be set by each thread.
        *(work_all_[thread_num]->fun) = fun;
        //
        // values we are computing square root of for this thread
        ok &=  0 == work_all_[thread_num]->y_squared->size();
        for(size_t i = 0; i < per_thread; i++)
        if( y_index < y_squared.size() )
            work_all_[thread_num]->y_squared->push_back(y_squared[y_index++]);
        //
        // set to false in case this thread's worker does not get called
        work_all_[thread_num]->ok = false;
    }
    ok &= y_index == y_squared.size();
    //
    return ok;
}
}

Input File: example/multi_thread/multi_chkpoint_one.cpp