Set Up Multi-Threaded Newton Method

multi_newton_setup

@(@\newcommand{\W}[1]{ \; #1 \; } \newcommand{\R}[1]{ {\rm #1} } \newcommand{\B}[1]{ {\bf #1} } \newcommand{\D}[2]{ \frac{\partial #1}{\partial #2} } \newcommand{\DD}[3]{ \frac{\partial^2 #1}{\partial #2 \partial #3} } \newcommand{\Dpow}[2]{ \frac{\partial^{#1}}{\partial {#2}^{#1}} } \newcommand{\dpow}[2]{ \frac{ {\rm d}^{#1}}{{\rm d}\, {#2}^{#1}} }@)@This is cppad-20221105 documentation. Here is a link to its current documentation . Set Up Multi-Threaded Newton Method
Syntax

ok = multi_newton_setup(

    num_sub, xlow, xup, epsilon, max_itr, num_threads

)

Purpose
These routine does the setup for splitting finding all the zeros in an interval into separate sub-intervals, one for each thread.

Thread
It is assumed that this function is called by thread zero, and all the other threads are blocked (waiting).

num_sub
See num_sub in multi_newton_run .

xlow
See xlow in multi_newton_run .

xup
See xup in multi_newton_run .

epsilon
See epsilon in multi_newton_run .

max_itr
See max_itr in multi_newton_run .

num_threads
See num_threads in multi_newton_run .

Source


namespace {
bool multi_newton_setup(
    size_t num_sub                              ,
    double xlow                                 ,
    double xup                                  ,
    double epsilon                              ,
    size_t max_itr                              ,
    size_t num_threads                          )
{
    num_threads  = std::max(num_threads_, size_t(1));
    bool ok      = num_threads == thread_alloc::num_threads();
    ok          &= thread_alloc::thread_num() == 0;

    // inputs that are same for all threads
    epsilon_ = epsilon;
    max_itr_ = max_itr;

    // resize the work vector to accomidate the number of threads
    ok &= work_all_.size() == 0;
    work_all_.resize(num_threads);

    // length of each sub interval
    sub_length_ = (xup - xlow) / double(num_sub);

    // determine values that are specific to each thread
    size_t num_min   = num_sub / num_threads; // minimum num_sub
    size_t num_more  = num_sub % num_threads; // number that have one more
    size_t sum_num   = 0;  // sum with respect to thread of num_sub
    size_t thread_num, num_sub_thread;
    for(thread_num = 0; thread_num < num_threads; thread_num++)
    {
# if  USE_THREAD_ALLOC_FOR_WORK_ALL
        // allocate separate memory for this thread to avoid false sharing
        size_t min_bytes(sizeof(work_one_t)), cap_bytes;
        void* v_ptr = thread_alloc::get_memory(min_bytes, cap_bytes);
        work_all_[thread_num] = static_cast<work_one_t*>(v_ptr);

        // thread_alloc is a raw memory allocator; i.e., it does not call
        // the constructor for the objects it creates. The vector
        // class requires it's constructor to be called so we do it here
        new(& (work_all_[thread_num]->x) ) vector<double>();
# else
        work_all_[thread_num] = new work_one_t;
# endif

        // number of sub-intervalse for this thread
        if( thread_num < num_more  )
            num_sub_thread = num_min + 1;
        else
            num_sub_thread = num_min;

        // when thread_num == 0, xlow_thread == xlow
        double xlow_thread = xlow + double(sum_num) * sub_length_;

        // when thread_num == num_threads - 1, xup_thread = xup
        double xup_thread =
            xlow + double(sum_num + num_sub_thread) * sub_length_;
        if( thread_num == num_threads - 1 )
            xup_thread = xup;

        // update sum_num for next time through loop
        sum_num += num_sub_thread;

        // input information specific to this thread
        work_all_[thread_num]->num_sub = num_sub_thread;
        work_all_[thread_num]->xlow    = xlow_thread;
        work_all_[thread_num]->xup     = xup_thread;
        ok &= work_all_[thread_num]->x.size() == 0;

        // in case this thread does not get called
        work_all_[thread_num]->ok = false;
    }
    ok &= sum_num == num_sub;
    return ok;
}
}

Input File: example/multi_thread/multi_newton.cpp