CppAD Speed, Matrix Multiplication

Loading [MathJax]/extensions/tex2jax.js

cppad_mat_mul.cpp

@(@\newcommand{\W}[1]{ \; #1 \; } \newcommand{\R}[1]{ {\rm #1} } \newcommand{\B}[1]{ {\bf #1} } \newcommand{\D}[2]{ \frac{\partial #1}{\partial #2} } \newcommand{\DD}[3]{ \frac{\partial^2 #1}{\partial #2 \partial #3} } \newcommand{\Dpow}[2]{ \frac{\partial^{#1}}{\partial {#2}^{#1}} } \newcommand{\dpow}[2]{ \frac{ {\rm d}^{#1}}{{\rm d}\, {#2}^{#1}} }@)@This is cppad-20221105 documentation. Here is a link to its current documentation . CppAD Speed, Matrix Multiplication
Specifications
See link_mat_mul .

Implementation

# include <cppad/cppad.hpp>
# include <cppad/speed/mat_sum_sq.hpp>
# include <cppad/speed/uniform_01.hpp>
# include <cppad/example/atomic_four/mat_mul/mat_mul.hpp>

// Note that CppAD uses global_option["memory"] at the main program level
# include <map>
extern std::map<std::string, bool> global_option;
// see comments in main program for this external
extern size_t global_cppad_thread_alloc_inuse;

bool link_mat_mul(
    size_t                           size     ,
    size_t                           repeat   ,
    CppAD::vector<double>&           x        ,
    CppAD::vector<double>&           z        ,
    CppAD::vector<double>&           dz
)
{   global_cppad_thread_alloc_inuse = 0;

    // --------------------------------------------------------------------
    // check global options
    const char* valid[] = { "memory", "onetape", "optimize", "atomic"};
    size_t n_valid = sizeof(valid) / sizeof(valid[0]);
    typedef std::map<std::string, bool>::iterator iterator;
    //
    for(iterator itr=global_option.begin(); itr!=global_option.end(); ++itr)
    {   if( itr->second )
        {   bool ok = false;
            for(size_t i = 0; i < n_valid; i++)
                ok |= itr->first == valid[i];
            if( ! ok )
                return false;
        }
    }
    // --------------------------------------------------------------------
    // optimization options: no conditional skips or compare operators
    std::string optimize_options =
        "no_conditional_skip no_compare_op no_print_for_op";
    // -----------------------------------------------------
    // setup
    typedef CppAD::AD<double>           ADScalar;
    typedef CppAD::vector<ADScalar>     ADVector;

    size_t j;               // temporary index
    size_t m = 1;           // number of dependent variables
    size_t n = size * size; // number of independent variables
    ADVector   X(n);        // AD domain space vector
    ADVector   Y(n);        // Store product matrix
    ADVector   Z(m);        // AD range space vector
    CppAD::ADFun<double> f; // AD function object

    // vectors of reverse mode weights
    CppAD::vector<double> w(1);
    w[0] = 1.;

    // atomic function information
    CppAD::vector<ADScalar> ax(2 * n), ay(n);
    CppAD::atomic_mat_mul<double> atom_mul("atom_mul");
    //
    // do not even record comparison operators
    size_t abort_op_index = 0;
    bool record_compare   = false;

    // ------------------------------------------------------
    if( ! global_option["onetape"] ) while(repeat--)
    {   // get the next matrix
        CppAD::uniform_01(n, x);
        for( j = 0; j < n; j++)
            X[j] = x[j];

        // declare independent variables
        Independent(X, abort_op_index, record_compare);

        // do computations
        if( ! global_option["atomic"] )
            mat_sum_sq(size, X, Y, Z);
        else
        {   for(j = 0; j < n; j++)
            {   ax[j]     = X[j];
                ax[n + j] = X[j];
            }
            // Y = X * X
            size_t call_id = atom_mul.set(size, size, size);
            atom_mul(call_id, ax, ay);
            Z[0] = 0.;
            for(j = 0; j < n; j++)
                Z[0] += ay[j];
        }
        // create function object f : X -> Z
        f.Dependent(X, Z);

        if( global_option["optimize"] )
            f.optimize(optimize_options);

        // skip comparison operators
        f.compare_change_count(0);

        // evaluate and return gradient using reverse mode
        z  = f.Forward(0, x);
        dz = f.Reverse(1, w);
    }
    else
    {   // get a next matrix
        CppAD::uniform_01(n, x);
        for(j = 0; j < n; j++)
            X[j] = x[j];

        // declare independent variables
        Independent(X, abort_op_index, record_compare);

        // do computations
        if( ! global_option["atomic"] )
            mat_sum_sq(size, X, Y, Z);
        else
        {   for(j = 0; j < n; j++)
            {   ax[j]   = X[j];
                ax[j+n] = X[j];
            }
            // Y = X * X
            atom_mul(ax, ay);
            Z[0] = 0.;
            for(j = 0; j < n; j++)
                Z[0] += ay[j];
        }

        // create function object f : X -> Z
        f.Dependent(X, Z);

        if( global_option["optimize"] )
            f.optimize(optimize_options);

        // skip comparison operators
        f.compare_change_count(0);

        while(repeat--)
        {   // get a next matrix
            CppAD::uniform_01(n, x);

            // evaluate and return gradient using reverse mode
            z  = f.Forward(0, x);
            dz = f.Reverse(1, w);
        }
    }
    size_t thread                   = CppAD::thread_alloc::thread_num();
    global_cppad_thread_alloc_inuse = CppAD::thread_alloc::inuse(thread);
    // --------------------------------------------------------------------
    // Free temporary work space (any future atomic_mat_mul constructors
    // would create new temporary work space.)
    CppAD::user_atomic<double>::clear();
    // --------------------------------------------------------------------

    return true;
}

Input File: speed/cppad/mat_mul.cpp