Adolc Speed: Matrix Multiplication

adolc_mat_mul.cpp

@(@\newcommand{\W}[1]{ \; #1 \; } \newcommand{\R}[1]{ {\rm #1} } \newcommand{\B}[1]{ {\bf #1} } \newcommand{\D}[2]{ \frac{\partial #1}{\partial #2} } \newcommand{\DD}[3]{ \frac{\partial^2 #1}{\partial #2 \partial #3} } \newcommand{\Dpow}[2]{ \frac{\partial^{#1}}{\partial {#2}^{#1}} } \newcommand{\dpow}[2]{ \frac{ {\rm d}^{#1}}{{\rm d}\, {#2}^{#1}} }@)@This is cppad-20221105 documentation. Here is a link to its current documentation . Adolc Speed: Matrix Multiplication
Specifications
See link_mat_mul .

Implementation

// suppress conversion warnings before other includes
# include <cppad/wno_conversion.hpp>
//
# include <adolc/adolc.h>
# include <cppad/utility/vector.hpp>
# include <cppad/speed/mat_sum_sq.hpp>
# include <cppad/speed/uniform_01.hpp>
# include <cppad/utility/vector.hpp>

// list of possible options
# include <map>
extern std::map<std::string, bool> global_option;

bool link_mat_mul(
    size_t                           size     ,
    size_t                           repeat   ,
    CppAD::vector<double>&           x        ,
    CppAD::vector<double>&           z        ,
    CppAD::vector<double>&           dz       )
{
    // speed test global option values
    if( global_option["memory"] || global_option["atomic"] || global_option["optimize"] )
        return false;
    // -----------------------------------------------------
    // setup
    typedef adouble    ADScalar;
    typedef ADScalar*  ADVector;

    int tag  = 0;         // tape identifier
    int m    = 1;         // number of dependent variables
    int n    = size*size; // number of independent variables
    double f;             // function value
    int j;                // temporary index

    // set up for thread_alloc memory allocator (fast and checks for leaks)
    using CppAD::thread_alloc; // the allocator
    size_t capacity;           // capacity of an allocation

    // AD domain space vector
    ADVector X = thread_alloc::create_array<ADScalar>(size_t(n), capacity);

    // Product matrix
    ADVector Y = thread_alloc::create_array<ADScalar>(size_t(n), capacity);

    // AD range space vector
    ADVector Z = thread_alloc::create_array<ADScalar>(size_t(m), capacity);

    // vector with matrix value
    double* mat = thread_alloc::create_array<double>(size_t(n), capacity);

    // vector of reverse mode weights
    double* u  = thread_alloc::create_array<double>(size_t(m), capacity);
    u[0] = 1.;

    // gradient
    double* grad = thread_alloc::create_array<double>(size_t(n), capacity);

    // ----------------------------------------------------------------------
    if( ! global_option["onetape"] ) while(repeat--)
    {   // choose a matrix
        CppAD::uniform_01(n, mat);

        // declare independent variables
        int keep = 1; // keep forward mode results
        trace_on(tag, keep);
        for(j = 0; j < n; j++)
            X[j] <<= mat[j];

        // do computations
        CppAD::mat_sum_sq(size, X, Y, Z);

        // create function object f : X -> Z
        Z[0] >>= f;
        trace_off();

        // evaluate and return gradient using reverse mode
        fos_reverse(tag, m, n, u, grad);
    }
    else
    {   // choose a matrix
        CppAD::uniform_01(n, mat);

        // declare independent variables
        int keep = 0; // do not keep forward mode results
        trace_on(tag, keep);
        for(j = 0; j < n; j++)
            X[j] <<= mat[j];

        // do computations
        CppAD::mat_sum_sq(size, X, Y, Z);

        // create function object f : X -> Z
        Z[0] >>= f;
        trace_off();

        while(repeat--)
        {   // choose a matrix
            CppAD::uniform_01(n, mat);

            // evaluate the determinant at the new matrix value
            keep = 1; // keep this forward mode result
            zos_forward(tag, m, n, keep, mat, &f);

            // evaluate and return gradient using reverse mode
            fos_reverse(tag, m, n, u, grad);
        }
    }
    // return function, matrix, and gradient
    z[0] = f;
    for(j = 0; j < n; j++)
    {   x[j]  = mat[j];
        dz[j] = grad[j];
    }

    // tear down
    thread_alloc::delete_array(X);
    thread_alloc::delete_array(Y);
    thread_alloc::delete_array(Z);
    thread_alloc::delete_array(mat);
    thread_alloc::delete_array(u);
    thread_alloc::delete_array(grad);

    return true;
}

Input File: speed/adolc/mat_mul.cpp