Tan and Tanh as User Atomic Operations: Example and Test

atomic_three_tangent.cpp

@(@\newcommand{\W}[1]{ \; #1 \; } \newcommand{\R}[1]{ {\rm #1} } \newcommand{\B}[1]{ {\bf #1} } \newcommand{\D}[2]{ \frac{\partial #1}{\partial #2} } \newcommand{\DD}[3]{ \frac{\partial^2 #1}{\partial #2 \partial #3} } \newcommand{\Dpow}[2]{ \frac{\partial^{#1}}{\partial {#2}^{#1}} } \newcommand{\dpow}[2]{ \frac{ {\rm d}^{#1}}{{\rm d}\, {#2}^{#1}} }@)@This is cppad-20221105 documentation. Here is a link to its current documentation . Tan and Tanh as User Atomic Operations: Example and Test
Discussion
The code below uses the tan_forward and tan_reverse to implement the tangent and hyperbolic tangent functions as atomic function operations. It also uses AD<float>, while most atomic examples use AD<double>.

Start Class Definition

# include <cppad/cppad.hpp>
namespace { // Begin empty namespace
using CppAD::vector;
//
class atomic_tangent : public CppAD::atomic_three<float> {

Constructor

private:
    const bool hyperbolic_; // is this hyperbolic tangent
public:
    // constructor
    atomic_tangent(const char* name, bool hyperbolic)
    : CppAD::atomic_three<float>(name),
    hyperbolic_(hyperbolic)
    { }
private:

for_type

    // calculate type_y
    bool for_type(
        const vector<float>&                parameter_x ,
        const vector<CppAD::ad_type_enum>&  type_x      ,
        vector<CppAD::ad_type_enum>&        type_y      ) override
    {   assert( parameter_x.size() == type_x.size() );
        bool ok = type_x.size() == 1; // n
        ok     &= type_y.size() == 2; // m
        if( ! ok )
            return false;
        type_y[0] = type_x[0];
        type_y[1] = type_x[0];
        return true;
    }

forward

    // forward mode routine called by CppAD
    bool forward(
        const vector<float>&               parameter_x ,
        const vector<CppAD::ad_type_enum>& type_x      ,
        size_t                             need_y      ,
        size_t                             p           ,
        size_t                             q           ,
        const vector<float>&               tx          ,
        vector<float>&                     tzy         ) override
    {   size_t q1 = q + 1;
# ifndef NDEBUG
        size_t n  = tx.size()  / q1;
        size_t m  = tzy.size() / q1;
# endif
        assert( type_x.size() == n );
        assert( n == 1 );
        assert( m == 2 );
        assert( p <= q );
        size_t j, k;

        if( p == 0 )
        {   // z^{(0)} = tan( x^{(0)} ) or tanh( x^{(0)} )
            if( hyperbolic_ )
                tzy[0] = float( tanh( tx[0] ) );
            else
                tzy[0] = float( tan( tx[0] ) );

            // y^{(0)} = z^{(0)} * z^{(0)}
            tzy[q1 + 0] = tzy[0] * tzy[0];

            p++;
        }
        for(j = p; j <= q; j++)
        {   float j_inv = 1.f / float(j);
            if( hyperbolic_ )
                j_inv = - j_inv;

            // z^{(j)} = x^{(j)} +- sum_{k=1}^j k x^{(k)} y^{(j-k)} / j
            tzy[j] = tx[j];
            for(k = 1; k <= j; k++)
                tzy[j] += tx[k] * tzy[q1 + j-k] * float(k) * j_inv;

            // y^{(j)} = sum_{k=0}^j z^{(k)} z^{(j-k)}
            tzy[q1 + j] = 0.;
            for(k = 0; k <= j; k++)
                tzy[q1 + j] += tzy[k] * tzy[j-k];
        }

        // All orders are implemented and there are no possible errors
        return true;
    }

reverse

    // reverse mode routine called by CppAD
    bool reverse(
        const vector<float>&               parameter_x ,
        const vector<CppAD::ad_type_enum>& type_x      ,
        size_t                             q           ,
        const vector<float>&               tx          ,
        const vector<float>&               tzy         ,
        vector<float>&                     px          ,
        const vector<float>&               pzy         ) override
    {   size_t q1 = q + 1;
# ifndef NDEBUG
        size_t n  = tx.size()  / q1;
        size_t m  = tzy.size() / q1;
# endif
        assert( px.size()  == n * q1 );
        assert( pzy.size() == m * q1 );
        assert( n == 1 );
        assert( m == 2 );

        size_t j, k;

        // copy because partials w.r.t. y and z need to change
        vector<float> qzy = pzy;

        // initialize accumultion of reverse mode partials
        for(k = 0; k < q1; k++)
            px[k] = 0.;

        // eliminate positive orders
        for(j = q; j > 0; j--)
        {   float j_inv = 1.f / float(j);
            if( hyperbolic_ )
                j_inv = - j_inv;

            // H_{x^{(k)}} += delta(j-k) +- H_{z^{(j)} y^{(j-k)} * k / j
            px[j] += qzy[j];
            for(k = 1; k <= j; k++)
                px[k] += qzy[j] * tzy[q1 + j-k] * float(k) * j_inv;

            // H_{y^{j-k)} += +- H_{z^{(j)} x^{(k)} * k / j
            for(k = 1; k <= j; k++)
                qzy[q1 + j-k] += qzy[j] * tx[k] * float(k) * j_inv;

            // H_{z^{(k)}} += H_{y^{(j-1)}} * z^{(j-k-1)} * 2.
            for(k = 0; k < j; k++)
                qzy[k] += qzy[q1 + j-1] * tzy[j-k-1] * 2.f;
        }

        // eliminate order zero
        if( hyperbolic_ )
            px[0] += qzy[0] * (1.f - tzy[q1 + 0]);
        else
            px[0] += qzy[0] * (1.f + tzy[q1 + 0]);

        return true;
    }

jac_sparsity

    // Jacobian sparsity routine called by CppAD
    bool jac_sparsity(
        const vector<float>&                parameter_x ,
        const vector<CppAD::ad_type_enum>&  type_x      ,
        bool                                dependency  ,
        const vector<bool>&                 select_x    ,
        const vector<bool>&                 select_y    ,
        CppAD::sparse_rc< vector<size_t> >& pattern_out ) override
    {
        size_t n = select_x.size();
        size_t m = select_y.size();
        assert( parameter_x.size() == n );
        assert( n == 1 );
        assert( m == 2 );

        // number of non-zeros in sparsity pattern
        size_t nnz = 0;
        if( select_x[0] )
        {   if( select_y[0] )
                ++nnz;
            if( select_y[1] )
                ++nnz;
        }

        // sparsity pattern
        pattern_out.resize(m, n, nnz);
        size_t k = 0;
        if( select_x[0] )
        {   if( select_y[0] )
                pattern_out.set(k++, 0, 0);
            if( select_y[1] )
                pattern_out.set(k++, 1, 0);
        }
        assert( k == nnz );

        return true;
    }

hes_sparsity

    // Hessian sparsity routine called by CppAD
    bool hes_sparsity(
        const vector<float>&                parameter_x ,
        const vector<CppAD::ad_type_enum>&  type_x      ,
        const vector<bool>&                 select_x    ,
        const vector<bool>&                 select_y    ,
        CppAD::sparse_rc< vector<size_t> >& pattern_out ) override
    {
        assert( parameter_x.size() == select_x.size() );
        assert( select_y.size() == 2 );
        size_t n = select_x.size();
        assert( n == 1 );

        // number of non-zeros in sparsity pattern
        size_t nnz = 0;
        if( select_x[0] & (select_y[0] | select_y[1]) )
            nnz = 1;
        // sparsity pattern
        pattern_out.resize(n, n, nnz);
        if( select_x[0] & (select_y[0] | select_y[1]) )
            pattern_out.set(0, 0, 0);

        return true;
    }

End Class Definition


}; // End of atomic_tangent class
}  // End empty namespace

Use Atomic Function

bool tangent(void)
{   bool ok = true;
    using CppAD::AD;
    using CppAD::NearEqual;
    float eps = 10.f * CppAD::numeric_limits<float>::epsilon();

Constructor


    // --------------------------------------------------------------------
    // Creater a tan and tanh object
    atomic_tangent my_tan("my_tan", false), my_tanh("my_tanh", true);

Recording

    // domain space vector
    size_t n  = 1;
    float  x0 = 0.5;
    CppAD::vector< AD<float> > ax(n);
    ax[0]     = x0;

    // declare independent variables and start tape recording
    CppAD::Independent(ax);

    // range space vector
    size_t m = 3;
    CppAD::vector< AD<float> > av(m);

    // temporary vector for computations
    // (my_tan and my_tanh computes tan or tanh and its square)
    CppAD::vector< AD<float> > au(2);

    // call atomic tan function and store tan(x) in f[0], ignore tan(x)^2
    my_tan(ax, au);
    av[0] = au[0];

    // call atomic tanh function and store tanh(x) in f[1], ignore tanh(x)^2
    my_tanh(ax, au);
    av[1] = au[0];

    // put a constant in f[2] = tanh(1.),  for sparsity pattern testing
    CppAD::vector< AD<float> > one(1);
    one[0] = 1.;
    my_tanh(one, au);
    av[2] = au[0];

    // create f: x -> v and stop tape recording
    CppAD::ADFun<float> f;
    f.Dependent(ax, av);

forward

    // check function value
    float tan = std::tan(x0);
    ok &= NearEqual(av[0] , tan,  eps, eps);
    float tanh = std::tanh(x0);
    ok &= NearEqual(av[1] , tanh,  eps, eps);

    // check zero order forward
    CppAD::vector<float> x(n), v(m);
    x[0] = x0;
    v    = f.Forward(0, x);
    ok &= NearEqual(v[0] , tan,  eps, eps);
    ok &= NearEqual(v[1] , tanh,  eps, eps);

    // tan'(x)   = 1 + tan(x)  * tan(x)
    // tanh'(x)  = 1 - tanh(x) * tanh(x)
    float tanp  = 1.f + tan * tan;
    float tanhp = 1.f - tanh * tanh;

    // compute first partial of f w.r.t. x[0] using forward mode
    CppAD::vector<float> dx(n), dv(m);
    dx[0] = 1.;
    dv    = f.Forward(1, dx);
    ok   &= NearEqual(dv[0] , tanp,   eps, eps);
    ok   &= NearEqual(dv[1] , tanhp,  eps, eps);
    ok   &= NearEqual(dv[2] , 0.f,    eps, eps);

    // tan''(x)   = 2 *  tan(x) * tan'(x)
    // tanh''(x)  = - 2 * tanh(x) * tanh'(x)
    // Note that second order Taylor coefficient for u half the
    // corresponding second derivative.
    float two    = 2;
    float tanpp  =   two * tan * tanp;
    float tanhpp = - two * tanh * tanhp;

    // compute second partial of f w.r.t. x[0] using forward mode
    CppAD::vector<float> ddx(n), ddv(m);
    ddx[0] = 0.;
    ddv    = f.Forward(2, ddx);
    ok   &= NearEqual(two * ddv[0], tanpp, eps, eps);
    ok   &= NearEqual(two * ddv[1], tanhpp, eps, eps);
    ok   &= NearEqual(two * ddv[2], 0.f, eps, eps);

reverse

    // compute derivative of tan - tanh using reverse mode
    CppAD::vector<float> w(m), dw(n);
    w[0]  = 1.;
    w[1]  = 1.;
    w[2]  = 0.;
    dw    = f.Reverse(1, w);
    ok   &= NearEqual(dw[0], w[0]*tanp + w[1]*tanhp, eps, eps);

    // compute second derivative of tan - tanh using reverse mode
    CppAD::vector<float> ddw(2);
    ddw   = f.Reverse(2, w);
    ok   &= NearEqual(ddw[0], w[0]*tanp  + w[1]*tanhp , eps, eps);
    ok   &= NearEqual(ddw[1], w[0]*tanpp + w[1]*tanhpp, eps, eps);

for_jac_sparsity

    // forward mode Jacobian sparstiy pattern
    CppAD::sparse_rc< CPPAD_TESTVECTOR(size_t) > pattern_in, pattern_out;
    pattern_in.resize(1, 1, 1);
    pattern_in.set(0, 0, 0);
    bool transpose     = false;
    bool dependency    = false;
    bool internal_bool = false;
    f.for_jac_sparsity(
        pattern_in, transpose, dependency, internal_bool, pattern_out
    );
    // (0, 0) and (1, 0) are in sparsity pattern
    ok &= pattern_out.nnz() == 2;
    ok &= pattern_out.row()[0] == 0;
    ok &= pattern_out.col()[0] == 0;
    ok &= pattern_out.row()[1] == 1;
    ok &= pattern_out.col()[1] == 0;

rev_sparse_hes

    // Hesian sparsity (using previous for_jac_sparsity call)
    CPPAD_TESTVECTOR(bool) select_y(m);
    select_y[0] = true;
    select_y[1] = false;
    select_y[2] = false;
    f.rev_hes_sparsity(
        select_y, transpose, internal_bool, pattern_out
    );
    ok &= pattern_out.nnz() == 1;
    ok &= pattern_out.row()[0] == 0;
    ok &= pattern_out.col()[0] == 0;

Large x Values

    // check tanh results for a large value of x
    x[0]  = std::numeric_limits<float>::max() / two;
    v     = f.Forward(0, x);
    tanh  = 1.;
    ok   &= NearEqual(v[1], tanh, eps, eps);
    dv    = f.Forward(1, dx);
    tanhp = 0.;
    ok   &= NearEqual(dv[1], tanhp, eps, eps);

    return ok;
}

Input File: example/atomic_three/tangent.cpp