@(@\newcommand{\W}[1]{ \; #1 \; }
\newcommand{\R}[1]{ {\rm #1} }
\newcommand{\B}[1]{ {\bf #1} }
\newcommand{\D}[2]{ \frac{\partial #1}{\partial #2} }
\newcommand{\DD}[3]{ \frac{\partial^2 #1}{\partial #2 \partial #3} }
\newcommand{\Dpow}[2]{ \frac{\partial^{#1}}{\partial {#2}^{#1}} }
\newcommand{\dpow}[2]{ \frac{ {\rm d}^{#1}}{{\rm d}\, {#2}^{#1}} }@)@This is cppad-20221105 documentation. Here is a link to its
current documentation
.
atomic_two Eigen Matrix Inversion Class
Purpose
Construct an atomic operation that computes the matrix inverse
@(@
R = A^{-1}
@)@
for any positive integer @(@
p
@)@
and invertible matrix @(@
A \in \B{R}^{p \times p}
@)@.
Matrix Dimensions
This example puts the matrix dimension @(@
p
@)@
in the atomic function arguments,
instead of the constructor
,
so it can be different for different calls to the atomic function.
Forward
The zero order forward mode Taylor coefficient is give by
@[@
R_0 = A_0^{-1}
@]@
For @(@
k = 1 , \ldots
@)@,
the k-th order Taylor coefficient of @(@
A R
@)@ is given by
@[@
0 = \sum_{\ell=0}^k A_\ell R_{k-\ell}
@]@
Solving for @(@
R_k
@)@ in terms of the coefficients
for @(@
A
@)@ and the lower order coefficients for @(@
R
@)@ we have
@[@
R_k = - R_0 \left( \sum_{\ell=1}^k A_\ell R_{k-\ell} \right)
@]@
Furthermore, once we have @(@
R_k
@)@ we can compute the sum using
@[@
A_0 R_k = - \left( \sum_{\ell=1}^k A_\ell R_{k-\ell} \right)
@]@
Product of Three Matrices
Suppose @(@
\bar{E}
@)@ is the derivative of the
scalar value function @(@
s(E)
@)@ with respect to @(@
E
@)@; i.e.,
@[@
\bar{E}_{i,j} = \frac{ \partial s } { \partial E_{i,j} }
@]@
Also suppose that @(@
t
@)@ is a scalar valued argument and
@[@
E(t) = B(t) C(t) D(t)
@]@
It follows that
@[@
E'(t) = B'(t) C(t) D(t) + B(t) C'(t) D(t) + B(t) C(t) D'(t)
@]@
@[@
(s \circ E)'(t)
=
\R{tr} [ \bar{E}^\R{T} E'(t) ]
@]@
@[@
=
\R{tr} [ \bar{E}^\R{T} B'(t) C(t) D(t) ] +
\R{tr} [ \bar{E}^\R{T} B(t) C'(t) D(t) ] +
\R{tr} [ \bar{E}^\R{T} B(t) C(t) D'(t) ]
@]@
@[@
=
\R{tr} [ B(t) D(t) \bar{E}^\R{T} B'(t) ] +
\R{tr} [ D(t) \bar{E}^\R{T} B(t) C'(t) ] +
\R{tr} [ \bar{E}^\R{T} B(t) C(t) D'(t) ]
@]@
@[@
\bar{B} = \bar{E} (C D)^\R{T} \W{,}
\bar{C} = \B{R}^\R{T} \bar{E} D^\R{T} \W{,}
\bar{D} = (B C)^\R{T} \bar{E}
@]@
Reverse
For @(@
k > 0
@)@, reverse mode
eliminates @(@
R_k
@)@ and expresses the function values
@(@
s
@)@ in terms of the coefficients of @(@
A
@)@
and the lower order coefficients of @(@
R
@)@.
The effect on @(@
\bar{R}_0
@)@
(of eliminating @(@
R_k
@)@) is
@[@
\bar{R}_0
= \bar{R}_0 - \bar{R}_k \left( \sum_{\ell=1}^k A_\ell R_{k-\ell} \right)^\R{T}
= \bar{R}_0 + \bar{R}_k ( A_0 R_k )^\R{T}
@]@
For @(@
\ell = 1 , \ldots , k
@)@,
the effect on @(@
\bar{R}_{k-\ell}
@)@ and @(@
A_\ell
@)@
(of eliminating @(@
R_k
@)@) is
@[@
\bar{A}_\ell = \bar{A}_\ell - R_0^\R{T} \bar{R}_k R_{k-\ell}^\R{T}
@]@
@[@
\bar{R}_{k-\ell} = \bar{R}_{k-\ell} - ( R_0 A_\ell )^\R{T} \bar{R}_k
@]@
We note that
@[@
R_0 '(t) A_0 (t) + R_0 (t) A_0 '(t) = 0
@]@
@[@
R_0 '(t) = - R_0 (t) A_0 '(t) R_0 (t)
@]@
The reverse mode formula that eliminates @(@
R_0
@)@ is
@[@
\bar{A}_0
= \bar{A}_0 - R_0^\R{T} \bar{R}_0 R_0^\R{T}
@]@
namespace { // BEGIN_EMPTY_NAMESPACEtemplate <class Base>
class atomic_eigen_mat_inv : public CppAD::atomic_base<Base> {
public:
// -----------------------------------------------------------// type of elements during calculation of derivativestypedef Base scalar;
// type of elements during tapingtypedef CppAD::AD<scalar> ad_scalar;
// type of matrix during calculation of derivativestypedef Eigen::Matrix<
scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> matrix;
// type of matrix during tapingtypedef Eigen::Matrix<
ad_scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor > ad_matrix;
// use atomic operation to invert an AD matrix
ad_matrix op(const ad_matrix& arg)
{ size_t nr = size_t( arg.rows() );
size_t ny = nr * nr;
size_t nx = 1 + ny;
assert( nr == size_t( arg.cols() ) );
// -------------------------------------------------------------------// packed version of argCPPAD_TESTVECTOR(ad_scalar) packed_arg(nx);
packed_arg[0] = ad_scalar( nr );
for(size_t i = 0; i < ny; i++)
packed_arg[1 + i] = arg.data()[i];
// -------------------------------------------------------------------// packed version of result = arg^{-1}.// This is an atomic_base function call that CppAD uses to// store the atomic operation on the tape.CPPAD_TESTVECTOR(ad_scalar) packed_result(ny);
(*this)(packed_arg, packed_result);
// -------------------------------------------------------------------// unpack result matrix
ad_matrix result(nr, nr);
for(size_t i = 0; i < ny; i++)
result.data()[i] = packed_result[i];
return result;
}
private:
// -------------------------------------------------------------// one forward mode vector of matrices for argument and result
CppAD::vector<matrix> f_arg_, f_result_;
// one reverse mode vector of matrices for argument and result
CppAD::vector<matrix> r_arg_, r_result_;
// -------------------------------------------------------------
// forward mode routine called by CppADvirtual bool forward(
// lowest order Taylor coefficient we are evaluating
size_t p ,
// highest order Taylor coefficient we are evaluating
size_t q ,
// which components of x are variablesconst CppAD::vector<bool>& vx ,
// which components of y are variables
CppAD::vector<bool>& vy ,
// tx [ j * (q+1) + k ] is x_j^kconst CppAD::vector<scalar>& tx ,
// ty [ i * (q+1) + k ] is y_i^k
CppAD::vector<scalar>& ty
)
{ size_t n_order = q + 1;
size_t nr = size_t( CppAD::Integer( tx[ 0 * n_order + 0 ] ) );
size_t ny = nr * nr;
# ifndef NDEBUG
size_t nx = 1 + ny;
# endifassert( vx.size() == 0 || nx == vx.size() );
assert( vx.size() == 0 || ny == vy.size() );
assert( nx * n_order == tx.size() );
assert( ny * n_order == ty.size() );
//// -------------------------------------------------------------------// make sure f_arg_ and f_result_ are large enoughassert( f_arg_.size() == f_result_.size() );
if( f_arg_.size() < n_order )
{ f_arg_.resize(n_order);
f_result_.resize(n_order);
//for(size_t k = 0; k < n_order; k++)
{ f_arg_[k].resize( long(nr), long(nr) );
f_result_[k].resize( long(nr), long(nr) );
}
}
// -------------------------------------------------------------------// unpack tx into f_arg_for(size_t k = 0; k < n_order; k++)
{ // unpack arg values for this orderfor(size_t i = 0; i < ny; i++)
f_arg_[k].data()[i] = tx[ (1 + i) * n_order + k ];
}
// -------------------------------------------------------------------// result for each order// (we could avoid recalculting f_result_[k] for k=0,...,p-1)//
f_result_[0] = f_arg_[0].inverse();
for(size_t k = 1; k < n_order; k++)
{ // initialize sum
matrix f_sum = matrix::Zero( long(nr), long(nr) );
// compute sumfor(size_t ell = 1; ell <= k; ell++)
f_sum -= f_arg_[ell] * f_result_[k-ell];
// result_[k] = arg_[0]^{-1} * sum_
f_result_[k] = f_result_[0] * f_sum;
}
// -------------------------------------------------------------------// pack result_ into tyfor(size_t k = 0; k < n_order; k++)
{ for(size_t i = 0; i < ny; i++)
ty[ i * n_order + k ] = f_result_[k].data()[i];
}
// -------------------------------------------------------------------// check if we are computing vyif( vx.size() == 0 )
returntrue;
// ------------------------------------------------------------------// This is a very dumb algorithm that over estimates which// elements of the inverse are variables (which is not efficient).
bool var = false;
for(size_t i = 0; i < ny; i++)
var |= vx[1 + i];
for(size_t i = 0; i < ny; i++)
vy[i] = var;
returntrue;
}