'''
sparseUtil provides useful sparse matrix operations for incremental
construction of large sparse matrices from its block elements. Class
:class:`csc_matrixPlus` derives from ``sparse.csc_matrix`` and makes it
possible to set elements outside the matrix dimensions by resizing
the matrix whenever necessary. Class :class:`csr_matrixPlus` does
the same thing with ``sparse.csr_matrix``.
Function :py:func:`sparseConcat` concatenates two sparse matrices
regardless of their dimension alignments. Fills with zeros where necessary.
'''
# Python 3 does not have long, only int
try:
long
except NameError:
long = int
from scipy import sparse
import numpy as np
[docs]class csc_matrixPlus(sparse.csc_matrix):
def __init__(self, arg1, shape=None, dtype=None,
copy=False, fromMatrix=None):
sparse.csc_matrix.__init__(self, arg1, shape, dtype, copy)
if fromMatrix:
self.__dict__.update(fromMatrix.__dict__)
from cylp.py.modeling import CyLPExpr as CyLPExpr
self.CyLPExpr = CyLPExpr
self.rowScaleFactor = self.colScaleFactor = None
[docs] def __setitem__(self, location, val):
'''
Set the item in row ``i`` and column ``j`` to ``val``.
Increases matrix's size if necessary
**Usage**
>>> from cylp.py.utils.sparseUtil import csc_matrixPlus
>>> import numpy as np
>>> indptr = np.array([0, 2, 3, 6])
>>> indices = np.array([0, 2, 2, 0, 1, 2])
>>> data = np.array([1, 2, 3, 4, 5, 6])
>>> s = csc_matrixPlus((data, indices, indptr), shape=(3, 3))
>>> s[2, 5] = 11
>>> s.todense()
matrix([[ 1, 0, 4, 0, 0, 0],
[ 0, 0, 5, 0, 0, 0],
[ 2, 3, 6, 0, 0, 11]])
'''
iRow, iCol = location
if not isinstance(val, (int, long, float)):
return sparse.csc_matrix.__setitem__(self, (iRow, iCol), val)
nCols = self.shape[1]
#update shape if nec.
if iRow >= self.shape[0]:
self._shape = (iRow + 1, self.shape[1])
if iCol < nCols:
for i in range(self.indptr[iCol], self.indptr[iCol + 1]):
if self.indices[i] == iRow:
self.data[i] = val
return
#If we reach here it means that index does NOT exist
for i in range(iCol + 1, nCols + 1):
self.indptr[i] += 1
indexOfElement = self.indptr[iCol + 1] - 1
#If indices is empty
if indexOfElement == 0:
self.indices = np.array([iRow], dtype=np.int32)
self.data = np.array([val])
else:
self.indices = np.concatenate((self.indices[:indexOfElement],
np.array([iRow], dtype=np.int32),
self.indices[indexOfElement:]), axis=0)
self.data = np.concatenate((self.data[:indexOfElement],
np.array([val]),
self.data[indexOfElement:]), axis=0)
else:
#We don't have enough columns, increase dimension 1
self.addColumns(iCol - nCols + 1)
self.indptr[iCol + 1] += 1
self.indices = np.concatenate((self.indices, np.array([iRow],
dtype=np.int32)), axis=0)
self.data = np.concatenate((self.data, np.array([val],
dtype=np.int32)), axis=0)
self._shape = (self._shape[0], iCol + 1)
[docs] def addColumns(self, nCol):
'''
Add ``nCol`` columns to the matrix
**Usage**
>>> from cylp.py.utils.sparseUtil import csc_matrixPlus
>>> s = csc_matrixPlus.getMatrixForTest()
>>> s.shape
(3, 3)
>>> s.addColumns(3)
>>> s.shape
(3, 6)
'''
nElement = len(self.data)
a = np.array(nCol * [nElement], dtype=np.int32)
self.indptr = np.concatenate((self.indptr, a), axis=0)
self._shape = (self._shape[0], self.shape[1] + nCol)
def __getitem__(self, key):
ret = sparse.csc_matrix.__getitem__(self, key)
if isinstance(ret, (int, long, float)):
return ret
# This seems to cause some potential problems when the result is 1x1
# It should really be returned as an int/float in that case, but
# this prevents it and causes behavior to be different than
# sparse.csr_matrix
return csc_matrixPlus(ret)
def row_scale(self, scaleFactor=None):
data = self.data
m = self.tocoo()
irow, jcol = m.row, m.col
if scaleFactor is not None:
data /= scaleFactor[irow]
self.rowScaleFactor = scaleFactor
return scaleFactor
rowScaleFactor = np.zeros(self.shape[0], dtype=np.float)
for k in range(len(data)):
row = irow[k]
val = abs(data[k])
rowScaleFactor[row] = max(rowScaleFactor[row], val)
rowScaleFactor[rowScaleFactor == 0.0] = 1.0
data /= rowScaleFactor[irow]
self.rowScaleFactor = rowScaleFactor
return rowScaleFactor
def col_scale(self, scaleFactor=None):
data = self.data
m = self.tocoo()
irow, jcol = m.row, m.col
if scaleFactor is not None:
data /= scaleFactor[jcol]
self.colScaleFactor = scaleFactor
return scaleFactor
colScaleFactor = np.zeros(self.shape[1], dtype=np.float)
for k in range(len(data)):
col = jcol[k]
val = abs(data[k])
colScaleFactor[col] = max(colScaleFactor[col], val)
colScaleFactor[colScaleFactor == 0.0] = 1.0
data /= colScaleFactor[jcol]
self.colScaleFactor = colScaleFactor
return colScaleFactor
def col_unscale(self, scaleFactor=None):
if scaleFactor is None:
scaleFactor = self.colScaleFactor
if scaleFactor is not None:
jcol = self.tocoo().col
self.data *= scaleFactor[jcol]
def row_unscale(self, scaleFactor=None):
if scaleFactor is None:
scaleFactor = self.rowScaleFactor
if scaleFactor is not None:
irow = self.tocoo().row
self.data *= scaleFactor[irow]
@property
def T(self):
return csr_matrixPlus(sparse.csc_matrix.transpose(self))
def __le__(self, other):
if isinstance(other, self.CyLPExpr):
return NotImplemented
return sparse.csc_matrix.__le__(self, other)
def __ge__(self, other):
if isinstance(other, self.CyLPExpr):
return NotImplemented
return sparse.csc_matrix.__ge__(self, other)
def __mul__(self, other):
if isinstance(other, self.CyLPExpr):
return NotImplemented
return sparse.csc_matrix.__mul__(self, other)
def __rmul__(self, other):
if isinstance(other, self.CyLPExpr):
return NotImplemented
return sparse.csc_matrix.__rmul__(self, other)
def __add__(self, other):
if isinstance(other, self.CyLPExpr):
return NotImplemented
return sparse.csc_matrix.__add__(self, other)
def __radd__(self, other):
if isinstance(other, self.CyLPExpr):
return NotImplemented
return sparse.csc_matrix.__radd__(self, other)
def __rsub__(self, other):
if isinstance(other, self.CyLPExpr):
return NotImplemented
return sparse.csc_matrix.__rsub__(self, other)
def __sub__(self, other):
if isinstance(other, self.CyLPExpr):
return NotImplemented
return sparse.csc_matrix.__sub__(self, other)
@staticmethod
def getMatrixForTest():
from cylp.py.utils.sparseUtil import csr_matrixPlus
import numpy as np
indptr = np.array([0, 2, 3, 6])
indices = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
s = csc_matrixPlus((data, indices, indptr), shape=(3, 3))
return s
[docs]class csr_matrixPlus(sparse.csr_matrix):
def __init__(self, arg1, shape=None, dtype=None,
copy=False, fromMatrix=None):
sparse.csr_matrix.__init__(self, arg1, shape, dtype, copy)
if fromMatrix:
self.__dict__.update(fromMatrix.__dict__)
from cylp.py.modeling import CyLPExpr as CyLPExpr
self.CyLPExpr = CyLPExpr
self.rowScaleFactor = self.colScaleFactor = None
[docs] def __setitem__(self, location, val):
'''
Sets the item in row ``i`` and col ``j`` to ``val``.
Increases matrix's ``shape[1]`` if necessary
**Usage**
>>> from cylp.py.utils.sparseUtil import csr_matrixPlus
>>> import numpy as np
>>> indptr = np.array([0, 2, 3, 6])
>>> indices = np.array([0, 2, 2, 0, 1, 2])
>>> data = np.array([1, 2, 3, 4, 5, 6])
>>> s = csr_matrixPlus((data, indices, indptr), shape=(3, 3))
>>> s[5,2] = 11
>>> print(s.todense())
[[ 1 0 2]
[ 0 0 3]
[ 4 5 6]
[ 0 0 0]
[ 0 0 0]
[ 0 0 11]]
'''
iRow, iCol = location
if not isinstance(val, (int, long, float)):
return sparse.csr_matrix.__setitem__(self, (iRow, iCol), val)
nRows = self.shape[0]
nCols = self.shape[1]
l = self.tolil()
if iCol >= nCols:
l._shape = (nRows, iCol + 1)
nCols = iCol + 1
if iRow >= nRows:
l._shape = (iRow + 1, nCols)
nRowsToAdd = iRow + 1 - nRows
l_temp = sparse.lil_matrix((nRowsToAdd, 1))
l.data = np.concatenate((l.data, l_temp.data))
l.rows = np.concatenate((l.rows, l_temp.rows))
l[iRow, iCol] = val
s = csr_matrixPlus(l)
self._nnz = s.nnz
self._shape = s._shape
self.indices = s.indices
self.indptr = s.indptr
self.data = s.data
self.has_sorted_indices = s.has_sorted_indices
return
nRows = self.shape[0]
if iCol >= self.shape[1]:
self._shape = (self.shape[0], iCol + 1)
if iRow < nRows:
for i in range(self.indptr[iRow], self.indptr[iRow + 1]):
if self.indices[i] == iCol:
self.data[i] = val
return
#if we reach here it means that index does NOT exist
for i in range(iRow + 1, nRows + 1):
self.indptr[i] += 1
indexOfElement = self.indptr[iRow + 1] - 1
# If indices is empty
if indexOfElement == 0:
self.indices = np.array([iCol], dtype=np.int32)
self.data = np.array([val])
else:
self.indices = np.concatenate((self.indices[:indexOfElement],
np.array([iCol], dtype=np.int32),
self.indices[indexOfElement:]), axis=0)
self.data = np.concatenate((self.data[:indexOfElement],
np.array([val]),
self.data[indexOfElement:]), axis=0)
else:
#We don't have enough columns, increase dimension 1
self.addRows(iRow - nRows + 1)
self.indptr[iRow + 1] += 1
self.indices = np.concatenate((self.indices, np.array([iCol],
dtype=np.int32)), axis=0)
self.data = np.concatenate((self.data, np.array([val],
dtype=np.int32)), axis=0)
self._shape = (iRow + 1, self._shape[1])
[docs] def addRows(self, nRow):
'''
Add ``nRow`` rows to the matrix
**Usage**
>>> from cylp.py.utils.sparseUtil import csr_matrixPlus
>>> s = csr_matrixPlus.getMatrixForTest()
>>> s.shape
(3, 3)
>>> s.addRows(2)
>>> s.shape
(5, 3)
'''
nElement = len(self.data)
a = np.array(nRow * [nElement], dtype=np.int32)
self.indptr = np.concatenate((self.indptr, a), axis=0)
self._shape = (self._shape[0] + nRow, self.shape[1])
def __getitem__(self, key):
ret = sparse.csr_matrix.__getitem__(self, key)
if isinstance(ret, (int, long, float)):
return ret
# This seems to cause some potential problems when the result is 1x1
# It should really be returned as an int/float in that case, but
# this prevents it and causes behavior to be different than
# sparse.csr_matrix
return csr_matrixPlus(ret)
def row_scale(self, scaleFactor=None):
data = self.data
m = self.tocoo()
irow, jcol = m.row, m.col
if scaleFactor is not None:
data /= scaleFactor[irow]
self.rowScaleFactor = scaleFactor
return scaleFactor
rowScaleFactor = np.zeros(self.shape[0], dtype=np.float)
for k in range(len(data)):
row = irow[k]
val = abs(data[k])
rowScaleFactor[row] = max(rowScaleFactor[row], val)
rowScaleFactor[rowScaleFactor == 0.0] = 1.0
data /= rowScaleFactor[irow]
self.rowScaleFactor = rowScaleFactor
return rowScaleFactor
def col_scale(self, scaleFactor=None):
data = self.data
m = self.tocoo()
irow, jcol = m.row, m.col
if scaleFactor is not None:
data /= scaleFactor[jcol]
self.colScaleFactor = scaleFactor
return scaleFactor
colScaleFactor = np.zeros(self.shape[1], dtype=np.float)
for k in range(len(data)):
col = jcol[k]
val = abs(data[k])
colScaleFactor[col] = max(colScaleFactor[col], val)
colScaleFactor[colScaleFactor == 0.0] = 1.0
data /= colScaleFactor[jcol]
self.colScaleFactor = colScaleFactor
return colScaleFactor
def col_unscale(self, scaleFactor=None):
if scaleFactor is None:
scaleFactor = self.colScaleFactor
if scaleFactor is not None:
jcol = self.tocoo().col
self.data *= scaleFactor[jcol]
def row_unscale(self, scaleFactor=None):
if scaleFactor is None:
scaleFactor = self.rowScaleFactor
if scaleFactor is not None:
irow = self.tocoo().row
self.data *= scaleFactor[irow]
@property
def T(self):
return csc_matrixPlus(sparse.csr_matrix.transpose(self))
def __le__(self, other):
if isinstance(other, self.CyLPExpr):
return NotImplemented
return sparse.csr_matrix.__le__(self, other)
def __ge__(self, other):
if isinstance(other, self.CyLPExpr):
return NotImplemented
return sparse.csr_matrix.__ge__(self, other)
def __mul__(self, other):
if isinstance(other, self.CyLPExpr):
return NotImplemented
return sparse.csr_matrix.__mul__(self, other)
def __rmul__(self, other):
if isinstance(other, self.CyLPExpr):
return NotImplemented
return sparse.csr_matrix.__rmul__(self, other)
def __add__(self, other):
if isinstance(other, self.CyLPExpr):
return NotImplemented
return sparse.csr_matrix.__add__(self, other)
def __radd__(self, other):
if isinstance(other, self.CyLPExpr):
return NotImplemented
return sparse.csr_matrix.__radd__(self, other)
def __rsub__(self, other):
if isinstance(other, self.CyLPExpr):
return NotImplemented
return sparse.csr_matrix.__rsub__(self, other)
def __sub__(self, other):
if isinstance(other, self.CyLPExpr):
return NotImplemented
return sparse.csr_matrix.__sub__(self, other)
@staticmethod
def getMatrixForTest():
from cylp.py.utils.sparseUtil import csr_matrixPlus
import numpy as np
indptr = np.array([0, 2, 3, 6])
indices = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
return csr_matrixPlus((data, indices, indptr), shape=(3, 3))
[docs]def sparseConcat(a, b, how, v_offset=0, h_offset=0):
'''
Concatenate two sparse matrices, ``a`` and ``b``, horizontally if
``how = 'h'``, and vertically if ``how = 'v'``.
Add zero rows and columns if dimensions don't align.
``v_offset`` specifies how to align ``b`` along side ``a``. The
value of ``v_offset`` will be added to each row index of ``b``.
``v_offset=-1`` means that we want the greatest possible offset without
changeing the dimensions.
``h_offset`` is a similar argument but to specify horizontal offset.
**Usage**
>>> from scipy import sparse
>>> from cylp.py.utils.sparseUtil import sparseConcat, csc_matrixPlus
>>> s1 = csc_matrixPlus.getMatrixForTest()
>>> s2 = sparse.lil_matrix([[1,0,2],[0,5,0]])
>>> sparseConcat(s1, s2, 'v').todense()
matrix([[1, 0, 4],
[0, 0, 5],
[2, 3, 6],
[1, 0, 2],
[0, 5, 0]])
>>> sparseConcat(s1, s2, 'h').todense()
matrix([[1, 0, 4, 1, 0, 2],
[0, 0, 5, 0, 5, 0],
[2, 3, 6, 0, 0, 0]])
If ``a = None`` then return ``b``. This makes possible an incremental
construction of large sparse matrices from scratch without the hassle
of the initial value check.
>>> s3 = None
>>> ((sparseConcat(s3, s1, 'h').todense() == s1.todense()).all() and
... (sparseConcat(s3, s1, 'v').todense() == s1.todense()).all())
True
'''
if a is None:
if b is None:
return None
return csr_matrixPlus(b)
if b is None:
return csr_matrixPlus(a)
assert(h_offset >= -1 and v_offset >= -1)
a = sparse.coo_matrix(a)
b = sparse.coo_matrix(b)
if how == 'h':
if v_offset == -1:
assert(a.shape[0] > b.shape[0])
v_offset = a.shape[0] - b.shape[0]
assert(h_offset >= 0)
row = np.concatenate((a.row, b.row + v_offset), axis=0)
col = np.concatenate((a.col, b.col + (a.shape[1] + h_offset)), axis=0)
data = np.concatenate((a.data, b.data), axis=0)
nRows = max(a.shape[0], b.shape[0] + v_offset)
nCols = a.shape[1] + b.shape[1] + h_offset
a = csr_matrixPlus((data, (row, col)),
shape=(nRows, nCols))
elif how == 'v':
if h_offset == -1:
assert(a.shape[1] > b.shape[1])
h_offset = a.shape[1] - b.shape[1]
assert(v_offset >= 0)
row = np.concatenate((a.row, b.row + (a.shape[0] + v_offset)), axis=0)
col = np.concatenate((a.col, b.col + h_offset), axis=0)
data = np.concatenate((a.data, b.data), axis=0)
nCols = max(a.shape[1], b.shape[1] + h_offset)
nRows = a.shape[0] + b.shape[0] + v_offset
a = csr_matrixPlus((data, (row, col)),
shape=(nRows, nCols))
return a
def I(n):
'''
Return a sparse identity matrix of size *n*
'''
if n <= 0:
return None
return csc_matrixPlus(sparse.eye(n, n))