Sparse Matrix

9 functions for sparse matrices in CSC (Compressed Sparse Column) format. Essential for single-cell RNA-seq count matrices where >90% of values are zero.

sparse_matrix

Create a sparse matrix from COO triplets (row, column, value).

sparse_matrix(entries, nrows, ncols) -> sparse
ParameterTypeDescription
entrieslist<[row, col, val]>Triplets of (row_idx, col_idx, value)
nrowsintNumber of rows
ncolsintNumber of columns
# 30,000 genes x 10,000 cells, mostly zeros
let entries = [
  [0, 5, 3.0],     # gene 0, cell 5 = 3 counts
  [0, 12, 1.0],    # gene 0, cell 12 = 1 count
  [142, 5, 7.0],   # gene 142, cell 5 = 7 counts
]
let mat = sparse_matrix(entries, 30000, 10000)

to_dense / to_sparse

Convert between sparse and dense matrix representations.

to_dense(sparse) -> matrix
to_sparse(matrix) -> sparse
let dense = to_dense(mat)   # full matrix (caution: memory!)
let sp = to_sparse(matrix([[1, 0, 0], [0, 0, 2], [0, 3, 0]]))
println(nnz(sp))   # 3

Edge case: to_dense on a large scRNA-seq matrix (30k x 10k) allocates ~2.4 GB. Use sparse operations when possible.

nnz

Count of non-zero entries in the sparse matrix.

nnz(sparse) -> int
let mat = sparse_matrix([[0, 0, 5.0], [1, 2, 3.0]], 10, 10)
nnz(mat)   # 2

# Sparsity ratio
let sparsity = 1.0 - float(nnz(mat)) / float(10 * 10)
println("Sparsity:", round(sparsity * 100, 1), "%")   # Sparsity: 98.0%

sparse_get

Get a single value from a sparse matrix by row and column index.

sparse_get(sparse, row, col) -> float
let mat = sparse_matrix([[0, 0, 5.0], [1, 2, 3.0]], 10, 10)
sparse_get(mat, 0, 0)   # 5.0
sparse_get(mat, 0, 1)   # 0.0  (not stored = zero)

normalize_sparse

Normalize columns (cells) to sum to a target value (default: 10,000 for CPM-like normalization).

normalize_sparse(mat, target?) -> sparse
let raw_counts = sparse_matrix(entries, 30000, 10000)
let normalized = normalize_sparse(raw_counts, 10000)   # library-size normalization

sparse_row_sums / sparse_col_sums

Compute row or column sums of a sparse matrix efficiently.

sparse_row_sums(sparse) -> list
sparse_col_sums(sparse) -> list
let mat = sparse_matrix([[0, 0, 5.0], [0, 1, 3.0], [1, 0, 2.0]], 3, 3)
sparse_row_sums(mat)   # [8.0, 2.0, 0.0]
sparse_col_sums(mat)   # [7.0, 3.0, 0.0]