Table Operations
32 dplyr-inspired functions for columnar data. Tables are BioLang's primary data structure for CSV files, variant calls, and expression matrices. All operations are immutable and pipeable.
table
Create a table from a column map. Each key is a column name, each value is a list of equal length.
table(columns) -> table
let t = table({
"gene": ["BRCA1", "TP53", "EGFR", "KRAS"],
"log2fc": [2.3, -1.5, 3.1, -0.8],
"pvalue": [0.001, 0.05, 0.0001, 0.3]
})
csv / write_csv
Read a CSV/TSV file into a table or write a table to file.
csv(path, opts?) -> table
write_csv(tbl, path, opts?) -> string
| Option | Type | Default | Description |
|---|---|---|---|
| separator | string | "," | Field delimiter |
| header | bool | true | First row is header |
| comment | string | nil | Comment prefix to skip lines |
| skip | int | 0 | Lines to skip before header |
# Read CSV
let de = csv("deseq2_results.csv")
# Read TSV (e.g., VCF or BED)
let bed = csv("regions.bed", {separator: "\t", header: false})
# Write filtered results
de |> filter(|row| row.padj < 0.05)
|> write_csv("significant.csv")
select
Select a subset of columns by name.
select(tbl, columns...) -> table
de |> select("gene", "log2fc", "padj")
filter (table)
Filter rows where a predicate returns true. The callback receives each row as a map.
filter(tbl, fn) -> table
de |> filter(|row| row.padj < 0.05 and abs(row.log2fc) > 1.0)
arrange
Sort table rows by one or more columns. Prefix with "-" for descending.
arrange(tbl, columns...) -> table
de |> arrange("padj") # ascending by p-value
de |> arrange("-log2fc") # descending by fold change
de |> arrange("chrom", "start") # multi-column sort
group_by / summarize
Group rows and compute aggregate statistics per group.
group_by(tbl, columns...) -> grouped_table
summarize(grouped, aggregations) -> table
let counts = csv("gene_counts.csv")
counts
|> group_by("sample")
|> summarize(|key, rows| {
sample: key,
total_reads: sum(col(rows, "count")),
num_genes: len(unique(col(rows, "gene"))),
mean_count: mean(col(rows, "count"))
})
mutate
Add or modify a column using a row-wise function.
mutate(tbl, name, fn) -> table
de |> mutate("neg_log10p", |row| -log10(row.padj))
|> mutate("significant", |row| row.padj < 0.05 and abs(row.log2fc) > 1)
|> mutate("direction", |row| if row.log2fc > 0 { "up" } else { "down" })
Joins
SQL-style table joins. The on parameter specifies the join key column(s).
left_join(a, b, on) -> table
inner_join(a, b, on) -> table
right_join(a, b, on) -> table
outer_join(a, b, on) -> table
semi_join(a, b, on) -> table # rows in a that have a match in b
anti_join(a, b, on) -> table # rows in a that have NO match in b
cross_join(a, b) -> table # cartesian product
let expression = csv("expression.csv") # gene, log2fc, padj
let annotation = csv("annotation.csv") # gene, chrom, start, end, biotype
# Annotate DE results with genomic coordinates
let annotated = left_join(expression, annotation, "gene")
# Keep only protein-coding genes with significant changes
annotated
|> filter(|row| row.biotype == "protein_coding" and row.padj < 0.05)
# Anti-join: find genes in expression but NOT in annotation
let missing = anti_join(expression, annotation, "gene")
pivot_wider / pivot_longer
Reshape between long and wide formats.
pivot_wider(tbl, names_from, values_from) -> table
pivot_longer(tbl, cols, names_to, values_to) -> table
# Long to wide: one column per sample
let long_counts = csv("counts_long.csv") # gene, sample, count
let wide = pivot_wider(long_counts, "sample", "count")
# gene | sample_A | sample_B | sample_C
# Wide to long for plotting
let wide_expr = csv("expression_matrix.csv")
let long = pivot_longer(wide_expr, ["S1", "S2", "S3"], "sample", "expression")
Window Functions
Ordered operations computed within groups or the full table.
window(tbl, name, fn, opts?) -> table
row_number(tbl, name, order_by) -> table
rank(tbl, name, order_by) -> table
lead(tbl, col, n?) -> table
lag(tbl, col, n?) -> table
# Rank genes by p-value per chromosome
de |> group_by("chrom")
|> rank("rank", "padj")
# Compute running difference
de |> arrange("padj")
|> lag("prev_padj", "padj", 1)
|> mutate("delta", |row| row.padj - (row.prev_padj ?? 0))
Utility Functions
| Function | Signature | Description |
|---|---|---|
| rename | rename(tbl, old, new) -> table | Rename a column |
| distinct | distinct(tbl, cols?) -> table | Remove duplicate rows |
| head | head(tbl, n?) -> table | First n rows (default 10) |
| tail | tail(tbl, n?) -> table | Last n rows (default 10) |
| nrow | nrow(tbl) -> int | Number of rows |
| ncol | ncol(tbl) -> int | Number of columns |
| colnames | colnames(tbl) -> list | List of column names |
| to_records | to_records(tbl) -> list | Convert to list of row maps |
let de = csv("results.csv")
println("Rows:", nrow(de), "Cols:", ncol(de))
println("Columns:", colnames(de))
de |> head(5) # preview first 5 rows
where
Alias for filter with a more SQL-like feel. Filters rows where a condition is true.
# Filter rows where score exceeds threshold
data |> where(|r| r.score > 30)
# Equivalent to filter
data |> filter(|r| r.score > 30)
# Chain multiple where clauses
variants
|> where(|r| r.quality > 20)
|> where(|r| r.af < 0.01)
|> where(|r| r.impact == "HIGH")
case_when
Conditional value mapping — returns the value for the first matching condition. Takes alternating condition/value lambda pairs.
# Classify variants by allele frequency
data |> mutate("impact", |r| case_when(
|_| r.af > 0.05, "common",
|_| r.af > 0.01, "low_freq",
|_| true, "rare"
))
# Assign risk levels
patients |> mutate("risk", |r| case_when(
|_| r.score > 90, "critical",
|_| r.score > 60, "high",
|_| r.score > 30, "moderate",
|_| true, "low"
))