Intermediate ~25 minutes

Visualization

BioLang has built-in plotting functions that produce SVG output for publication-quality figures. This tutorial covers general-purpose plots, biology-specific visualizations like volcano plots, MA plots, and heatmaps, genome track rendering, and saving results to SVG files.

What you will learn

  • General-purpose scatter and line plots with plot()
  • Histograms of value distributions with histogram()
  • Volcano plots for differential expression with volcano()
  • MA plots for expression data with ma_plot()
  • Heatmaps of matrices and tables with heatmap()
  • Genome track visualization with genome_track()
  • Saving any plot to SVG files with save_svg()
Run this tutorial: Download visualization.bl and run it with bl run examples/tutorials/visualization.bl

Step 1 — General-Purpose Plotting with plot()

The plot() builtin is your go-to for scatter and line plots. Pass it a data table and an options record to control axes, colors, and labels.

# scatter.bl — scatter and line plots

# Build a simple data table
let data = table({
  dose:     [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
  response: [2.1, 4.3, 5.8, 8.1, 9.9, 12.2, 13.8, 16.1, 18.0, 20.2],
})

# Basic scatter plot
let p = plot(data, {
  x:      "dose",
  y:      "response",
  title:  "Dose-Response Relationship",
  xlabel: "Dose (mg)",
  ylabel: "Response",
})

# Line plot — set the mode option
let time_data = table({
  hour:       [0, 1, 2, 4, 8, 12, 24],
  expression: [1.0, 1.2, 2.5, 5.1, 8.3, 6.2, 3.1],
})

let p2 = plot(time_data, {
  x:      "hour",
  y:      "expression",
  mode:   "line",
  title:  "Expression Over Time",
  xlabel: "Time (hours)",
  ylabel: "Relative Expression",
  color:  "#8b5cf6",
})

# Scatter with color grouping
let expr = table({
  gene_a:    [3.2, 5.1, 7.0, 2.8, 6.4, 8.1, 4.5, 7.3],
  gene_b:    [4.1, 6.0, 8.2, 3.5, 7.1, 9.0, 5.2, 8.1],
  treatment: ["ctrl", "ctrl", "ctrl", "ctrl", "treat", "treat", "treat", "treat"],
})

let p3 = plot(expr, {
  x:        "gene_a",
  y:        "gene_b",
  color_by: "treatment",
  title:    "Gene A vs Gene B by Treatment",
  xlabel:   "Gene A (CPM)",
  ylabel:   "Gene B (CPM)",
})

Step 2 — Histograms with histogram()

Use histogram() to visualize the distribution of a list of numeric values. Control the number of bins and appearance through the options record.

# histogram.bl — distribution plots

# GC content values from a FASTQ analysis
let gc_values = [
  0.38, 0.42, 0.45, 0.47, 0.49, 0.50, 0.51, 0.52, 0.48, 0.44,
  0.41, 0.53, 0.55, 0.46, 0.43, 0.50, 0.49, 0.47, 0.51, 0.39,
  0.56, 0.44, 0.48, 0.52, 0.46, 0.41, 0.50, 0.53, 0.47, 0.45,
]

# Basic histogram
let h = histogram(gc_values, {
  bins:   15,
  title:  "GC Content Distribution",
  xlabel: "GC Content",
  ylabel: "Frequency",
  color:  "#8b5cf6",
})

# Quality scores histogram
let quality_scores = [28, 30, 32, 35, 36, 37, 38, 34, 33, 31,
                      29, 36, 37, 35, 38, 39, 40, 33, 32, 30]

let h2 = histogram(quality_scores, {
  bins:   10,
  title:  "Read Quality Score Distribution",
  xlabel: "Phred Quality Score",
  ylabel: "Count",
  color:  "#22d3ee",
})

Step 3 — Volcano Plots with volcano()

Volcano plots are essential for visualizing differential expression results. The volcano() builtin takes a table of DE results and highlights significant genes based on fold-change and p-value thresholds.

# volcano.bl — differential expression visualization

# Simulated DE results table
let de = table({
  gene:   ["TP53", "BRCA1", "MYC", "EGFR", "KRAS",
           "ACTB", "GAPDH", "RPS6", "CDH1", "VEGFA",
           "PTEN", "RB1", "APC", "NRAS", "RAF1"],
  log2fc: [2.5, -3.1, 4.2, 1.8, -0.3,
           0.1, -0.05, 0.2, -2.8, 3.5,
           -1.9, 0.4, -0.1, 0.6, 0.15],
  pvalue: [1e-12, 5e-10, 1e-15, 2e-6, 0.45,
           0.82, 0.91, 0.55, 3e-8, 1e-11,
           5e-5, 0.12, 0.73, 0.08, 0.65],
})

# Basic volcano plot
let v = volcano(de, {
  x:             "log2fc",
  y:             "pvalue",
  label:         "gene",
  fc_threshold:  1.0,
  p_threshold:   0.05,
  title:         "Treated vs Control",
  xlabel:        "log2 Fold Change",
  ylabel:        "-log10(p-value)",
  colors: {
    up:   "#22d3ee",
    down: "#8b5cf6",
    ns:   "#475569",
  },
})

# Highlight specific genes of interest
let v2 = volcano(de, {
  x:          "log2fc",
  y:          "pvalue",
  label:      "gene",
  highlight:  ["TP53", "MYC", "VEGFA"],
  fc_threshold: 1.5,
  p_threshold:  1e-4,
  title:      "Key Oncogenes Highlighted",
})

Step 4 — MA Plots with ma_plot()

MA plots show the relationship between average expression (A) and log fold change (M). They are complementary to volcano plots for QC of differential expression analyses.

# ma.bl — MA plot for differential expression

let de = table({
  gene:      ["TP53", "BRCA1", "MYC", "EGFR", "KRAS",
              "ACTB", "GAPDH", "RPS6", "CDH1", "VEGFA"],
  log2fc:    [2.5, -3.1, 4.2, 1.8, -0.3,
              0.1, -0.05, 0.2, -2.8, 3.5],
  mean_expr: [8.2, 6.5, 10.1, 7.8, 9.3,
              12.5, 13.1, 11.8, 5.2, 7.0],
  pvalue:    [1e-12, 5e-10, 1e-15, 2e-6, 0.45,
              0.82, 0.91, 0.55, 3e-8, 1e-11],
})

# Basic MA plot
let m = ma_plot(de, {
  x:      "mean_expr",
  y:      "log2fc",
  title:  "MA Plot — Treated vs Control",
  xlabel: "Average Expression (log2 CPM)",
  ylabel: "log2 Fold Change",
})

# MA plot with significance coloring
let m2 = ma_plot(de, {
  x:           "mean_expr",
  y:           "log2fc",
  label:       "gene",
  pvalue:      "pvalue",
  p_threshold: 0.05,
  title:       "MA Plot with Significance",
  colors: {
    sig: "#8b5cf6",
    ns:  "#475569",
  },
})

Step 5 — Heatmaps with heatmap()

Heatmaps are ideal for visualizing expression matrices, correlation tables, and any 2D numeric data. Pass a table and specify which columns to render.

# heatmap.bl — expression heatmap

# Expression matrix: rows = genes, columns = samples
let expr = table({
  gene:    ["TP53", "BRCA1", "MYC", "EGFR", "KRAS", "VEGFA"],
  ctrl_1:  [5.2, 3.1, 8.5, 4.2, 7.1, 2.8],
  ctrl_2:  [5.5, 3.4, 8.2, 4.0, 7.3, 3.0],
  ctrl_3:  [5.0, 3.0, 8.8, 4.5, 6.9, 2.6],
  treat_1: [8.1, 1.2, 12.5, 6.8, 7.0, 6.5],
  treat_2: [7.8, 1.5, 12.1, 7.0, 7.2, 6.2],
  treat_3: [8.5, 1.0, 13.0, 6.5, 6.8, 7.0],
})

let sample_cols = ["ctrl_1", "ctrl_2", "ctrl_3", "treat_1", "treat_2", "treat_3"]

# Basic heatmap
let h = heatmap(expr, {
  row_labels:   "gene",
  col_labels:   sample_cols,
  color_scale:  "blue_white_red",
  title:        "Gene Expression Heatmap",
})

# Heatmap with clustering and z-score normalization
let h2 = heatmap(expr, {
  row_labels:    "gene",
  col_labels:    sample_cols,
  cluster_rows:  true,
  cluster_cols:  true,
  color_scale:   "blue_white_red",
  color_range:   [-3, 3],
  title:         "Top DE Genes (z-scored)",
})

Step 6 — Genome Tracks with genome_track()

The genome_track() builtin visualizes genomic intervals such as gene annotations, peaks, or coverage regions along a chromosome coordinate axis.

# tracks.bl — genome track visualization

# Genomic intervals as a table
let peaks = table({
  chrom:  ["chr1", "chr1", "chr1", "chr1", "chr1"],
  start:  [1000, 5000, 12000, 18000, 25000],
  end:    [2500, 7500, 14000, 19500, 28000],
  name:   ["peak_1", "peak_2", "peak_3", "peak_4", "peak_5"],
  score:  [45.2, 82.1, 33.5, 91.0, 55.8],
})

# Basic genome track
let t = genome_track(peaks, {
  chrom:  "chrom",
  start:  "start",
  end:    "end",
  label:  "name",
  title:  "ChIP-seq Peaks — chr1",
})

# Genome track with score-based coloring
let t2 = genome_track(peaks, {
  chrom:    "chrom",
  start:    "start",
  end:      "end",
  label:    "name",
  score:    "score",
  color_by: "score",
  title:    "ChIP-seq Peak Scores",
})

# Gene annotations
let genes = table({
  chrom:  ["chr17", "chr17", "chr17"],
  start:  [7661779, 7676594, 7687490],
  end:    [7687538, 7687538, 7688012],
  name:   ["TP53_exon1", "TP53_exon2", "TP53_exon3"],
  strand: ["-", "-", "-"],
})

let t3 = genome_track(genes, {
  chrom:  "chrom",
  start:  "start",
  end:    "end",
  label:  "name",
  strand: "strand",
  title:  "TP53 Gene Structure",
})

Step 7 — Saving Plots with save_svg()

Every plot builtin returns a plot object. Use save_svg() to write any plot to an SVG file on disk.

# save.bl — persisting plots to SVG files

let data = table({
  x: [1, 2, 3, 4, 5],
  y: [2.1, 4.0, 6.2, 7.9, 10.1],
})

# Create a plot and save it
let p = plot(data, { x: "x", y: "y", title: "My Plot" })
save_svg(p, "results/my_plot.svg")

# Pipe a plot directly into save_svg
let values = [28, 30, 32, 35, 36, 37, 38, 34, 33, 31]
histogram(values, { bins: 8, title: "Quality Scores" })
  |> save_svg("results/quality_hist.svg")

# Save a volcano plot
let de = table({
  gene:   ["TP53", "MYC", "ACTB"],
  log2fc: [2.5, 4.2, 0.1],
  pvalue: [1e-12, 1e-15, 0.82],
})

volcano(de, { x: "log2fc", y: "pvalue", label: "gene" })
  |> save_svg("results/volcano.svg")

print("All plots saved to results/")

Step 8 — Complete Workflow

Here is a full example that loads data, creates multiple visualizations, and saves them all to SVG files.

# requires: results/de_results.csv and data/expression_matrix.csv in working directory
# workflow.bl — complete visualization pipeline

# Load differential expression results
let de = csv("results/de_results.csv")

# 1. Volcano plot of DE results
let v = volcano(de, {
  x:             "log2fc",
  y:             "pvalue",
  label:         "gene",
  fc_threshold:  1.0,
  p_threshold:   0.05,
  title:         "Differential Expression",
  colors: { up: "#22d3ee", down: "#8b5cf6", ns: "#475569" },
})
save_svg(v, "figures/volcano.svg")

# 2. MA plot of the same results
let m = ma_plot(de, {
  x:           "mean_expr",
  y:           "log2fc",
  pvalue:      "pvalue",
  p_threshold: 0.05,
  title:       "MA Plot",
})
save_svg(m, "figures/ma_plot.svg")

# 3. Histogram of fold-change distribution
let fc_values = col(de, "log2fc")
histogram(fc_values, {
  bins:   30,
  title:  "log2 Fold Change Distribution",
  xlabel: "log2FC",
  ylabel: "Count",
}) |> save_svg("figures/fc_distribution.svg")

# 4. Heatmap of top significant genes
let top_genes = de
  |> filter(|r| r.pvalue < 0.01)
  |> arrange("pvalue")
  |> head(20)

let expr_matrix = csv("data/expression_matrix.csv")
  |> filter(|r| r.gene in col(top_genes, "gene"))

let samples = ["ctrl_1", "ctrl_2", "ctrl_3", "treat_1", "treat_2", "treat_3"]

heatmap(expr_matrix, {
  row_labels:   "gene",
  col_labels:   samples,
  cluster_rows: true,
  color_scale:  "blue_white_red",
  title:        "Top 20 DE Genes",
}) |> save_svg("figures/heatmap.svg")

# 5. Genome track of significant regions
let regions = de
  |> filter(|r| r.pvalue < 1e-8)

genome_track(regions, {
  chrom: "chrom",
  start: "start",
  end:   "end",
  label: "gene",
  score: "log2fc",
  title: "Genome-wide Significant Loci",
}) |> save_svg("figures/genome_track.svg")

# 6. General scatter plot of expression correlation
let scatter_data = csv("data/expression.csv")
plot(scatter_data, {
  x:      "ctrl_mean",
  y:      "treat_mean",
  title:  "Control vs Treatment Expression",
  xlabel: "Control (log2 CPM)",
  ylabel: "Treatment (log2 CPM)",
  color:  "#8b5cf6",
}) |> save_svg("figures/expression_scatter.svg")

print("All 6 figures saved to figures/")

Next Steps

Ready for advanced topics? Try the Multi-species Comparative Genomics tutorial.