Style Guide

This guide covers BioLang's conventions for comments, naming, formatting, and best practices. Following these conventions produces consistent, readable code across the community.

Comments

BioLang uses # for line comments. There are no block comments.

# This is a line comment

let x = 42  # Inline comment after code

# Multi-line comments use consecutive # lines.
# Each line starts with # and a single space.
# Keep comments concise and meaningful.

# Avoid obvious comments like:
# let count = 0  # set count to zero   (BAD — states the obvious)

# Good comments explain WHY, not WHAT:
# Filter reads with MAPQ < 30 because the aligner reports
# unreliable mappings below this threshold for short reads.
let hq_reads = reads |> filter(|r| r.mapq >= 30)

Documentation Comments

# Doc comments use ## and appear before declarations.
# They are extracted by the documentation generator.

## Compute the GC content of a DNA sequence.
##
## Returns the fraction of bases that are G or C,
## as a Float between 0.0 and 1.0.
##
## Example:
##   let gc = gc_ratio(dna"ATCGATCG")  # 0.5
pub fn gc_ratio(seq: DNA) -> Float {
  gc_content(seq)
}

## A genomic variant with position and quality information.
pub struct Variant {
  ## Chromosome name (e.g. "chr1")
  chrom: String,
  ## 1-based genomic position
  pos: Int,
  ## Reference allele
  ref_allele: String,
  ## Alternate allele
  alt_allele: String,
  ## Phred-scaled quality score
  qual: Float
}

Naming Conventions

ItemConventionExample
Variablessnake_caseread_count, gc_content
Functionssnake_casefilter_reads, compute_stats
StructsPascalCaseGenomicInterval, AlignmentConfig
EnumsPascalCaseStrand, VariantType
Enum variantsPascalCaseStrand.Plus, VariantType.Snv
TraitsPascalCaseSequenceable, Display
ConstantsSCREAMING_SNAKEMAX_QUALITY, DEFAULT_MAPQ
Modulessnake_casebio/io, quality_control
Type parametersSingle uppercaseT, K, V

Naming Guidelines

# Good names — descriptive and consistent
let filtered_reads = reads |> filter(|r| r.mapq >= 30)
let gene_expression = csv("expression.csv")
let variant_count = len(variants)
fn compute_gc_content(seq: DNA) -> Float { ... }

# Avoid abbreviations unless universally understood
let fltrd_rds = ...       # BAD — unclear abbreviation
let filtered_reads = ...  # GOOD

# Accepted abbreviations in bioinformatics
let gc = gc_content(seq)    # GC content
let mapq = read.mapq        # Mapping quality
let snv = variant            # Single nucleotide variant
let bam = read_bam(path)    # BAM format
let vcf = read_vcf(path)   # VCF format

Formatting

Indentation

Use 2 spaces for indentation. Never use tabs.

# 2-space indentation
fn process_sample(sample) {
  let reads = read_fastq(sample.path)
  let filtered = reads
    |> filter(|r| mean_phred(r.quality) >= 30)
    |> filter(|r| r.length >= 50)

  if len(filtered) > 0 {
    let results = analyze(filtered)
    write_results(results, sample.output)
  } else {
    print(f"Warning: no reads passed QC for {sample.id}")
  }
}

Line Length

Keep lines under 100 characters. Break long expressions at operators, commas, or after opening delimiters:

# Break pipe chains — one stage per line
let result = data
  |> filter(|r| r.quality >= threshold)
  |> map(|r| transform(r))
  |> arrange(desc(score))

# Break long function calls at argument boundaries
let config = AlignmentConfig {
  reference: "hg38.fa",
  min_mapq: 30,
  max_mismatch: 3,
  paired_only: true
}

# Break long conditions
if variant.qual >= min_quality
  && variant.depth >= min_depth
  && variant.allele_frequency >= min_af {
  accept(variant)
}

Blank Lines

# One blank line between top-level declarations
fn load_data(path) {
  csv(path)
}

fn process(data) {
  data |> filter(|r| r.score > 0)
}

# No blank line between closely related statements
let reads = read_fastq(path)
let filtered = reads |> filter(|r| r.quality >= 30)
let count = len(filtered)

# One blank line to separate logical blocks within a function
fn analyze(sample) {
  # Load and validate
  let data = csv(sample.path)
  let validated = validate(data)

  # Filter and transform
  let processed = validated
    |> filter(|r| r.score > 0)
    |> mutate(normalized = |r| r.score / max_score)

  # Output
  write_csv(processed, sample.output)
  print(f"Done: {sample.id}")
}

Best Practices

Prefer Pipes Over Nesting

# BAD — deeply nested, hard to read
let result = arrange(filter(select(data, "gene", "score"), |r| r.score > 0.5), desc("score"))

# GOOD — pipe chain, easy to follow
let result = data
  |> select("gene", "score")
  |> filter(|r| r.score > 0.5)
  |> arrange(desc("score"))

Prefer Functional Style

# BAD — unnecessary loop accumulation
let total = 0
for x in values {
  total = total + x
}

# GOOD — functional reduction
let total = values |> sum()

# OK — loop when it is genuinely clearer
let retries = 0
let result = None
while retries < 3 && result == None {
  result = try { fetch(url) } catch _ { None }
  retries = retries + 1
}

Use Meaningful Variable Names in Lambdas

# BAD — cryptic parameter names
let x = data |> filter(|a| a.b > 0.5) |> map(|c| c.d + c.e)

# GOOD — descriptive names
let significant = data
  |> filter(|gene| gene.p_value < 0.05)
  |> map(|gene| gene.log2fc + gene.base_mean)

# OK — single letter for obvious cases
let lengths = sequences |> map(|s| len(s))
let evens = numbers |> filter(|n| n % 2 == 0)

Handle Errors Explicitly

# BAD — ignoring potential errors
let data = csv("input.csv") |> unwrap()

# GOOD — handle the error case
let data = match csv("input.csv") {
  Ok(d) => d,
  Err(e) => {
    print(f"Failed to read input: {e.message}")
    exit(1)
  }
}

# GOOD — propagate with context
let data = csv("input.csv")
  |> context("loading sample data")?

Prefer Descriptive Function Signatures

# BAD — no types, unclear purpose
fn process(a, b, c) { ... }

# GOOD — typed, self-documenting
fn filter_variants(
  variants: Table,
  min_quality: Float = 30.0,
  min_depth: Int = 10,
  impact_filter: List[String] = ["HIGH", "MODERATE"]
) -> Table {
  variants
    |> filter(|v| v.qual >= min_quality)
    |> filter(|v| v.depth >= min_depth)
    |> filter(|v| v.impact in impact_filter)
}

Group Related Imports

# Standard library imports first
import { csv, write_csv } from "io"
import { mean, stdev } from "stats"

# Bio-specific imports
import { read_fastq, stream_bam } from "bio/io"
import { gc_content, reverse_complement } from "bio/seq"
import { align } from "bio/align"

# Local imports last
import { PipelineConfig } from "./config.bl"
import { run_qc } from "./lib/qc.bl"

Anti-Patterns

# Anti-pattern: stringly typed data
let status = "running"   # Use an enum instead

# Better:
enum JobStatus { Pending, Running, Completed, Failed }
let status = JobStatus.Running

# Anti-pattern: magic numbers
let filtered = reads |> filter(|r| r.mapq >= 30)  # Why 30?

# Better: named constant with comment
const MIN_MAPQ = 30  # MAPQ 30 = 99.9% mapping confidence
let filtered = reads |> filter(|r| r.mapq >= MIN_MAPQ)

# Anti-pattern: overly long pipe chains without explanation
let result = data |> f() |> g() |> h() |> i() |> j() |> k() |> l()

# Better: break into named stages
let cleaned = data |> f() |> g()        # Preprocessing
let analyzed = cleaned |> h() |> i()    # Analysis
let result = analyzed |> j() |> k()     # Formatting