Style Guide
This guide covers BioLang's conventions for comments, naming, formatting, and best practices. Following these conventions produces consistent, readable code across the community.
Comments
BioLang uses # for line comments. There are no block comments.
# This is a line comment
let x = 42 # Inline comment after code
# Multi-line comments use consecutive # lines.
# Each line starts with # and a single space.
# Keep comments concise and meaningful.
# Avoid obvious comments like:
# let count = 0 # set count to zero (BAD — states the obvious)
# Good comments explain WHY, not WHAT:
# Filter reads with MAPQ < 30 because the aligner reports
# unreliable mappings below this threshold for short reads.
let hq_reads = reads |> filter(|r| r.mapq >= 30)
Documentation Comments
# Doc comments use ## and appear before declarations.
# They are extracted by the documentation generator.
## Compute the GC content of a DNA sequence.
##
## Returns the fraction of bases that are G or C,
## as a Float between 0.0 and 1.0.
##
## Example:
## let gc = gc_ratio(dna"ATCGATCG") # 0.5
pub fn gc_ratio(seq: DNA) -> Float {
gc_content(seq)
}
## A genomic variant with position and quality information.
pub struct Variant {
## Chromosome name (e.g. "chr1")
chrom: String,
## 1-based genomic position
pos: Int,
## Reference allele
ref_allele: String,
## Alternate allele
alt_allele: String,
## Phred-scaled quality score
qual: Float
}
Naming Conventions
| Item | Convention | Example |
|---|---|---|
| Variables | snake_case | read_count, gc_content |
| Functions | snake_case | filter_reads, compute_stats |
| Structs | PascalCase | GenomicInterval, AlignmentConfig |
| Enums | PascalCase | Strand, VariantType |
| Enum variants | PascalCase | Strand.Plus, VariantType.Snv |
| Traits | PascalCase | Sequenceable, Display |
| Constants | SCREAMING_SNAKE | MAX_QUALITY, DEFAULT_MAPQ |
| Modules | snake_case | bio/io, quality_control |
| Type parameters | Single uppercase | T, K, V |
Naming Guidelines
# Good names — descriptive and consistent
let filtered_reads = reads |> filter(|r| r.mapq >= 30)
let gene_expression = csv("expression.csv")
let variant_count = len(variants)
fn compute_gc_content(seq: DNA) -> Float { ... }
# Avoid abbreviations unless universally understood
let fltrd_rds = ... # BAD — unclear abbreviation
let filtered_reads = ... # GOOD
# Accepted abbreviations in bioinformatics
let gc = gc_content(seq) # GC content
let mapq = read.mapq # Mapping quality
let snv = variant # Single nucleotide variant
let bam = read_bam(path) # BAM format
let vcf = read_vcf(path) # VCF format
Formatting
Indentation
Use 2 spaces for indentation. Never use tabs.
# 2-space indentation
fn process_sample(sample) {
let reads = read_fastq(sample.path)
let filtered = reads
|> filter(|r| mean_phred(r.quality) >= 30)
|> filter(|r| r.length >= 50)
if len(filtered) > 0 {
let results = analyze(filtered)
write_results(results, sample.output)
} else {
print(f"Warning: no reads passed QC for {sample.id}")
}
}
Line Length
Keep lines under 100 characters. Break long expressions at operators, commas, or after opening delimiters:
# Break pipe chains — one stage per line
let result = data
|> filter(|r| r.quality >= threshold)
|> map(|r| transform(r))
|> arrange(desc(score))
# Break long function calls at argument boundaries
let config = AlignmentConfig {
reference: "hg38.fa",
min_mapq: 30,
max_mismatch: 3,
paired_only: true
}
# Break long conditions
if variant.qual >= min_quality
&& variant.depth >= min_depth
&& variant.allele_frequency >= min_af {
accept(variant)
}
Blank Lines
# One blank line between top-level declarations
fn load_data(path) {
csv(path)
}
fn process(data) {
data |> filter(|r| r.score > 0)
}
# No blank line between closely related statements
let reads = read_fastq(path)
let filtered = reads |> filter(|r| r.quality >= 30)
let count = len(filtered)
# One blank line to separate logical blocks within a function
fn analyze(sample) {
# Load and validate
let data = csv(sample.path)
let validated = validate(data)
# Filter and transform
let processed = validated
|> filter(|r| r.score > 0)
|> mutate(normalized = |r| r.score / max_score)
# Output
write_csv(processed, sample.output)
print(f"Done: {sample.id}")
}
Best Practices
Prefer Pipes Over Nesting
# BAD — deeply nested, hard to read
let result = arrange(filter(select(data, "gene", "score"), |r| r.score > 0.5), desc("score"))
# GOOD — pipe chain, easy to follow
let result = data
|> select("gene", "score")
|> filter(|r| r.score > 0.5)
|> arrange(desc("score"))
Prefer Functional Style
# BAD — unnecessary loop accumulation
let total = 0
for x in values {
total = total + x
}
# GOOD — functional reduction
let total = values |> sum()
# OK — loop when it is genuinely clearer
let retries = 0
let result = None
while retries < 3 && result == None {
result = try { fetch(url) } catch _ { None }
retries = retries + 1
}
Use Meaningful Variable Names in Lambdas
# BAD — cryptic parameter names
let x = data |> filter(|a| a.b > 0.5) |> map(|c| c.d + c.e)
# GOOD — descriptive names
let significant = data
|> filter(|gene| gene.p_value < 0.05)
|> map(|gene| gene.log2fc + gene.base_mean)
# OK — single letter for obvious cases
let lengths = sequences |> map(|s| len(s))
let evens = numbers |> filter(|n| n % 2 == 0)
Handle Errors Explicitly
# BAD — ignoring potential errors
let data = csv("input.csv") |> unwrap()
# GOOD — handle the error case
let data = match csv("input.csv") {
Ok(d) => d,
Err(e) => {
print(f"Failed to read input: {e.message}")
exit(1)
}
}
# GOOD — propagate with context
let data = csv("input.csv")
|> context("loading sample data")?
Prefer Descriptive Function Signatures
# BAD — no types, unclear purpose
fn process(a, b, c) { ... }
# GOOD — typed, self-documenting
fn filter_variants(
variants: Table,
min_quality: Float = 30.0,
min_depth: Int = 10,
impact_filter: List[String] = ["HIGH", "MODERATE"]
) -> Table {
variants
|> filter(|v| v.qual >= min_quality)
|> filter(|v| v.depth >= min_depth)
|> filter(|v| v.impact in impact_filter)
}
Group Related Imports
# Standard library imports first
import { csv, write_csv } from "io"
import { mean, stdev } from "stats"
# Bio-specific imports
import { read_fastq, stream_bam } from "bio/io"
import { gc_content, reverse_complement } from "bio/seq"
import { align } from "bio/align"
# Local imports last
import { PipelineConfig } from "./config.bl"
import { run_qc } from "./lib/qc.bl"
Anti-Patterns
# Anti-pattern: stringly typed data
let status = "running" # Use an enum instead
# Better:
enum JobStatus { Pending, Running, Completed, Failed }
let status = JobStatus.Running
# Anti-pattern: magic numbers
let filtered = reads |> filter(|r| r.mapq >= 30) # Why 30?
# Better: named constant with comment
const MIN_MAPQ = 30 # MAPQ 30 = 99.9% mapping confidence
let filtered = reads |> filter(|r| r.mapq >= MIN_MAPQ)
# Anti-pattern: overly long pipe chains without explanation
let result = data |> f() |> g() |> h() |> i() |> j() |> k() |> l()
# Better: break into named stages
let cleaned = data |> f() |> g() # Preprocessing
let analyzed = cleaned |> h() |> i() # Analysis
let result = analyzed |> j() |> k() # Formatting