BioContainers
BioLang integrates with BioContainers to run any bioinformatics tool in an isolated container environment. This provides reproducibility, avoids installation headaches, and ensures version-pinned tool execution.
Running Tools in Containers
Basic container execution
# Run a tool from a BioContainers image
# container_run(image, command, options?)
let result = container_run(
"quay.io/biocontainers/samtools:1.19--h50ea8bc_0",
"samtools flagstat /data/aligned.bam",
{ mount: "./data" }
)
print(result.stdout)
Common bioinformatics tools in containers
# Run samtools in a container
let stats = container_run(
"quay.io/biocontainers/samtools:1.19--h50ea8bc_0",
"samtools flagstat /data/aligned.bam"
)
print(stats.stdout)
# BWA alignment in a container (redirect stdout to file)
container_run(
"quay.io/biocontainers/bwa-mem2:2.2.1--he513fc3_0",
"bwa-mem2 mem -t 8 /data/GRCh38.fa /data/sample_R1.fq.gz /data/sample_R2.fq.gz > /data/aligned.sam"
)
# GATK variant calling
container_run(
"quay.io/biocontainers/gatk:4.4.0.0--py36hdfd78af_0",
"gatk HaplotypeCaller -R /data/GRCh38.fa -I /data/aligned.bam -O /data/variants.g.vcf.gz --emit-ref-confidence GVCF"
)
Managing Container Images
Pulling and caching images
# Pull an image before use (cached locally)
container_pull("quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0")
# Check if a container runtime is available
if container_available() {
print("Container runtime is ready")
} else {
print("No container runtime found (Docker/Podman/Singularity)")
}
# List cached tool images
let images = tool_list()
for img in images {
print("#{img}")
}
Searching BioContainers registry
# Search for tools
let results = tool_search("samtools")
for tool in take(results, 5) {
print("#{tool.name} - #{tool.description}")
}
# Get detailed info for a tool
let info = tool_info("bwa-mem2")
print("#{info.name}: #{info.description}")
print("Image: #{info.image}")
Multi-Tool Pipelines
Chaining container tools
# Run a full alignment pipeline using containers
let sample = "sample_001"
let ref = "refs/GRCh38.fa"
let r1 = "fastq/#{sample}_R1.fq.gz"
let r2 = "fastq/#{sample}_R2.fq.gz"
let out_dir = "results/#{sample}"
mkdir(out_dir)
# Step 1: Quality control
print("Running FastQC...")
container_run(
"quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0",
"fastqc /data/#{r1} /data/#{r2} -o /data/#{out_dir}"
)
# Step 2: Trimming
print("Trimming adapters...")
container_run(
"quay.io/biocontainers/fastp:0.23.4--hadf994f_2",
"fastp -i /data/#{r1} -I /data/#{r2} -o /data/#{out_dir}/trimmed_R1.fq.gz -O /data/#{out_dir}/trimmed_R2.fq.gz --json /data/#{out_dir}/fastp.json --thread 4"
)
# Step 3: Alignment
print("Aligning reads...")
container_run(
"quay.io/biocontainers/bwa-mem2:2.2.1--he513fc3_0",
"bwa-mem2 mem -t 8 /data/#{ref} /data/#{out_dir}/trimmed_R1.fq.gz /data/#{out_dir}/trimmed_R2.fq.gz > /data/#{out_dir}/aligned.sam"
)
# Step 4: SAM to sorted BAM
print("Sorting BAM...")
container_run(
"quay.io/biocontainers/samtools:1.19--h50ea8bc_0",
"samtools sort -@ 4 -o /data/#{out_dir}/sorted.bam /data/#{out_dir}/aligned.sam"
)
# Step 5: Index
container_run(
"quay.io/biocontainers/samtools:1.19--h50ea8bc_0",
"samtools index /data/#{out_dir}/sorted.bam"
)
print("Pipeline complete for #{sample}")
Custom Containers
Using custom Docker images
# Use any Docker/OCI image, not just BioContainers
container_run(
"my-registry.com/custom-pipeline:v2.1",
"run_analysis.sh /data/input /data/output",
{ mount: "./data", env: { "THREADS": "8", "REFERENCE": "/data/refs/genome.fa" } }
)
# container_run auto-detects runtime (Docker, Podman, Singularity, Apptainer)
container_run(
"library://user/collection/tool:1.0",
"tool --input /data/file.bam"
)
Building a container from BioLang
# Generate a Dockerfile for a custom tool set
let dockerfile = "
FROM ubuntu:22.04
RUN apt-get update && apt-get install -y \\
samtools=1.19-1 \\
bcftools=1.19-1 \\
bedtools=2.31.0
WORKDIR /data
"
write_text(dockerfile, "Dockerfile")
let build = shell("docker build -t my-biotools:latest .")
if build.exit_code == 0 {
print("Custom container built successfully")
}
Resource Management
Container resource limits
# For resource limits, use shell with docker flags directly
shell("docker run --rm --memory=16g --cpus=4 -v $(pwd):/data -w /data quay.io/biocontainers/gatk:4.4.0.0--py36hdfd78af_0 gatk HaplotypeCaller -R /data/ref.fa -I /data/in.bam -O /data/out.vcf")
# Or use container_run for simpler cases
container_run(
"quay.io/biocontainers/gatk:4.4.0.0--py36hdfd78af_0",
"gatk HaplotypeCaller -R /data/ref.fa -I /data/in.bam -O /data/out.vcf"
)
Cleaning up
# List cached tool images
let cached = tool_list()
print("Cached images: #{len(cached)}")
# Remove a specific cached image via shell
shell("docker rmi quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0")
# Prune unused images
let result = shell("docker image prune -f")
print("Cleanup: #{result.stdout}")