Python/R Interop
BioLang interoperates with Python and R through subprocess execution, file-based data exchange, and JSON communication. This lets you leverage existing libraries like scanpy, DESeq2, or ggplot2 from BioLang scripts.
Calling Python
Running a Python script
# Run a Python script and capture output
let result = shell("python3 analyze.py --input data.csv")
print("Exit code: #{result.exit_code}")
print("Output: #{result.stdout}")
Inline Python with data exchange
# Prepare data in BioLang, process in Python, read results back
let counts = read_csv("data/counts.csv")
write_tsv(counts, "_temp_counts.tsv")
let metadata = read_csv("data/sample_sheet.csv")
write_tsv(metadata, "_temp_metadata.tsv")
# Run DESeq2-like analysis via Python
# Write Python script to a temp file and run it
let py_script = "
import pandas as pd
from pydeseq2.dds import DeseqDataSet
from pydeseq2.ds import DeseqStats
counts = pd.read_csv('_temp_counts.tsv', sep='\\t', index_col=0)
metadata = pd.read_csv('_temp_metadata.tsv', sep='\\t', index_col=0)
dds = DeseqDataSet(counts=counts, metadata=metadata, design_factors='condition')
dds.deseq2()
stat_res = DeseqStats(dds, contrast=['condition', 'treated', 'control'])
stat_res.summary()
stat_res.results_df.to_csv('_temp_deseq_results.tsv', sep='\\t')
"
write_text(py_script, "_temp_deseq.py")
let result = shell("python3 _temp_deseq.py")
if result.exit_code != 0 {
print("Python error: #{result.stderr}")
exit(1)
}
# Read results back into BioLang
let de_results = tsv("_temp_deseq_results.tsv")
let significant = de_results
|> filter(|r| float(r["padj"]) < 0.05)
|> filter(|r| abs(float(r["log2FoldChange"])) > 1.0)
print("Significant DE genes: #{len(significant)}")
write_tsv(significant, "significant_genes.tsv")
# Clean up temp files
remove("_temp_counts.tsv")
remove("_temp_metadata.tsv")
remove("_temp_deseq_results.tsv")
remove("_temp_deseq.py")
Python via JSON exchange
# Pass structured data to Python via JSON
let params = {
input_file: "counts.csv",
min_counts: 10,
normalization: "TPM",
output_file: "normalized.csv"
}
write_json(params, "_params.json")
let result = shell("python3 normalize.py --params _params.json")
if result.exit_code == 0 {
let output = read_json("_params_result.json")
print("Normalized #{output["n_genes"]} genes across #{output["n_samples"]} samples")
} else {
print("Normalization failed: #{result.stderr}")
}
Calling R
Running an R script
# Run R script for statistical analysis
let result = shell("Rscript analysis.R input.csv output.csv")
if result.exit_code != 0 {
print("R error: #{result.stderr}")
exit(1)
}
let output = read_csv("output.csv")
print("R analysis produced #{len(output)} results")
Inline R for DESeq2
# Prepare count matrix and run DESeq2 in R
let counts = read_csv("data/counts.csv")
write_csv(counts, "_deseq_input.csv")
let r_script = "
library(DESeq2)
counts <- read.csv('_deseq_input.csv', row.names=1)
coldata <- data.frame(
condition = factor(c(rep('control', 3), rep('treated', 3)))
)
dds <- DESeqDataSetFromMatrix(countData=counts, colData=coldata, design=~condition)
dds <- DESeq(dds)
res <- results(dds, contrast=c('condition', 'treated', 'control'))
write.csv(as.data.frame(res), '_deseq_output.csv')
"
write_text(r_script, "_deseq.R")
let result = shell("Rscript _deseq.R")
if result.exit_code == 0 {
let de = read_csv("_deseq_output.csv")
let sig = de |> filter(|r| float(r["padj"]) < 0.05)
print("DESeq2 found #{len(sig)} significant genes")
write_csv(sig, "deseq2_significant.csv")
}
remove("_deseq.R")
remove("_deseq_input.csv")
remove("_deseq_output.csv")
R for plotting
# Generate publication-quality plots with ggplot2
let data = tsv("pca_coordinates.tsv")
write_tsv(data, "_plot_data.tsv")
let r_plot = '
library(ggplot2)
data <- read.delim("_plot_data.tsv")
p <- ggplot(data, aes(x=pc1, y=pc2, color=population)) +
geom_point(size=2, alpha=0.7) +
theme_minimal() +
labs(x="PC1", y="PC2", title="Population Structure") +
scale_color_brewer(palette="Set2")
ggsave("pca_plot.png", p, width=8, height=6, dpi=300)
cat("Plot saved")
'
write_text(r_plot, "_plot.R")
shell("Rscript _plot.R")
print("PCA plot saved to pca_plot.png")
remove("_plot.R")
remove("_plot_data.tsv")
Bidirectional Pipes
Streaming data to external tools
# Write data then process with an external tool
write_fastq(read_fastq("data/reads.fastq"), "_temp.fq")
let result = shell("seqkit stats -T _temp.fq")
print(result.stdout)
remove("_temp.fq")
Using samtools via pipe
# Use samtools for operations BioLang does not natively support
let flagstat = shell("samtools flagstat aligned.bam")
print(flagstat.stdout)
# Filter BED then pass to samtools
let bed_regions = read_bed("data/regions.bed")
|> filter(|r| r.end - r.start > 100)
write_bed(bed_regions, "_filtered.bed")
let result = shell("samtools view -L _filtered.bed -b aligned.bam -o targeted.bam")
remove("_filtered.bed")
Data Format Exchange
AnnData exchange with Python
# Exchange data with scanpy via CSV/JSON
# Export from scanpy: adata.obs.to_read_csv("obs.csv")
let obs = read_csv("obs.csv")
# Do something in BioLang
let cluster_sizes = frequencies(map(obs, |row| row["leiden"]))
print("Cluster sizes: #{to_string(cluster_sizes)}")
# Compute custom scores
let scores = map(obs, |cell| {
float(cell["n_genes"]) / float(cell["total_counts"])
})
write_json(scores, "custom_scores.json")
# Import back in scanpy: scores = json.load(open("custom_scores.json"))
Parquet for large datasets
# Use CSV/TSV for data exchange with Python/R
let data = read_csv("data/expression.csv")
# Write as TSV for efficient exchange
write_tsv(data, "data.tsv")
# Python can read it:
# pd.read_csv("data.tsv", sep="\t")
# Read TSV back
let loaded = tsv("data.tsv")
print("Loaded #{len(loaded)} rows from TSV")
Best Practices
Temporary file management
# Use a temp directory for clean interop
let tmp = "_tmp_interop"
mkdir(tmp)
try {
let input_path = "#{tmp}/input.tsv"
let output_path = "#{tmp}/output.tsv"
write_tsv(data, input_path)
shell("python3 process.py #{input_path} #{output_path}")
let result = tsv(output_path)
write_tsv(result, "final_output.tsv")
} catch e {
print("Error: #{to_string(e)}")
}
# Clean up
remove(tmp)