Clinical Variants
Clinical variant analysis requires careful filtering, annotation matching, and classification. BioLang streamlines these workflows with built-in ClinVar integration and ACMG-compatible classification helpers.
ClinVar Queries
Query ClinVar for a gene
# requires: internet connection
# Search ClinVar for variants in BRCA1
let ids = ncbi_search("clinvar", "BRCA1[gene] AND pathogenic[clinsig]")
print(f"Pathogenic BRCA1 variant IDs: {ids |> len}")
# Fetch summaries for the top results
let summaries = ncbi_summary(ids |> take(10), "clinvar")
summaries |> each(|s| {
print(f" UID {s.uid}")
})
Batch ClinVar annotation
# requires: internet connection
# Annotate a VCF with ClinVar data
let vcf = read_vcf("patient_variants.vcf")
let annotated = vcf
|> map(|v| {
let clinvar_ids = ncbi_search("clinvar", f"{v.chrom}:{v.pos}[chrpos] AND {v.alt}[alt]")
{
chrom: v.chrom,
pos: v.pos,
ref: v.ref,
alt: v.alt,
gene: v.info["gene"],
clinvar_match: clinvar_ids |> len > 0,
clinvar_id: clinvar_ids |> first ?? "none"
}
})
let with_clinvar = annotated |> filter(|v| { v.clinvar_match })
print(f"Variants with ClinVar match: {with_clinvar |> len}")
with_clinvar |> write_tsv("clinvar_annotated.tsv")
Pathogenicity Filtering
Multi-tier variant filtering
# Clinical-grade variant filtering pipeline
let vcf = read_vcf("annotated.vcf.gz")
# Tier 1: Known pathogenic
let tier1 = vcf
|> filter(|v| { v.filter == "PASS" })
|> filter(|v| {
let sig = v.info["CLNSIG"] ?? ""
sig |> contains("Pathogenic") && !(sig |> contains("Conflicting"))
})
|> filter(|v| {
let revstat = v.info["CLNREVSTAT"] ?? ""
revstat |> contains("criteria_provided") && !(revstat |> contains("conflicting"))
})
# Tier 2: Likely pathogenic or uncertain with strong evidence
let tier2 = vcf
|> filter(|v| { v.filter == "PASS" })
|> filter(|v| {
let sig = v.info["CLNSIG"] ?? ""
sig |> contains("Likely_pathogenic")
})
# Tier 3: Rare variants in disease genes
let disease_genes = read_text("disease_gene_panel.txt") |> split("
") |> collect
let tier3 = vcf
|> filter(|v| { v.filter == "PASS" })
|> filter(|v| { disease_genes |> contains(v.info["gene"]) })
|> filter(|v| { (v.info["gnomAD_AF"] ?? "0") |> float < 0.001 })
|> filter(|v| { v.info["IMPACT"] == "HIGH" || v.info["IMPACT"] == "MODERATE" })
print(f"Tier 1 (known pathogenic): {tier1 |> len}")
print(f"Tier 2 (likely pathogenic): {tier2 |> len}")
print(f"Tier 3 (rare, high impact): {tier3 |> len}")
tier1 |> write_vcf("tier1_pathogenic.vcf")
tier2 |> write_vcf("tier2_likely_pathogenic.vcf")
tier3 |> write_vcf("tier3_candidate.vcf")
Population frequency filtering
# Filter variants by population allele frequency
let vcf = read_vcf("patient.vcf.gz")
# Remove common variants across all gnomAD populations
let rare = vcf
|> filter(|v| { v.filter == "PASS" })
|> filter(|v| {
let af_all = (v.info["gnomAD_AF"] ?? "0") |> float
let af_max = (v.info["gnomAD_AF_popmax"] ?? "0") |> float
af_all < 0.01 && af_max < 0.01
})
print(f"Total PASS variants: {vcf |> filter(|v| { v.filter == "PASS" }) |> len}")
print(f"After frequency filter: {rare |> len}")
rare |> write_vcf("rare_variants.vcf")
ACMG Classification
Applying ACMG criteria
# Simplified ACMG variant classification
let classify_variant = |v| {
let evidence = []
# PVS1: Null variant in a gene where LOF is a known mechanism
let lof_genes = read_text("lof_genes.txt") |> split("
") |> collect
if contains(["nonsense", "frameshift", "splice"], v.consequence) && lof_genes |> contains(v.gene) {
evidence |> push("PVS1")
}
# PS1: Same amino acid change as established pathogenic
if v.clinvar_aa_match == true {
evidence |> push("PS1")
}
# PM2: Absent from controls (or extremely rare)
if (v.gnomad_af ?? 0.0) < 0.0001 {
evidence |> push("PM2")
}
# PP3: Computational evidence supports deleterious
if (v.cadd_phred ?? 0.0) > 25.0 && (v.revel ?? 0.0) > 0.7 {
evidence |> push("PP3")
}
# BP1: Missense in gene where only truncating cause disease
let trunc_only = read_text("truncating_only_genes.txt") |> split("
") |> collect
if v.consequence == "missense" && trunc_only |> contains(v.gene) {
evidence |> push("BP1")
}
# Classify based on evidence combination
let has_pvs = evidence |> any(|e| starts_with(e, "PVS"))
let has_ps = evidence |> any(|e| starts_with(e, "PS"))
let n_pm = evidence |> filter(|e| starts_with(e, "PM")) |> len
let n_pp = evidence |> filter(|e| starts_with(e, "PP")) |> len
let n_bp = evidence |> filter(|e| starts_with(e, "BP")) |> len
let classification = if has_pvs && has_ps { "Pathogenic" }
else if has_pvs && n_pm >= 1 { "Likely pathogenic" }
else if n_bp >= 2 { "Likely benign" }
else if len(evidence) == 0 { "Benign" }
else { "VUS" }
{ variant: v, evidence: evidence, classification: classification }
}
let variants = tsv("annotated_variants.tsv")
let classified = variants |> map(classify_variant)
for class in ["Pathogenic", "Likely pathogenic", "VUS", "Likely benign", "Benign"] {
let n = classified |> filter(|c| { c.classification == class }) |> len
print(f"{class}: {n}")
}
classified |> write_csv("acmg_classifications.csv")
Clinical Report Generation
Variant summary report
# Generate a clinical variant summary
let tier1 = tsv("tier1_pathogenic.tsv")
let tier2 = tsv("tier2_likely_pathogenic.tsv")
let patient = read_csv("data/sample_sheet.csv") |> first
let report = {
patient_id: patient["id"],
analysis_date: now(),
reference_genome: "GRCh38",
total_variants_analyzed: read_vcf("patient.vcf.gz") |> len,
reportable_findings: tier1 |> len + tier2 |> len,
tier1_variants: tier1 |> map(|v| { {
gene: v["gene"],
variant: v["chrom"] + ":" + str(v["pos"]) + " " + v["ref"] + ">" + v["alt"],
hgvs: v["hgvs_c"],
protein: v["hgvs_p"],
significance: v["significance"],
condition: v["condition"],
zygosity: v["zygosity"],
inheritance: v["inheritance"]
}}),
tier2_variants: tier2 |> map(|v| { {
gene: v["gene"],
variant: v["chrom"] + ":" + str(v["pos"]) + " " + v["ref"] + ">" + v["alt"],
significance: v["significance"]
}})
}
report |> write_csv("clinical_report.csv")
print(f"Report generated: {report.reportable_findings} reportable findings")
Pharmacogenomics lookup
# Check pharmacogenomic variants
let pgx_variants = tsv("pgx_reference.tsv") # star alleles and rsIDs
let patient_vcf = read_vcf("patient.vcf.gz")
let pgx_hits = pgx_variants |> map(|pgx| {
let match = patient_vcf
|> filter(|v| { v.chrom == pgx["chrom"] && v.pos == pgx["pos"] |> int })
|> first
{
gene: pgx["gene"],
star_allele: pgx["star_allele"],
rsid: pgx["rsid"],
drug: pgx["drug"],
genotype: match?.genotypes |> first ?? "ref/ref",
phenotype: pgx["phenotype"]
}
})
print("Pharmacogenomic Report:")
for hit in pgx_hits |> filter(|h| { h.genotype != "ref/ref" }) {
print(f" {hit.gene} {hit.star_allele}: {hit.genotype}")
print(f" Drug: {hit.drug} - {hit.phenotype}")
}