Clinical Variants

Clinical variant analysis requires careful filtering, annotation matching, and classification. BioLang streamlines these workflows with built-in ClinVar integration and ACMG-compatible classification helpers.

ClinVar Queries

Query ClinVar for a gene

# requires: internet connection
# Search ClinVar for variants in BRCA1
let ids = ncbi_search("clinvar", "BRCA1[gene] AND pathogenic[clinsig]")

print(f"Pathogenic BRCA1 variant IDs: {ids |> len}")
# Fetch summaries for the top results
let summaries = ncbi_summary(ids |> take(10), "clinvar")
summaries |> each(|s| {
  print(f"  UID {s.uid}")
})

Batch ClinVar annotation

# requires: internet connection
# Annotate a VCF with ClinVar data
let vcf = read_vcf("patient_variants.vcf")

let annotated = vcf
  |> map(|v| {
    let clinvar_ids = ncbi_search("clinvar", f"{v.chrom}:{v.pos}[chrpos] AND {v.alt}[alt]")
    {
      chrom: v.chrom,
      pos: v.pos,
      ref: v.ref,
      alt: v.alt,
      gene: v.info["gene"],
      clinvar_match: clinvar_ids |> len > 0,
      clinvar_id: clinvar_ids |> first ?? "none"
    }
  })

let with_clinvar = annotated |> filter(|v| { v.clinvar_match })
print(f"Variants with ClinVar match: {with_clinvar |> len}")
with_clinvar |> write_tsv("clinvar_annotated.tsv")

Pathogenicity Filtering

Multi-tier variant filtering

# Clinical-grade variant filtering pipeline
let vcf = read_vcf("annotated.vcf.gz")

# Tier 1: Known pathogenic
let tier1 = vcf
  |> filter(|v| { v.filter == "PASS" })
  |> filter(|v| {
    let sig = v.info["CLNSIG"] ?? ""
    sig |> contains("Pathogenic") && !(sig |> contains("Conflicting"))
  })
  |> filter(|v| {
    let revstat = v.info["CLNREVSTAT"] ?? ""
    revstat |> contains("criteria_provided") && !(revstat |> contains("conflicting"))
  })

# Tier 2: Likely pathogenic or uncertain with strong evidence
let tier2 = vcf
  |> filter(|v| { v.filter == "PASS" })
  |> filter(|v| {
    let sig = v.info["CLNSIG"] ?? ""
    sig |> contains("Likely_pathogenic")
  })

# Tier 3: Rare variants in disease genes
let disease_genes = read_text("disease_gene_panel.txt") |> split("
") |> collect
let tier3 = vcf
  |> filter(|v| { v.filter == "PASS" })
  |> filter(|v| { disease_genes |> contains(v.info["gene"]) })
  |> filter(|v| { (v.info["gnomAD_AF"] ?? "0") |> float < 0.001 })
  |> filter(|v| { v.info["IMPACT"] == "HIGH" || v.info["IMPACT"] == "MODERATE" })

print(f"Tier 1 (known pathogenic):  {tier1 |> len}")
print(f"Tier 2 (likely pathogenic): {tier2 |> len}")
print(f"Tier 3 (rare, high impact): {tier3 |> len}")

tier1 |> write_vcf("tier1_pathogenic.vcf")
tier2 |> write_vcf("tier2_likely_pathogenic.vcf")
tier3 |> write_vcf("tier3_candidate.vcf")

Population frequency filtering

# Filter variants by population allele frequency
let vcf = read_vcf("patient.vcf.gz")

# Remove common variants across all gnomAD populations
let rare = vcf
  |> filter(|v| { v.filter == "PASS" })
  |> filter(|v| {
    let af_all = (v.info["gnomAD_AF"] ?? "0") |> float
    let af_max = (v.info["gnomAD_AF_popmax"] ?? "0") |> float
    af_all < 0.01 && af_max < 0.01
  })

print(f"Total PASS variants: {vcf |> filter(|v| { v.filter == "PASS" }) |> len}")
print(f"After frequency filter: {rare |> len}")
rare |> write_vcf("rare_variants.vcf")

ACMG Classification

Applying ACMG criteria

# Simplified ACMG variant classification
let classify_variant = |v| {
  let evidence = []

  # PVS1: Null variant in a gene where LOF is a known mechanism
  let lof_genes = read_text("lof_genes.txt") |> split("
") |> collect
  if contains(["nonsense", "frameshift", "splice"], v.consequence) && lof_genes |> contains(v.gene) {
    evidence |> push("PVS1")
  }

  # PS1: Same amino acid change as established pathogenic
  if v.clinvar_aa_match == true {
    evidence |> push("PS1")
  }

  # PM2: Absent from controls (or extremely rare)
  if (v.gnomad_af ?? 0.0) < 0.0001 {
    evidence |> push("PM2")
  }

  # PP3: Computational evidence supports deleterious
  if (v.cadd_phred ?? 0.0) > 25.0 && (v.revel ?? 0.0) > 0.7 {
    evidence |> push("PP3")
  }

  # BP1: Missense in gene where only truncating cause disease
  let trunc_only = read_text("truncating_only_genes.txt") |> split("
") |> collect
  if v.consequence == "missense" && trunc_only |> contains(v.gene) {
    evidence |> push("BP1")
  }

  # Classify based on evidence combination
  let has_pvs = evidence |> any(|e| starts_with(e, "PVS"))
  let has_ps = evidence |> any(|e| starts_with(e, "PS"))
  let n_pm = evidence |> filter(|e| starts_with(e, "PM")) |> len
  let n_pp = evidence |> filter(|e| starts_with(e, "PP")) |> len
  let n_bp = evidence |> filter(|e| starts_with(e, "BP")) |> len
  let classification = if has_pvs && has_ps { "Pathogenic" }
    else if has_pvs && n_pm >= 1 { "Likely pathogenic" }
    else if n_bp >= 2 { "Likely benign" }
    else if len(evidence) == 0 { "Benign" }
    else { "VUS" }
  { variant: v, evidence: evidence, classification: classification }
}

let variants = tsv("annotated_variants.tsv")
let classified = variants |> map(classify_variant)

for class in ["Pathogenic", "Likely pathogenic", "VUS", "Likely benign", "Benign"] {
  let n = classified |> filter(|c| { c.classification == class }) |> len
  print(f"{class}: {n}")
}

classified |> write_csv("acmg_classifications.csv")

Clinical Report Generation

Variant summary report

# Generate a clinical variant summary
let tier1 = tsv("tier1_pathogenic.tsv")
let tier2 = tsv("tier2_likely_pathogenic.tsv")
let patient = read_csv("data/sample_sheet.csv") |> first

let report = {
  patient_id: patient["id"],
  analysis_date: now(),
  reference_genome: "GRCh38",
  total_variants_analyzed: read_vcf("patient.vcf.gz") |> len,
  reportable_findings: tier1 |> len + tier2 |> len,
  tier1_variants: tier1 |> map(|v| { {
    gene: v["gene"],
    variant: v["chrom"] + ":" + str(v["pos"]) + " " + v["ref"] + ">" + v["alt"],
    hgvs: v["hgvs_c"],
    protein: v["hgvs_p"],
    significance: v["significance"],
    condition: v["condition"],
    zygosity: v["zygosity"],
    inheritance: v["inheritance"]
  }}),
  tier2_variants: tier2 |> map(|v| { {
    gene: v["gene"],
    variant: v["chrom"] + ":" + str(v["pos"]) + " " + v["ref"] + ">" + v["alt"],
    significance: v["significance"]
  }})
}

report |> write_csv("clinical_report.csv")
print(f"Report generated: {report.reportable_findings} reportable findings")

Pharmacogenomics lookup

# Check pharmacogenomic variants
let pgx_variants = tsv("pgx_reference.tsv")  # star alleles and rsIDs
let patient_vcf = read_vcf("patient.vcf.gz")

let pgx_hits = pgx_variants |> map(|pgx| {
  let match = patient_vcf
    |> filter(|v| { v.chrom == pgx["chrom"] && v.pos == pgx["pos"] |> int })
    |> first

  {
    gene: pgx["gene"],
    star_allele: pgx["star_allele"],
    rsid: pgx["rsid"],
    drug: pgx["drug"],
    genotype: match?.genotypes |> first ?? "ref/ref",
    phenotype: pgx["phenotype"]
  }
})

print("Pharmacogenomic Report:")
for hit in pgx_hits |> filter(|h| { h.genotype != "ref/ref" }) {
  print(f"  {hit.gene} {hit.star_allele}: {hit.genotype}")
  print(f"    Drug: {hit.drug} - {hit.phenotype}")
}