Ensembl

The Ensembl client provides access to the Ensembl REST API for gene annotation, variant effect prediction (VEP), and sequence retrieval across hundreds of species. No API key is required. Rate limit is 15 requests per second, handled automatically.

ensembl_gene

Look up gene information by Ensembl gene ID. Returns a record with fields: id, symbol, description, species, biotype, start, end, strand, chromosome.

# Look up by Ensembl gene ID
let gene = ensembl_gene("ENSG00000012048")
print(gene.id)          # => "ENSG00000012048"
print(gene.symbol)      # => "BRCA1"
print(gene.biotype)     # => "protein_coding"
print(gene.chromosome)  # => "17"
print(gene.start)       # => 43044295
print(gene.end)         # => 43170245
print(gene.strand)      # => -1
print(gene.description)
print(gene.species)     # => "homo_sapiens"

ensembl_symbol

Look up gene information by species and gene symbol. Takes two positional arguments: species string and symbol string. Returns the same record shape as ensembl_gene.

# Look up by gene symbol
let gene = ensembl_symbol("human", "BRCA1")
print(gene.id)          # => "ENSG00000012048"
print(gene.symbol)      # => "BRCA1"
print(gene.chromosome)  # => "17"
print(gene.biotype)     # => "protein_coding"

# Other species
let mouse_gene = ensembl_symbol("mouse", "Brca1")
print(mouse_gene.id)
print(mouse_gene.species)

let zebrafish = ensembl_symbol("zebrafish", "brca1")
print(zebrafish.id)

ensembl_vep

Predict variant functional consequences using the Variant Effect Predictor (VEP). Takes one argument: an HGVS notation string. Returns a list of records, each with allele_string, most_severe_consequence, and transcript_consequences (a list of records with gene_id, transcript_id, impact, consequences).

# VEP consequence prediction using HGVS notation
let results = ensembl_vep("17:g.7675088C>T")
results |> each(|r| {
  print("Alleles: " + r.allele_string)
  print("Most severe: " + r.most_severe_consequence)

  r.transcript_consequences |> each(|tc| {
    print(tc.gene_id + " / " + tc.transcript_id)
    print("  Impact: " + tc.impact)
    print("  Consequences: ", tc.consequences)
  })
})

# Transcript-level HGVS notation
results = ensembl_vep("ENST00000357654.9:c.5382insC")
results |> each(|r| {
  print(r.most_severe_consequence)
})

ensembl_sequence

Retrieve sequences by Ensembl ID. Takes one required argument (an Ensembl ID) and one optional argument (sequence type: "genomic", "cds", or "protein"). Defaults to "genomic" if omitted. Returns a record with id, seq, and molecule.

# Get genomic sequence (default type)
let result = ensembl_sequence("ENSG00000012048")
print(result.id)
print(result.seq |> len())    # Full genomic span including introns
print(result.molecule)

# Get CDS (coding sequence) — pass type as second argument
let cds = ensembl_sequence("ENST00000357654", "cds")
print(cds.seq |> len())
print(cds.seq |> gc_content())

# Get protein sequence
let prot = ensembl_sequence("ENSP00000350283", "protein")
print(prot.seq |> len())
print(prot.seq |> molecular_weight())

Practical Example: Gene Annotation Pipeline

# Annotate a list of genes using Ensembl
let gene_symbols = ["BRCA1", "TP53", "EGFR", "KRAS", "PIK3CA"]

let report = gene_symbols |> map(|name| {
  let gene = ensembl_symbol("human", name)
  let cds = ensembl_sequence(gene.id, "cds")

  {
    symbol: name,
    ensembl_id: gene.id,
    chromosome: gene.chromosome,
    biotype: gene.biotype,
    cds_length: cds.seq |> len(),
    gc_content: cds.seq |> gc_content()
  }
})

report |> to_table() |> print()
report |> to_table() |> write_csv("gene_annotations.csv")

# VEP analysis for known variants
let hgvs_list = ["17:g.7675088C>T", "7:g.55259515T>G", "12:g.25245350C>T"]
let vep_report = hgvs_list |> map(|hgvs| {
  let results = ensembl_vep(hgvs)
  let first = results |> first()
  {
    variant: hgvs,
    consequence: first.most_severe_consequence,
    alleles: first.allele_string
  }
})
vep_report |> to_table() |> print()