NCBI
The NCBI client provides access to the National Center for Biotechnology Information
databases via E-utilities and the newer Datasets API. It covers gene search, sequence
retrieval, literature queries, taxonomy lookups, and BLAST submissions. Set the
NCBI_API_KEY environment variable for higher rate limits (10 req/s
instead of 3 req/s).
ncbi_search
Search any NCBI database (gene, pubmed, nucleotide, protein, sra, etc.) and return structured results:
# ncbi_search(db, term, [max_results])
# Search the gene database — returns list of ID strings
let results = ncbi_search("gene", "BRCA1")
print(results)
# => ["672"]
# Search PubMed for articles
let articles = ncbi_search("pubmed", "CRISPR AND cancer", 10)
articles |> print()
# Search the nucleotide database
let seqs = ncbi_search("nucleotide", "human mitochondria complete genome", 5)
print(seqs)
# Search SRA for sequencing runs
let runs = ncbi_search("sra", "RNA-seq human liver", 20)
print(runs)
# Advanced query syntax (Entrez query language)
results = ncbi_search("gene", "BRCA1[Gene] AND Homo sapiens[Organism]")
ncbi_fetch
Fetch full records from NCBI by ID. The first argument is the ID (or list of IDs), the second is the database, and the optional third is the return type:
# ncbi_fetch(ids, db, [rettype]) — raw E-utilities efetch
# Returns text in the requested format
# Fetch a gene record in FASTA format
let text = ncbi_fetch("NC_000017.11", "nucleotide", "fasta")
print(text)
# Fetch multiple gene summaries
let ids = ["672", "7157", "1956"] # BRCA1, TP53, EGFR
text = ncbi_fetch(ids, "gene")
# Fetch PubMed abstracts
let abstracts = ncbi_fetch(["33257863", "32015508"], "pubmed", "abstract")
ncbi_gene / ncbi_pubmed / ncbi_sequence
Convenience functions that wrap ncbi_search for common use cases:
# ncbi_gene(term, [max]) — search the gene database
let gene = ncbi_gene("BRCA1")
print(gene)
# ncbi_pubmed(term, [max]) — search PubMed
let pubs = ncbi_pubmed("CRISPR AND cancer", 10)
print(pubs)
# ncbi_sequence(id) — fetch a sequence as FASTA text
let seq = ncbi_sequence("NC_000017.11")
print(seq)
Practical Examples
Gene Research Pipeline
# Research a gene: combine NCBI, literature, and sequence data
let gene_name = "TP53"
# 1. Get gene info
let gene = ncbi_gene(gene_name)
print("Gene:", gene)
# 2. Find recent publications
let pubs = ncbi_pubmed("{gene_name} AND cancer AND 2024[PDAT]", 10)
print("\nRecent publications:", pubs)
# 3. Get the reference sequence
let seq = ncbi_sequence("NC_000017.11")
print("\nSequence:", seq |> len(), "characters")
Batch Gene Lookup
# Look up multiple genes
let genes = ["BRCA1", "BRCA2", "TP53", "EGFR", "KRAS", "PIK3CA", "PTEN", "MYC"]
let summary = genes
|> map(|name| {
let result = ncbi_search("gene", name)
{
symbol: name,
ids: result,
count: len(result)
}
})
|> to_table()
summary |> print()
summary |> write_csv("gene_summary.csv")
SRA Data Discovery
# Find RNA-seq datasets for a tissue type
let runs = ncbi_search("sra", "RNA-seq AND human AND liver AND Illumina", 50)
print(runs)