Collections
BioLang provides three core collection types: Lists (ordered sequences), Maps (key-value associations), and Sets (unique unordered elements). All collections are immutable by default and support functional operations via pipes.
Lists
Lists are ordered, indexable sequences of values. They are the most commonly used collection in BioLang:
# Create lists with bracket syntax
let numbers = [1, 2, 3, 4, 5]
let genes = ["BRCA1", "TP53", "EGFR", "KRAS"]
let mixed = [42, "hello", true, dna"ATCG"]
let empty: List[Int] = []
# Nested lists
let matrix = [
[1, 2, 3],
[4, 5, 6],
[7, 8, 9]
]
Indexing and Slicing
let items = ["a", "b", "c", "d", "e"]
let first = items[0] # "a"
let last = items[-1] # "e"
let slice = items[1..3] # ["b", "c"]
let tail = items[2..] # ["c", "d", "e"]
let head = items[..3] # ["a", "b", "c"]
# Safe indexing returns Option
let maybe = slice(items, 10, 11) |> first() # None (no panic)
List Operations
let a = [1, 2, 3]
let b = [4, 5, 6]
# Concatenation
let combined = a + b # [1, 2, 3, 4, 5, 6]
let with_item = push(a, 4) # [1, 2, 3, 4]
let prepended = concat([0], a) # [0, 1, 2, 3]
# Information
let length = len(a) # 3
let has_two = 2 in a # true
let idx = find_index(a, |x| x == 3) # 2
# Modification (returns new list)
let reversed = a |> reverse() # [3, 2, 1]
let sorted = a |> sort() # [1, 2, 3]
let deduped = [1, 2, 2, 3] |> unique() # [1, 2, 3]
let flat = [[1, 2], [3, 4]] |> flatten() # [1, 2, 3, 4]
Functional Operations on Lists
let scores = [85, 92, 78, 95, 88]
# map — transform each element
let percentages = scores |> map(|s| s / 100.0)
# filter — keep elements matching a predicate
let high = scores |> filter(|s| s >= 90) # [92, 95]
# reduce — combine all elements into one value
let total = scores |> reduce(|acc, s| acc + s) # 438
# find — first matching element
let first_high = scores |> find(|s| s >= 90) # Some(92)
# any / all — boolean tests
let has_perfect = scores |> any(|s| s == 100) # false
let all_passing = scores |> all(|s| s >= 60) # true
# zip — combine two lists element-wise
let names = ["Alice", "Bob", "Carol"]
let ages = [30, 25, 35]
let pairs = zip(names, ages) # [("Alice", 30), ("Bob", 25), ("Carol", 35)]
# map and flatten
let codons = sequences |> map(|s| kmers(s, 3)) |> flatten()
List Comprehensions
# Generate a list with a comprehension
let squares = [x * x for x in 1..=10]
# [1, 4, 9, 16, 25, 36, 49, 64, 81, 100]
# Comprehension with filter
let even_squares = [x * x for x in 1..=10 if x % 2 == 0]
# [4, 16, 36, 64, 100]
# Nested comprehension
let pairs = [(i, j) for i in 0..3 for j in 0..3 if i != j]
# [(0,1), (0,2), (1,0), (1,2), (2,0), (2,1)]
# Comprehension with bio data
let gc_ratios = [gc_content(seq) for seq in sequences if len(seq) >= 100]
Maps
Maps are unordered key-value collections. Keys must be hashable types (String, Int, Bool, DNA, RNA, Protein):
# Create maps with brace syntax
let gene_ids = {
"BRCA1": 672,
"TP53": 7157,
"EGFR": 1956,
"KRAS": 3845
}
let empty_map: Map[String, Int] = {}
# Access values
let id = gene_ids["BRCA1"] # 672
let maybe_id = has_key(gene_ids, "MYC") # false (safe check)
Map Operations
let m = {"a": 1, "b": 2, "c": 3}
# Query
let has_a = "a" in m # true
let size = len(m) # 3
let all_keys = keys(m) # ["a", "b", "c"]
let all_vals = values(m) # [1, 2, 3]
let pairs = zip(keys(m), values(m)) # [("a", 1), ("b", 2), ("c", 3)]
# Modification (returns new map)
let merged = merge(m, {"c": 99, "d": 4}) # {"a": 1, "b": 2, "c": 99, "d": 4}
# Iteration over maps
for (k, v) in m {
print(f"{k} = {v}")
}
Building Maps
# From a list of pairs
let pairs = [("BRCA1", 672), ("TP53", 7157)]
let m = into(pairs, "Map")
# Group a list into a map
let reads_by_chrom = reads |> group_by(|r| r.chrom)
# Map[String, List[Read]]
# Count occurrences
let base_counts = base_counts(sequence)
# {"A": 42, "T": 38, "C": 31, "G": 29}
# Map comprehension
let squared = {k: v * v for (k, v) in scores}
Sets
Sets are unordered collections of unique elements. BioLang uses brace syntax with values only (no colons) to create sets:
# Create sets
let sample_ids = {"S001", "S002", "S003"}
let chromosomes = {"chr1", "chr2", "chr3", "chrX"}
let empty_set: Set[String] = set()
# From a list (deduplicates)
let uniq = unique([1, 2, 2, 3, 3, 3]) # [1, 2, 3]
let as_set = set([1, 2, 2, 3]) # {1, 2, 3}
Set Operations
let a = {"BRCA1", "TP53", "EGFR", "KRAS"}
let b = {"TP53", "KRAS", "MYC", "BRAF"}
# Standard set operations
let common = intersection(a, b) # {"TP53", "KRAS"}
let all_genes = union(a, b) # {"BRCA1", "TP53", "EGFR", "KRAS", "MYC", "BRAF"}
let only_a = difference(a, b) # {"BRCA1", "EGFR"}
# Membership
let has_tp53 = "TP53" in a # true
# Size
let count = len(a) # 4
Sets in Bioinformatics
# Find overlapping genes between two experiments
let experiment_1 = csv("degs_treated.csv")
|> select("gene")
|> collect()
|> unique()
let experiment_2 = csv("degs_control.csv")
|> select("gene")
|> collect()
|> unique()
let shared = intersection(set(experiment_1), set(experiment_2))
print(f"Shared DEGs: {len(shared)}")
# Venn diagram data
let only_treated = difference(set(experiment_1), set(experiment_2))
let only_control = difference(set(experiment_2), set(experiment_1))
print(f"Only treated: {len(only_treated)}")
print(f"Only control: {len(only_control)}")
Iteration Patterns
# enumerate — get index and value
for (i, gene) in enumerate(genes) {
print(f"{i + 1}. {gene}")
}
# chunked iteration
for batch in reads |> chunk(1000) {
process_batch(batch)
}
# windowed iteration
for w in signal |> window(3) {
let avg = mean(w)
print(f"Moving average: {avg}")
}
# parallel zip iteration
for (name, score) in zip(names, scores) {
print(f"{name}: {score}")
}
# take / skip
let first_ten = items |> take(10)
let after_header = lines |> drop(1)
Sorting and Ordering
# Sort with default ordering
let sorted_nums = [3, 1, 4, 1, 5] |> sort() # [1, 1, 3, 4, 5]
let sorted_desc = [3, 1, 4, 1, 5] |> sort() |> reverse() # [5, 4, 3, 1, 1]
# Sort by key
let by_length = genes |> sort_by(|g| len(g))
let by_score = results |> sort_by(|r| r.score) |> reverse()
# min / max
let best = max(scores) # 95
let worst = min(scores) # 78
let longest = sort_by(sequences, |s| len(s)) |> last()
Converting Between Collections
# Deduplicate a list
let unique_genes = unique(gene_list)
# Sort a collection
let sorted_genes = sort(gene_list)
# List to Table
let t = table(
gene = gene_list,
score = score_list
)
# Table column to List
let genes = variants |> select("gene") |> collect()