String Operations
18 functions for string manipulation, searching, and formatting. All string functions are immutable and return new strings.
upper / lower
Convert string to uppercase or lowercase.
upper(s) -> string
lower(s) -> string
upper("atcg") # "ATCG"
lower("BRCA1") # "brca1"
# Normalize sequence input
let seq = "atcGATcg"
let normalized = upper(seq) # "ATCGATCG"
trim / trim_left / trim_right
Remove whitespace from both ends, start, or end of a string.
trim(s) -> string
trim_left(s) -> string
trim_right(s) -> string
trim(" ATCG ") # "ATCG"
trim_left(" ATCG ") # "ATCG "
trim_right(" ATCG ") # " ATCG"
# Clean FASTA header
let header = ">NM_001256799.3 Homo sapiens BRCA1 \n"
trim(header) # ">NM_001256799.3 Homo sapiens BRCA1"
split
Split a string by a delimiter into a list of substrings.
split(s, delimiter) -> list
| Parameter | Type | Description |
|---|---|---|
| s | string | String to split |
| delimiter | string | Delimiter (empty string splits into characters) |
split("A,T,C,G", ",") # ["A", "T", "C", "G"]
split("ATCG", "") # ["A", "T", "C", "G"]
split("chr1:1000-2000", ":") # ["chr1", "1000-2000"]
# Parse SAM fields
let sam_line = "read1\t0\tchr1\t100\t60\t50M\t*\t0\t0\tATCG\tFFFF"
let fields = split(sam_line, "\t")
println("RNAME:", fields[2]) # RNAME: chr1
join
Join a list of strings with a separator.
join(list, separator) -> string
join(["A", "T", "C", "G"], "") # "ATCG"
join(["chr1", "1000", "2000"], ":") # "chr1:1000:2000"
join(["BRCA1", "TP53", "EGFR"], ", ") # "BRCA1, TP53, EGFR"
replace
Replace all occurrences of a substring.
replace(s, old, new) -> string
replace("ATCGATCG", "T", "U") # "AUCGAUCG" (DNA to RNA)
replace("chr1:1000-2000", "chr", "") # "1:1000-2000"
starts_with / ends_with / contains
Test for substring presence at start, end, or anywhere.
starts_with(s, prefix) -> bool
ends_with(s, suffix) -> bool
contains(s, substring) -> bool
starts_with("NM_001256", "NM_") # true
ends_with("sample.fastq.gz", ".gz") # true
contains("BRCA1 DNA repair", "repair") # true
# Filter FASTA headers
let headers = [">NM_001", ">XM_002", ">NM_003"]
filter(headers, |h| starts_with(h, ">NM_"))
# [">NM_001", ">NM_003"]
repeat
Repeat a string n times.
repeat(s, n) -> string
repeat("AT", 4) # "ATATATAT"
repeat("-", 40) # "----------------------------------------"
pad_left / pad_right
Pad a string to a target length with a fill character.
pad_left(s, width, fill?) -> string
pad_right(s, width, fill?) -> string
pad_left("42", 5, "0") # "00042"
pad_right("BRCA1", 10) # "BRCA1 "
# Align output columns
let genes = ["TP53", "BRCA1", "EGFR"]
let pvals = [0.001, 0.05, 0.003]
zip(genes, pvals) |> map(|pair| {
pad_right(pair[0], 8) + str(pair[1])
}) |> map(println)
char_at / substr / index_of
Positional access into strings.
char_at(s, index) -> string
substr(s, start, length?) -> string
index_of(s, substr) -> int | nil
char_at("ATCG", 2) # "C"
substr("ATCGATCG", 2, 4) # "CGAT"
substr("ATCGATCG", 4) # "ATCG" (to end)
index_of("ATCGATCG", "GAT") # 3
index_of("ATCGATCG", "XYZ") # nil
# Extract codon
let seq = "ATGCGATCGTAA"
let codon_start = 3
let codon = substr(seq, codon_start, 3) # "CGA"
format
Format a string with {} placeholders, replaced in order by arguments.
format(template, args...) -> string
format("chr{}:{}-{}", 1, 1000, 2000)
# "chr1:1000-2000"
format("{} has {} reads ({}% mapped)", "sample1", 1500000, 95.3)
# "sample1 has 1500000 reads (95.3% mapped)"
# Named placeholders with maps
format("Gene: {name}, p-value: {pval}", {"name": "BRCA1", "pval": 0.001})
# "Gene: BRCA1, p-value: 0.001"