Knowledge Graphs

15 builtins for building and querying graph data structures. Model protein-protein interactions, gene regulatory networks, metabolic pathways, and ontology hierarchies as first-class graphs in BioLang. Supports both directed and undirected graphs with arbitrary node and edge attributes.

Creating Graphs

graph(directed?) Bool? → Graph

Creates a new graph. Call with no arguments for an undirected graph, or pass true for a directed graph. Directed graphs distinguish edge direction — useful for regulatory networks and signaling cascades.

# Undirected graph (e.g., protein-protein interactions)
let ppi = graph()

# Directed graph (e.g., gene regulatory network)
let grn = graph(true)

Adding Nodes and Edges

add_node(g, id, attrs?) Graph, String, Record? → Graph

Adds a node with the given identifier and optional attribute record. If the node already exists, its attributes are updated. Returns the graph for chaining.

let g = graph()
  |> add_node("TP53", {type: "tumor_suppressor", chrom: "chr17"})
  |> add_node("MDM2", {type: "oncogene", chrom: "chr12"})
  |> add_node("BRCA1", {type: "tumor_suppressor", chrom: "chr17"})
add_edge(g, from, to, attrs?) Graph, String, String, Record? → Graph

Adds an edge between two nodes with optional attributes. If either node does not yet exist, it is automatically created with empty attributes. Returns the graph for chaining.

let g = graph()
  |> add_edge("TP53", "MDM2", {score: 0.99, source: "STRING"})
  |> add_edge("TP53", "BRCA1", {score: 0.95, source: "STRING"})
  |> add_edge("BRCA1", "BARD1", {score: 0.97, source: "STRING"})

# Nodes TP53, MDM2, BRCA1, and BARD1 are created automatically

Querying

nodes(g) Graph → List

Returns a list of all node identifiers in the graph.

let ids = nodes(g)
# ["TP53", "MDM2", "BRCA1", "BARD1"]
edges(g) Graph → List

Returns a list of all edges as records with from, to, and any attached attributes.

let all_edges = edges(g)
all_edges |> each(|e| print(e.from + " -- " + e.to + " score=" + str(e.score)))
has_node(g, id) Graph, String → Bool

Returns true if the graph contains a node with the given identifier.

has_node(g, "TP53")    # true
has_node(g, "EGFR")    # false
has_edge(g, from, to) Graph, String, String → Bool

Returns true if an edge exists between the two nodes. In undirected graphs, order does not matter. In directed graphs, checks specifically for an edge from from to to.

has_edge(g, "TP53", "MDM2")   # true
has_edge(g, "MDM2", "BARD1")  # false
neighbors(g, id) Graph, String → List

Returns a list of node identifiers directly connected to the given node. In directed graphs, returns outgoing neighbors only.

let tp53_partners = neighbors(g, "TP53")
# ["MDM2", "BRCA1"]

print(str(len(tp53_partners)) + " interaction partners for TP53")
degree(g, id) Graph, String → Int

Returns the number of edges connected to the given node. In directed graphs, counts both incoming and outgoing edges.

let d = degree(g, "TP53")
# 2 (connected to MDM2 and BRCA1)
shortest_path(g, from, to) Graph, String, String → List | Nil

Finds the shortest path between two nodes using breadth-first search. Returns a list of node identifiers forming the path, or nil if no path exists.

let path = shortest_path(g, "MDM2", "BARD1")
# ["MDM2", "TP53", "BRCA1", "BARD1"]

if path != nil then
  print("Path length: " + str(len(path) - 1) + " edges")
else
  print("No path found")
end

Graph Analysis

connected_components(g) Graph → List

Returns a list of connected components, where each component is a list of node identifiers. Useful for identifying separate clusters in interaction networks.

# Build a network with two disconnected clusters
let g = graph()
  |> add_edge("TP53", "MDM2", {})
  |> add_edge("TP53", "BRCA1", {})
  |> add_edge("KRAS", "BRAF", {})
  |> add_edge("BRAF", "MEK1", {})

let components = connected_components(g)
# [["TP53", "MDM2", "BRCA1"], ["KRAS", "BRAF", "MEK1"]]

print(str(len(components)) + " connected components")
subgraph(g, node_ids) Graph, List → Graph

Extracts a subgraph containing only the specified nodes and edges between them. Node and edge attributes are preserved.

# Extract the p53 neighborhood
let tp53_subnet = subgraph(g, ["TP53", "MDM2", "BRCA1"])

print(str(len(nodes(tp53_subnet))) + " nodes in subgraph")
print(str(len(edges(tp53_subnet))) + " edges in subgraph")
node_attr(g, id) Graph, String → Record | Nil

Returns the attribute record attached to a node, or nil if the node does not exist.

let attrs = node_attr(g, "TP53")
# {type: "tumor_suppressor", chrom: "chr17"}

print(attrs.type)   # "tumor_suppressor"

Removing Nodes and Edges

remove_node(g, id) Graph, String → Graph

Removes a node and all edges connected to it. Returns the graph for chaining.

let g = g |> remove_node("MDM2")

has_node(g, "MDM2")          # false
has_edge(g, "TP53", "MDM2")  # false (edge removed too)
remove_edge(g, from, to) Graph, String, String → Graph

Removes the edge between two nodes. The nodes themselves remain in the graph. Returns the graph for chaining.

let g = g |> remove_edge("TP53", "BRCA1")

has_node(g, "TP53")           # true (node still exists)
has_edge(g, "TP53", "BRCA1")  # false (edge removed)

Directed vs Undirected

The choice between directed and undirected affects edge lookup, neighbor queries, and path traversal. Undirected graphs treat edges as bidirectional; directed graphs respect edge direction.

# Undirected: protein-protein interactions are symmetric
let ppi = graph()
  |> add_edge("TP53", "MDM2", {})

has_edge(ppi, "TP53", "MDM2")  # true
has_edge(ppi, "MDM2", "TP53")  # true  (symmetric)
neighbors(ppi, "MDM2")         # ["TP53"]

# Directed: transcription factor regulates target gene
let grn = graph(true)
  |> add_edge("TP53", "CDKN1A", {type: "activates"})
  |> add_edge("TP53", "BAX", {type: "activates"})
  |> add_edge("MYC", "TP53", {type: "represses"})

has_edge(grn, "TP53", "CDKN1A")  # true
has_edge(grn, "CDKN1A", "TP53")  # false (directed)
neighbors(grn, "TP53")           # ["CDKN1A", "BAX"] (outgoing only)
degree(grn, "TP53")              # 3 (2 outgoing + 1 incoming)

Real-World Example: STRING Network Analysis

Build a protein-protein interaction network from the STRING database, identify hub genes by degree centrality, and check connectivity between genes of interest.

# requires: internet connection (STRING API)
# Query STRING for TP53 interaction partners
let interactions = string_network(["TP53"], 9606)

# Build graph from STRING results — each record has {protein_a, protein_b, score}
let ppi = graph()
interactions |> each(|row| {
  ppi = add_edge(ppi, row.protein_a, row.protein_b, {
    score: row.score,
    source: "STRING"
  })
})

print(str(len(nodes(ppi))) + " proteins in network")
print(str(len(edges(ppi))) + " interactions")

# Find hub genes (highest degree)
let hub_genes = nodes(ppi)
  |> map(|n| {gene: n, degree: degree(ppi, n)})
  |> sort_by(|x| 0 - x.degree)
  |> take(10)

print("Top 10 hub genes:")
hub_genes |> each(|h| print("  " + h.gene + ": " + str(h.degree) + " interactions"))

# Check connectivity between two genes
let path = shortest_path(ppi, "MDM2", "BRCA1")
if path != nil then
  print("Path from MDM2 to BRCA1: " + join(path, " -> "))
  print("Distance: " + str(len(path) - 1) + " edges")
else
  print("MDM2 and BRCA1 are not connected")
end

# Identify network clusters
let components = connected_components(ppi)
print(str(len(components)) + " connected components")
components
  |> filter(|c| len(c) >= 3)
  |> each(|c| print("  Cluster (" + str(len(c)) + " genes): " + join(take(c, 5), ", ")))

# Extract subnetwork of tumor suppressors
let ts_genes = nodes(ppi)
  |> filter(|n| {
    let a = node_attr(ppi, n)
    a != nil and a.type == "tumor_suppressor"
  })
let ts_subnet = subgraph(ppi, ts_genes)
print(str(len(nodes(ts_subnet))) + " tumor suppressors, " + str(len(edges(ts_subnet))) + " interactions")

Builtin Reference

BuiltinReturnsDescription
graph(directed?)GraphCreate a new graph; pass true for directed
add_node(g, id, attrs?)GraphAdd or update a node with optional attributes
add_edge(g, from, to, attrs?)GraphAdd an edge; auto-creates missing nodes
nodes(g)ListList all node identifiers
edges(g)ListList all edges as records (from, to, attrs)
has_node(g, id)BoolCheck if a node exists
has_edge(g, from, to)BoolCheck if an edge exists between two nodes
neighbors(g, id)ListList directly connected node identifiers
degree(g, id)IntCount edges connected to a node
shortest_path(g, from, to)List | NilBFS shortest path; nil if unreachable
connected_components(g)ListList of node-id lists per component
subgraph(g, node_ids)GraphExtract subgraph with given nodes and their edges
node_attr(g, id)Record | NilGet attribute record for a node
remove_node(g, id)GraphRemove a node and all its edges
remove_edge(g, from, to)GraphRemove an edge; nodes remain