Grammar

This page provides the formal grammar of BioLang in an EBNF-like notation, along with tokenization rules. This is the authoritative reference for the language syntax.

Notation

SymbolMeaning
=Definition
|Alternative
( )Grouping
[ ]Optional (zero or one)
{ }Repetition (zero or more)
" "Terminal string
;End of rule

Program Structure

program        = { statement } ;

statement      = import_stmt
               | let_stmt
               | assign_stmt
               | fn_decl
               | struct_decl
               | enum_decl
               | trait_decl
               | impl_block
               | type_alias
               | for_stmt
               | while_stmt
               | return_stmt
               | break_stmt
               | continue_stmt
               | expression
               ;

terminator     = NEWLINE | EOF ;

Declarations

import_stmt    = "import" "{" ident_list "}" "from" STRING
               | "import" STRING [ "as" IDENT ]
               | "import" "*" "from" STRING
               | "pub" "import" "{" ident_list "}" "from" STRING
               ;

ident_list     = IDENT { "," IDENT } ;

let_stmt       = "let" [ "mut" ] IDENT [ ":" type_expr ] "=" expression ;

assign_stmt    = IDENT "=" expression ;

fn_decl        = [ "pub" ] "fn" IDENT [ type_params ] "(" [ param_list ] ")"
                 [ "->" type_expr ] block ;

param_list     = param { "," param } ;

param          = IDENT [ ":" type_expr ] [ "=" expression ] ;

type_params    = "[" IDENT { "," IDENT } "]" ;

struct_decl    = [ "pub" ] "struct" IDENT [ type_params ] "{"
                 { field_decl } "}" ;

field_decl     = [ "##" DOC_COMMENT ]
                 IDENT ":" type_expr [ "=" expression ] ;

enum_decl      = [ "pub" ] "enum" IDENT [ type_params ] "{"
                 variant { "," variant } [ "," ] "}" ;

variant        = IDENT [ "(" type_list ")" ] ;

type_list      = type_expr { "," type_expr } ;

trait_decl     = [ "pub" ] "trait" IDENT [ type_params ] "{"
                 { trait_method } "}" ;

trait_method   = "fn" IDENT "(" [ param_list ] ")" [ "->" type_expr ]
                 [ block ] ;

impl_block     = "impl" [ type_params ] IDENT [ "for" IDENT ] "{"
                 { fn_decl } "}" ;

type_alias     = "type" IDENT [ type_params ] "=" type_expr ;

Type Expressions

type_expr      = simple_type
               | generic_type
               | fn_type
               | tuple_type
               ;

simple_type    = "Int" | "Float" | "String" | "Bool"
               | "DNA" | "RNA" | "Protein" | "Interval"
               | "Table" | "Matrix" | "Stream"
               | IDENT
               ;

generic_type   = IDENT "[" type_list "]" ;
               (* e.g., List[Int], Map[String, Float], Option[Gene] *)

fn_type        = "fn" "(" [ type_list ] ")" "->" type_expr ;

tuple_type     = "(" type_expr "," type_list ")" ;

Expressions

expression     = pipe_expr ;

pipe_expr      = or_expr { "|>" or_expr } ;

or_expr        = and_expr { "||" and_expr } ;

and_expr       = equality_expr { "&&" equality_expr } ;

equality_expr  = comparison_expr { ( "==" | "!=" ) comparison_expr } ;

comparison_expr = range_expr { ( "<" | "<=" | ">" | ">=" | "in" ) range_expr } ;

range_expr     = add_expr [ ( ".." | "..=" ) add_expr ] ;

add_expr       = mul_expr { ( "+" | "-" ) mul_expr } ;

mul_expr       = unary_expr { ( "*" | "/" | "%" ) unary_expr } ;

unary_expr     = ( "-" | "!" ) unary_expr
               | power_expr
               ;

power_expr     = postfix_expr [ "**" unary_expr ] ;

postfix_expr   = primary { postfix_op } ;

postfix_op     = "(" [ arg_list ] ")"      (* function call *)
               | "[" expression "]"        (* index *)
               | "[" expression ".." [ expression ] "]"  (* slice *)
               | "." IDENT                 (* field access *)
               | "?"                       (* error propagation *)
               ;

arg_list       = arg { "," arg } ;

arg            = [ IDENT "=" ] expression ;  (* positional or named *)

primary        = INT_LITERAL
               | FLOAT_LITERAL
               | STRING_LITERAL
               | BOOL_LITERAL
               | DNA_LITERAL
               | RNA_LITERAL
               | PROTEIN_LITERAL
               | IDENT
               | lambda_expr
               | fn_expr
               | if_expr
               | match_expr
               | try_expr
               | list_literal
               | map_literal
               | set_literal
               | comprehension
               | "(" expression ")"
               | block
               ;

Compound Expressions

lambda_expr    = "|" [ param_list ] "|" ( expression | block ) ;

fn_expr        = "fn" "(" [ param_list ] ")" [ "->" type_expr ]
                 ( expression | block ) ;

if_expr        = "if" expression block
                 { "else" "if" expression block }
                 [ "else" block ] ;

match_expr     = "match" expression "{"
                 { match_arm }
                 "}" ;

match_arm      = pattern [ "if" expression ] "=>" ( expression | block ) "," ;

try_expr       = "try" block { "catch" [ IDENT [ "as" IDENT ] ] block } ;

block          = "{" { statement } [ expression ] "}" ;

comprehension  = "[" expression "for" IDENT "in" expression
                 { "for" IDENT "in" expression }
                 [ "if" expression ] "]"
               | "{" expression ":" expression "for" "(" IDENT "," IDENT ")"
                 "in" expression [ "if" expression ] "}"
               ;

Patterns

pattern        = literal_pattern
               | ident_pattern
               | wildcard_pattern
               | tuple_pattern
               | list_pattern
               | struct_pattern
               | enum_pattern
               | or_pattern
               ;

literal_pattern = INT_LITERAL | FLOAT_LITERAL | STRING_LITERAL
                | BOOL_LITERAL | DNA_LITERAL | RNA_LITERAL
                | range_pattern
                ;

range_pattern  = INT_LITERAL ( ".." | "..=" ) INT_LITERAL ;

ident_pattern  = IDENT ;

wildcard_pattern = "_" ;

tuple_pattern  = "(" pattern { "," pattern } ")" ;

list_pattern   = "[" "]"
               | "[" pattern { "," pattern } [ "," "..." IDENT ] "]"
               ;

struct_pattern = IDENT "{" field_pattern { "," field_pattern }
                 [ "," ".." ] "}" ;

field_pattern  = IDENT [ ":" pattern ] ;

enum_pattern   = IDENT [ "." IDENT ] [ "(" pattern { "," pattern } ")" ] ;

or_pattern     = pattern "|" pattern ;

Control Flow Statements

for_stmt       = "for" ( IDENT | tuple_pattern ) "in" expression block ;

while_stmt     = "while" expression block ;

return_stmt    = "return" [ expression ] ;

break_stmt     = "break" [ expression ] ;

continue_stmt  = "continue" ;

Tokenization

The lexer produces a stream of tokens from source text. BioLang's tokenizer has several notable features: newline-as-terminator with automatic suppression, bio literal prefixes, and the pipe operator.

Keywords

keyword        = "let" | "mut" | "fn" | "if" | "else" | "for" | "while"
               | "in" | "match" | "return" | "break" | "continue"
               | "import" | "from" | "as" | "pub" | "struct" | "enum"
               | "trait" | "impl" | "type" | "try" | "catch"
               | "true" | "false" | "None"
               ;

Operators and Punctuation

operator       = "|>" | "=>" | "->" | ".." | "..=" | "..."
               | "+" | "-" | "*" | "/" | "%" | "**"
               | "==" | "!=" | "<" | "<=" | ">" | ">="
               | "&&" | "||" | "!"
               | "=" | "|" | "?" | "." | ","
               ;

delimiter      = "(" | ")" | "[" | "]" | "{" | "}" | ":" | ";" ;

Literals

INT_LITERAL    = DIGIT { DIGIT | "_" }
               | "0x" HEX_DIGIT { HEX_DIGIT | "_" }
               | "0b" BIN_DIGIT { BIN_DIGIT | "_" }
               | "0o" OCT_DIGIT { OCT_DIGIT | "_" }
               ;

FLOAT_LITERAL  = DIGIT { DIGIT } "." DIGIT { DIGIT } [ EXPONENT ]
               | DIGIT { DIGIT } EXPONENT
               ;

EXPONENT       = ( "e" | "E" ) [ "+" | "-" ] DIGIT { DIGIT } ;

STRING_LITERAL = '"' { CHAR | ESCAPE | INTERPOLATION } '"'
               | 'r"' { CHAR } '"'
               ;

INTERPOLATION  = "{" expression "}" ;

ESCAPE         = "\" ( "n" | "t" | "r" | "\" | '"' | "{" | "0" ) ;

DNA_LITERAL    = 'dna"' { DNA_CHAR } '"' ;
DNA_CHAR       = "A" | "T" | "C" | "G" | "N"
               | "a" | "t" | "c" | "g" | "n" ;

RNA_LITERAL    = 'rna"' { RNA_CHAR } '"' ;
RNA_CHAR       = "A" | "U" | "C" | "G" | "N"
               | "a" | "u" | "c" | "g" | "n" ;

PROTEIN_LITERAL = 'protein"' { AMINO_CHAR } '"' ;
AMINO_CHAR     = "A" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "K" | "L"
               | "M" | "N" | "P" | "Q" | "R" | "S" | "T" | "V" | "W" | "Y"
               | "*"
               ;

BOOL_LITERAL   = "true" | "false" ;

Identifiers

IDENT          = ( ALPHA | "_" ) { ALPHA | DIGIT | "_" } ;

ALPHA          = "a".."z" | "A".."Z" ;
DIGIT          = "0".."9" ;
HEX_DIGIT      = DIGIT | "a".."f" | "A".."F" ;
BIN_DIGIT      = "0" | "1" ;
OCT_DIGIT      = "0".."7" ;

Newline Suppression

Newlines act as statement terminators. However, the lexer suppresses newline tokens when they appear after certain tokens, allowing expressions to span multiple lines naturally:

(* Newlines are suppressed after: *)
suppress_after = "|>" | "=>" | "->" | "," | "(" | "[" | "{"
               | "+" | "-" | "*" | "/" | "%" | "**"
               | "==" | "!=" | "<" | "<=" | ">" | ">="
               | "&&" | "||" | "=" | "|" | "."
               ;

Comments and Whitespace

COMMENT        = "#" { any character except NEWLINE } NEWLINE ;
DOC_COMMENT    = "##" { any character except NEWLINE } NEWLINE ;
WHITESPACE     = " " | "\t" ;      (* Spaces and tabs are ignored *)
NEWLINE        = "\n" | "\r\n" ;   (* Statement terminator *)

Operator Precedence

Operators listed from lowest to highest precedence:

PrecedenceOperatorAssociativityDescription
1 (lowest)|>LeftPipe
2||LeftLogical OR
3&&LeftLogical AND
4== !=LeftEquality
5< <= > >= inLeftComparison
6.. ..=NoneRange
7+ -LeftAddition
8* / %LeftMultiplication
9- !Right (prefix)Unary
10**RightExponentiation
11 (highest)() [] . ?LeftPostfix