Grammar
This page provides the formal grammar of BioLang in an EBNF-like notation, along with tokenization rules. This is the authoritative reference for the language syntax.
Notation
| Symbol | Meaning |
|---|---|
= | Definition |
| | Alternative |
( ) | Grouping |
[ ] | Optional (zero or one) |
{ } | Repetition (zero or more) |
" " | Terminal string |
; | End of rule |
Program Structure
program = { statement } ;
statement = import_stmt
| let_stmt
| assign_stmt
| fn_decl
| struct_decl
| enum_decl
| trait_decl
| impl_block
| type_alias
| for_stmt
| while_stmt
| return_stmt
| break_stmt
| continue_stmt
| expression
;
terminator = NEWLINE | EOF ;
Declarations
import_stmt = "import" "{" ident_list "}" "from" STRING
| "import" STRING [ "as" IDENT ]
| "import" "*" "from" STRING
| "pub" "import" "{" ident_list "}" "from" STRING
;
ident_list = IDENT { "," IDENT } ;
let_stmt = "let" [ "mut" ] IDENT [ ":" type_expr ] "=" expression ;
assign_stmt = IDENT "=" expression ;
fn_decl = [ "pub" ] "fn" IDENT [ type_params ] "(" [ param_list ] ")"
[ "->" type_expr ] block ;
param_list = param { "," param } ;
param = IDENT [ ":" type_expr ] [ "=" expression ] ;
type_params = "[" IDENT { "," IDENT } "]" ;
struct_decl = [ "pub" ] "struct" IDENT [ type_params ] "{"
{ field_decl } "}" ;
field_decl = [ "##" DOC_COMMENT ]
IDENT ":" type_expr [ "=" expression ] ;
enum_decl = [ "pub" ] "enum" IDENT [ type_params ] "{"
variant { "," variant } [ "," ] "}" ;
variant = IDENT [ "(" type_list ")" ] ;
type_list = type_expr { "," type_expr } ;
trait_decl = [ "pub" ] "trait" IDENT [ type_params ] "{"
{ trait_method } "}" ;
trait_method = "fn" IDENT "(" [ param_list ] ")" [ "->" type_expr ]
[ block ] ;
impl_block = "impl" [ type_params ] IDENT [ "for" IDENT ] "{"
{ fn_decl } "}" ;
type_alias = "type" IDENT [ type_params ] "=" type_expr ;
Type Expressions
type_expr = simple_type
| generic_type
| fn_type
| tuple_type
;
simple_type = "Int" | "Float" | "String" | "Bool"
| "DNA" | "RNA" | "Protein" | "Interval"
| "Table" | "Matrix" | "Stream"
| IDENT
;
generic_type = IDENT "[" type_list "]" ;
(* e.g., List[Int], Map[String, Float], Option[Gene] *)
fn_type = "fn" "(" [ type_list ] ")" "->" type_expr ;
tuple_type = "(" type_expr "," type_list ")" ;
Expressions
expression = pipe_expr ;
pipe_expr = or_expr { "|>" or_expr } ;
or_expr = and_expr { "||" and_expr } ;
and_expr = equality_expr { "&&" equality_expr } ;
equality_expr = comparison_expr { ( "==" | "!=" ) comparison_expr } ;
comparison_expr = range_expr { ( "<" | "<=" | ">" | ">=" | "in" ) range_expr } ;
range_expr = add_expr [ ( ".." | "..=" ) add_expr ] ;
add_expr = mul_expr { ( "+" | "-" ) mul_expr } ;
mul_expr = unary_expr { ( "*" | "/" | "%" ) unary_expr } ;
unary_expr = ( "-" | "!" ) unary_expr
| power_expr
;
power_expr = postfix_expr [ "**" unary_expr ] ;
postfix_expr = primary { postfix_op } ;
postfix_op = "(" [ arg_list ] ")" (* function call *)
| "[" expression "]" (* index *)
| "[" expression ".." [ expression ] "]" (* slice *)
| "." IDENT (* field access *)
| "?" (* error propagation *)
;
arg_list = arg { "," arg } ;
arg = [ IDENT "=" ] expression ; (* positional or named *)
primary = INT_LITERAL
| FLOAT_LITERAL
| STRING_LITERAL
| BOOL_LITERAL
| DNA_LITERAL
| RNA_LITERAL
| PROTEIN_LITERAL
| IDENT
| lambda_expr
| fn_expr
| if_expr
| match_expr
| try_expr
| list_literal
| map_literal
| set_literal
| comprehension
| "(" expression ")"
| block
;
Compound Expressions
lambda_expr = "|" [ param_list ] "|" ( expression | block ) ;
fn_expr = "fn" "(" [ param_list ] ")" [ "->" type_expr ]
( expression | block ) ;
if_expr = "if" expression block
{ "else" "if" expression block }
[ "else" block ] ;
match_expr = "match" expression "{"
{ match_arm }
"}" ;
match_arm = pattern [ "if" expression ] "=>" ( expression | block ) "," ;
try_expr = "try" block { "catch" [ IDENT [ "as" IDENT ] ] block } ;
block = "{" { statement } [ expression ] "}" ;
comprehension = "[" expression "for" IDENT "in" expression
{ "for" IDENT "in" expression }
[ "if" expression ] "]"
| "{" expression ":" expression "for" "(" IDENT "," IDENT ")"
"in" expression [ "if" expression ] "}"
;
Patterns
pattern = literal_pattern
| ident_pattern
| wildcard_pattern
| tuple_pattern
| list_pattern
| struct_pattern
| enum_pattern
| or_pattern
;
literal_pattern = INT_LITERAL | FLOAT_LITERAL | STRING_LITERAL
| BOOL_LITERAL | DNA_LITERAL | RNA_LITERAL
| range_pattern
;
range_pattern = INT_LITERAL ( ".." | "..=" ) INT_LITERAL ;
ident_pattern = IDENT ;
wildcard_pattern = "_" ;
tuple_pattern = "(" pattern { "," pattern } ")" ;
list_pattern = "[" "]"
| "[" pattern { "," pattern } [ "," "..." IDENT ] "]"
;
struct_pattern = IDENT "{" field_pattern { "," field_pattern }
[ "," ".." ] "}" ;
field_pattern = IDENT [ ":" pattern ] ;
enum_pattern = IDENT [ "." IDENT ] [ "(" pattern { "," pattern } ")" ] ;
or_pattern = pattern "|" pattern ;
Control Flow Statements
for_stmt = "for" ( IDENT | tuple_pattern ) "in" expression block ;
while_stmt = "while" expression block ;
return_stmt = "return" [ expression ] ;
break_stmt = "break" [ expression ] ;
continue_stmt = "continue" ;
Tokenization
The lexer produces a stream of tokens from source text. BioLang's tokenizer has several notable features: newline-as-terminator with automatic suppression, bio literal prefixes, and the pipe operator.
Keywords
keyword = "let" | "mut" | "fn" | "if" | "else" | "for" | "while"
| "in" | "match" | "return" | "break" | "continue"
| "import" | "from" | "as" | "pub" | "struct" | "enum"
| "trait" | "impl" | "type" | "try" | "catch"
| "true" | "false" | "None"
;
Operators and Punctuation
operator = "|>" | "=>" | "->" | ".." | "..=" | "..."
| "+" | "-" | "*" | "/" | "%" | "**"
| "==" | "!=" | "<" | "<=" | ">" | ">="
| "&&" | "||" | "!"
| "=" | "|" | "?" | "." | ","
;
delimiter = "(" | ")" | "[" | "]" | "{" | "}" | ":" | ";" ;
Literals
INT_LITERAL = DIGIT { DIGIT | "_" }
| "0x" HEX_DIGIT { HEX_DIGIT | "_" }
| "0b" BIN_DIGIT { BIN_DIGIT | "_" }
| "0o" OCT_DIGIT { OCT_DIGIT | "_" }
;
FLOAT_LITERAL = DIGIT { DIGIT } "." DIGIT { DIGIT } [ EXPONENT ]
| DIGIT { DIGIT } EXPONENT
;
EXPONENT = ( "e" | "E" ) [ "+" | "-" ] DIGIT { DIGIT } ;
STRING_LITERAL = '"' { CHAR | ESCAPE | INTERPOLATION } '"'
| 'r"' { CHAR } '"'
;
INTERPOLATION = "{" expression "}" ;
ESCAPE = "\" ( "n" | "t" | "r" | "\" | '"' | "{" | "0" ) ;
DNA_LITERAL = 'dna"' { DNA_CHAR } '"' ;
DNA_CHAR = "A" | "T" | "C" | "G" | "N"
| "a" | "t" | "c" | "g" | "n" ;
RNA_LITERAL = 'rna"' { RNA_CHAR } '"' ;
RNA_CHAR = "A" | "U" | "C" | "G" | "N"
| "a" | "u" | "c" | "g" | "n" ;
PROTEIN_LITERAL = 'protein"' { AMINO_CHAR } '"' ;
AMINO_CHAR = "A" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "K" | "L"
| "M" | "N" | "P" | "Q" | "R" | "S" | "T" | "V" | "W" | "Y"
| "*"
;
BOOL_LITERAL = "true" | "false" ;
Identifiers
IDENT = ( ALPHA | "_" ) { ALPHA | DIGIT | "_" } ;
ALPHA = "a".."z" | "A".."Z" ;
DIGIT = "0".."9" ;
HEX_DIGIT = DIGIT | "a".."f" | "A".."F" ;
BIN_DIGIT = "0" | "1" ;
OCT_DIGIT = "0".."7" ;
Newline Suppression
Newlines act as statement terminators. However, the lexer suppresses newline tokens when they appear after certain tokens, allowing expressions to span multiple lines naturally:
(* Newlines are suppressed after: *)
suppress_after = "|>" | "=>" | "->" | "," | "(" | "[" | "{"
| "+" | "-" | "*" | "/" | "%" | "**"
| "==" | "!=" | "<" | "<=" | ">" | ">="
| "&&" | "||" | "=" | "|" | "."
;
Comments and Whitespace
COMMENT = "#" { any character except NEWLINE } NEWLINE ;
DOC_COMMENT = "##" { any character except NEWLINE } NEWLINE ;
WHITESPACE = " " | "\t" ; (* Spaces and tabs are ignored *)
NEWLINE = "\n" | "\r\n" ; (* Statement terminator *)
Operator Precedence
Operators listed from lowest to highest precedence:
| Precedence | Operator | Associativity | Description |
|---|---|---|---|
| 1 (lowest) | |> | Left | Pipe |
| 2 | || | Left | Logical OR |
| 3 | && | Left | Logical AND |
| 4 | == != | Left | Equality |
| 5 | < <= > >= in | Left | Comparison |
| 6 | .. ..= | None | Range |
| 7 | + - | Left | Addition |
| 8 | * / % | Left | Multiplication |
| 9 | - ! | Right (prefix) | Unary |
| 10 | ** | Right | Exponentiation |
| 11 (highest) | () [] . ? | Left | Postfix |