This module provides tagless-final interfaces for the syntax of Python.
Tagless-final style, also known as object algebra in OOP contexts, is a programming techinque of embedding domain-specific languages in a typed functional host language. Reading the associated literature to get a deeper understanding of the concept is definitely encouraged. But we will also include a short explanation here in order to facilitate basic understandings of the APIs.
Let's consider a small DSL containing both boolean and integer values as our running example. Conventional wisdom suggests the following patterns to construct and process the syntax of a language:
(* Syntax of DSL *)
type value = Int of int | Bool of bool
type expression =
| Constant of value
| Add of expression * expression
| IsEqual of expression * expression
(* An evaluator of the DSL, as an example of a downstream processing logic of the DSL's
syntax. *)
let rec eval = function
| Constant value -> value
| Add (lhs, rhs) -> (
match (eval lhs, eval rhs) with
| Int lhs, Int rhs -> Int (lhs + rhs)
| _ -> failwith "Runtime type error")
| IsEqual (lhs, rhs) -> (
match (eval lhs, eval rhs) with
| Int lhs, Int rhs -> Bool (lhs = rhs)
| Bool lhs, Bool rhs -> Bool (lhs = rhs)
| _ -> failwith "Runtime type error")
(* The program constructs an explicit AST for the DSL, then process it. *)
let construct_ast () =
IsEqual
( Constant (Bool true),
IsEqual (Add (Constant (Int 2), Constant (Int 3)), Constant (Int 4)) )
let _ =
let syntax_tree = construct_ast () in
eval syntax_tree
Note how the program first construct an explicit structure syntax_tree
first, and then send this structure to downstream processing logic so it can get pattern-matched on. If the same logic gets re-written in the tagless-final style, it would roughly look like this:
(* This definition is still needed, as the result type of [eval]. *)
type value = Int of int | Bool of bool
(* Syntax of DSL. Note the lack of concrete variant/record definitions -- syntax are 100%
defined as a collection of free-functions. *)
type 'e expression = {
constant : value -> 'e;
add : 'e -> 'e -> 'e;
is_equal : 'e -> 'e -> 'e;
}
(* An evaluator of the DSL. Note the lack of explicit pattern match on the syntax tree (which
is why this style is named "tagless"). *)
let eval : value expression =
let constant v = v in
let add lhs rhs =
match (lhs, rhs) with
| Int lhs, Int rhs -> Int (lhs + rhs)
| _ -> failwith "Runtime type error"
in
let is_equal lhs rhs =
match (lhs, rhs) with
| Int lhs, Int rhs -> Bool (lhs = rhs)
| Bool lhs, Bool rhs -> Bool (lhs = rhs)
| _ -> failwith "Runtime type error"
in
{ constant; add; is_equal }
(* The program does not constructs an explicit AST for the DSL. Processing of the DSL is
performed exactly when the DSL gets constructed. *)
let construct_and_eval { constant; add; is_equal } =
is_equal (constant (Bool true))
(is_equal (add (constant (Int 2)) (constant (Int 3))) (constant (Int 4)))
let _ = construct_and_eval eval
Note how the intermediate step of constructing an explicit syntax_tree
structure is completely eliminated -- we specify the downstream processing logic on top of the syntatical structure of the DSL directly as free functions, and invoke them immediately when the corrsponding DSL structure is constructed. In other words, the processing logic somehow gets "dependency injected" into the construction logic.
This seemlying unintuitive style of programming actually provides two main benefits over the conventional alternative:
- It solves the expression problem. That is, both extending the syntax of the DSL and adding new operations of the DSL can be done in a type-safe way that does not requires the developer to modify pre-existing logic. In contrast, in non-tagless-final approaches, if the syntax tree is represented as ADT then adding new operations would require modifying old logic, and if the syntax tree is represented as class hierarchy then adding new syntax variant would require modifying old logic.
- It offers more flexibility than the tranditional ADT approach in the sense that one can easily add stronger type-level constraints to the tagless-final interfaces. For example, we can tweak the definition of
expression
in our DSL to syntactically rule out terms that cannot statically type check:
(* The intention here is to use the first parameter as a phantom type, i.e. the actual
definition of this type is going to be [type (_, 'a) typed = 'a] *)
type ('phantom, 'a) typed
type 'v value = { intv : int -> (int, 'v) typed; boolv : bool -> (bool, 'v) typed }
type ('e, 'v) expression = {
constant : 't. ('t, 'v) typed -> ('v, 'e) typed;
(* Note how we only allow adding terms with [int] types. *)
add : (int, 'e) typed -> (int, 'e) typed -> (int, 'e) typed;
(* Note how we only allow comparing terms with the same type. *)
is_equal : 't. ('t, 'e) typed -> ('t, 'e) typed -> (bool, 'e) typed;
}
Doing the same thing with the non-tagless-final style requires advanced language features such as GADT, which is not always available in the host language. Besides, GADT often has its own set of nuisances even in a host language that supports it. Granted, such a flexibility is not actively exploited by this library. But the point that tagless-final style allows us to pursue this potential direction without too much pain still remains.
The main type this module offers, TaglessFinal.t
, works just like the expression
type appeared in the above example: it is nothing more than a collection of free functions on which downstream syntax processors can be defined. Once it is constructed with
TaglessFinal.make
, one can then pass it onto various paring APIs in
Parser.TaglessFinal
, which serves simliar purpose as the construct_and_eval
function in our running example.
Keeping the structure of TaglessFinal.t
fully authentic to Python's official ast
module is an explicit design goal for this library. Even if Python's own ast
representation may exhibit some obvious issues or inconsistencies, it is not the job of this module to fix them, for both philosophical and techinical reasons. If certain design of ast
is undesirable, the recommended way to address those is to write downstream conversion logic.
A position is a pair of integers representing line number and column numbers.
A location is a pair of Position.t
representing a range for a given token.
This module provide a type that represents Python identifiers.
This module provides a type that represents Python constant value.
module ExpressionContext : sig ... end
This module provides a type that represents Python expression contexts.
This module provides a type that represents Python boolean operators.
This module provides a type that represents Python numerical binary operators.
This module provides a type that represents Python unary operators.
This module provides a type that represents Python comparison operators.
This module provides a type that represents Python comprehension structures.
This module provides a type that represents keyword arguments at each callsite.
This module provides a type that represents single parameter at each function definition site.
This module provides a type that represents a parameter list at each function definition site.
This module provides a type that represents a Python expression.
This module provides a type that represents a Python with
item.
This module provides a type that represents a Python import
item.
This module provides a type that represents a Python exception handler.
This module provides a type that represents a branch of the Python match
statement. See PEP 622.
This module provides a type that represents a pattern for a given match
branch. See PEP 622.
This module provides a type that represents a Python statement.
This module provides a type that represents a Python type ignore item.
This module provides a type that represents a Python module.
This module provides a type that represents a Python function type signature. See
Parser.TaglessFinal.parse_function_type
.
type ('argument, 'arguments, 'binary_operator, 'boolean_operator, 'comparison_operator, 'comprehension, 'constant, 'exception_handler, 'expression, 'expression_context, 'function_type, 'identifier, 'import_alias, 'keyword, 'location, 'match_case, 'module_, 'pattern, 'position, 'statement, 'type_ignore, 'unary_operator, 'with_item)
t =
private {
argument : ('expression, 'identifier, 'location, 'argument) Argument.t;
arguments : ('argument, 'expression, 'arguments) Arguments.t;
binary_operator : 'binary_operator BinaryOperator.t;
boolean_operator : 'boolean_operator BooleanOperator.t;
comparison_operator : 'comparison_operator ComparisonOperator.t;
comprehension : ('expression, 'comprehension) Comprehension.t;
constant : 'constant Constant.t;
exception_handler : ('expression,
'identifier,
'location,
'statement,
'exception_handler)
ExceptionHandler.t;
expression : ('arguments,
'binary_operator,
'boolean_operator,
'comparison_operator,
'comprehension,
'constant,
'expression_context,
'identifier,
'keyword,
'location,
'unary_operator,
'expression)
Expression.t;
expression_context : 'expression_context ExpressionContext.t;
function_type : ('expression, 'function_type) FunctionType.t;
identifier : 'identifier Identifier.t;
import_alias : ('identifier, 'location, 'import_alias) ImportAlias.t;
keyword : ('expression, 'identifier, 'location, 'keyword) Keyword.t;
location : ('position, 'location) Location.t;
match_case : ('expression, 'pattern, 'statement, 'match_case) MatchCase.t;
module_ : ('statement, 'type_ignore, 'module_) Module.t;
pattern : ('constant, 'expression, 'identifier, 'location, 'pattern) Pattern.t;
position : 'position Position.t;
statement : ('import_alias,
'arguments,
'binary_operator,
'exception_handler,
'expression,
'identifier,
'keyword,
'location,
'match_case,
'with_item,
'statement)
Statement.t;
type_ignore : 'type_ignore TypeIgnore.t;
unary_operator : 'unary_operator UnaryOperator.t;
with_item : ('expression, 'with_item) WithItem.t;
}
val make :
argument:('a, 'b, 'c, 'd) Argument.t ->
arguments:('d, 'a, 'e) Arguments.t ->
binary_operator:'f BinaryOperator.t ->
boolean_operator:'g BooleanOperator.t ->
comparison_operator:'h ComparisonOperator.t ->
comprehension:('a, 'i) Comprehension.t ->
constant:'j Constant.t ->
exception_handler:('a, 'b, 'c, 'k, 'l) ExceptionHandler.t ->
expression:('e, 'f, 'g, 'h, 'i, 'j, 'm, 'b, 'n, 'c, 'o, 'a) Expression.t ->
expression_context:'m ExpressionContext.t ->
function_type:('a, 'p) FunctionType.t ->
identifier:'b Identifier.t ->
import_alias:('b, 'c, 'q) ImportAlias.t ->
keyword:('a, 'b, 'c, 'n) Keyword.t ->
location:('r, 'c) Location.t ->
match_case:('a, 's, 'k, 't) MatchCase.t ->
module_:('k, 'u, 'v) Module.t ->
pattern:('j, 'a, 'b, 'c, 's) Pattern.t ->
position:'r Position.t ->
statement:('q, 'e, 'f, 'l, 'a, 'b, 'n, 'c, 't, 'w, 'k) Statement.t ->
type_ignore:'u TypeIgnore.t ->
unary_operator:'o UnaryOperator.t ->
with_item:('a, 'w) WithItem.t ->
unit ->
('d,
'e,
'f,
'g,
'h,
'i,
'j,
'l,
'a,
'm,
'p,
'b,
'q,
'n,
'c,
't,
'v,
's,
'r,
'k,
'u,
'o,
'w)
t