(** OCaml reinforcement: tree structures
*
* Goal: implement an interpreter for a small imperative programming language
*)
(* algebraic datatype example: binary tree *)
(* a binary tree is either a leaf, or a node with two subtrees,
each of these cases is defined by a constructor (Leaf or Node),
the constructor Node applies to two already defined binary trees *)
(* this declaration defines a new type, whose values are defined
with the declared constructors *)
type binary_tree =
| Leaf
| Node of binary_tree * binary_tree
(* the binary tree
@
/ \
. @
/ \
. .
is represented in caml by the following expression *)
let t = Node(Leaf,
Node(Leaf, Leaf))
(* definition of a function on trees by case on the shape, ie on the constructor *)
let rec size t = match t with
| Leaf -> 1
| Node(t1, t2) -> 1 + size t1 + size t2
let rec height = function
| Leaf -> 0
| Node(t1, t2) -> 1 + max (height t1) (height t2)
(**
Version 1. Imperative language with arithmetic, mutable variables,
branching and loops.
*)
(* enumeration of a list of supported binary operations *)
type bop = Add | Mul | Sub | And (* | Div | Eq | Lt ... *)
(* a tree-like type for expressions *)
type expr =
| Cst of int
| Var of string
| Binop of bop * expr * expr
(* a tree-like type for instructions *)
type instr =
| Set of string * expr
| If of expr * instr * instr
| While of expr * instr
(* we need not only single instructions, but also sequences of instructions *)
(* choice here: add a sequence constructor to instructions, so that the type
'instr' cover both single instructions and sequences *)
| Seq of instr * instr
(* possible alternative: keep the type 'instr' for single instructions only,
and use 'instr list' when meaning a sequence (for instance: inside if and
while instructions) *)
(**
Version 1A.
Evaluating an expression depends on an abstraction of the memory, that
provides values for variables. We need a data structure associating
variable names (called keys) to values.
In version 1A, we use an immutable data structure based on balanced
binary search trees.
*)
(* the standard library provides a 'functor' Map.Make, that is a module that
can be parameterized by another module; here we instantiate Map.Make on the
module String to get environments whose keys are strings, and call Env the
obtained module *)
module Env = Map.Make(String)
(* this provides a type 'a Env.t of environements with keys of type string
and values of type 'a, as well as functions
Env.find: string -> 'a Env.t -> 'a
to retrieve the value for a given key or
Env.add: string -> 'a -> 'a Env.t -> 'a Env.t
to add a new key/value pair to an environment *)
(* since environments are immutable, they are not modified by add;
instead add returns a new environment that contains the additional entry *)
(* remark that this does not imply a copy of the structure, since immutable
structures are shared internally *)
(* auxiliary function for evaluating a binary operation, once the values v1
and v2 of its operands have already been computed *)
let eval_bop bop v1 v2 = match bop with
| Add -> v1 + v2
| Mul -> v1 * v2
| Sub -> v1 - v2
| And -> assert false (* this case should not happen, since the conjunction
will have a special treatment *)
(* evaluation of an expression, under some environement *)
(* we use only one type of values (int), for both integers and booleans;
as in C, true is represented by 1 and false by 0 *)
(* eval: expr -> int Env.t -> int *)
let rec eval e env = match e with
| Cst n -> n
(* the value of a variable is given by the environement *)
| Var x -> Env.find x env
(* evaluation of a strict binary operation implies evaluating both operands,
and then applying the corresponding operator; an exception is &&, which is
a lazy operator: the right operand is evaluated only if the left one is
not enough to decide the result *)
(* the two following patterns (And, and other binary operators) intersect,
in the case of a value that matches both patterns, the first matching
branch is selected, hence we write the special case And before the
general case bop *)
| Binop(And, e1, e2) -> if eval e1 env <> 0 then eval e2 env else 0
| Binop(bop, e1, e2) -> eval_bop bop (eval e1 env) (eval e2 env)
(* execution of an instruction or sequence, under some environment *)
(* instructions are likely to update the memory; since the environment is
immutable, our execution function returns a new updated environment instead *)
(* exec: instr -> int Env.t -> int Env.t *)
let rec exec i env = match i with
(* case of a variable assignment: returns an environment updated with
the new value *)
| Set(x, e) -> let v = eval e env in
Env.add x v env
(* seq: execution starts with i1, then goes on with i2 in the updated environment *)
| Seq(i1, i2) -> let env' = exec i1 env in
exec i2 env'
(* alternative, compact notation:
env |> exec i1 |> exec i2 *)
(* if: only one branch is executed, depending on the value of e *)
| If(e, i1, i2) -> if eval e env <> 0 then
exec i1 env
else
exec i2 env
(* while: if the condition is positive, execute the body, and then the
loop again in the modified environment; otherwise nothing happens, and
the result is the unmodified environment *)
| While(e, i') ->
if eval e env <> 0 then
env |> exec i' |> exec i
else
env
(**
Version 1B.
The same functions, using a mutable hashtable.
*)
type env = (string, int) Hashtbl.t
let exec i env =
(* inside the main function exec, we define two local functions eval and exec,
with implicit access to the shared, mutable environment env *)
(* eval: expr -> int *)
let rec eval e = match e with
| Cst n -> n
| Var x -> Hashtbl.find env x
| Binop(And, e1, e2) -> if eval e1 <> 0 then eval e2 else 0
| Binop(bop, e1, e2) -> eval_bop bop (eval e1) (eval e2)
in
(* exec: instr -> unit *)
let rec exec i = match i with
| Set(x, e) -> let v = eval e in
Hashtbl.replace env x v
(* the sequence does not need to transfer the new environment anymore,
since execution of i1 just mutates the shared table env *)
| Seq(i1, i2) -> exec i1; exec i2
| If(e, i1, i2) ->
if eval e <> 0 then
exec i1
else
exec i2
| While(e, i') ->
if eval e <> 0 then
(exec i'; exec i)
in
exec i
(* wish list: local variable, other types of data, functions, data structures *)
(** Version 2. Basic imperative language with functions. *)
(* new constructors are needed, we define new versions of expr and instr *)
type expr =
| Cst of int
| Var of string
| Binop of bop * expr * expr
(* function call, defined by a function name and a list of parameters *)
| Call of string * expr list
type instr =
| Set of string * expr
| If of expr * instr * instr
| While of expr * instr
| Seq of instr * instr
(* terminates the function and returns a value *)
| Return of expr
(* function definition *)
(* data structure with several fields, similar to "structs" (called "record" in caml),
each field is defined by an identifier and a type *)
type function_def = {
name: string;
code: instr;
params: string list;
(* locals: (string * int) list; *)
}
(* definition of a function int square(int x) { return x*x; } *)
let f = { name="square"; params=["x"]; code=Return(Binop(Mul, Var "x", Var "x")) }
(* a program is given by a list of function definitions, as well as a set of
(initialized) global variables *)
type prog = {
globals: (string * int) list;
functions: function_def list;
}
(* here, we assume one of the functions of the list is called "main" *)
(* user-defined exception, carrying a value of type int *)
exception Return of int
(* it will serve to terminate execution of a function when reaching a return instruction *)
(* main function for executing a program, given a list of arguments for its "main" function *)
let exec_prog p args =
(* create an initialize a global environment, with the given values for global variables *)
let env = Hashtbl.create 32 in
List.iter (fun (x, v) -> Hashtbl.add env x v) p.globals;
(* evaluation of most expressions is as before *)
let rec eval e = match e with
| Cst n -> n
| Var x -> Hashtbl.find env x
| Binop(And, e1, e2) -> if eval e1 <> 0 then eval e2 else 0
| Binop(bop, e1, e2) -> eval_bop bop (eval e1) (eval e2)
(* new: evaluation of a function call, given a function name and a list of parameters *)
| Call(fname, args) ->
(* first step: evaluate the parameters *)
(* second step: perform the call *)
exec_call fname (List.map eval args)
(* evaluation of most instructions is as before *)
and exec_instr = function
| Set(x, e) -> let v = eval e in
Hashtbl.replace env x v
| Seq(i1, i2) -> exec_instr i1; exec_instr i2
| If(e, i1, i2) -> if eval e <> 0 then exec_instr i1 else exec_instr i2
| While(e, i') as i -> if eval e <> 0 then (exec_instr i'; exec_instr i)
(* new: return (of the value given by an expression) *)
| Return e -> let v = eval e in
(* after evaluating the expression, terminates execution with
the custom defined exception; this skips the execution of any
remaining instr *)
raise (Return v)
(* execution of a function call, given a function name and a list of values for
its parameters *)
(* exec_call: string -> expr list -> int *)
and exec_call fname args =
(* first step: find the function definition with the called name *)
let f = List.find (fun f -> f.name = fname) p.functions in
(* second step: bind formal parameters and arguments *)
List.iter2 (Hashtbl.add env) f.params args;
(* List.iter2 (fun x a -> Hashtbl.add env x a) f.params args; *)
(* remark: Hashtbl.add adds a new binding, that shadows the previous binding
with the same key if there is any, but does not destroy it; advantage:
when the new binding is removed, the old one becomes accessible again *)
(* then exec the code *)
let v =
(* if the execution reaches a return instruction, it raises an exception
that we catch here, to return the value 'v' carried by the exception;
otherwise, exec_instr f.code terminates "normally" and we just return 0
(we must return something of the required type, or fail explicitly) *)
try exec_instr f.code; 0
with Return v -> v
in
(* finally remove the local bindings, letting the old ones resurface *)
List.iter (Hashtbl.remove env) f.params;
v
in
exec_call "main" args