More Lexical Analysis for Tiger?
So I decided to play more with just ocamllex
and get a toy language
according to the guide I am using to be parsed. It’s definitely
cool to say the least.
(* scanner for a toy language *)
(* Copyright 2019 Justin Baum *)
(* credits to this tutorial:
https://courses.softlab.ntua.gr/compilers/2015a/ocamllex-tutorial.pdf
*)
{
open Printf
}
let digit = ['0'-'9']
let id = ['a'-'z']['a'-'z' '0'-'9']*
rule toy_lang = parse
| digit+ as inum
{ printf "integer: %s (%d)\n" inum (int_of_string inum);
toy_lang lexbuf
}
| digit+ '.' digit* as fnum
{ printf "float: %s (%f)\n" fnum (float_of_string fnum);
toy_lang lexbuf
}
| "if"
| "then"
| "begin"
| "end"
| "let"
| "in"
| "function" as word
{ printf "keyword: %s\n" word;
toy_lang lexbuf
}
| id as text
{ printf "identifier %s\n" text;
toy_lang lexbuf
}
| '+'
| '-'
| '*'
| '/' as op
{ printf "operator %c\n" op;
toy_lang lexbuf
}
| '{' [^ '\n']* '}'
{ toy_lang lexbuf }
| [' ' '\t' '\n']
{ toy_lang lexbuf }
| _ as c
{ printf "Unrecognized character: %c\n" c;
toy_lang lexbuf
}
| ":="
{ printf "assignment operator\n";
toy_lang lexbuf
}
| eof { () }
{
let main () =
let cin =
if Array.length Sys.argv > 1
then open_in Sys.argv.(1)
else stdin
in
let lexbuf = Lexing.from_channel cin in
toy_lang lexbuf
let _ = Printexc.print main ()
}
Which when I run it the language,
let anidentifier := 124.0 + 5
if this
then that
else well
begin
let c :=
if andidentifier
then anotheridentifier
else that
function
end
Gives us a lexer!
keyword: let
identifier anidentifier
assignment operator
float: 124.0 (124.000000)
operator +
integer: 5 (5)
keyword: if
identifier this
keyword: then
identifier that
identifier else
identifier well
keyword: begin
keyword: let
identifier c
assignment operator
keyword: if
identifier andidentifier
keyword: then
identifier anotheridentifier
identifier else
identifier that
keyword: function
keyword: end
To say the least, this has been a good amount of fun.
Written on June 17, 2019 20:40 UTC-4