More Lexical Analysis for Tiger?

So I decided to play more with just ocamllex and get a toy language according to the guide I am using to be parsed. It’s definitely cool to say the least.

(* scanner for a toy language *)
(* Copyright 2019 Justin Baum *)
(* credits to this tutorial:
https://courses.softlab.ntua.gr/compilers/2015a/ocamllex-tutorial.pdf
*)
{
  open Printf
}

let digit = ['0'-'9']
let id = ['a'-'z']['a'-'z' '0'-'9']*

rule toy_lang = parse
  | digit+ as inum
    { printf "integer: %s (%d)\n" inum (int_of_string inum);
    toy_lang lexbuf
    }
  | digit+ '.' digit* as fnum
    { printf "float: %s (%f)\n" fnum (float_of_string fnum);
    toy_lang lexbuf
    }
  | "if"
  | "then"
  | "begin"
  | "end"
  | "let"
  | "in"
  | "function" as word
    { printf "keyword: %s\n" word;
    toy_lang lexbuf
    }
  | id as text
    { printf "identifier %s\n" text;
    toy_lang lexbuf
    }
  | '+'
  | '-'
  | '*'
  | '/' as op
    { printf "operator %c\n" op;
    toy_lang lexbuf
    }
  | '{' [^ '\n']* '}'
    { toy_lang lexbuf }
  | [' ' '\t' '\n']
    { toy_lang lexbuf }
  | _ as c
    { printf "Unrecognized character: %c\n" c;
    toy_lang lexbuf
    }
  | ":="
    { printf "assignment operator\n";
    toy_lang lexbuf
    }
  | eof { () }

{
let main () =
  let cin =
    if Array.length Sys.argv > 1
    then open_in Sys.argv.(1)
    else stdin
  in
  let lexbuf = Lexing.from_channel cin in
  toy_lang lexbuf

let _ = Printexc.print main ()
}

Which when I run it the language,

let anidentifier := 124.0 + 5

if this
then that
else well

begin
let c :=
if andidentifier
then anotheridentifier
else that

function 
end

Gives us a lexer!

keyword: let
identifier anidentifier
assignment operator
float: 124.0 (124.000000)
operator +
integer: 5 (5)
keyword: if
identifier this
keyword: then
identifier that
identifier else
identifier well
keyword: begin
keyword: let
identifier c
assignment operator
keyword: if
identifier andidentifier
keyword: then
identifier anotheridentifier
identifier else
identifier that
keyword: function
keyword: end

To say the least, this has been a good amount of fun.

Written on June 17, 2019 20:40 UTC-4