構文木・レキサ・パーサ

構文木は教科書7ページに書いてあります。非常にシンプルです。Haskellのような複雑な言語も構文糖衣を取り除くと,同様の言語まで単純化されます。そんなバカなと思う人は,GHCのソースのghc/compiler/coreSyn/CoreSyn.lhsを読んでみて下さい。
レキサ/パーサの実装は他の言語と何も変わりません。ocamllex/ocamlyaccで実装しています。

  • syntax.ml
type variable = string
type binaryop = Add|Sub|Mul|Div|Lt|Le|Eq|Ne|Ge|Gt|And|Or
type definition = variable * expression
and alternative = int * variable list * expression
and expression =
      VarE of variable
    | NumE of int
    | PackE of int * int
    | AppE of expression * expression
    | InfixE of binaryop * expression * expression
    | LetE of definition list * expression
    | LetrecE of definition list * expression
    | CaseE of expression * alternative list
    | LambdaE of variable list * expression

type supercombinator = variable list * expression
  • lexer.mll

リテラルは整数だけです。

{
open Parser
}

let space  = [' ' '\t' '\n' '\r']
let letter = ['A'-'Z' 'a'-'z' '_']
let digit  = ['0'-'9']

rule lex = parse
    space +
    { lex lexbuf }
    | '#' [^ '\n']* (* comment *)
    { lex lexbuf }
    | letter (letter | digit)*
    { match Lexing.lexeme lexbuf with
          "let" -> Tlet
        | "letrec" -> Tletrec
        | "in" -> Tin
        | "case" -> Tcase
        | "of" -> Tof
        | "Pack" -> Tpack
        | s -> Tvar s }
    | (['0'-'9']* as num) { Tint (int_of_string num) }
    | '{' { Tlbrace }
    | '}' { Trbrace }
    | '(' { Tlparen }
    | ')' { Trparen }
    | '\\' { Tbackslash }
    | ';' { Tsemi }
    | '=' { Tequal }
    | '+' { Tplus }
    | '-' { Tminus }
    | '*' { Tmul }
    | '/' { Tdiv }
    | '<' { Tless }
    | "<=" { Tlessequal }
    | "==" { Tequalequal }
    | "~=" { Tnotequal }
    | ">=" { Tgreaterequal }
    | '>' { Tgreater }
    | '&' { Tand }
    | '|' { Tor }
    | "->" { Tarrow }
    | ',' { Tcomma }
    | '.' { Tdot }
    | eof { Teof }
  • parser.mly

教科書7ページをそのままyaccで書きなおすだけです。

%{
open Syntax
%}

%token <string> Tvar
%token <int> Tint
%token Tlet Tletrec Tin Tcase Tof Tpack Tarrow Tlbrace Trbrace Tlparen Trparen
    Tbackslash Tsemi Tequal Tplus Tminus Tmul Tdiv Tless Tlessequal Tequalequal
    Tnotequal Tgreaterequal Tgreater Tand Tor Tcomma Tdot Teof

%right    Tor
%right    Tand
%nonassoc Tminus
%right    Tplus
%nonassoc Tdiv
%right    Tmul
%left     prec_app

%start program
%type <Syntax.supercombinator list> program

%%

program:
    sc Teof            { [$1] }
    | sc Tsemi program { $1::$3 }
;
sc: // super combinator
    vars Tequal expr { ($1, $3) }
;
expr:
      expr aexpr %prec prec_app { AppE($1,$2) }
    | expr binop expr           { InfixE($2,$1,$3) }
    | Tlet defns Tin expr       { LetE($2,$4) }
    | Tletrec defns Tin expr    { LetrecE($2,$4) }
    | Tcase expr Tof alts       { CaseE($2,$4) }
    | Tbackslash vars Tdot expr { LambdaE($2,$4) }
    | aexpr                     { $1 }
;
aexpr: // atomic expression
      Tvar      { VarE($1) }
    | Tint      { NumE($1) }
    | Tpack Tlbrace Tint Tcomma Tint Trbrace { PackE($3,$5) }
    | Tlparen expr Trparen { $2 }
;
defns: // definitions
      defn              { [$1] }
    | defn Tsemi defns  { $1::$3 }
;
defn: // definition
    Tvar Tequal expr { ($1,$3) }
;
alts: // alternatives
      alt            { [$1] }
    | alt Tsemi alts { $1::$3 }
;
alt: // alternative
    Tless Tint Tgreater vars Tarrow expr { ($2,$4,$6) }
;
binop:
      Tplus         { Add }
    | Tminus        { Sub }
    | Tmul          { Mul }
    | Tdiv          { Div }
    | Tless         { Lt }
    | Tlessequal    { Le }
    | Tequalequal   { Eq }
    | Tnotequal     { Ne }
    | Tgreaterequal { Ge }
    | Tgreater      { Gt }
    | Tand          { And }
    | Tor           { Or }
;
vars:
      Tvar      { [$1] }
    | Tvar vars { $1::$2 }
;
%%
  • ビルド・実行

今のところ、パースしてそれで終わりです。来週はG-machineを作ります。

% git clone git://github.com/nineties/puref.git
% cd puref
% make
% ./puref
main = let x = 1 in x + 2
[Ctrl-D]