-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlexer.mll
More file actions
161 lines (154 loc) · 5.06 KB
/
lexer.mll
File metadata and controls
161 lines (154 loc) · 5.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
{
open Parser
exception Eof
exception Reader_error of string
let string_of_char c =
String.make 1 c
let string_reverse s =
let maxind = (String.length s) - 1 in
for i = 0 to maxind/2 do
let c = s.[i] in
s.[i] <- s.[maxind-i];
s.[maxind-i] <- c
done;
s
}
rule token = parse
(* The ordering of the rules here is significant: e.g. -1 is an
integer but 1- should be a symbol *)
['-']? ['0'-'9']+ as lexeme { INT(int_of_string lexeme) }
| '#' { readermacro lexbuf }
| ['+' '-']? ['0'-'9']* '.'? ['0'-'9']+ ( 'e' ['+' '-']? ['0'-'9']+ )?
as lexeme { FLOAT(float_of_string lexeme) }
| ['A'-'Z' 'a'-'z' '-' '/' '*' '+' '0'-'9' ':' '&' '>'
'<' '=' '!' '?' '$' '@']+ as lexeme { SYMBOL(lexeme) }
| '"' { STRING(string lexbuf) }
| [ '[' '(' ] { LPAREN }
| [ ']' ')' ] { RPAREN }
| '\'' { QUOTE }
| '.' { DOT }
| ';' [^'\n']* '\n' { token lexbuf } (* skip comments *)
| [' ' '\t' '\n'] { token lexbuf } (* skip whitespace *)
| eof { raise Eof }
and string = parse
'\\' { let c = escaped lexbuf in c^(string lexbuf) }
| [^ '"' '\\']+ as lexeme { lexeme^(string lexbuf) }
| '"' { "" }
| eof { raise Eof }
and escaped = parse
(* TODO: escape sequences *)
'^' (['A'-'Z'] as ctrlchar)
{ string_of_char (char_of_int ((int_of_char ctrlchar)-64)) }
| '0' (['0'-'9']['0'-'9'] as digits)
{ string_of_char (char_of_int (int_of_string digits)) }
| _ as lexeme { string_of_char lexeme }
| eof { raise Eof }
(* TODO: actual Lisp-defined reader macros *)
and readermacro = parse
'<' { raise (Reader_error "invalid reader macro") }
| ['x''X'] (['-' '+'] as sign)? (['0'-'9' 'a'-'f' 'A'-'F']+ as digits)
{
match sign with
Some '-' -> INT(-(int_of_string ("0x"^digits)))
| Some '+' | None -> INT(int_of_string ("0x"^digits))
| Some _ -> raise (Lisp.Impossible "minus sign changed into something else in the middle of #X")
}
| ['o''O'] (['-' '+'] as sign)? (['0'-'7']+ as digits)
{
match sign with
Some '-' -> INT(-(int_of_string ("0o"^digits)))
| Some '+' | None -> INT(int_of_string ("0o"^digits))
| Some _ -> raise (Lisp.Impossible "minus sign changed into something else in the middle of #O")
}
| ['b''B'] (['-' '+'] as sign)? (['0' '1']+ as digits)
{
match sign with
Some '-' -> INT(-(int_of_string ("0b"^digits)))
| Some '+' | None -> INT(int_of_string ("0b"^digits))
| Some _ -> raise (Lisp.Impossible "minus sign changed into something else in the middle of #B")
}
| (['0'-'9']+ as radix) ['r''R'] (['-''+'] as sign)? (['0'-'9' 'A'-'Z' 'a'-'z']+ as digits)
{
let radix = int_of_string radix in
let parse string =
let dpow a exp =
match exp with
0 -> 1
| 1 -> a
| e when e >= 2 ->
begin
let result = ref a in
for i = 2 to exp do
result := !result * a
done;
!result
end
| e -> raise (Lisp.Impossible ("exponent `"^(string_of_int e)^"' slipped through guarded clause in #R"))
in
let index = ref 0 in
let value = ref 0 in
String.iter (fun c ->
let digit =
match c with
'0'..'9' -> (int_of_char c) - (int_of_char '0')
| 'A'..'Z' -> (int_of_char c) - (int_of_char 'A') + 10
| _ -> raise (Lisp.Impossible "invalid digit in #R")
in
let placeval = dpow radix !index in
if digit >= radix then
raise (Reader_error "digit greater than allowed radix in #R")
else begin
value := !value + digit*placeval;
incr index
end
) (String.uppercase (string_reverse string));
!value
in
let value = parse digits in
match sign with
Some '-' -> INT(-value)
| Some '+' | None -> INT(value)
| Some _ -> raise (Lisp.Impossible "minus sign changed into something else in the middle of #R reader macro")
}
| '\\' (_ as char) { INT(int_of_char char) }
| '\'' { FUNQUOTE }
| '.'
{
(* read-time evaluation *)
if (not (Hashtbl.mem Eval.variables "*read-eval*")) || (Lisp.truep (Hashtbl.find Eval.variables "*read-eval*")) then
LISPVAL(Eval.eval (Parser.main token lexbuf))
else
raise (Reader_error "Readtime evaluation disabled (*read-eval* nil)")
}
| '+'
{
(* TODO: *features* *)
let condition = Parser.main token lexbuf in
let expression = Parser.main token lexbuf in
let result = Eval.eval condition in
if Lisp.truep result then
LISPVAL(expression)
else
token lexbuf
}
| '-'
{
let condition = Parser.main token lexbuf in
let expression = Parser.main token lexbuf in
let result = Eval.eval condition in
if not (Lisp.truep result) then
LISPVAL(expression)
else
token lexbuf
}
| '|' { long_comment lexbuf; token lexbuf }
| _ { raise (Reader_error "invalid reader macro") }
and long_comment = shortest
'|' '#' { () }
| '#' '|' { long_comment lexbuf; long_comment lexbuf }
| [^'|' '#']+ { long_comment lexbuf }
| '|' _ { long_comment lexbuf }
| '#' _ { long_comment lexbuf }
{
(* trailer *)
}