Source file parse_js.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
open! Stdlib
module Lexer : sig
type t
val of_file : string -> t
val of_channel : in_channel -> t
val of_string : ?pos:Lexing.position -> ?filename:string -> string -> t
val curr_pos : t -> Lexing.position
val token : t -> Js_token.t * (Lexing.position * Lexing.position)
val lex_as_regexp : t -> Js_token.t * (Lexing.position * Lexing.position)
val dummy_pos : Lexing.position
end = struct
type t =
{ l : Sedlexing.lexbuf
; mutable env : Flow_lexer.Lex_env.t
}
let dummy_pos = { Lexing.pos_fname = ""; pos_lnum = 0; pos_cnum = 0; pos_bol = 0 }
let zero_pos = { Lexing.pos_fname = ""; pos_lnum = 1; pos_cnum = 0; pos_bol = 0 }
let create l = { l; env = Flow_lexer.Lex_env.create l }
let of_file file : t =
let ic = open_in file in
let lexbuf = Sedlexing.Utf8.from_channel ic in
Sedlexing.set_filename lexbuf file;
create lexbuf
let of_channel ci : t = create (Sedlexing.Utf8.from_channel ci)
let of_string ?(pos = zero_pos) ?filename s =
let l = Sedlexing.Utf8.from_string s in
let pos =
match filename with
| None -> pos
| Some pos_fname -> { pos with pos_fname }
in
Sedlexing.set_position l pos;
Option.iter filename ~f:(Sedlexing.set_filename l);
create l
let curr_pos lexbuf = snd (Sedlexing.lexing_positions lexbuf.l)
let report_errors res =
match Flow_lexer.Lex_result.errors res with
| [] -> ()
| l ->
List.iter l ~f:(fun (loc, e) ->
let loc =
match loc.Flow_lexer.Loc.source with
| None ->
Printf.sprintf
"%d:%d"
loc.start.pos_lnum
(loc.start.pos_cnum - loc.start.pos_bol)
| Some f ->
Printf.sprintf
"%s:%d:%d"
f
loc.start.pos_lnum
(loc.start.pos_cnum - loc.start.pos_bol)
in
Printf.eprintf
"Lexer error: %s: %s\n"
loc
(Flow_lexer.Parse_error.to_string e))
let token (t : t) =
let env, res = Flow_lexer.lex t.env in
t.env <- env;
let tok = Flow_lexer.Lex_result.token res in
let pos = Flow_lexer.Lex_result.loc res in
report_errors res;
tok, pos
let lex_as_regexp (t : t) =
Sedlexing.rollback t.l;
let env, res = Flow_lexer.regexp t.env in
t.env <- env;
let tok = Flow_lexer.Lex_result.token res in
let pos = Flow_lexer.Lex_result.loc res in
report_errors res;
tok, pos
end
exception Parsing_error of Parse_info.t
let matching_token (o : Js_token.t) (c : Js_token.t) =
match o, c with
| T_LPAREN, T_RPAREN | T_LBRACKET, T_RBRACKET | T_LCURLY, T_RCURLY -> true
| _ -> false
module Tokens : sig
type elt = Js_token.t * (Lexing.position * Lexing.position)
type +'a t
val add : elt -> 'a -> 'a t -> 'a t
val last : 'a t -> elt option
val last' : 'a t -> (elt * 'a t * 'a) option
val empty : 'a t
val all : 'a t -> (Js_token.t * Parse_info.t) list
end = struct
type elt = Js_token.t * (Lexing.position * Lexing.position)
type 'a t = (elt * 'a) list
let empty = []
let add elt data t = (elt, data) :: t
let rec last = function
| [] -> None
| (((Js_token.TComment _ | TCommentLineDirective _), _), _) :: l -> last l
| (x, _) :: _ -> Some x
let rec last' = function
| [] -> None
| (((Js_token.TComment _ | TCommentLineDirective _), _), _) :: l -> last' l
| (x, data) :: l -> Some (x, l, data)
let all t_rev = List.rev_map t_rev ~f:(fun ((t, (p, _)), _) -> t, Parse_info.t_of_pos p)
end
let parse_aux the_parser (lexbuf : Lexer.t) =
let init = the_parser (Lexer.curr_pos lexbuf) in
let fol prev (_, (c, _)) =
match Tokens.last prev with
| None -> true
| Some (_, (_, p)) -> c.Lexing.pos_lnum <> p.Lexing.pos_lnum
in
let rec loop_error prev checkpoint =
let module I = Js_parser.MenhirInterpreter in
match checkpoint with
| I.InputNeeded _env ->
let checkpoint =
I.offer checkpoint (Js_token.T_EOF, Lexer.curr_pos lexbuf, Lexer.curr_pos lexbuf)
in
loop_error prev checkpoint
| I.Shifting _ | I.AboutToReduce _ -> loop_error prev (I.resume checkpoint)
| I.Accepted _ -> assert false
| I.Rejected -> `Error prev
| I.HandlingError _ -> loop_error prev (I.resume checkpoint)
in
let parse_annot s =
match String.drop_prefix ~prefix:"//" s with
| None -> None
| Some s -> (
let buf = Lexing.from_string s in
try
match Annot_parser.annot Annot_lexer.main buf with
| `Requires l -> Some (`Requires l)
| `Provides (n, k, ka) -> Some (`Provides (n, k, ka))
| `Version l -> Some (`Version l)
| `Weakdef -> Some `Weakdef
| `Always -> Some `Always
| `If name -> Some (`If name)
| `Ifnot name -> Some (`Ifnot name)
| `Alias name -> Some (`Alias name)
with
| Not_found -> None
| _ -> None)
in
let rec loop prev buffer checkpoint =
let module I = Js_parser.MenhirInterpreter in
match checkpoint with
| I.InputNeeded _env ->
let token, buffer, prev =
match Tokens.last prev with
| Some ((Js_token.T_EOF, _) as last) -> last, buffer, prev
| _ ->
let read_tok buffer lexbuf =
match buffer with
| [] -> buffer, Lexer.token lexbuf
| x :: xs -> xs, x
in
let rec read_one prev buffer (lexbuf : Lexer.t) =
let buffer, t = read_tok buffer lexbuf in
match t with
| (TCommentLineDirective _, _) as t ->
let prev = Tokens.add t checkpoint prev in
read_one prev buffer lexbuf
| (TComment s, loc) as t ->
if fol prev t
then
match parse_annot s with
| None ->
let prev = Tokens.add t checkpoint prev in
read_one prev buffer lexbuf
| Some annot ->
let t = Js_token.TAnnot (s, annot), loc in
t, buffer, prev
else
let prev = Tokens.add t checkpoint prev in
read_one prev buffer lexbuf
| t -> t, buffer, prev
in
let t, buffer, prev = read_one prev buffer lexbuf in
let (t, pos), buffer =
match Tokens.last prev, t with
| ( Some ((T_RETURN | T_CONTINUE | T_BREAK | T_THROW | T_YIELD), _)
, (((T_SEMICOLON | T_VIRTUAL_SEMICOLON), _) as t) ) -> t, buffer
| Some ((T_RETURN | T_CONTINUE | T_BREAK | T_THROW | T_YIELD), _), t
when fol prev t ->
let buffer = t :: buffer in
(T_VIRTUAL_SEMICOLON, (Lexer.dummy_pos, Lexer.dummy_pos)), buffer
| _, ((T_DECR, pos) as tok) when not (fol prev tok) ->
(Js_token.T_DECR_NB, pos), buffer
| _, ((T_INCR, pos) as tok) when not (fol prev tok) ->
(Js_token.T_INCR_NB, pos), buffer
| _, ((((T_DIV | T_DIV_ASSIGN) as t), ((start_pos, _) as _pos)) as tok)
-> (
if I.acceptable checkpoint t start_pos
then tok, buffer
else
match buffer with
| [] -> Lexer.lex_as_regexp lexbuf, buffer
| _ ->
tok, buffer)
| _, t -> t, buffer
in
(t, pos), buffer, prev
in
let t, (pos_start, pos_stop) = token in
let prev = Tokens.add token checkpoint prev in
let checkpoint = I.offer checkpoint (t, pos_start, pos_stop) in
loop prev buffer checkpoint
| I.Shifting _ | I.AboutToReduce _ -> loop prev buffer (I.resume checkpoint)
| I.Accepted v -> `Ok (v, prev)
| I.Rejected -> `Error prev
| I.HandlingError _env -> (
let to_ident (t, loc) =
let name = Js_token.to_string t in
Js_token.T_IDENTIFIER (Stdlib.Utf8_string.of_string_exn name, name), loc
in
let rec rewind stack buffer prev =
match Tokens.last' prev with
| None -> None
| Some (((tok, loc) as tok'), prev, checkpoint) -> (
match tok, stack with
| (T_RPAREN | T_RCURLY | T_RBRACKET), _ ->
let buffer = tok' :: buffer in
let stack = tok :: stack in
rewind stack buffer prev
| ((T_LPAREN | T_LCURLY | T_LBRACKET) as o), c :: stack -> (
if not (matching_token o c)
then None
else
match stack with
| [] -> Some (loc, prev, buffer, checkpoint)
| _ ->
let buffer = tok' :: buffer in
rewind stack buffer prev)
| _, stack ->
let buffer = tok' :: buffer in
rewind stack buffer prev)
in
let end_of_do_whle prev =
match rewind [ T_RPAREN ] [] prev with
| None -> false
| Some (_, prev, _, _) -> (
match Tokens.last' prev with
| None -> false
| Some ((T_WHILE, _), prev, _checkpoint) -> (
match Tokens.last' prev with
| None -> false
| Some ((T_SEMICOLON, _), prev, _checkpoint) -> (
match Tokens.last' prev with
| None -> false
| Some ((T_DO, _), _, _) -> true
| Some (_, _, _) -> false)
| Some ((T_RCURLY, _), prev, _checkpoint) -> (
match rewind [ T_RCURLY ] [] prev with
| None -> false
| Some (_, prev, _, _) -> (
match Tokens.last' prev with
| None -> false
| Some ((T_DO, _), _, _) -> true
| Some (_, _, _) -> false))
| Some (_, _, _) -> false)
| Some (_, _, _) -> false)
in
let kind =
match Tokens.last' prev with
| None | Some ((T_VIRTUAL_SEMICOLON, _), _, _) -> `None
| Some ((((T_YIELD | T_AWAIT), _) as tok), rest, checkpoint)
when I.acceptable checkpoint (fst (to_ident tok)) Lexer.dummy_pos ->
`Replace (to_ident tok, rest, checkpoint)
| Some (((T_RCURLY, _) as tok), rest, checkpoint)
when I.acceptable checkpoint Js_token.T_VIRTUAL_SEMICOLON Lexer.dummy_pos ->
`Semi_colon (tok, rest, checkpoint)
| Some (((T_EOF, _) as tok), rest, checkpoint)
when I.acceptable checkpoint Js_token.T_VIRTUAL_SEMICOLON Lexer.dummy_pos ->
`Semi_colon (tok, rest, checkpoint)
| Some (((T_ARROW, _) as tok), prev, checkpoint) when not (fol prev tok) ->
`Arrow (tok, prev, checkpoint)
| Some (last, rest, checkpoint) -> (
match Tokens.last' rest with
| Some ((T_VIRTUAL_SEMICOLON, _), _, _) -> `None
| (Some _ | None)
when fol rest last
&& I.acceptable
checkpoint
Js_token.T_VIRTUAL_SEMICOLON
Lexer.dummy_pos -> `Semi_colon (last, rest, checkpoint)
| Some ((T_RPAREN, _), rest, _)
when end_of_do_whle rest
&& I.acceptable
checkpoint
Js_token.T_VIRTUAL_SEMICOLON
Lexer.dummy_pos -> `Semi_colon (last, rest, checkpoint)
| _ -> `None)
in
let drop_annot_or_error () =
match Tokens.last' prev with
| Some ((TAnnot (s, _), pos), prev, checkpoint) ->
let t = Js_token.TComment s, pos in
let prev = Tokens.add t checkpoint prev in
loop prev buffer checkpoint
| _ -> loop_error prev (I.resume checkpoint)
in
match kind with
| `None -> drop_annot_or_error ()
| `Arrow (tok, prev, _checkpoint) -> (
let buffer = tok :: buffer in
let err () = loop_error prev (I.resume checkpoint) in
match Tokens.last' prev with
| Some (((T_RPAREN, _) as tok), prev, _) -> (
let buffer = tok :: buffer in
match rewind [ T_RPAREN ] buffer prev with
| None -> err ()
| Some (loc, prev, buffer, checkpoint) ->
let buffer = (Js_token.T_LPAREN_ARROW, loc) :: buffer in
loop prev buffer checkpoint)
| Some _ | None -> err ())
| `Replace (t, prev, checkpoint) ->
let checkpoint =
let t, pos = t in
I.offer checkpoint (t, fst pos, snd pos)
in
let prev = Tokens.add t checkpoint prev in
loop prev buffer checkpoint
| `Semi_colon (tok, prev, checkpoint) ->
let buffer = tok :: buffer in
let t = Js_token.T_VIRTUAL_SEMICOLON, (Lexer.dummy_pos, Lexer.dummy_pos) in
let checkpoint =
let t, pos = t in
I.offer checkpoint (t, fst pos, snd pos)
in
let prev = Tokens.add t checkpoint prev in
loop prev buffer checkpoint)
in
match loop Tokens.empty [] init with
| `Ok x -> x
| `Error toks ->
let rec pi last =
match Tokens.last' last with
| None -> Parse_info.zero
| Some ((_, (p, _)), rest, _) ->
if Poly.(p = Lexer.dummy_pos) then pi rest else Parse_info.t_of_pos p
in
raise (Parsing_error (pi toks))
let fail_early =
object
inherit Js_traverse.iter
method early_error p = raise (Parsing_error p.loc)
end
let check_program p =
List.iter p ~f:(function
| `Annot _ -> ()
| `Item p -> fail_early#program [ p ])
let parse' lex =
let p, toks = parse_aux Js_parser.Incremental.program lex in
check_program p;
let groups =
List.group p ~f:(fun a pred ->
match pred, a with
| `Item _, `Annot _ -> false
| `Annot _, `Annot _ -> true
| `Item _, `Item _ -> true
| `Annot _, `Item _ -> true)
in
let p =
List.map groups ~f:(fun g ->
List.partition_map g ~f:(function
| `Annot a -> `Fst a
| `Item i -> `Snd i))
in
p, Tokens.all toks
let parse lex =
let p, _ = parse_aux Js_parser.Incremental.program lex in
check_program p;
List.filter_map p ~f:(function
| `Item i -> Some i
| `Annot _ -> None)
let parse_expr lex =
let expr, _ = parse_aux Js_parser.Incremental.standalone_expression lex in
fail_early#expression expr;
expr