package lambdapi

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file lpLexer.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
(** Lexer for Lambdapi syntax, using Sedlex, a Utf8 lexer generator. *)

open Lplib
open Sedlexing
open Common open Pos

let remove_first : Sedlexing.lexbuf -> string = fun lb ->
  Utf8.sub_lexeme lb 1 (lexeme_length lb - 1)

let remove_last : Sedlexing.lexbuf -> string = fun lb ->
  Utf8.sub_lexeme lb 0 (lexeme_length lb - 1)

let remove_ends : Sedlexing.lexbuf -> string = fun lb ->
  Utf8.sub_lexeme lb 1 (lexeme_length lb - 2)

exception SyntaxError of strloc

let syntax_error : Lexing.position * Lexing.position -> string -> 'a =
  fun pos msg -> raise (SyntaxError (Pos.make_pos pos msg))

let fail : Sedlexing.lexbuf -> string -> 'a = fun lb msg ->
  syntax_error (Sedlexing.lexing_positions lb) msg

let invalid_character : Sedlexing.lexbuf -> 'a = fun lb ->
  fail lb "Invalid character"

(** Tokens. *)
type token =
  (* end of file *)
  | EOF

  (* keywords in alphabetical order *)
  | ABORT
  | ADMIT
  | ADMITTED
  | APPLY
  | AS
  | ASSERT of bool (* true for "assertnot" *)
  | ASSOCIATIVE
  | ASSUME
  | BEGIN
  | BUILTIN
  | COERCE_RULE
  | COMMUTATIVE
  | COMPUTE
  | CONSTANT
  | DEBUG
  | END
  | FAIL
  | FLAG
  | GENERALIZE
  | HAVE
  | IN
  | INDUCTION
  | INDUCTIVE
  | INFIX
  | INJECTIVE
  | LET
  | NOTATION
  | OPAQUE
  | OPEN
  | POSTFIX
  | PREFIX
  | PRINT
  | PRIVATE
  | PROOFTERM
  | PROTECTED
  | PROVER
  | PROVER_TIMEOUT
  | QUANTIFIER
  | REFINE
  | REFLEXIVITY
  | REMOVE
  | REQUIRE
  | REWRITE
  | RULE
  | SEARCH
  | SEQUENTIAL
  | SET
  | SIMPLIFY
  | SOLVE
  | SYMBOL
  | SYMMETRY
  | TRY
  | TYPE_QUERY
  | TYPE_TERM
  | UNIF_RULE
  | VERBOSE
  | WHY3
  | WITH

  (* other tokens *)
  | DEBUG_FLAGS of (bool * string)
      (* Tuple constructor (with parens) required by Menhir. *)
  | INT of string
  | FLOAT of string
  | SIDE of Pratter.associativity
  | STRINGLIT of string
  | SWITCH of bool

  (* symbols *)
  | ARROW
  | ASSIGN
  | BACKQUOTE
  | COMMA
  | COLON
  | DOT
  | EQUIV
  | HOOK_ARROW
  | LAMBDA
  | L_CU_BRACKET
  | L_PAREN
  | L_SQ_BRACKET
  | PI
  | R_CU_BRACKET
  | R_PAREN
  | R_SQ_BRACKET
  | SEMICOLON
  | TURNSTILE
  | UNDERSCORE
  | VBAR

  (* identifiers *)
  | UID of string
  | UID_EXPL of string
  | UID_META of int
  | UID_PATT of string
  | QID of Path.t (* in reverse order *)
  | QID_EXPL of Path.t (* in reverse order *)

(** Some regexp definitions. *)
let space = [%sedlex.regexp? Chars " \t\n\r"]
let digit = [%sedlex.regexp? '0' .. '9']
let nat = [%sedlex.regexp? Plus digit]
let int = [%sedlex.regexp? nat | '-', nat]
let float = [%sedlex.regexp? int, '.', Plus digit]
let oneline_comment = [%sedlex.regexp? "//", Star (Compl ('\n' | '\r'))]
let string = [%sedlex.regexp? '"', Star (Compl '"'), '"']

(** Identifiers.

   There are two kinds of identifiers: regular identifiers and escaped
   identifiers of the form ["{|...|}"].

   Modulo those surrounding brackets, escaped identifiers allow to use as
   identifiers keywords or filenames that are not regular identifiers.

   An escaped identifier denoting a filename or directory is unescaped before
   accessing to it. Hence, the module ["{|a b|}"] refers to the file ["a b"].

   Identifiers need to be normalized so that an escaped identifier, once
   unescaped, is not regular. To this end, every identifier of the form
   ["{|s|}"] with s regular, is understood as ["s"] (function
   [remove_useless_escape] below).

   Finally, identifiers must not be empty, so that we can use the empty string
   for the path of ghost signatures. *)

(** Unqualified regular identifiers are any non-empty sequence of characters
   not among: *)
let forbidden_letter = [%sedlex.regexp? Chars " ,;\r\t\n(){}[]:.`\"@$|/"]
let regid = [%sedlex.regexp? '/' | Plus (Compl forbidden_letter)]

let is_regid : string -> bool = fun s ->
  let lb = Utf8.from_string s in
  match%sedlex lb with
  | regid, eof -> true
  | _ -> false

(** Unqualified escaped identifiers are any non-empty sequence of characters
    (except "|}") between "{|" and "|}". *)
let notbars = [%sedlex.regexp? Star (Compl '|')]
let escid = [%sedlex.regexp?
    "{|", notbars, '|', Star ('|' | Compl (Chars "|}"), notbars, '|'), '}']

(** [escape s] converts a string [s] into an escaped identifier if it is not
   regular. We do not check whether [s] contains ["|}"]. FIXME? *)
let escape s = if is_regid s then s else Escape.escape s

(** [remove_useless_escape s] replaces escaped regular identifiers by their
   unescape form. *)
let remove_useless_escape : string -> string = fun s ->
  let s' = Escape.unescape s in if is_regid s' then s' else s

(** Lexer. *)
let rec token lb =
  match%sedlex lb with

  (* end of file *)
  | eof -> EOF

  (* spaces *)
  | space -> token lb

  (* comments *)
  | oneline_comment -> token lb
  | "/*" -> comment token 0 lb

  (* keywords *)
  | "abort" -> ABORT
  | "admit" -> ADMIT
  | "admitted" -> ADMITTED
  | "apply" -> APPLY
  | "as" -> AS
  | "assert" -> ASSERT false
  | "assertnot" -> ASSERT true
  | "associative" -> ASSOCIATIVE
  | "assume" -> ASSUME
  | "begin" -> BEGIN
  | "builtin" -> BUILTIN
  | "coerce_rule" -> COERCE_RULE
  | "commutative" -> COMMUTATIVE
  | "compute" -> COMPUTE
  | "constant" -> CONSTANT
  | "debug" -> DEBUG
  | "end" -> END
  | "fail" -> FAIL
  | "flag" -> FLAG
  | "generalize" -> GENERALIZE
  | "have" -> HAVE
  | "in" -> IN
  | "induction" -> INDUCTION
  | "inductive" -> INDUCTIVE
  | "infix" -> INFIX
  | "injective" -> INJECTIVE
  | "left" -> SIDE(Pratter.Left)
  | "let" -> LET
  | "notation" -> NOTATION
  | "off" -> SWITCH(false)
  | "on" -> SWITCH(true)
  | "opaque" -> OPAQUE
  | "open" -> OPEN
  | "postfix" -> POSTFIX
  | "prefix" -> PREFIX
  | "print" -> PRINT
  | "private" -> PRIVATE
  | "proofterm" -> PROOFTERM
  | "protected" -> PROTECTED
  | "prover" -> PROVER
  | "prover_timeout" -> PROVER_TIMEOUT
  | "quantifier" -> QUANTIFIER
  | "refine" -> REFINE
  | "reflexivity" -> REFLEXIVITY
  | "remove" -> REMOVE
  | "require" -> REQUIRE
  | "rewrite" -> REWRITE
  | "right" -> SIDE(Pratter.Right)
  | "rule" -> RULE
  | "search" -> SEARCH
  | "sequential" -> SEQUENTIAL
  | "set" -> SET
  | "simplify" -> SIMPLIFY
  | "solve" -> SOLVE
  | "symbol" -> SYMBOL
  | "symmetry" -> SYMMETRY
  | "try" -> TRY
  | "type" -> TYPE_QUERY
  | "TYPE" -> TYPE_TERM
  | "unif_rule" -> UNIF_RULE
  | "verbose" -> VERBOSE
  | "why3" -> WHY3
  | "with" -> WITH

  (* other tokens *)
  | '+', Plus lowercase -> DEBUG_FLAGS(true, remove_first lb)
  | '-', Plus lowercase -> DEBUG_FLAGS(false, remove_first lb)
  | int -> INT(Utf8.lexeme lb)
  | float -> FLOAT(Utf8.lexeme lb)
  | string -> STRINGLIT(Utf8.sub_lexeme lb 1 (lexeme_length lb - 2))

  (* symbols *)
  | 0x2254 (* ≔ *) -> ASSIGN
  | 0x2192 (* → *) -> ARROW
  | '`' -> BACKQUOTE
  | ',' -> COMMA
  | ':' -> COLON
  | '.' -> DOT
  | 0x2261 (* ≡ *) -> EQUIV
  | 0x21aa (* ↪ *) -> HOOK_ARROW
  | 0x03bb (* λ *) -> LAMBDA
  | '{' -> L_CU_BRACKET
  | '(' -> L_PAREN
  | '[' -> L_SQ_BRACKET
  | 0x03a0 (* Π *) -> PI
  | '}' -> R_CU_BRACKET
  | ')' -> R_PAREN
  | ']' -> R_SQ_BRACKET
  | ';' -> SEMICOLON
  | 0x22a2 (* ⊢ *) -> TURNSTILE
  | '|' -> VBAR
  | '_' -> UNDERSCORE

  (* identifiers *)
  | regid -> UID(Utf8.lexeme lb)
  | escid -> UID(remove_useless_escape(Utf8.lexeme lb))
  | '@', regid -> UID_EXPL(remove_first lb)
  | '@', escid -> UID_EXPL(remove_useless_escape(remove_first lb))
  | '?', nat -> UID_META(int_of_string(remove_first lb))
  | '$', regid -> UID_PATT(remove_first lb)
  | '$', escid -> UID_PATT(remove_useless_escape(remove_first lb))
  | '$', nat -> UID_PATT(remove_first lb)

  | regid, '.' -> qid false [remove_last lb] lb
  | escid, '.' -> qid false [remove_useless_escape(remove_last lb)] lb
  | '@', regid, '.' -> qid true [remove_ends lb] lb
  | '@', escid, '.' -> qid true [remove_useless_escape(remove_ends lb)] lb

  (* invalid character *)
  | _ -> invalid_character lb

and qid expl ids lb =
  match%sedlex lb with
  | oneline_comment -> qid expl ids lb
  | "/*" -> comment (qid expl ids) 0 lb
  | regid, '.' -> qid expl (remove_last lb :: ids) lb
  | escid, '.' -> qid expl (remove_useless_escape(remove_last lb) :: ids) lb
  | regid ->
    if expl then QID_EXPL(Utf8.lexeme lb :: ids)
    else QID(Utf8.lexeme lb :: ids)
  | escid ->
    if expl then QID_EXPL(remove_useless_escape (Utf8.lexeme lb) :: ids)
    else QID(remove_useless_escape (Utf8.lexeme lb) :: ids)
  | _ ->
    fail lb ("Invalid identifier: \""
             ^ String.concat "." (List.rev (Utf8.lexeme lb :: ids)) ^ "\".")

and comment next i lb =
  match%sedlex lb with
  | eof -> fail lb "Unterminated comment."
  | "*/" -> if i=0 then next lb else comment next (i-1) lb
  | "/*" -> comment next (i+1) lb
  | any -> comment next i lb
  | _ -> invalid_character lb

(** [token buf] is a lexing function on buffer [buf] that can be passed to
    a parser. *)
let token : lexbuf -> unit -> token * Lexing.position * Lexing.position =
  fun lb () -> try with_tokenizer token lb () with
  | Sedlexing.MalFormed -> fail lb "Not Utf8 encoded file"
  | Sedlexing.InvalidCodepoint k ->
      fail lb ("Invalid Utf8 code point " ^ string_of_int k)

let token =
  let r = ref (EOF, Lexing.dummy_pos, Lexing.dummy_pos) in fun lb () ->
  Debug.(record_time Lexing (fun () -> r := token lb ())); !r
OCaml

Innovation. Community. Security.