Source file tyxml_ppx.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
open Tyxml_syntax
open Ppxlib.Asttypes
open Ppxlib.Parsetree
type lang = Common.lang = Html | Svg
let lang_of_ns loc ns =
if ns = Markup.Ns.html || ns = "" then Common.Html
else if ns = Markup.Ns.svg then Common.Svg
else Common.error loc "Unknown namespace %s" ns
module Loc = struct
(** 0-width locations do not show in the toplevel. We expand them to
one-width.
*)
let one_width ?(ghost=false) pos =
{ Location.loc_ghost = ghost ;
loc_start = pos ;
loc_end = {pos with pos_cnum = pos.pos_cnum + 1}
}
(** Given a list of input strings for Markup.ml, evaluates to a function that
converts Markup.ml locations of characters within these strings to their
OCaml locations. *)
let make_location_map located_strings =
let starting_a_string = ref (fun _ -> ()) in
let source =
let strings = ref located_strings in
let offset = ref 0 in
let rec next_byte () = match !strings with
| [] -> None
| (s, loc)::rest ->
if !offset = 0 then !starting_a_string loc;
if !offset < String.length s then begin
offset := !offset + 1;
Some (s.[!offset - 1])
end
else begin
offset := 0;
strings := rest;
next_byte ()
end
in
Markup.fn next_byte
in
let location_map =
let preprocessed_input_stream, get_markupml_location =
source
|> Markup.Encoding.decode Markup.Encoding.utf_8
|> Markup.preprocess_input_stream
in
let location_map = ref [] in
starting_a_string := begin fun ocaml_position ->
location_map :=
(get_markupml_location (), ocaml_position)::!location_map
end;
Markup.drain preprocessed_input_stream;
List.rev !location_map
in
fun given_markup_location ->
let rec bounded_maximum best = function
| [] -> best
| ((noted_markup_location, _) as loc)::rest ->
if Markup.compare_locations
noted_markup_location given_markup_location > 0 then best
else bounded_maximum (Some loc) rest
in
let preceding_markup_location, preceding_ocaml_position =
match bounded_maximum None location_map with
| Some loc -> loc
| None -> assert false
in
let line, column = given_markup_location in
let line', column' = preceding_markup_location in
let ocaml_position =
let open Lexing in
if line = line' then
{preceding_ocaml_position with
pos_cnum = preceding_ocaml_position.pos_cnum + column - column'}
else
{preceding_ocaml_position with
pos_lnum = preceding_ocaml_position.pos_lnum + line - line';
pos_bol = 0;
pos_cnum = column - 1}
in
one_width ocaml_position
end
(** Antiquotations
We replace antiquotations expressions by a dummy token "(tyxmlX)".
We store a table token to expression to retrieve them after parsing.
*)
module Antiquot = struct
let fmt_id = Printf.sprintf "(tyxml%i)"
let regex_id = Re.(seq [ str "(tyxml" ; rep digit ; char ')' ])
let re_id = Re.compile regex_id
let make_id =
let r = ref 0 in
fun () -> incr r ; fmt_id !r
module H = Hashtbl.Make(struct
type t = string
let hash = Hashtbl.hash
let equal (x:string) y = x = y
end)
let tbl = H.create 17
let create expr =
let s = make_id () in
H.add tbl s expr ;
s
let get loc s =
if H.mem tbl s then H.find tbl s
else
Common.error loc
"Internal error: This expression placeholder is not registered"
let contains loc s = match Re.exec_opt re_id s with
| None -> `No
| Some g ->
let (i,j) = Re.Group.offset g 0 in
let is_whole = i = 0 && j = String.length s in
if is_whole
then `Whole (get loc s)
else `Yes (get loc @@ Re.Group.get g 0)
let assert_no_antiquot ~loc kind (_namespace,s) =
match contains loc s with
| `No -> ()
| `Yes e | `Whole e ->
Common.error e.pexp_loc
"OCaml expressions are not accepted as %s names" kind
end
(** Building block to rebuild the output with expressions intertwined. *)
(** Walk the text list to replace placeholders by OCaml expressions when
appropriate. Use {!make_txt} on the rest. *)
let make_text ~loc ~lang ss =
let buf = Buffer.create 17 in
let push_txt buf l =
let s = Buffer.contents buf in
Buffer.clear buf ;
if s = "" then l else Common.value (Common.txt ~loc ~lang s) :: l
in
let rec aux ~loc res = function
| [] -> push_txt buf res
| `Text s :: t ->
Buffer.add_string buf s ;
aux ~loc res t
| `Delim g :: t ->
let e = Antiquot.get loc @@ Re.Group.get g 0 in
aux ~loc (Common.antiquot e :: push_txt buf res) t
in
aux ~loc [] @@ Re.split_full Antiquot.re_id @@ String.concat "" ss
let replace_attribute ~loc ((ns,attr_name),value) =
let attr = (lang_of_ns loc ns, attr_name) in
Antiquot.assert_no_antiquot ~loc "attribute" attr ;
match Antiquot.contains loc value with
| `No -> (attr, Common.value value)
| `Whole e -> (attr, Common.antiquot e)
| `Yes _ ->
Common.error loc
"Mixing literals and OCaml expressions is not supported in attribute values"
(** Processing *)
(** Takes the ast and transforms it into a Markup.ml char stream.
The payload [expr] is either a single token, or an application (that is, a list).
A token is either a string or an antiquotation. Antiquotations are replaced
by placeholder strings (see {!Antiquot}).
Each token is equipped with a starting (but no ending) position.
*)
let ast_to_stream expressions =
let strings =
expressions |> List.map @@ fun expr ->
match expr.pexp_desc with
| Pexp_constant (Pconst_string (s, loc, _)) ->
(s, loc.loc_start)
| _ ->
(Antiquot.create expr, expr.pexp_loc.loc_start)
in
let source =
let items = ref strings in
let offset = ref 0 in
let rec next_byte () = match !items with
| [] -> None
| (s, _)::rest ->
if !offset < String.length s then begin
offset := !offset + 1;
Some (s.[!offset - 1])
end
else begin
offset := 0;
items := rest;
next_byte ()
end
in
Markup.fn next_byte
in
source, Loc.make_location_map strings
let context_of_lang = function
| Common.Svg -> Some (`Fragment "svg")
| Html -> None
(** Given the payload of a [%html ...] or [%svg ...] expression,
converts it to a TyXML expression representing the markup
contained therein. *)
let markup_to_expr lang loc expr =
let context = context_of_lang lang in
let input_stream, adjust_location = ast_to_stream expr in
let report loc error =
match error with
| `Bad_content _ -> ()
| _ ->
let loc = adjust_location loc in
let message =
Markup.Error.to_string error |> String.capitalize_ascii
in
Common.error loc "%s" message
in
let parser =
Markup.parse_html
?context
~encoding:Markup.Encoding.utf_8
~report
input_stream
in
let signals = Markup.signals parser in
let get_loc () = adjust_location @@ Markup.location parser in
let rec assemble lang children =
match Markup.next signals with
| None | Some `End_element -> List.rev children
| Some (`Text ss) ->
let loc = get_loc () in
let node = make_text ~loc ~lang ss in
assemble lang (node @ children)
| Some (`Start_element ((ns, elt_name), attributes)) ->
let newlang = lang_of_ns loc ns in
let name = (newlang, elt_name) in
let loc = get_loc () in
let sub_children = assemble newlang [] in
Antiquot.assert_no_antiquot ~loc "element" name ;
let attributes = List.map (replace_attribute ~loc) attributes in
let node =
Element.parse
~parent_lang:lang ~loc ~name ~attributes sub_children
in
assemble lang (Common.Val node :: children)
| Some (`Comment s) ->
let loc = get_loc () in
let node = Common.value @@ Element.comment ~loc ~lang s in
assemble lang (node :: children)
| Some (`Xml _ | `Doctype _ | `PI _) ->
assemble lang children
in
let l =
Element_content.filter_surrounding_whitespace @@
assemble lang []
in
match l with
| [ Val x | Antiquot x ] -> x
| l -> Common.list_wrap_value lang loc l
let markup_to_expr_with_implementation lang modname loc expr =
match modname with
| Some modname ->
let current_modname = Common.implementation lang in
Common.set_implementation lang modname ;
let res = markup_to_expr lang loc expr in
Common.set_implementation lang current_modname ;
res
| _ ->
markup_to_expr lang loc expr
let is_capitalized s =
if String.length s < 0 then false
else match s.[0] with
| 'A'..'Z' -> true
| _ -> false
(** Extract and verify the modname in the annotation [%html.Bar.Baz .. ]. *)
let get_modname = function
| None -> None
| Some {txt = longident ; loc} ->
let l = Longident.flatten_exn longident in
let s = String.concat "." l in
if l = [] then None
else if not (List.for_all is_capitalized l) then
Common.error loc "This identifier is not a module name"
else Some s
let application_to_list expr =
match expr.pexp_desc with
| Pexp_apply (f, arguments) -> f::(List.map snd arguments)
| _ -> [expr]
let markup_cases ~lang ~modname cases =
let f ({pc_rhs} as case) =
let loc = pc_rhs.pexp_loc in
let pc_rhs =
markup_to_expr_with_implementation lang modname loc @@
application_to_list pc_rhs
in {case with pc_rhs}
in
List.map f cases
let rec markup_function ~lang ~modname e =
let loc = e.pexp_loc in
match e.pexp_desc with
| Pexp_fun (label,def,pat,content) ->
let content = markup_function ~lang ~modname content in
{e with pexp_desc = Pexp_fun (label,def,pat,content)}
| Pexp_function cases ->
let cases = markup_cases ~lang ~modname cases in
{e with pexp_desc = Pexp_function cases}
| _ ->
markup_to_expr_with_implementation lang modname loc @@
application_to_list e
let markup_bindings ~lang ~modname l =
let f ({pvb_expr} as b) =
let pvb_expr = markup_function ~lang ~modname pvb_expr in
{b with pvb_expr}
in
List.map f l
let expand_expr ~lang ~loc:_ ~path:_ ~arg e _ =
let modname = get_modname arg in
match e.pexp_desc with
| Pexp_let (recflag, bindings, next) ->
let bindings = markup_bindings ~lang ~modname bindings in
{e with pexp_desc = Pexp_let (recflag, bindings, next)}
| _ ->
markup_to_expr_with_implementation lang modname e.pexp_loc @@
application_to_list e
let expand_str_item ~lang ~loc:_ ~path:_ ~arg recflag value_bindings =
let bindings =
markup_bindings ~lang ~modname:(get_modname arg) value_bindings
in
Ppxlib.Ast_helper.Str.value recflag bindings