package sexplib0

  1. Overview
  2. Docs

Source file sexp.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
[@@@ocaml.warning "-3"]

(* blit_string doesn't exist in [StdLabels.Bytes]...  *)
let bytes_blit_string ~src ~src_pos ~dst ~dst_pos ~len =
  Bytes.blit_string src src_pos dst dst_pos len
;;

open StdLabels
open Format

(** Type of S-expressions *)
type t =
  | Atom of string
  | List of t list

let sexp_of_t t = t
let t_of_sexp t = t

let rec compare_list a b =
  match a, b with
  | [], [] -> 0
  | [], _ -> -1
  | _, [] -> 1
  | x :: xs, y :: ys ->
    let res = compare x y in
    if res <> 0 then res else compare_list xs ys

and compare a b =
  if a == b
  then 0
  else (
    match a, b with
    | Atom a, Atom b -> String.compare a b
    | Atom _, _ -> -1
    | _, Atom _ -> 1
    | List a, List b -> compare_list a b)
;;

let equal a b = compare a b = 0

exception Not_found_s of t
exception Of_sexp_error of exn * t

module Printing = struct
  (* Default indentation level for human-readable conversions *)

  let default_indent = ref 1

  (* Escaping of strings used as atoms in S-expressions *)

  let must_escape str =
    let len = String.length str in
    len = 0
    ||
    let rec loop str ix =
      match str.[ix] with
      | '"' | '(' | ')' | ';' | '\\' -> true
      | '|' ->
        ix > 0
        &&
        let next = ix - 1 in
        Char.equal str.[next] '#' || loop str next
      | '#' ->
        ix > 0
        &&
        let next = ix - 1 in
        Char.equal str.[next] '|' || loop str next
      | '\000' .. '\032' | '\127' .. '\255' -> true
      | _ -> ix > 0 && loop str (ix - 1)
    in
    loop str (len - 1)
  ;;

  let escaped s =
    let n = ref 0 in
    for i = 0 to String.length s - 1 do
      n
      := !n
         +
         match String.unsafe_get s i with
         | '\"' | '\\' | '\n' | '\t' | '\r' | '\b' -> 2
         | ' ' .. '~' -> 1
         | _ -> 4
    done;
    if !n = String.length s
    then s
    else (
      let s' = Bytes.create !n in
      n := 0;
      for i = 0 to String.length s - 1 do
        (match String.unsafe_get s i with
         | ('\"' | '\\') as c ->
           Bytes.unsafe_set s' !n '\\';
           incr n;
           Bytes.unsafe_set s' !n c
         | '\n' ->
           Bytes.unsafe_set s' !n '\\';
           incr n;
           Bytes.unsafe_set s' !n 'n'
         | '\t' ->
           Bytes.unsafe_set s' !n '\\';
           incr n;
           Bytes.unsafe_set s' !n 't'
         | '\r' ->
           Bytes.unsafe_set s' !n '\\';
           incr n;
           Bytes.unsafe_set s' !n 'r'
         | '\b' ->
           Bytes.unsafe_set s' !n '\\';
           incr n;
           Bytes.unsafe_set s' !n 'b'
         | ' ' .. '~' as c -> Bytes.unsafe_set s' !n c
         | c ->
           let a = Char.code c in
           Bytes.unsafe_set s' !n '\\';
           incr n;
           Bytes.unsafe_set s' !n (Char.chr (48 + (a / 100)));
           incr n;
           Bytes.unsafe_set s' !n (Char.chr (48 + (a / 10 mod 10)));
           incr n;
           Bytes.unsafe_set s' !n (Char.chr (48 + (a mod 10))));
        incr n
      done;
      Bytes.unsafe_to_string s')
  ;;

  let esc_str str =
    let estr = escaped str in
    let elen = String.length estr in
    let res = Bytes.create (elen + 2) in
    bytes_blit_string ~src:estr ~src_pos:0 ~dst:res ~dst_pos:1 ~len:elen;
    Bytes.unsafe_set res 0 '"';
    Bytes.unsafe_set res (elen + 1) '"';
    Bytes.unsafe_to_string res
  ;;

  let index_of_newline str start =
    try Some (String.index_from str start '\n') with
    | Not_found -> None
  ;;

  let get_substring str index end_pos_opt =
    let end_pos =
      match end_pos_opt with
      | None -> String.length str
      | Some end_pos -> end_pos
    in
    String.sub str ~pos:index ~len:(end_pos - index)
  ;;

  let is_one_line str =
    match index_of_newline str 0 with
    | None -> true
    | Some index -> index + 1 = String.length str
  ;;

  let pp_hum_maybe_esc_str ppf str =
    if not (must_escape str)
    then pp_print_string ppf str
    else if is_one_line str
    then pp_print_string ppf (esc_str str)
    else (
      let rec loop index =
        let next_newline = index_of_newline str index in
        let next_line = get_substring str index next_newline in
        pp_print_string ppf (escaped next_line);
        match next_newline with
        | None -> ()
        | Some newline_index ->
          pp_print_string ppf "\\";
          pp_force_newline ppf ();
          pp_print_string ppf "\\n";
          loop (newline_index + 1)
      in
      pp_open_box ppf 0;
      (* the leading space is to line up the lines *)
      pp_print_string ppf " \"";
      loop 0;
      pp_print_string ppf "\"";
      pp_close_box ppf ())
  ;;

  let mach_maybe_esc_str str = if must_escape str then esc_str str else str

  (* Output of S-expressions to formatters *)

  let rec pp_hum_indent indent ppf = function
    | Atom str -> pp_hum_maybe_esc_str ppf str
    | List (h :: t) ->
      pp_open_box ppf indent;
      pp_print_string ppf "(";
      pp_hum_indent indent ppf h;
      pp_hum_rest indent ppf t
    | List [] -> pp_print_string ppf "()"

  and pp_hum_rest indent ppf = function
    | h :: t ->
      pp_print_space ppf ();
      pp_hum_indent indent ppf h;
      pp_hum_rest indent ppf t
    | [] ->
      pp_print_string ppf ")";
      pp_close_box ppf ()
  ;;

  let rec pp_mach_internal may_need_space ppf = function
    | Atom str ->
      let str' = mach_maybe_esc_str str in
      let new_may_need_space = str' == str in
      if may_need_space && new_may_need_space then pp_print_string ppf " ";
      pp_print_string ppf str';
      new_may_need_space
    | List (h :: t) ->
      pp_print_string ppf "(";
      let may_need_space = pp_mach_internal false ppf h in
      pp_mach_rest may_need_space ppf t;
      false
    | List [] ->
      pp_print_string ppf "()";
      false

  and pp_mach_rest may_need_space ppf = function
    | h :: t ->
      let may_need_space = pp_mach_internal may_need_space ppf h in
      pp_mach_rest may_need_space ppf t
    | [] -> pp_print_string ppf ")"
  ;;

  let pp_hum ppf sexp = pp_hum_indent !default_indent ppf sexp
  let pp_mach ppf sexp = ignore (pp_mach_internal false ppf sexp)
  let pp = pp_mach

  (* Sexp size *)

  let rec size_loop ((v, c) as acc) = function
    | Atom str -> v + 1, c + String.length str
    | List lst -> List.fold_left lst ~init:acc ~f:size_loop
  ;;

  let size sexp = size_loop (0, 0) sexp

  (* Buffer conversions *)

  let to_buffer_hum ~buf ?(indent = !default_indent) sexp =
    let ppf = Format.formatter_of_buffer buf in
    Format.fprintf ppf "%a@?" (pp_hum_indent indent) sexp
  ;;

  let to_buffer_mach ~buf sexp =
    let rec loop may_need_space = function
      | Atom str ->
        let str' = mach_maybe_esc_str str in
        let new_may_need_space = str' == str in
        if may_need_space && new_may_need_space then Buffer.add_char buf ' ';
        Buffer.add_string buf str';
        new_may_need_space
      | List (h :: t) ->
        Buffer.add_char buf '(';
        let may_need_space = loop false h in
        loop_rest may_need_space t;
        false
      | List [] ->
        Buffer.add_string buf "()";
        false
    and loop_rest may_need_space = function
      | h :: t ->
        let may_need_space = loop may_need_space h in
        loop_rest may_need_space t
      | [] -> Buffer.add_char buf ')'
    in
    ignore (loop false sexp)
  ;;

  let to_buffer = to_buffer_mach

  let to_buffer_gen ~buf ~add_char ~add_string sexp =
    let rec loop may_need_space = function
      | Atom str ->
        let str' = mach_maybe_esc_str str in
        let new_may_need_space = str' == str in
        if may_need_space && new_may_need_space then add_char buf ' ';
        add_string buf str';
        new_may_need_space
      | List (h :: t) ->
        add_char buf '(';
        let may_need_space = loop false h in
        loop_rest may_need_space t;
        false
      | List [] ->
        add_string buf "()";
        false
    and loop_rest may_need_space = function
      | h :: t ->
        let may_need_space = loop may_need_space h in
        loop_rest may_need_space t
      | [] -> add_char buf ')'
    in
    ignore (loop false sexp)
  ;;

  (* The maximum size of a thing on the minor heap is 256 words.
     Previously, this size of the returned buffer here was 4096 bytes, which
     caused the Buffer to be allocated on the *major* heap every time.

     According to a simple benchmark by Ron, we can improve performance for
     small s-expressions by a factor of ~4 if we only allocate 1024 bytes
     (128 words + some small overhead) worth of buffer initially.  And one
     can argue that if it's free to allocate strings smaller than 256 words,
     large s-expressions requiring larger expensive buffers won't notice
     the extra two doublings from 1024 bytes to 2048 and 4096. And especially
     performance-sensitive applications to always pass in a larger buffer to
     use. *)
  let buffer () = Buffer.create 1024

  (* String conversions *)

  let to_string_hum ?indent = function
    | Atom str
      when match index_of_newline str 0 with
        | None -> true
        | Some _ -> false -> mach_maybe_esc_str str
    | sexp ->
      let buf = buffer () in
      to_buffer_hum ?indent sexp ~buf;
      Buffer.contents buf
  ;;

  let to_string_mach = function
    | Atom str -> mach_maybe_esc_str str
    | sexp ->
      let buf = buffer () in
      to_buffer_mach sexp ~buf;
      Buffer.contents buf
  ;;

  let to_string = to_string_mach
end

include Printing

let of_float_style : [ `Underscores | `No_underscores ] ref = ref `No_underscores
let of_int_style : [ `Underscores | `No_underscores ] ref = ref `No_underscores

module Private = struct
  include Printing
end

let message name fields =
  let rec conv_fields = function
    | [] -> []
    | (fname, fsexp) :: rest ->
      (match fname with
       | "" -> fsexp :: conv_fields rest
       | _ -> List [ Atom fname; fsexp ] :: conv_fields rest)
  in
  List (Atom name :: conv_fields fields)
;;
OCaml

Innovation. Community. Security.