Source file misc_.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
(** {1 Minimal support for Unicode characters in identifiers} *)
include Misc
module Color = struct
include Color
external isatty : out_channel -> bool = "caml_sys_isatty"
let should_enable_color () =
let term = try Sys.getenv "TERM" with Not_found -> "" in
term <> "dumb"
&& term <> ""
&& isatty stderr
let default_setting = Auto
let enabled = ref true
end
module Error_style = struct
include Error_style
let default_setting = Contextual
end
module Style = struct
type color =
| Black
| Red
| Green
| Yellow
| Blue
| Magenta
| Cyan
| White
type style =
| FG of color
| BG of color
| Bold
| Reset
let ansi_of_color = function
| Black -> "0"
| Red -> "1"
| Green -> "2"
| Yellow -> "3"
| Blue -> "4"
| Magenta -> "5"
| Cyan -> "6"
| White -> "7"
let code_of_style = function
| FG c -> "3" ^ ansi_of_color c
| BG c -> "4" ^ ansi_of_color c
| Bold -> "1"
| Reset -> "0"
let ansi_of_style_l l =
let s = match l with
| [] -> code_of_style Reset
| [s] -> code_of_style s
| _ -> String.concat ";" (List.map code_of_style l)
in
"\x1b[" ^ s ^ "m"
type Format.stag += Style of style list
type tag_style ={
ansi: style list;
text_open:string;
text_close:string
}
type styles = {
error: tag_style;
warning: tag_style;
loc: tag_style;
hint: tag_style;
inline_code: tag_style;
}
let no_markup stl = { ansi = stl; text_close = ""; text_open = "" }
let default_styles = {
warning = no_markup [Bold; FG Magenta];
error = no_markup [Bold; FG Red];
loc = no_markup [Bold];
hint = no_markup [Bold; FG Blue];
inline_code= { ansi=[Bold]; text_open = {|"|}; text_close = {|"|} }
}
let cur_styles = ref default_styles
let get_styles () = !cur_styles
let set_styles s = cur_styles := s
let style_of_tag s = match s with
| Format.String_tag "error" -> (!cur_styles).error
| Format.String_tag "warning" ->(!cur_styles).warning
| Format.String_tag "loc" -> (!cur_styles).loc
| Format.String_tag "hint" -> (!cur_styles).hint
| Format.String_tag "inline_code" -> (!cur_styles).inline_code
| Style s -> no_markup s
| _ -> raise Not_found
let as_inline_code printer ppf x =
let open Format_doc in
pp_open_stag ppf (Format.String_tag "inline_code");
printer ppf x;
pp_close_stag ppf ()
let inline_code ppf s = as_inline_code Format_doc.pp_print_string ppf s
let mark_open_tag ~or_else s =
try
let style = style_of_tag s in
if !Color.enabled then ansi_of_style_l style.ansi else style.text_open
with Not_found -> or_else s
let mark_close_tag ~or_else s =
try
let style = style_of_tag s in
if !Color.enabled then ansi_of_style_l [Reset] else style.text_close
with Not_found -> or_else s
let set_tag_handling ppf =
let open Format in
let functions = pp_get_formatter_stag_functions ppf () in
let functions' = {functions with
mark_open_stag=(mark_open_tag ~or_else:functions.mark_open_stag);
mark_close_stag=(mark_close_tag ~or_else:functions.mark_close_stag);
} in
pp_set_mark_tags ppf true;
pp_set_formatter_stag_functions ppf functions';
()
let setup =
let first = ref true in
let formatter_l =
[Format.std_formatter; Format.err_formatter; Format.str_formatter]
in
let enable_color = function
| Color.Auto -> Color.should_enable_color ()
| Color.Always -> true
| Color.Never -> false
in
fun o ->
if !first then (
first := false;
Format.set_mark_tags true;
List.iter set_tag_handling formatter_l;
Color.enabled := (match o with
| Some s -> enable_color s
| None -> enable_color Color.default_setting)
);
()
end
module Utf8_lexeme = struct
type t = string
type case = Upper of Uchar.t | Lower of Uchar.t
let known_chars : (Uchar.t, case) Hashtbl.t = Hashtbl.create 32
let _ =
List.iter
(fun (upper, lower) ->
let upper = Uchar.of_int upper and lower = Uchar.of_int lower in
Hashtbl.add known_chars upper (Upper lower);
Hashtbl.add known_chars lower (Lower upper))
[
(0xc0, 0xe0); (0xc1, 0xe1);
(0xc2, 0xe2); (0xc3, 0xe3);
(0xc4, 0xe4); (0xc5, 0xe5);
(0xc6, 0xe6); (0xc7, 0xe7);
(0xc8, 0xe8); (0xc9, 0xe9);
(0xca, 0xea); (0xcb, 0xeb);
(0xcc, 0xec); (0xcd, 0xed);
(0xce, 0xee); (0xcf, 0xef);
(0xd0, 0xf0); (0xd1, 0xf1);
(0xd2, 0xf2); (0xd3, 0xf3);
(0xd4, 0xf4); (0xd5, 0xf5);
(0xd6, 0xf6); (0xd8, 0xf8);
(0xd9, 0xf9); (0xda, 0xfa);
(0xdb, 0xfb); (0xdc, 0xfc);
(0xdd, 0xfd); (0xde, 0xfe);
(0x160, 0x161); (0x17d, 0x17e);
(0x152, 0x153); (0x178, 0xff);
(0x1e9e, 0xdf);
]
let known_pairs : (Uchar.t * Uchar.t, Uchar.t) Hashtbl.t = Hashtbl.create 32
let _ =
List.iter
(fun (c1, n2, n) ->
Hashtbl.add known_pairs
(Uchar.of_char c1, Uchar.of_int n2) (Uchar.of_int n))
[
('A', 0x300, 0xc0); ('A', 0x301, 0xc1);
('A', 0x302, 0xc2); ('A', 0x303, 0xc3);
('A', 0x308, 0xc4); ('A', 0x30a, 0xc5);
('C', 0x327, 0xc7); ('E', 0x300, 0xc8);
('E', 0x301, 0xc9); ('E', 0x302, 0xca);
('E', 0x308, 0xcb); ('I', 0x300, 0xcc);
('I', 0x301, 0xcd); ('I', 0x302, 0xce);
('I', 0x308, 0xcf); ('N', 0x303, 0xd1);
('O', 0x300, 0xd2); ('O', 0x301, 0xd3);
('O', 0x302, 0xd4); ('O', 0x303, 0xd5);
('O', 0x308, 0xd6);
('U', 0x300, 0xd9); ('U', 0x301, 0xda);
('U', 0x302, 0xdb); ('U', 0x308, 0xdc);
('Y', 0x301, 0xdd); ('Y', 0x308, 0x178);
('S', 0x30c, 0x160); ('Z', 0x30c, 0x17d);
('a', 0x300, 0xe0); ('a', 0x301, 0xe1);
('a', 0x302, 0xe2); ('a', 0x303, 0xe3);
('a', 0x308, 0xe4); ('a', 0x30a, 0xe5);
('c', 0x327, 0xe7); ('e', 0x300, 0xe8);
('e', 0x301, 0xe9); ('e', 0x302, 0xea);
('e', 0x308, 0xeb); ('i', 0x300, 0xec);
('i', 0x301, 0xed); ('i', 0x302, 0xee);
('i', 0x308, 0xef); ('n', 0x303, 0xf1);
('o', 0x300, 0xf2); ('o', 0x301, 0xf3);
('o', 0x302, 0xf4); ('o', 0x303, 0xf5);
('o', 0x308, 0xf6);
('u', 0x300, 0xf9); ('u', 0x301, 0xfa);
('u', 0x302, 0xfb); ('u', 0x308, 0xfc);
('y', 0x301, 0xfd); ('y', 0x308, 0xff);
('s', 0x30c, 0x161); ('z', 0x30c, 0x17e);
]
let normalize_generic ~keep_ascii transform s =
let rec norm check buf prev i =
if i >= String.length s then begin
Buffer.add_utf_8_uchar buf (transform prev)
end else begin
let d = String.get_utf_8_uchar s i in
let u = Uchar.utf_decode_uchar d in
check d u;
let i' = i + Uchar.utf_decode_length d in
match Hashtbl.find_opt known_pairs (prev, u) with
| Some u' ->
norm check buf u' i'
| None ->
Buffer.add_utf_8_uchar buf (transform prev);
norm check buf u i'
end in
let ascii_limit = 128 in
if s = ""
|| keep_ascii && String.for_all (fun x -> Char.code x < ascii_limit) s
then Ok s
else
let buf = Buffer.create (String.length s) in
let valid = ref true in
let check d u =
valid := !valid && Uchar.utf_decode_is_valid d && u <> Uchar.rep
in
let d = String.get_utf_8_uchar s 0 in
let u = Uchar.utf_decode_uchar d in
check d u;
norm check buf u (Uchar.utf_decode_length d);
let contents = Buffer.contents buf in
if !valid then
Ok contents
else
Error contents
let normalize s =
normalize_generic ~keep_ascii:true (fun u -> u) s
let uchar_is_uppercase u =
let c = Uchar.to_int u in
if c < 0x80 then c >= 65 && c <= 90 else
match Hashtbl.find_opt known_chars u with
| Some(Upper _) -> true
| _ -> false
let uchar_lowercase u =
let c = Uchar.to_int u in
if c < 0x80 then
if c >= 65 && c <= 90 then Uchar.of_int (c + 32) else u
else
match Hashtbl.find_opt known_chars u with
| Some(Upper u') -> u'
| _ -> u
let uchar_uppercase u =
let c = Uchar.to_int u in
if c < 0x80 then
if c >= 97 && c <= 122 then Uchar.of_int (c - 32) else u
else
match Hashtbl.find_opt known_chars u with
| Some(Lower u') -> u'
| _ -> u
let capitalize s =
let first = ref true in
normalize_generic ~keep_ascii:false
(fun u -> if !first then (first := false; uchar_uppercase u) else u)
s
let uncapitalize s =
let first = ref true in
normalize_generic ~keep_ascii:false
(fun u -> if !first then (first := false; uchar_lowercase u) else u)
s
let is_capitalized s =
s <> "" &&
uchar_is_uppercase (Uchar.utf_decode_uchar (String.get_utf_8_uchar s 0))
let uchar_valid_in_identifier ~with_dot u =
let c = Uchar.to_int u in
if c < 0x80 then
c >= 97 && c <= 122
|| c >= 65 && c <= 90
|| c >= 48 && c <= 57
|| c = 95
|| c = 39
|| (with_dot && c = 46)
else
Hashtbl.mem known_chars u
let uchar_not_identifier_start u =
let c = Uchar.to_int u in
c >= 48 && c <= 57
|| c = 39
type validation_result =
| Valid
| Invalid_character of Uchar.t (** Character not allowed *)
| Invalid_beginning of Uchar.t (** Character not allowed as first char *)
let validate_identifier ?(with_dot=false) s =
let rec check i =
if i >= String.length s then Valid else begin
let d = String.get_utf_8_uchar s i in
let u = Uchar.utf_decode_uchar d in
let i' = i + Uchar.utf_decode_length d in
if not (uchar_valid_in_identifier ~with_dot u) then
Invalid_character u
else if i = 0 && uchar_not_identifier_start u then
Invalid_beginning u
else
check i'
end
in check 0
let is_valid_identifier s =
validate_identifier s = Valid
let starts_like_a_valid_identifier s =
s <> "" &&
(let u = Uchar.utf_decode_uchar (String.get_utf_8_uchar s 0) in
uchar_valid_in_identifier ~with_dot:false u
&& not (uchar_not_identifier_start u))
let is_lowercase s =
let rec is_lowercase_at len s n =
if n >= len then true
else
let d = String.get_utf_8_uchar s n in
let u = Uchar.utf_decode_uchar d in
(uchar_valid_in_identifier ~with_dot:false u)
&& not (uchar_is_uppercase u)
&& is_lowercase_at len s (n+Uchar.utf_decode_length d)
in
is_lowercase_at (String.length s) s 0
end