Source file cset.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
module List = struct end
open Import
type c = int
let to_int x = x
let of_int x = x
let to_char t = Char.chr t
let of_char c = Char.code c
type t = (c * c) list
let equal = List.equal ~eq:(fun (x, y) (x', y') -> Int.equal x x' && Int.equal y y')
let rec union l l' =
match l, l' with
| _, [] -> l
| [], _ -> l'
| (c1, c2) :: r, (c1', c2') :: r' ->
if c2 + 1 < c1'
then (c1, c2) :: union r l'
else if c2' + 1 < c1
then (c1', c2') :: union l r'
else if c2 < c2'
then union r ((min c1 c1', c2') :: r')
else union ((min c1 c1', c2) :: r) r'
;;
let rec inter l l' =
match l, l' with
| _, [] -> []
| [], _ -> []
| (c1, c2) :: r, (c1', c2') :: r' ->
if c2 < c1'
then inter r l'
else if c2' < c1
then inter l r'
else if c2 < c2'
then (max c1 c1', c2) :: inter r l'
else (max c1 c1', c2') :: inter l r'
;;
let rec diff l l' =
match l, l' with
| _, [] -> l
| [], _ -> []
| (c1, c2) :: r, (c1', c2') :: r' ->
if c2 < c1'
then (c1, c2) :: diff r l'
else if c2' < c1
then diff l r'
else (
let r'' = if c2' < c2 then (c2' + 1, c2) :: r else r in
if c1 < c1' then (c1, c1' - 1) :: diff r'' r' else diff r'' r')
;;
let single c = [ c, c ]
let add c l = union (single c) l
let seq c c' = if c <= c' then [ c, c' ] else [ c', c ]
let rec offset o l =
match l with
| [] -> []
| (c1, c2) :: r -> (c1 + o, c2 + o) :: offset o r
;;
let empty = []
let cany = [ 0, 255 ]
let union_all : t list -> t = List.fold_left ~init:empty ~f:union
let intersect_all : t list -> t = List.fold_left ~init:cany ~f:inter
let rec mem (c : int) s =
match s with
| [] -> false
| (c1, c2) :: rem -> if c <= c2 then c >= c1 else mem c rem
;;
type hash = int
let rec hash_rec = function
| [] -> 0
| (i, j) :: r -> i + (13 * j) + (257 * hash_rec r)
;;
let hash l = hash_rec l land 0x3FFFFFFF
let print_one ch (c1, c2) =
if Int.equal c1 c2 then Format.fprintf ch "%d" c1 else Format.fprintf ch "%d-%d" c1 c2
;;
let pp = Fmt.list print_one
let rec iter t ~f =
match t with
| [] -> ()
| (x, y) :: xs ->
f x y;
iter xs ~f
;;
let one_char = function
| [ (i, j) ] when Int.equal i j -> Some i
| _ -> None
;;
module CSetMap = Map.Make (struct
type t = int * (int * int) list
let compare (i, u) (j, v) =
let c = compare i j in
if c <> 0 then c else compare u v
;;
end)
let fold_right t ~init ~f = List.fold_right ~f t ~init
let csingle c = single (Char.code c)
let is_empty = function
| [] -> true
| _ -> false
;;
let rec prepend s x l =
match s, l with
| [], _ -> l
| _r, [] -> []
| (_c, c') :: r, ([ (d, _d') ], _x') :: _r' when c' < d -> prepend r x l
| (c, c') :: r, ([ (d, d') ], x') :: r' ->
if c <= d
then
if c' < d'
then ([ d, c' ], x @ x') :: prepend r x (([ c' + 1, d' ], x') :: r')
else ([ d, d' ], x @ x') :: prepend s x r'
else if c > d'
then ([ d, d' ], x') :: prepend s x r'
else ([ d, c - 1 ], x') :: prepend s x (([ c, d' ], x') :: r')
| _ -> assert false
;;
let pick = function
| [] -> invalid_arg "Re_cset.pick"
| (x, _) :: _ -> x
;;
let cseq c c' = seq (of_char c) (of_char c')
let rg = cseq
let char = csingle
let upper = union_all [ cseq 'A' 'Z'; cseq '\192' '\214'; cseq '\216' '\222' ]
let clower = offset 32 upper
let cdigit = cseq '0' '9'
let ascii = cseq '\000' '\127'
let cadd c s = add (of_char c) s
let space = add (of_char ' ') (cseq '\009' '\013')
let xdigit = union_all [ cdigit; cseq 'a' 'f'; cseq 'A' 'F' ]
let calpha =
List.fold_right
~f:cadd
[ '\170'; '\181'; '\186'; '\223'; '\255' ]
~init:(union clower upper)
;;
let calnum = union calpha cdigit
let case_insens s =
union_all [ s; offset 32 (inter s upper); offset (-32) (inter s clower) ]
;;
let cword = cadd '_' calnum
let notnl = diff cany (csingle '\n')
let nl = csingle '\n'
let set str =
let s = ref empty in
for i = 0 to String.length str - 1 do
s := union (csingle str.[i]) !s
done;
!s
;;
let blank = set "\t "
let lower = union_all [ rg 'a' 'z'; char '\181'; rg '\223' '\246'; rg '\248' '\255' ]
let alpha = union_all [ lower; upper; char '\170'; char '\186' ]
let alnum = union_all [ alpha; cdigit ]
let wordc = union_all [ alnum; char '_' ]
let cntrl = union_all [ rg '\000' '\031'; rg '\127' '\159' ]
let graph = union_all [ rg '\033' '\126'; rg '\160' '\255' ]
let print = union_all [ rg '\032' '\126'; rg '\160' '\255' ]
let punct =
union_all
[ rg '\033' '\047'
; rg '\058' '\064'
; rg '\091' '\096'
; rg '\123' '\126'
; rg '\160' '\169'
; rg '\171' '\180'
; rg '\182' '\185'
; rg '\187' '\191'
; char '\215'
; char '\247'
]
;;