Source file string_monoid.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
open Core
module Underlying = struct
type t =
| String of String.t
| Bigstring of Bigstring.t
| Char of char
let length = function
| String str -> String.length str
| Bigstring str -> Bigstring.length str
| Char _ -> 1
;;
let blit_bytes ?(src_pos = 0) ~src ?src_len:(len = length src) ~dst ?(dst_pos = 0) () =
match src with
| String src -> Bytes.From_string.blit ~src ~src_pos ~len ~dst ~dst_pos
| Bigstring src -> Bigstring.To_bytes.blit ~src_pos ~src ~dst ~dst_pos ~len
| Char c ->
(match src_pos, len with
| 0, 1 -> Bytes.set dst dst_pos c
| (0 | 1), 0 -> ()
| _, _ -> invalid_arg "index out of bounds")
;;
let blit_bigstring
?(src_pos = 0)
~src
?src_len:(len = length src)
~dst
?(dst_pos = 0)
()
=
match src with
| String src -> Bigstring.From_string.blit ~src ~src_pos ~len ~dst ~dst_pos
| Bigstring src -> Bigstring.blit ~src ~src_pos ~len ~dst ~dst_pos
| Char c ->
(match src_pos, len with
| 0, 1 -> dst.{dst_pos} <- c
| (0 | 1), 0 -> ()
| _, _ -> invalid_arg "index out of bounds")
;;
let output_bigbuffer ~bigbuffer = function
| String s -> Bigbuffer.add_string bigbuffer s
| Bigstring bstr -> Bigbuffer.add_bigstring bigbuffer bstr
| Char c -> Bigbuffer.add_char bigbuffer c
;;
let is_substr_string ?(pos = 0) ?len t ~string =
let len = Option.value len ~default:(length t - pos) in
if len <> Substring.length string || pos < 0 || pos + len > length t
then false
else (
let get =
match t with
| String str -> String.get str
| Bigstring bstr -> Bigstring.get bstr
| Char c -> const c
in
let rec loop i =
if i >= len
then true
else Char.equal (get (i + pos)) (Substring.get string i) && loop (i + 1)
in
loop 0)
;;
let is_substr_prefix ?(pos = 0) ?len t ~prefix =
let len = Option.value len ~default:(length t - pos) in
if len < Substring.length prefix
then false
else is_substr_string ~pos ~len:(Substring.length prefix) t ~string:prefix
;;
let is_substr_suffix ?(pos = 0) ?len t ~suffix =
let len = Option.value len ~default:(length t - pos) in
if len < Substring.length suffix
then false
else
is_substr_string
~pos:(pos + len - Substring.length suffix)
~len:(Substring.length suffix)
t
~string:suffix
;;
let is_substr_substring ?(pos = 0) ?len t ~substring =
let len = Option.value len ~default:(length t - pos) in
if len < Substring.length substring
then false
else (
let rec loop i =
if i + Substring.length substring > len
then false
else
is_substr_string
~pos:(pos + i)
~len:(Substring.length substring)
t
~string:substring
|| loop (i + 1)
in
loop 0)
;;
end
type t =
| List of (int * t list)
| Leaf of Underlying.t
let empty = List (0, [])
let of_string s = if String.is_empty s then empty else Leaf (Underlying.String s)
let of_bigstring bs =
if 0 = Bigstring.length bs then empty else Leaf (Underlying.Bigstring bs)
;;
let of_char c = Leaf (Underlying.Char c)
let nl = of_char '\n'
let length = function
| List (len, _) -> len
| Leaf underlying -> Underlying.length underlying
;;
let is_empty t = length t = 0
(**
The plus operation is not associative over individual representations,
but is associative over the quotient space with the equivalence
relationship
x ~ y == (to_string x) = (to_string y)
*)
let plus a b =
match a, b with
| b, List (0, _) -> b
| List (0, _), b -> b
| List (len, _), b -> List (len + length b, [ a; b ])
| Leaf a', List (len, l) -> List (Underlying.length a' + len, a :: l)
| Leaf x, Leaf y -> List (Underlying.(length x + length y), [ a; b ])
;;
let concat ?(sep = empty) ts =
match ts with
| [] -> empty
| t :: ts ->
plus t (List.fold_right ts ~f:(fun t ts -> plus sep (plus t ts)) ~init:empty)
;;
let concat_underlying ~of_underlying ?sep strs =
let sep = Option.map sep ~f:of_underlying in
let ts = List.map strs ~f:of_underlying in
concat ?sep ts
;;
let concat_string = concat_underlying ~of_underlying:of_string
type blitter =
?src_pos:int -> src:Underlying.t -> ?src_len:int -> ?dst_pos:int -> unit -> unit
let blit ~(dst_blit : blitter) t =
let rec blit dst_pos t =
match t with
| Leaf src -> dst_blit ~src ~dst_pos ()
| List (len, srcs) ->
let len' =
List.fold_left srcs ~init:dst_pos ~f:(fun dst_pos t ->
blit dst_pos t;
dst_pos + length t)
in
assert (len' - dst_pos = len)
in
blit 0 t
;;
let to_string t =
let dst = Bytes.create (length t) in
blit ~dst_blit:(Underlying.blit_bytes ~dst) t;
Bytes.unsafe_to_string ~no_mutation_while_string_reachable:dst
;;
let to_bigstring t =
let dst = Bigstring.create (length t) in
blit ~dst_blit:(Underlying.blit_bigstring ~dst) t;
dst
;;
let output ~dst_output t =
let rec output t =
match t with
| Leaf underlying -> dst_output underlying
| List (_, ts) -> List.iter ~f:output ts
in
output t
;;
let output_bigbuffer t bigbuffer =
output ~dst_output:(Underlying.output_bigbuffer ~bigbuffer) t
;;
let rec fold t ~init ~f =
match t with
| List (_, list) ->
List.fold list ~init ~f:(fun init t -> fold t ~init ~f [@nontail]) [@nontail]
| Leaf underlying -> f init underlying [@nontail]
;;
let iter t ~f = fold t ~init:() ~f:(fun () -> f)
let rec is_substr_string t ~string =
if is_empty t && Substring.is_empty string
then true
else if length t <> Substring.length string
then false
else (
match t with
| Leaf u -> Underlying.is_substr_string u ~string
| List (len, ts) ->
assert (len = Substring.length string);
List.fold_until
ts
~init:0
~f:(fun pos t ->
if is_substr_string t ~string:(Substring.sub string ~pos ~len:(length t))
then Continue_or_stop.Continue (pos + length t)
else Continue_or_stop.Stop false)
~finish:(Fn.const true))
;;
let substring_split substring ~pos =
Substring.sub substring ~len:pos, Substring.sub substring ~pos
;;
let rec is_substr_suffix t ~suffix =
if Substring.length suffix = 0
then true
else if length t < Substring.length suffix
then false
else (
match t with
| Leaf u -> Underlying.is_substr_suffix u ~suffix
| List (_, []) -> Substring.is_empty suffix
| List (_, [ t ]) -> is_substr_suffix t ~suffix
| List (len, hd :: tl) ->
let tl = List (len - length hd, tl) in
if length tl >= Substring.length suffix
then is_substr_suffix tl ~suffix
else (
let hd_part, tl_part =
substring_split suffix ~pos:(Substring.length suffix - length tl)
in
is_substr_suffix hd ~suffix:hd_part && is_substr_string tl ~string:tl_part))
;;
let rec is_substr_prefix t ~prefix =
if Substring.is_empty prefix
then true
else if length t < Substring.length prefix
then false
else (
match t with
| Leaf u -> Underlying.is_substr_prefix u ~prefix
| List (_, []) -> Substring.is_empty prefix
| List (len, hd :: tl) ->
if length hd >= Substring.length prefix
then is_substr_prefix hd ~prefix
else (
let tl = List (len - length hd, tl) in
let hd_part, tl_part = substring_split prefix ~pos:(length hd) in
is_substr_string hd ~string:hd_part && is_substr_prefix tl ~prefix:tl_part))
;;
let rec is_substr_substring t ~substring =
if Substring.is_empty substring
then true
else if length t < Substring.length substring
then false
else (
match t with
| Leaf u -> Underlying.is_substr_substring u ~substring
| List (_, []) -> Substring.is_empty substring
| List (_, [ t ]) -> is_substr_substring t ~substring
| List (len, hd :: tl) ->
let tl = List (len - length hd, tl) in
let rec suffix_loop pos =
if pos <= 0
then is_substr_substring tl ~substring
else (
let hd_part, tl_part = substring_split substring ~pos in
(is_substr_suffix hd ~suffix:hd_part && is_substr_prefix tl ~prefix:tl_part)
|| suffix_loop (pos - 1))
in
is_substr_substring hd ~substring || suffix_loop (Substring.length substring - 1))
;;
let is_string t ~string =
is_substr_string
t
~string:(Substring.create (Bytes.unsafe_of_string_promise_no_mutation string))
;;
let is_prefix t ~prefix =
is_substr_prefix
t
~prefix:(Substring.create (Bytes.unsafe_of_string_promise_no_mutation prefix))
;;
let is_suffix t ~suffix =
is_substr_suffix
t
~suffix:(Substring.create (Bytes.unsafe_of_string_promise_no_mutation suffix))
;;
let is_substring t ~substring =
is_substr_substring
t
~substring:(Substring.create (Bytes.unsafe_of_string_promise_no_mutation substring))
;;
let%test_module _ =
(module struct
let haystack =
concat
~sep:(of_char ' ')
[ of_string "hello"; of_bigstring (Bigstring.of_string "big"); of_string "world" ]
;;
let%expect_test "is_string" =
printf "%b" (is_string haystack ~string:"hello big world");
[%expect {| true |}];
printf "%b" (is_string haystack ~string:"hello");
[%expect {| false |}];
printf "%b" (is_string haystack ~string:"o big");
[%expect {| false |}]
;;
let%expect_test "is_prefix" =
printf "%b" (is_prefix haystack ~prefix:"");
[%expect {| true |}];
printf "%b" (is_prefix haystack ~prefix:"h");
[%expect {| true |}];
printf "%b" (is_prefix haystack ~prefix:"hello");
[%expect {| true |}];
printf "%b" (is_prefix haystack ~prefix:"hello ");
[%expect {| true |}];
printf "%b" (is_prefix haystack ~prefix:"hello b");
[%expect {| true |}];
printf "%b" (is_prefix haystack ~prefix:"hello big");
[%expect {| true |}];
printf "%b" (is_prefix haystack ~prefix:"hello big world");
[%expect {| true |}];
printf "%b" (is_prefix haystack ~prefix:"hello big round");
[%expect {| false |}];
printf "%b" (is_prefix haystack ~prefix:"hello big world!");
[%expect {| false |}];
printf "%b" (is_prefix haystack ~prefix:"b");
[%expect {| false |}];
printf "%b" (is_prefix haystack ~prefix:"world");
[%expect {| false |}];
printf "%b" (is_prefix haystack ~prefix:"d");
[%expect {| false |}]
;;
let%expect_test "is_suffix" =
printf "%b" (is_suffix haystack ~suffix:"");
[%expect {| true |}];
printf "%b" (is_suffix haystack ~suffix:"d");
[%expect {| true |}];
printf "%b" (is_suffix haystack ~suffix:"world");
[%expect {| true |}];
printf "%b" (is_suffix haystack ~suffix:" world");
[%expect {| true |}];
printf "%b" (is_suffix haystack ~suffix:"g world");
[%expect {| true |}];
printf "%b" (is_suffix haystack ~suffix:"big world");
[%expect {| true |}];
printf "%b" (is_suffix haystack ~suffix:"hello big world");
[%expect {| true |}];
printf "%b" (is_suffix haystack ~suffix:"round world");
[%expect {| false |}];
printf "%b" (is_suffix haystack ~suffix:"hello big world!");
[%expect {| false |}];
printf "%b" (is_suffix haystack ~suffix:"hello");
[%expect {| false |}]
;;
let%expect_test "is_substring" =
printf "%b" (is_substring haystack ~substring:"");
[%expect {| true |}];
printf "%b" (is_substring haystack ~substring:"w");
[%expect {| true |}];
printf "%b" (is_substring haystack ~substring:"d");
[%expect {| true |}];
printf "%b" (is_substring haystack ~substring:"big");
[%expect {| true |}];
printf "%b" (is_substring haystack ~substring:"ell");
[%expect {| true |}];
printf "%b" (is_substring haystack ~substring:"ell");
[%expect {| true |}];
printf "%b" (is_substring haystack ~substring:"o b");
[%expect {| true |}];
printf "%b" (is_substring haystack ~substring:"o big w");
[%expect {| true |}];
printf "%b" (is_substring haystack ~substring:"hello big world");
[%expect {| true |}];
printf "%b" (is_substring haystack ~substring:"hello big world!");
[%expect {| false |}];
printf "%b" (is_substring haystack ~substring:"big world");
[%expect {| true |}];
printf "%b" (is_substring haystack ~substring:"hello big");
[%expect {| true |}];
printf "%b" (is_substring haystack ~substring:"hello big ");
[%expect {| true |}];
printf "%b" (is_substring haystack ~substring:"hello big round");
[%expect {| false |}];
printf "%b" (is_substring haystack ~substring:"round");
[%expect {| false |}];
printf "%b" (is_substring haystack ~substring:"big round");
[%expect {| false |}]
;;
end)
;;
module Private = struct
let output = output
end