package octez-libs

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file immutable_chunked_byte_vector.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
(*****************************************************************************)
(*                                                                           *)
(* Open Source License                                                       *)
(* Copyright (c) 2023 TriliTech  <contact@trili.tech>                        *)
(*                                                                           *)
(* Permission is hereby granted, free of charge, to any person obtaining a   *)
(* copy of this software and associated documentation files (the "Software"),*)
(* to deal in the Software without restriction, including without limitation *)
(* the rights to use, copy, modify, merge, publish, distribute, sublicense,  *)
(* and/or sell copies of the Software, and to permit persons to whom the     *)
(* Software is furnished to do so, subject to the following conditions:      *)
(*                                                                           *)
(* The above copyright notice and this permission notice shall be included   *)
(* in all copies or substantial portions of the Software.                    *)
(*                                                                           *)
(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)
(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  *)
(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL   *)
(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)
(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING   *)
(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER       *)
(* DEALINGS IN THE SOFTWARE.                                                 *)
(*                                                                           *)
(*****************************************************************************)

module String = StdLabels.String
module Bytes = StdLabels.Bytes

exception Bounds

exception SizeOverflow

let reraise = function
  | Lazy_vector.Bounds -> raise Bounds
  | Lazy_vector.SizeOverflow -> raise SizeOverflow
  | exn -> raise exn

module Chunk = struct
  type t = string

  (** Number of bits in an address for the chunk offset *)
  let offset_bits = 9

  (** Size of a chunk in bytes - with 9 bits of address space the
      chunk is 512 bytes *)
  let size = Int64.shift_left 1L offset_bits

  (** The same size but of type int, for internal usage. *)
  let size_int : int = Int.shift_left 1 offset_bits

  (** Get the chunk index for an address. *)
  let index address = Int64.shift_right address offset_bits

  (** Get the offset within its chunk for a given address. *)
  let offset address = Int64.(logand address (sub size 1L))

  (** Get the address from a page index and an offset. *)
  let address ~index ~offset = Int64.(add (shift_left index offset_bits) offset)

  let alloc () = String.make size_int (Char.chr 0)

  (* Replace [dst] chunk's substring [[dst_pos]..[dst_pos] + [len] - 1]
     with substring of [src] bytes [[src_pos]..[src_pos] + [len] - 1].
     Creates new chunk, leaving original [dst] unchanged. *)
  let blit ~(src : bytes) ~(src_pos : int) ~(dst : t) ~(dst_pos : int64)
      ~(len : int) : t =
    let bytes_len = Bytes.length src in
    if
      len <= 0
      || src_pos + len > bytes_len
      || Int64.add dst_pos (Int64.of_int len) > size
    then raise Bounds
    else if len = size_int then
      (* The whole chunk should be replaced, so we just recreate a new one *)
      Bytes.unsafe_to_string @@ Bytes.sub src ~pos:src_pos ~len
    else
      (* Copy original chunk and replace corresponding substring *)
      let new_chunk_bytes = String.to_bytes dst in
      Bytes.blit
        ~src
        ~src_pos
        ~dst:new_chunk_bytes
        ~dst_pos:(Int64.to_int dst_pos)
        ~len ;
      Bytes.unsafe_to_string new_chunk_bytes

  let of_bytes_sub bytes ~(pos : int) ~(len : int) =
    if len > size_int then raise Bounds
    else if len = size_int then
      (* Just create the whole chunk *)
      let copied_sub = Bytes.sub bytes ~pos ~len in
      Bytes.unsafe_to_string copied_sub
    else blit ~src:bytes ~src_pos:pos ~dst:(alloc ()) ~dst_pos:0L ~len

  let of_bytes bytes =
    of_bytes_sub bytes ~pos:0 ~len:(Int.min (Bytes.length bytes) size_int)

  let to_bytes = String.to_bytes

  let num_needed length =
    if Int64.compare length 0L > 0 then
      (* [pred length] is used to cover the edge cases where [length] is an exact
          multiple of [Chunk.size]. For example [div Chunk.size Chunk.size] is 1
          but would allocate 2 chunks without a [pred] applied to the first
          argument. *)
      Int64.(div (pred length) size |> succ)
    else 0L

  (* Return left and right addresses of a chunk *)
  let inclusive_boundaries chunk_id =
    ( address ~index:chunk_id ~offset:0L,
      address ~index:chunk_id ~offset:(Int64.sub size 1L) )

  (* Sets char at position [offset] to [chr] and returns new chunk,
     leaving original one unchanged *)
  let set_char (s : t) (offset : int64) (chr : char) =
    let copied_bytes = String.to_bytes s in
    Bytes.set copied_bytes (Int64.to_int offset) chr ;
    Bytes.unsafe_to_string copied_bytes

  let encoding =
    let open Tezos_tree_encoding in
    conv of_bytes to_bytes (raw [])
end

module Vector = Lazy_vector.Int64Vector

type t = {length : int64; chunks : Chunk.t Vector.t}

let def_get_chunk _ = Lwt.return (Chunk.alloc ())

let set_chunk (vec : t) (chunk_id : int64) (chunk : Chunk.t) =
  let new_chunks = Vector.set chunk_id chunk vec.chunks in
  {vec with chunks = new_chunks}

let create ?origin ?get_chunk length =
  let chunks =
    Vector.create ?origin ?produce_value:get_chunk (Chunk.num_needed length)
  in
  {length; chunks}

let origin vector = Vector.origin vector.chunks

let grow vector size_delta =
  if size_delta > 0L then
    let new_size = Int64.add vector.length size_delta in
    let new_chunks = Chunk.num_needed new_size in
    let current_chunks = Vector.num_elements vector.chunks in
    let chunk_count_delta = Int64.sub new_chunks current_chunks in
    if Int64.compare chunk_count_delta 0L > 0 then
      (* We cannot make any assumption on the previous value of
         [produce_value]. In particular, it may very well raise an
         error in case of absent value (which is the case when
         growing the chunked byte vector requires to allocate new
         chunks). *)
      {length = new_size; chunks = Vector.grow chunk_count_delta vector.chunks}
    else {vector with length = new_size}
  else vector

let allocate length = grow (create 0L) length

let of_bytes bytes =
  let length = Int64.of_int (Bytes.length bytes) in
  let rec set_chunks (chunk_id : int64) vec =
    let chunk_left, _ = Chunk.inclusive_boundaries chunk_id in
    if chunk_left >= length then vec
    else
      let suffix_len = Int64.to_int @@ Int64.sub length chunk_left in
      let len = Int.min Chunk.size_int suffix_len in
      let chunk =
        Chunk.of_bytes_sub bytes ~pos:(Int64.to_int chunk_left) ~len
      in
      let new_vec = set_chunk vec chunk_id chunk in
      (set_chunks [@tailcall]) (Int64.add chunk_id 1L) new_vec
  in
  set_chunks 0L (allocate length)

let of_string str = of_bytes @@ Bytes.unsafe_of_string str

let length vector = vector.length

let get_chunk index {chunks; _} =
  Lwt.catch
    (fun () -> Vector.get index chunks)
    (function
      | Lazy_vector.Bounds as exn -> reraise exn | _ -> def_get_chunk ())

let load_byte vector address =
  let open Lwt.Syntax in
  if address >= vector.length || address < 0L then raise Bounds ;
  let+ chunk = get_chunk (Chunk.index address) vector in
  let offset = Int64.to_int @@ Chunk.offset address in
  Char.code @@ String.get chunk offset

let load_bytes vector offset length =
  let open Lwt.Syntax in
  let end_offset = Int64.pred @@ Int64.add offset length in
  (* Ensure [offset] and [end_offset] are valid indeces in the vector.

     Once we ensure the vector can be contained in a string, we can safely
     convert everything to int, since the size of the vector is contained in
     a `nativeint`. See {!of_string} comment. *)
  if
    offset < 0L || length < 0L
    || end_offset >= vector.length
    || vector.length > Int64.of_int Sys.max_string_length
  then raise Bounds ;
  let accum = Bytes.make (Int64.to_int length) (Char.chr 0) in
  let rec copy_chunks (vector_pos : int64) =
    if vector_pos > end_offset then Lwt.return_unit
    else
      let chunk_id = Chunk.index vector_pos in
      let left, right = Chunk.inclusive_boundaries chunk_id in
      let* chunk = get_chunk chunk_id vector in
      let l_chunk_offset, r_chunk_offset =
        (* Chunk fully lies in the the suffix of target bytes *)
        if left == vector_pos && right <= end_offset then (0, Chunk.size_int - 1)
        else
          ( Int64.to_int @@ Chunk.offset @@ Int64.max left vector_pos,
            Int64.to_int @@ Chunk.offset @@ Int64.min right end_offset )
      in
      let sub_chunk_len = r_chunk_offset - l_chunk_offset + 1 in
      let accum_pos = Int64.to_int @@ Int64.sub vector_pos offset in
      String.blit
        ~src:chunk
        ~src_pos:l_chunk_offset
        ~dst:accum
        ~dst_pos:accum_pos
        ~len:sub_chunk_len ;
      (copy_chunks [@tailcall])
        (Int64.add vector_pos @@ Int64.of_int sub_chunk_len)
  in
  let+ () = copy_chunks offset in
  accum

let store_byte vector address byte =
  let open Lwt.Syntax in
  if address >= vector.length || address < 0L then raise Bounds ;
  let+ chunk = get_chunk (Chunk.index address) vector in
  let offset = Chunk.offset address in
  let new_chunk = Chunk.set_char chunk offset (Char.chr byte) in
  set_chunk vector (Chunk.index address) new_chunk

let store_bytes vector offset bytes =
  let open Lwt.Syntax in
  let length = Int64.of_int @@ Bytes.length bytes in
  let end_offset = Int64.pred @@ Int64.add offset length in
  if
    offset < 0L
    || end_offset >= vector.length
    || vector.length > Int64.of_int Sys.max_string_length
  then raise Bounds ;

  let rec set_chunks (bytes_pos : int) vec =
    if Int64.of_int bytes_pos >= length then Lwt.return vec
    else
      let offseted_pos = Int64.add offset (Int64.of_int bytes_pos) in
      let chunk_id = Chunk.index offseted_pos in
      let _, chunk_right = Chunk.inclusive_boundaries chunk_id in
      let* chunk = get_chunk chunk_id vec in
      (* [l_range; r_range] in the chunk which has to be rewritten *)
      let l_range = Chunk.offset offseted_pos in
      let r_range =
        if end_offset >= chunk_right then
          (* If vector's right offset exceeds chunk's right offset in the vector,
             the we just replace a suffix of the chunk *)
          Chunk.offset chunk_right
        else
          (* Otherwise we replace a substring of the chunk *)
          Chunk.offset end_offset
      in
      let len = Int64.succ (Int64.sub r_range l_range) |> Int64.to_int in
      let new_chunk =
        Chunk.blit
          ~src:bytes
          ~src_pos:bytes_pos
          ~dst:chunk
          ~dst_pos:l_range
          ~len
      in
      let new_vec = set_chunk vec chunk_id new_chunk in
      (set_chunks [@tailcall]) (bytes_pos + len) new_vec
  in
  set_chunks 0 vector

let to_bytes vector = load_bytes vector 0L vector.length

let to_string vector =
  let open Lwt.Syntax in
  let+ buffer = to_bytes vector in
  Bytes.to_string buffer

let loaded_chunks vector = Vector.loaded_bindings vector.chunks

module Enc_intf = struct
  type nonrec t = t

  type chunk = Chunk.t

  let origin = origin

  let loaded_chunks = loaded_chunks

  let length = length

  let create = create
end

module Encoding = Tezos_tree_encoding.CBV_encoding.Make (Enc_intf)

let encoding = Encoding.cbv Chunk.encoding
OCaml

Innovation. Community. Security.