package irmin-pack
Irmin backend which stores values in a pack file
Install
Dune Dependency
Authors
Maintainers
Sources
irmin-3.6.1.tbz
sha256=11fc2570bdbfd48478c89113fc801084a84a9a2c12d9bf8c64e05ce64ae19bd7
sha512=2171699ca24dec5c9c908a2676b272e034c14eb17f7052a794535e52af0be40be68a689e59c0a640ee244b11703320483f4d0c261542e6242ba23a8f1272b9b0
doc/src/irmin-pack.unix/mapping_file.ml.html
Source file mapping_file.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
(* * Copyright (c) 2018-2022 Tarides <contact@tarides.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *) (** The mapping file is a sorted list of intervals which allows the mapping from sparse virtual offsets to compact physical offsets. It is used by the GC to compact the live reachable data into the [prefix] file and delete the dead intervals of data from the disk. The concrete representation is a sorted array of triplets [(virtual_offset, physical_offset, length)]. Given a virtual offset, its physical location can be found by doing a binary search. *) open! Import include Mapping_file_intf module BigArr1 = Bigarray.Array1 type int64_bigarray = (int64, Bigarray.int64_elt, Bigarray.c_layout) Bigarray.Array1.t (* Set to 0 until we find decide what to do about sequential traversal of pack files *) let gap_tolerance = 0 module Int64_mmap : sig type t = private { fn : string; fd : Unix.file_descr; mutable arr : int64_bigarray; } val open_ro : fn:string -> sz:int -> t (** NOTE [open_ ~fn ~sz] can use [sz=-1] to open with size based on the size of the underlying file *) val open_rw : string -> t val close : t -> unit end = struct type t = { fn : string; fd : Unix.file_descr; mutable arr : int64_bigarray } (* NOTE sz=-1 is recognized by [map_file] as "derive from size of file"; if we want a different size (eg because we want the file to grow) we can provide it explicitly *) let open_ro ~fn ~sz = let = false in assert (Sys.file_exists fn); let fd = Unix.(openfile fn [ O_RDONLY ] 0o660) in let arr = let open Bigarray in Unix.map_file fd Int64 c_layout shared [| sz |] |> array1_of_genarray in { fn; fd; arr } let open_rw fn = (* NOTE following mmap is shared *) let = true in assert (Sys.file_exists fn); let fd = Unix.(openfile fn [ O_RDWR ] 0o660) in let arr = let open Bigarray in Unix.map_file fd Int64 c_layout shared [| -1 |] |> array1_of_genarray in { fn; fd; arr } let close t = Unix.close t.fd; (* following tries to make the array unreachable, so GC'able; however, no guarantee that arr actually is unreachable *) t.arr <- Bigarray.(Array1.create Int64 c_layout 0); () end (** The mapping file is created from a decreasing list of [(virtual_offset, 0, length)]. We first need to reverse it such that virtual offsets are in increasing order. *) let rev_inplace (src : int64_bigarray) : unit = let src_sz = BigArr1.dim src in let _ = assert (src_sz >= 3); assert (src_sz mod 3 = 0) in let rec rev i j = if i < j then ( let ioff, ilen = (src.{i}, src.{i + 2}) in let joff, jlen = (src.{j}, src.{j + 2}) in src.{i} <- joff; src.{i + 2} <- jlen; src.{j} <- ioff; src.{j + 2} <- ilen; rev (i + 3) (j - 3)) in rev 0 (src_sz - 3) let int64_endian : int64 -> int64 = fun i -> if Sys.big_endian then ( (* We are currently on a BE platform but the ints are encoded as LE in the file. We've just read a LE int using a BE decoding scheme. Let's fix this. The first step is to set [buf] to contain exactly what is stored on disk. Since the current platform is BE, we've interpreted what was written on disk using a BE decoding scheme. To do the opposite operation we must use a BE encoding scheme, hence [set_int64_be]. Now that [buf] mimics what was on disk, the second step consist of decoding it using a LE function, hence [get_int64_le]. *) let buf = Bytes.create 8 in Bytes.set_int64_be buf 0 i; Bytes.get_int64_le buf 0) else i let conv_int64 i = Int64.to_int (int64_endian i) (** We then replace the [0] component of the triplets with the accumulated length. This yields triplets [(virtual_offset, physical_offset, length)], which will allow us to map virtual offsets to their physical location in the prefix file. *) let set_prefix_offsets src = let src_sz = BigArr1.dim src in let rec go i poff = if i < src_sz then ( src.{i + 1} <- int64_endian poff; let len = int64_endian src.{i + 2} in go (i + 3) (Int64.add poff len)) in go 0 Int64.zero module Make (Io : Io.S) = struct module Io = Io module Errs = Io_errors.Make (Io) module Ao = Append_only_file.Make (Io) (Errs) type t = { arr : int64_bigarray; root : string; generation : int } let open_map ~root ~generation = let path = Irmin_pack.Layout.V4.mapping ~generation ~root in match Io.classify_path path with | `File -> ( let mmap = Int64_mmap.open_ro ~fn:path ~sz:(-1) in let arr = mmap.arr in let len = BigArr1.dim arr in match len > 0 && len mod 3 = 0 with | true -> Int64_mmap.close mmap; Ok { root; generation; arr } | false -> Error (`Corrupted_mapping_file (__FILE__ ^ ": mapping mmap size did not meet size requirements")) ) | _ -> Error `No_such_file_or_directory let create ?report_mapping_size ~root ~generation ~register_entries () = assert (generation > 0); let open Result_syntax in let path = Irmin_pack.Layout.V3.mapping ~generation ~root in let* () = if Sys.word_size <> 64 then Error `Gc_forbidden_on_32bit_platforms else Ok () in (* Unlink residual and ignore errors (typically no such file) *) Io.unlink path |> ignore; (* Create [file] *) let* file = Ao.create_rw ~path ~overwrite:true ~auto_flush_threshold:1_000_000 ~auto_flush_procedure:`Internal in (* Fill and close [file] *) let append_entry ~off ~len = (* Write [off, 0, len] in little-endian encoding for portability. The [0] reserves the space for the future prefix offset. *) let buffer = Bytes.create 24 in Bytes.set_int64_le buffer 0 (Int63.to_int64 off); Bytes.set_int64_le buffer 8 Int64.zero; Bytes.set_int64_le buffer 16 (Int64.of_int len); (* Bytes.unsafe_to_string usage: buffer is uniquely owned; we assume Bytes.set_int64_le returns unique ownership; we give up ownership of buffer in conversion to string. This is safe. *) Ao.append_exn file (Bytes.unsafe_to_string buffer) in (* Check if we can collapse consecutive entries *) let current_entry = ref None in let register_entry ~off ~len = let current = match !current_entry with | None -> (off, len) | Some (off', len') -> if off >= off' then invalid_arg "register_entry: offsets are not strictly decreasing"; let dist = Int63.to_int (Int63.sub off' off) in if dist <= len + gap_tolerance then (off, dist + len') else ( append_entry ~off:off' ~len:len'; (off, len)) in current_entry := Some current in let* () = Errs.catch (fun () -> register_entries ~register_entry; (* Flush pending entry *) match !current_entry with | None -> () | Some (off, len) -> append_entry ~off ~len) in let* () = Ao.flush file in let* () = Ao.close file in (* Reopen [file] but as an mmap *) let file = Int64_mmap.open_rw path in let* () = Errs.catch (fun () -> rev_inplace file.arr; set_prefix_offsets file.arr) in (* Flush and close new mapping [file] *) let* () = Errs.catch (fun () -> Unix.fsync file.fd) in Int64_mmap.close file; let* mapping_size = Io.size_of_path path in Option.iter (fun f -> f mapping_size) report_mapping_size; (* Open created map *) open_map ~root ~generation let entry_count arr = BigArr1.dim arr / 3 let entry_idx i = i * 3 let entry_off arr i = arr.{entry_idx i} |> conv_int64 |> Int63.of_int let entry_poff arr i = arr.{entry_idx i + 1} |> conv_int64 |> Int63.of_int let entry_len arr i = arr.{entry_idx i + 2} |> conv_int64 let iter_exn { arr; _ } f = for i = 0 to entry_count arr - 1 do f ~off:(entry_off arr i) ~len:(entry_len arr i) done let iter t f = Errs.catch (fun () -> iter_exn t f; ()) type entry = { off : int63; poff : int63; len : int } let find_nearest_leq { arr; _ } off = let get arr i = arr.{entry_idx i} |> conv_int64 in match Utils.nearest_leq ~arr ~get ~lo:0 ~hi:(entry_count arr - 1) ~key:(Int63.to_int off) with | `All_gt_key -> None | `Some i -> let off = entry_off arr i in let poff = entry_poff arr i in let len = entry_len arr i in Some { off; poff; len } end
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>