package irmin
Irmin, a distributed database that follows the same design principles as Git
Install
Dune Dependency
Authors
Maintainers
Sources
irmin-3.7.2.tbz
sha256=6aa5ba4c6a0233747d4bd3ab396f2b3a33bb80f9d8ed4a178c83a035033c0e52
sha512=8a5506a57e8569a11c883ca25b3c60ae7a9a03c2da111668193d7b4296434c43f1a310e78f189e4b38c234da790259d2961c6a0ae481282c18bb97f618fccfb6
doc/src/irmin.data/fixed_size_string_set.ml.html
Source file fixed_size_string_set.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
(* * Copyright (c) 2022 Tarides <contact@tarides.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *) type elt = string type t = { elt_length : int; hash_elt : elt -> int; hash_elt_substring : Bigstringaf.t -> off:int -> len:int -> int; empty_slot : elt; mutable data : Bigstringaf.t; mutable data_length : int; mutable slot_count : int; mutable cardinal : int; } type hashset = t module Slot : sig type t val of_elt : hashset -> elt -> t val of_elt_substring : hashset -> src:Bigstringaf.t -> src_off:int -> t val contains : hashset -> t -> elt -> bool val contains_substring : hashset -> t -> src:Bigstringaf.t -> src_off:int -> bool val is_empty : hashset -> t -> bool val get : hashset -> t -> elt val set : hashset -> t -> elt -> unit val set_substring : hashset -> t -> src:Bigstringaf.t -> src_off:int -> unit val next : hashset -> t -> t val iter_all : hashset -> f:(t -> unit) -> unit val to_offset : t -> int end = struct type t = Offset of int [@@ocaml.unboxed] let offset_of_hash h hash = let index = abs hash mod h.slot_count in Offset (index * h.elt_length) let of_elt h elt = offset_of_hash h (h.hash_elt elt) let of_elt_substring h ~src ~src_off = offset_of_hash h (h.hash_elt_substring src ~off:src_off ~len:h.elt_length) let contains h (Offset offset) string = Bigstringaf.memcmp_string h.data offset string 0 h.elt_length = 0 let contains_substring h (Offset offset) ~src ~src_off = Bigstringaf.memcmp h.data offset src src_off h.elt_length = 0 let is_empty h t = contains h t h.empty_slot let get h (Offset offset) = Bigstringaf.substring h.data ~off:offset ~len:h.elt_length let set h (Offset offset) elt = Bigstringaf.blit_from_string elt ~src_off:0 h.data ~dst_off:offset ~len:h.elt_length let set_substring h (Offset offset) ~src ~src_off = Bigstringaf.blit src ~src_off h.data ~dst_off:offset ~len:h.elt_length let next h (Offset offset) = Offset ((offset + h.elt_length) mod h.data_length) let iter_all hashset ~f = assert (hashset.data_length <> 0); f (Offset 0); let rec aux = function | Offset 0 -> () | offset -> f offset; aux (next hashset offset) in aux (next hashset (Offset 0)) let to_offset (Offset n) = n end let empty_all_slots t = Slot.iter_all t ~f:(fun slot -> Slot.set t slot t.empty_slot) module Default = struct let hash : string -> int = Hashtbl.hash let hash_substring t ~off ~len = hash (Bigstringaf.substring t ~off ~len) let null ~elt_length = String.make elt_length '\000' end let create ~elt_length ?(initial_slots = 0) ?hash ?hash_substring ?null () = if elt_length <= 0 then Fmt.invalid_arg "%s.create: element length must be strictly positive" __MODULE__; let empty_slot = match null with Some x -> x | None -> Default.null ~elt_length in let hash_elt, hash_elt_substring = match (hash, hash_substring) with | Some h, Some h' -> (h, h') | None, None -> (Default.hash, Default.hash_substring) | Some _, None | None, Some _ -> Fmt.invalid_arg "%s.create: must pass either both [hash] and [hash_substring] or \ neither" __MODULE__ in let slot_count = let rec aux n = if n >= initial_slots then n else if n * 2 > Sys.max_array_length then n else aux (n * 2) in aux 2 in let data_length = slot_count * elt_length in let data = Bigstringaf.create data_length in let t = { data; data_length; hash_elt; hash_elt_substring; elt_length; empty_slot; slot_count; cardinal = 0; } in empty_all_slots t; t let load_factor t = let slots_available = Bigstringaf.length t.data / t.elt_length in Float.of_int t.cardinal /. Float.of_int slots_available type ok_or_duplicate = [ `Ok | `Duplicate ] let rec unguarded_add t slot elt : ok_or_duplicate = if Slot.is_empty t slot then ( (* Write the element to this slot *) Slot.set t slot elt; `Ok) else if Slot.contains t slot elt then `Duplicate else unguarded_add t (Slot.next t slot) elt let rec unguarded_add_substring t slot ~src ~src_off : ok_or_duplicate = if Slot.is_empty t slot then ( (* Write the element to this slot *) Slot.set_substring t slot ~src ~src_off; `Ok) else if Slot.contains_substring t slot ~src ~src_off then `Duplicate else unguarded_add_substring t (Slot.next t slot) ~src ~src_off let resize t = let old_len = Bigstringaf.length t.data in let old_data = t.data in let new_len = old_len + (t.slot_count / 2 * t.elt_length) in let new_data = Bigstringaf.create new_len in let old_t = { t with data = old_data; data_length = old_len } in t.data <- new_data; t.data_length <- new_len; t.slot_count <- new_len / t.elt_length; empty_all_slots t; Slot.iter_all old_t ~f:(fun old_slot -> if not (Slot.is_empty old_t old_slot) then let src_off = Slot.to_offset old_slot in let new_slot = Slot.of_elt_substring t ~src:old_t.data ~src_off in let result = unguarded_add_substring t new_slot ~src:old_t.data ~src_off in assert (result = `Ok)) (* Resize when the hashset is more than 90% full: *) let max_load_factor = 0.9 let add t elt = if String.length elt <> t.elt_length then Fmt.invalid_arg "%s.add: cannot write string of incorrect size to hashset" __MODULE__; if String.equal elt t.empty_slot then Fmt.invalid_arg "%s.add: cannot write null value to hashset" __MODULE__; if Float.compare (load_factor t) max_load_factor >= 0 then resize t; let slot = Slot.of_elt t elt in let result = unguarded_add t slot elt in if result = `Ok then t.cardinal <- t.cardinal + 1; result let add_exn t elt = match add t elt with | `Ok -> () | `Duplicate -> Fmt.invalid_arg "%s.add_exn: element '%S' already present" __MODULE__ elt let mem t elt = if String.length elt <> t.elt_length then Fmt.invalid_arg "%s.mem: cannot read string of incorrect size from hashset" __MODULE__; if String.equal elt t.empty_slot then Fmt.failwith "%s.mem: cannot read null value from hashset" __MODULE__; let rec probe_loop slot = if Slot.contains t slot elt then true else if Slot.is_empty t slot then false else probe_loop (Slot.next t slot) in probe_loop (Slot.of_elt t elt) let invariant invariant_elt t = let element_count = ref 0 in Slot.iter_all t ~f:(fun slot -> if not (Slot.is_empty t slot) then ( incr element_count; invariant_elt (Slot.get t slot))); assert (t.cardinal = !element_count) (* Using [Obj.reachable_words] directly on values of type [t] will give inaccurate results since bigstrings are allocated on the C heap. As a workaround, we provide a dedicated [reachable_words] function for use in benchmarking this implementation. *) let reachable_words t = let bytes_per_word = Sys.word_size / 8 in (t.data_length / bytes_per_word) + Obj.reachable_words (Obj.repr t)
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>