package grenier
A collection of various algorithms in OCaml
Install
Dune Dependency
Authors
Maintainers
Sources
grenier-0.15.tbz
sha256=dec7f84b9e93d5825f10c7dea84d5a74d7365ede45664ae63c26b5e8045c1c44
sha512=b8aa1569c2e24b89674d1b34de34cd1798896bb6a53aa5a1287f68cee880125e6b687f66ad73da9069a01cc3ece1f0684f48328b099d43529bff736b772c8fd8
doc/src/grenier.hll/hll.ml.html
Source file hll.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
let sqr x = x *. x let first_setbit n = (* Reverse mapping of B(2,6) See http://chessprogramming.wikispaces.com/De+Bruijn+sequence *) let db26 = "\x00\x01\x02\x35\x03\x07\x36\x1b\x04\x26\x29\x08\x22\x37\x30\x1c\x3e\x05\x27\x2e\x2c\x2a\x16\x09\x18\x23\x3b\x38\x31\x12\x1d\x0b\x3f\x34\x06\x1a\x25\x28\x21\x2f\x3d\x2d\x2b\x15\x17\x3a\x11\x0a\x33\x19\x24\x20\x3c\x14\x39\x10\x32\x1f\x13\x0f\x1e\x0e\x0d\x0c" in (* Isolate lsb See http://aggregate.org/MAGIC/#Least%20Significant%201%20Bit *) let n = Int64.logand n (Int64.neg n) in (* Get index in B(2,6) *) let n = Int64.mul n 0x022fdd63cc95386dL in let n = Int64.shift_right_logical n 58 in Char.code db26.[Int64.to_int n] type t = bytes (** Building a new hll *) let validate t = (1 lsl Char.code (Bytes.get t 0) + 1 = Bytes.length t) let estimate_memory ~error = let p = int_of_float (ceil (log (sqr (1.04 /. error)))) in (1 lsl p) let make ~error = assert (0. < error && error < 1.); let p = int_of_float (ceil (log (sqr (1.04 /. error)))) in let t = Bytes.make (1 lsl p + 1) '\000' in Bytes.set t 0 (Char.chr p); assert (validate t); t let clear t = Bytes.fill t 1 (Bytes.length t - 1) '\000'; assert (validate t) (** Adding an element to the hll *) let get_rho w = if w = 0L then 64 else 1 + first_setbit w let add t x = let p = Char.code (Bytes.get t 0) in let m = 1 lsl p in let j = Int64.to_int x land (m - 1) + 1 in let w = Int64.shift_right_logical x p in Bytes.set t j (Char.chr (max (Char.code (Bytes.get t j)) (get_rho w))) (* assert (validate t): micro benchmark shows that validating in an add loop has a 10% overhead, not necessary. *) (** Merging and copying hlls *) let copy t = Bytes.copy t let merge ~into:t t' = let length = Bytes.length t in if length <> Bytes.length t' then invalid_arg "update: counters precision should be equal"; for i = 1 to length - 1 do Bytes.set t i (max (Bytes.get t i) (Bytes.get t' i)) done; assert (validate t) (** Estimating cardinality, HyperLogLog *) let count_nulls t = let nulls = ref 0 in for i = 1 to Bytes.length t - 1 do if Bytes.get t i = '\000' then incr nulls done; !nulls let get_alpha = function | p when not (4 <= p && p <= 16) -> assert false | 4 -> 0.673 | 5 -> 0.697 | 6 -> 0.709 | p -> 0.7213 /. (1.0 +. 1.079 /. float (1 lsl p)) let hll_estimation precision t = let p = Char.code (Bytes.get t 0) in let m = 1 lsl p in let sum = ref 0. in for i = 1 to m do sum := !sum +. 2. ** float (- min (precision-p) (Char.code (Bytes.get t i))) done; get_alpha p *. sqr (float m) /. !sum let linear_counting m nulls = let m = float m and nulls = float nulls in (m *. log (m /. nulls)) let card_hll t = let e = hll_estimation 32 t in let p = Char.code (Bytes.get t 0) in let m = 1 lsl p in if e <= (5.0 /. 2.0) *. float m then ( (* Small range *) match count_nulls t with | 0 -> e | nulls -> linear_counting m nulls ) else if e <= (2.0 ** 32.0) /. 30.0 then ( (* Normal range *) e ) else ( (* Large range *) (-. (2.0 ** 32.0) *. log (1.0 -. e /. (2.0 ** 32.0))) ) (** Estimating cardinality, HyperLogLog++ *) let get_threshold p = Hll_consts.threshold.(p - 4) let get_nearest_neighbors e vec = let distance = Array.mapi (fun idx v -> sqr (e -. v), idx) vec in Array.sort (fun ((a : float),_) (b,_) -> compare a b) distance; Array.init 6 (fun i -> let _, idx = distance.(i) in idx) let estimate_bias e p = let bias_vector = Hll_consts.bias_data.(p - 4) in let nearest_neighbors = get_nearest_neighbors e Hll_consts.raw_estimated_data.(p - 4) in let sum = ref 0. in for i = 0 to Array.length nearest_neighbors - 1 do sum := !sum +. bias_vector.(nearest_neighbors.(i)) done; !sum /. float (Array.length nearest_neighbors) let ep t = let p = Char.code (Bytes.get t 0) in let m = float (1 lsl p) in let e = hll_estimation 64 t in if e <= 5. *. m then e -. estimate_bias e p else e let card_hllpp t = assert (validate t); let p = Char.code (Bytes.get t 0) in let m = (1 lsl p) in match count_nulls t with | 0 -> ep t | nulls -> let h = linear_counting m nulls in if h <= get_threshold p then h else ep t let card = card_hllpp (* Thomas Wang 64-bit integer hashing *) let hash_int64 key = let open Int64 in let (lsr) = shift_right_logical in let (lsl) = shift_left in let not = lognot in let xor = logxor in let key = add (not key) (key lsl 21) in let key = xor key (key lsr 24) in let key = add (add key (key lsl 3)) (key lsl 8) in let key = xor key (key lsr 14) in let key = add (add key (key lsl 2)) (key lsl 4) in let key = xor key (key lsr 28) in let key = add key (key lsl 31) in key let to_string t = assert (1 lsl Char.code (Bytes.get t 0) + 1 = Bytes.length t); Bytes.to_string t let of_string s = let t = Bytes.of_string s in (* t.[0] = 1 lsl length s + 1. Also, it as to be small, so higher bits must be null and could be used to store versioning information in the future. *) if not (validate t) then raise (Invalid_argument "Hll.of_string"); t
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>