package biocaml
The OCaml Bioinformatics Library
Install
Dune Dependency
Authors
Maintainers
Sources
biocaml-0.11.2.tbz
sha256=fae219e66db06f81f3fd7d9e44717ccf2d6d85701adb12004ab4ae6d3359dd2d
sha512=f6abd60dac2e02777be81ce3b5acdc0db23b3fa06731f5b2d0b32e6ecc9305fe64f407bbd95a3a9488b14d0a7ac7c41c73a7e18c329a8f18febfc8fd50eccbc6
doc/src/biocaml.unix/bin_pred.ml.html
Source file bin_pred.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
type confusion_matrix = { tp : int ; tn : int ; fp : int ; fn : int ; } [@@deriving sexp] type curve = (float * confusion_matrix) array [@@deriving sexp] let zero = { tp = 0 ; tn = 0 ; fn = 0 ; fp = 0 } let update accu ~threshold ~score ~label = match Float.(threshold < score), label with | true, true -> { accu with tp = accu.tp + 1 } | true, false -> { accu with fp = accu.fp + 1 } | false, true -> { accu with fn = accu.fn + 1 } | false, false -> { accu with tn = accu.tn + 1 } let confusion_matrix ~scores ~labels ~threshold = if Array.length scores <> Array.length labels then invalid_argf "Bin_pred.confusion_matrix: scores and labels have different lengths (%d and %d)" (Array.length scores) (Array.length labels) () ; Array.fold2_exn scores labels ~init:zero ~f:(fun accu score label -> update accu ~threshold ~score ~label) let positive cm = cm.tp + cm.fn let negative cm = cm.tn + cm.fp let cardinal cm = cm.tp + cm.tn + cm.fp + cm.fn let no_positives { tp ; fn = _ ; fp ; tn = _ } = tp = 0 && fp = 0 let sensitivity cm = float cm.tp /. float (cm.tp + cm.fn) let recall = sensitivity let false_positive_rate cm = float cm.fp /. float (cm.fp + cm.tn) let accuracy cm = float (cm.tp + cm.tn) /. float (cardinal cm) let specificity cm = float cm.tn /. float (cm.fp + cm.tn) let positive_predictive_value cm = float cm.tp /. float (cm.tp + cm.fp) let precision = positive_predictive_value let negative_predictive_value cm = float cm.tn /. float (cm.tn + cm.fn) let false_discovery_rate cm = float cm.fp /. float (cm.fp + cm.tp) let f1_score cm = 2. *. float cm.tp /. float (2 * cm.tp + cm.fp + cm.fn) let performance_curve ~scores ~labels = let n = Array.length scores in if n <> Array.length labels then invalid_argf "Bin_pred.make_curve: scores and labels have different lengths (%d and %d)" n (Array.length labels) () ; let examples = let r = Array.map2_exn scores labels ~f:(fun x y -> x, y) in Array.sort ~compare:(Fn.flip Poly.compare) r ; r in let np = Array.count labels ~f:Fun.id in let nn = Array.count labels ~f:(fun x -> not x) in let initial = { tp = 0 ; tn = nn ; fp = 0 ; fn = np } in let rec loop acc current_threshold current_matrix i = if i = n then List.rev ((current_threshold, current_matrix) :: acc) else let score, label = examples.(i) in let acc = if Float.(score < current_threshold) then (current_threshold, current_matrix) :: acc else acc in let new_matrix = if label then { current_matrix with tp = current_matrix.tp + 1 ; fn = current_matrix.fn - 1 } else { current_matrix with fp = current_matrix.fp + 1 ; tn = current_matrix.tn - 1 } in loop acc score new_matrix (i + 1) in loop [] Float.infinity initial 0 |> Array.of_list let trapez_area x1 x2 y1 y2 = 0.5 *. (y1 +. y2) *. (x2 -. x1) (* Assumes [points] is non empty (which is the case if it has been produced by [performance_curve]) and that points come with decreasing x-coordinates. *) let auc points = let f ((x1,y1), sum) ((x2,y2) as p) = (p, sum +. trapez_area x1 x2 y1 y2) in Array.fold points ~f ~init:(points.(0), 0.) |> snd let roc_curve ~scores ~labels = let matrices = performance_curve ~scores ~labels in let curve = Array.map matrices ~f:(fun (_,m) -> false_positive_rate m, sensitivity m) in let auc = auc curve in curve, auc let average_precision ~precision ~recall = let n = Array.length precision in Array.init (n - 1) ~f:(fun i -> precision.(i + 1) *. (recall.(i + 1) -. recall.(i)) ) |> Array.sum (module Float) ~f:Fn.id let recall_precision_curve ~scores ~labels = let matrices = performance_curve ~scores ~labels in let curve = Array.map matrices ~f:(fun (_,m) -> if no_positives m then 0., 1. else recall m, precision m ) in let recall, precision = Array.unzip curve in curve, average_precision ~recall ~precision let%expect_test "performance curve 1" = let scores = [| 2.1 ; 1.2 ; 5.6 ; 0. |] in let labels = [| true ; false ; true ; false |] in let curve = performance_curve ~scores ~labels in print_endline (Sexp.to_string_hum (sexp_of_curve curve)) ; [%expect " ((INF ((tp 0) (tn 2) (fp 0) (fn 2))) (5.6 ((tp 1) (tn 2) (fp 0) (fn 1))) (2.1 ((tp 2) (tn 2) (fp 0) (fn 0))) (1.2 ((tp 2) (tn 1) (fp 1) (fn 0))) (0 ((tp 2) (tn 0) (fp 2) (fn 0))))"] let%test "rp_curve perfect recognition" = let scores = [| 2.1 ; 1.2 ; 5.6 ; 0. |] in let labels = [| true ; false ; true ; false |] in let _, auc = recall_precision_curve ~scores ~labels in Float.(auc = 1.) let%test "rp_curve against sklearn" = let scores = [| -0.20078869; 0.30423874; 0.20105976; 0.27523711; 0.42593404; -0.15043726; -0.08794601; -0.12733462; 0.22931596; -0.23913518; -0.06386267; -0.14958466; -0.04914839; 0.09898417; 0.0515638 ; -0.1142941 ; 0.16106135; 0.04871897; -0.08258102; -0.26105668; 0.24693291; -0.18029058; -0.38384994; 0.26336904; 0.12585371; -0.03991278; 0.39424539; 0.42411536; -0.4790443 ; -0.30529061; -0.09281931; 0.01213433; -0.20204098; 0.40148935; -0.04536122; 0.12179099; 0.06493837; -0.07007139; 0.0032915 ; -0.39635676; 0.02619439; 0.20018683; 0.065023 ; 0.49589616; -0.28221895; 0.31364573; 0.1906223 ; 0.11549516; 0.03145977; 0.22408591 |] in let labels = [| true; true; true; true; true; false; true; false; true; false; false; true; false; false; true; false; false; false; true; false; true; false; false; true; true; true; true; true; false; false; false; true; false; true; false; true; false; false; false; false; true; true; true; true; false; true; true; false; true; false |] in let curve, _ = recall_precision_curve ~scores ~labels in let recall, precision = Array.unzip curve in let ap = average_precision ~recall ~precision in Float.robustly_compare ap 0.8783170534965226 = 0
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>