Source file bin_pred.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
type confusion_matrix = {
tp : int ;
tn : int ;
fp : int ;
fn : int ;
}
[@@deriving sexp]
type curve = (float * confusion_matrix) array
[@@deriving sexp]
let zero = { tp = 0 ; tn = 0 ; fn = 0 ; fp = 0 }
let update accu ~threshold ~score ~label =
match Float.(threshold < score), label with
| true, true -> { accu with tp = accu.tp + 1 }
| true, false -> { accu with fp = accu.fp + 1 }
| false, true -> { accu with fn = accu.fn + 1 }
| false, false -> { accu with tn = accu.tn + 1 }
let confusion_matrix ~scores ~labels ~threshold =
if Array.length scores <> Array.length labels
then
invalid_argf
"Bin_pred.confusion_matrix: scores and labels have different lengths (%d and %d)"
(Array.length scores) (Array.length labels) () ;
Array.fold2_exn scores labels ~init:zero ~f:(fun accu score label -> update accu ~threshold ~score ~label)
let positive cm = cm.tp + cm.fn
let negative cm = cm.tn + cm.fp
let cardinal cm = cm.tp + cm.tn + cm.fp + cm.fn
let no_positives { tp ; fn = _ ; fp ; tn = _ } =
tp = 0 && fp = 0
let sensitivity cm =
float cm.tp /. float (cm.tp + cm.fn)
let recall = sensitivity
let false_positive_rate cm =
float cm.fp /. float (cm.fp + cm.tn)
let accuracy cm =
float (cm.tp + cm.tn) /. float (cardinal cm)
let specificity cm =
float cm.tn /. float (cm.fp + cm.tn)
let positive_predictive_value cm =
float cm.tp /. float (cm.tp + cm.fp)
let precision = positive_predictive_value
let negative_predictive_value cm =
float cm.tn /. float (cm.tn + cm.fn)
let false_discovery_rate cm =
float cm.fp /. float (cm.fp + cm.tp)
let f1_score cm =
2. *. float cm.tp /. float (2 * cm.tp + cm.fp + cm.fn)
let performance_curve ~scores ~labels =
let n = Array.length scores in
if n <> Array.length labels
then
invalid_argf
"Bin_pred.make_curve: scores and labels have different lengths (%d and %d)"
n (Array.length labels) () ;
let examples =
let r = Array.map2_exn scores labels ~f:(fun x y -> x, y) in
Array.sort ~compare:(Fn.flip Poly.compare) r ;
r
in
let np = Array.count labels ~f:ident in
let nn = Array.count labels ~f:(fun x -> not x) in
let initial = { tp = 0 ; tn = nn ; fp = 0 ; fn = np } in
let rec loop acc current_threshold current_matrix i =
if i = n then
List.rev ((current_threshold, current_matrix) :: acc)
else
let score, label = examples.(i) in
let acc =
if Float.(score < current_threshold) then
(current_threshold, current_matrix) :: acc
else
acc
in
let new_matrix =
if label then
{ current_matrix
with tp = current_matrix.tp + 1 ;
fn = current_matrix.fn - 1 }
else
{ current_matrix
with fp = current_matrix.fp + 1 ;
tn = current_matrix.tn - 1 }
in
loop acc score new_matrix (i + 1)
in
loop [] Float.infinity initial 0
|> Array.of_list
let trapez_area x1 x2 y1 y2 = 0.5 *. (y1 +. y2) *. (x2 -. x1)
let auc points =
let f ((x1,y1), sum) ((x2,y2) as p) = (p, sum +. trapez_area x1 x2 y1 y2) in
Array.fold points ~f ~init:(points.(0), 0.) |> snd
let roc_curve ~scores ~labels =
let matrices = performance_curve ~scores ~labels in
let curve = Array.map matrices ~f:(fun (_,m) -> false_positive_rate m, sensitivity m) in
let auc = auc curve in
curve, auc
let average_precision ~precision ~recall =
let n = Array.length precision in
Array.init (n - 1) ~f:(fun i ->
precision.(i + 1) *. (recall.(i + 1) -. recall.(i))
)
|> Array.sum (module Float) ~f:Fn.id
let recall_precision_curve ~scores ~labels =
let matrices = performance_curve ~scores ~labels in
let curve = Array.map matrices ~f:(fun (_,m) ->
if no_positives m then 0., 1. else recall m, precision m
) in
let recall, precision = Array.unzip curve in
curve, average_precision ~recall ~precision
let%expect_test "performance curve 1" =
let scores = [| 2.1 ; 1.2 ; 5.6 ; 0. |] in
let labels = [| true ; false ; true ; false |] in
let curve = performance_curve ~scores ~labels in
print_endline (Sexp.to_string_hum (sexp_of_curve curve)) ;
[%expect "
((INF ((tp 0) (tn 2) (fp 0) (fn 2))) (5.6 ((tp 1) (tn 2) (fp 0) (fn 1)))
(2.1 ((tp 2) (tn 2) (fp 0) (fn 0))) (1.2 ((tp 2) (tn 1) (fp 1) (fn 0)))
(0 ((tp 2) (tn 0) (fp 2) (fn 0))))"]
let%test "rp_curve perfect recognition" =
let scores = [| 2.1 ; 1.2 ; 5.6 ; 0. |] in
let labels = [| true ; false ; true ; false |] in
let _, auc = recall_precision_curve ~scores ~labels in
Float.(auc = 1.)
let%test "rp_curve against sklearn" =
let scores = [|
-0.20078869; 0.30423874; 0.20105976; 0.27523711; 0.42593404;
-0.15043726; -0.08794601; -0.12733462; 0.22931596; -0.23913518;
-0.06386267; -0.14958466; -0.04914839; 0.09898417; 0.0515638 ;
-0.1142941 ; 0.16106135; 0.04871897; -0.08258102; -0.26105668;
0.24693291; -0.18029058; -0.38384994; 0.26336904; 0.12585371;
-0.03991278; 0.39424539; 0.42411536; -0.4790443 ; -0.30529061;
-0.09281931; 0.01213433; -0.20204098; 0.40148935; -0.04536122;
0.12179099; 0.06493837; -0.07007139; 0.0032915 ; -0.39635676;
0.02619439; 0.20018683; 0.065023 ; 0.49589616; -0.28221895;
0.31364573; 0.1906223 ; 0.11549516; 0.03145977; 0.22408591 |] in
let labels = [|
true; true; true; true; true; false; true; false; true; false; false; true; false; false; true; false; false; false; true; false; true; false;
false; true; true; true; true; true; false; false; false; true; false; true; false; true; false; false; false; false; true; true; true; true;
false; true; true; false; true; false |] in
let curve, _ = recall_precision_curve ~scores ~labels in
let recall, precision = Array.unzip curve in
let ap = average_precision ~recall ~precision in
Float.robustly_compare ap 0.8783170534965226 = 0