package biocaml

You can search for identifiers within the package.

in-package search v0.2.0

biocaml
- Library biocaml.base
  - Biocaml_base
    
    Bed
    
    Bed3
    
    Bed4
    
    Bed5_raw
    
    Bed5
    
    Fasta
    
    Parser0
    
    Parser
    
    Gff
    
    Line
    
    Lines
    
    Parser
    
    Macs2
    
    Xls
    
    Broad_peaks
    
    Table
    
    Field
    
    Ucsc_genome_browser
- Library biocaml.ez
  - Biocaml_ez
    
    Bam
    
    Header
    
    Alignment0
    
    Bamstats
    
    Fragment_length_histogram
    
    Chr_histogram
    
    Fasta
    
    Fastq
    
    Illumina
    
    Lines
    
    MakeIO
    
    Buffer
    
    Transform
    
    Phred_score
    
    Range
    
    Roman_num
    
    Sam
    
    Flags
    
    MakeIO
    
    Seq_range
    
    Identifier
    
    Make
    
    Strand
- Library biocaml.unix
  - Biocaml_unix
    
    Accu
    
    Counter
    
    Relation
    
    Bins
    
    Bam
    
    Header
    
    Alignment0
    
    Bamstats
    
    Fragment_length_histogram
    
    Chr_histogram
    
    Bar
    
    Bed
    
    Error
    
    Transform
    
    Bgzf
    
    Bin_pred
    
    Biocaml_result
    
    Let_syntax
    
    Monad_infix
    
    Error
    
    Export
    
    Stable
    
    List
    
    Bpmap
    
    Cel
    
    Chr
    
    Error
    
    Entrez
    
    Fetch
    
    Make
    
    Object_id
    
    Dbtag
    
    Gene_ref
    
    PubmedSummary
    
    Pubmed
    
    Gene
    
    Fasta
    
    Fastq
    
    MakeIO
    
    Illumina
    
    File_mapper
    
    Future
    
    S
    
    Deferred
    
    Monad_infix
    
    Let_syntax
    
    Let_syntax
    
    Open_on_rhs
    
    Result
    
    List
    
    Or_error
    
    List
    
    In_thread
    
    Pipe
    
    Reader
    
    Reader
    
    Read_result
    
    Writer
    
    Future_unix
    
    Deferred
    
    Monad_infix
    
    Let_syntax
    
    Let_syntax
    
    Open_on_rhs
    
    Result
    
    List
    
    Or_error
    
    List
    
    In_thread
    
    Pipe
    
    Reader
    
    Reader
    
    Read_result
    
    Writer
    
    GenomeMap
    
    Chromosome
    
    Make
    
    Selection
    
    Signal
    
    LSet
    
    LMap
    
    Gff
    
    Error
    
    Tags
    
    Transform
    
    Histogram
    
    Interval_tree
    
    Iset
    
    Jaspar
    
    Line
    
    Lines
    
    MakeIO
    
    Buffer
    
    Transform
    
    Math
    
    Msg
    
    Tree
    
    MzData
    
    Precursor
    
    Phred_score
    
    Pos
    
    Psl
    
    Error
    
    Transform
    
    Pwm
    
    RSet
    
    Range
    
    Roman_num
    
    Sam
    
    Flags
    
    MakeIO
    
    Sbml
    
    Seq
    
    Seq_range
    
    Identifier
    
    Replace_polymorphic_compare
    
    Map
    
    Key
    
    Provide_of_sexp
    
    Provide_bin_io
    
    Provide_hash
    
    Set
    
    Elt
    
    Named
    
    Provide_of_sexp
    
    Provide_bin_io
    
    Provide_hash
    
    Make
    
    Sgr
    
    Solexa_score
    
    Strand
    
    Table
    
    Row
    
    Tags
    
    Error
    
    Transform
    
    Tfxm
    
    object_t
    
    Track
    
    Error
    
    Transform
    
    Transcripts
    
    Vcf
    
    Transform
    
    Wig
    
    Error
    
    Tags
    
    Transform
    
    Zip
    
    Default
    
    Error
    
    Transform
- Sources
  - biocaml.base
    
    bed.ml
    
    biocaml_base.ml
    
    fasta.ml
    
    gff.ml
    
    line.ml
    
    lines.ml
    
    macs2.ml
    
    table.ml
    
    ucsc_genome_browser.ml
  - biocaml.ez
    
    bam.ml
    
    bamstats.ml
    
    biocaml_ez.ml
    
    fasta.ml
    
    fastq.ml
    
    lines.ml
    
    phred_score.ml
    
    range.ml
    
    roman_num.ml
    
    sam.ml
    
    seq_range.ml
    
    strand.ml
  - biocaml.unix
    
    accu.ml
    
    bam.ml
    
    bamstats.ml
    
    bar.ml
    
    bed.ml
    
    bgzf.ml
    
    bin_pred.ml
    
    biocaml_result.ml
    
    biocaml_unix.ml
    
    bpmap.ml
    
    cel.ml
    
    chr.ml
    
    entrez.ml
    
    fasta.ml
    
    fastq.ml
    
    file_mapper.ml
    
    future.ml
    
    future_unix.ml
    
    genomeMap.ml
    
    gff.ml
    
    histogram.ml
    
    interval_tree.ml
    
    iset.ml
    
    jaspar.ml
    
    line.ml
    
    lines.ml
    
    math.ml
    
    msg.ml
    
    mzData.ml
    
    phred_score.ml
    
    pos.ml
    
    psl.ml
    
    pwm.ml
    
    rSet.ml
    
    range.ml
    
    roman_num.ml
    
    sam.ml
    
    sbml.ml
    
    seq.ml
    
    seq_range.ml
    
    sgr.ml
    
    solexa_score.ml
    
    strand.ml
    
    table.ml
    
    tfxm.ml
    
    track.ml
    
    transcripts.ml
    
    vcf.ml
    
    wig.ml
    
    zip.ml

Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file `bin_pred.ml`

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
type confusion_matrix = {
  tp : int ;
  tn : int ;
  fp : int ;
  fn : int ;
}
[@@deriving sexp]

type curve = (float * confusion_matrix) array
[@@deriving sexp]

let zero = { tp = 0 ; tn = 0 ; fn = 0 ; fp = 0 }

let update accu ~threshold ~score ~label =
  match Float.(threshold < score), label with
  | true, true -> { accu with tp = accu.tp + 1 }
  | true, false -> { accu with fp = accu.fp + 1 }
  | false, true -> { accu with fn = accu.fn + 1 }
  | false, false -> { accu with tn = accu.tn + 1 }

let confusion_matrix ~scores ~labels ~threshold =
  if Array.length scores <> Array.length labels
  then
    invalid_argf
      "Bin_pred.confusion_matrix: scores and labels have different lengths (%d and %d)"
      (Array.length scores) (Array.length labels) () ;
  Array.fold2_exn scores labels ~init:zero ~f:(fun accu score label -> update accu ~threshold ~score ~label)

let positive cm = cm.tp + cm.fn
let negative cm = cm.tn + cm.fp

let cardinal cm = cm.tp + cm.tn + cm.fp + cm.fn

let no_positives { tp ; fn = _ ; fp ; tn = _ } =
  tp = 0 && fp = 0

let sensitivity cm =
  float cm.tp /. float (cm.tp + cm.fn)

let recall = sensitivity

let false_positive_rate cm =
  float cm.fp /. float (cm.fp + cm.tn)

let accuracy cm =
  float (cm.tp + cm.tn) /. float (cardinal cm)

let specificity cm =
  float cm.tn /. float (cm.fp + cm.tn)

let positive_predictive_value cm =
  float cm.tp /. float (cm.tp + cm.fp)

let precision = positive_predictive_value

let negative_predictive_value cm =
  float cm.tn /. float (cm.tn + cm.fn)

let false_discovery_rate cm =
  float cm.fp /. float (cm.fp + cm.tp)

let f1_score cm =
  2. *. float cm.tp /. float (2 * cm.tp + cm.fp + cm.fn)

let performance_curve ~scores ~labels =
  let n = Array.length scores in
  if n <> Array.length labels
  then
    invalid_argf
      "Bin_pred.make_curve: scores and labels have different lengths (%d and %d)"
      n (Array.length labels) () ;
  let examples =
    let r = Array.map2_exn scores labels ~f:(fun x y -> x, y) in
    Array.sort ~compare:(Fn.flip Poly.compare) r ;
    r
  in
  let np = Array.count labels ~f:Fun.id in
  let nn = Array.count labels ~f:(fun x -> not x) in
  let initial = { tp = 0 ; tn = nn ; fp = 0 ; fn = np } in
  let rec loop acc current_threshold current_matrix i =
    if i = n then
      List.rev ((current_threshold, current_matrix) :: acc)
    else
      let score, label = examples.(i) in
      let acc =
        if Float.(score < current_threshold) then
          (current_threshold, current_matrix) :: acc
        else
          acc
      in
      let new_matrix =
        if label then
          { current_matrix
            with tp = current_matrix.tp + 1 ;
                 fn = current_matrix.fn - 1 }
        else
          { current_matrix
            with fp = current_matrix.fp + 1 ;
                 tn = current_matrix.tn - 1 }
      in
      loop acc score new_matrix (i + 1)
  in
  loop [] Float.infinity initial 0
  |> Array.of_list

let trapez_area x1 x2 y1 y2 = 0.5 *. (y1 +. y2) *. (x2 -. x1)

(* Assumes [points] is non empty (which is the case if it has been
   produced by [performance_curve]) and that points come with
   decreasing x-coordinates. *)
let auc points =
  let f ((x1,y1), sum) ((x2,y2) as p) = (p, sum +. trapez_area x1 x2 y1 y2) in
  Array.fold points ~f ~init:(points.(0), 0.) |> snd

let roc_curve ~scores ~labels =
  let matrices = performance_curve ~scores ~labels in
  let curve = Array.map matrices ~f:(fun (_,m) -> false_positive_rate m, sensitivity m) in
  let auc = auc curve in
  curve, auc

let average_precision ~precision ~recall =
  let n = Array.length precision in
  Array.init (n - 1) ~f:(fun i ->
      precision.(i + 1) *. (recall.(i + 1) -. recall.(i))
    )
  |> Array.sum (module Float) ~f:Fn.id

let recall_precision_curve ~scores ~labels =
  let matrices = performance_curve ~scores ~labels in
  let curve = Array.map matrices ~f:(fun (_,m) ->
      if no_positives m then 0., 1. else recall m, precision m
    ) in
  let recall, precision = Array.unzip curve in
  curve, average_precision ~recall ~precision

let%expect_test "performance curve 1" =
  let scores = [| 2.1 ; 1.2 ; 5.6 ; 0. |] in
  let labels = [| true ; false ; true ; false |] in
  let curve = performance_curve ~scores ~labels in
  print_endline (Sexp.to_string_hum (sexp_of_curve curve)) ;
  [%expect "
    ((INF ((tp 0) (tn 2) (fp 0) (fn 2))) (5.6 ((tp 1) (tn 2) (fp 0) (fn 1)))
     (2.1 ((tp 2) (tn 2) (fp 0) (fn 0))) (1.2 ((tp 2) (tn 1) (fp 1) (fn 0)))
     (0 ((tp 2) (tn 0) (fp 2) (fn 0))))"]

let%test "rp_curve perfect recognition" =
  let scores = [| 2.1 ; 1.2 ; 5.6 ; 0. |] in
  let labels = [| true ; false ; true ; false |] in
  let _, auc = recall_precision_curve ~scores ~labels in
  Float.(auc = 1.)

let%test "rp_curve against sklearn" =
  let scores = [|
    -0.20078869;  0.30423874;  0.20105976;  0.27523711;  0.42593404;
    -0.15043726; -0.08794601; -0.12733462;  0.22931596; -0.23913518;
    -0.06386267; -0.14958466; -0.04914839;  0.09898417;  0.0515638 ;
    -0.1142941 ;  0.16106135;  0.04871897; -0.08258102; -0.26105668;
    0.24693291; -0.18029058; -0.38384994;  0.26336904;  0.12585371;
    -0.03991278;  0.39424539;  0.42411536; -0.4790443 ; -0.30529061;
    -0.09281931;  0.01213433; -0.20204098;  0.40148935; -0.04536122;
    0.12179099;  0.06493837; -0.07007139;  0.0032915 ; -0.39635676;
    0.02619439;  0.20018683;  0.065023  ;  0.49589616; -0.28221895;
    0.31364573;  0.1906223 ;  0.11549516;  0.03145977;  0.22408591 |] in
  let labels = [|
    true; true; true; true; true; false; true; false; true; false; false; true; false; false; true; false; false; false; true; false; true; false;
    false; true; true; true; true; true; false; false; false; true; false; true; false; true; false; false; false; false; true; true; true; true;
    false; true; true; false; true; false |] in
  let curve, _ = recall_precision_curve ~scores ~labels in
  let recall, precision = Array.unzip curve in
  let ap = average_precision ~recall ~precision in
  Float.robustly_compare ap 0.8783170534965226 = 0

package biocaml

Source file bin_pred.ml

Source file `bin_pred.ml`