package biocaml

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file bamstats.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119


type t = {
  total : int ;
  qc_pass : int ; (** [not_passing_quality_controls] returns [false],
                      assumed for all other counts *)
  single_reads : int ; (** [has_multiple_segments] returns [false] *)
  read_pairs : int ; (** [has_multiple_segments] and [first_segment] *)
  mapped_reads : int ; (** [!segment_unmapped] and [!secondary_alignment]
                           and [!supplementary_alignment] *)
  mapped_pairs : int ; (** [has_multiple_segments] and [first_segment]
                           and [each_segment_properly_aligned]
                           and [!secondary_alignment]
                           and [!supplementary_alignment] *)
}
[@@deriving sexp]

let zero = {
  total = 0 ;
  qc_pass = 0 ;
  single_reads = 0 ;
  read_pairs = 0 ;
  mapped_reads = 0 ;
  mapped_pairs = 0 ;
}


let incr_if b i = if b then i + 1 else i

let update_gen s flags =
  let open Sam.Flags in
  let total = s.total + 1 in
  if not_passing_quality_controls flags then
    { s with total }
  else
    let qc_pass = s.qc_pass + 1 in
    let single_reads = incr_if (not (has_multiple_segments flags)) s.single_reads in
    let pair_witness = has_multiple_segments flags && first_segment flags in
    let read_pairs = incr_if pair_witness s.read_pairs in
    let main_mapping =
      not (segment_unmapped flags ||
           secondary_alignment flags ||
           supplementary_alignment flags)
    in
    let mapped_reads = incr_if main_mapping s.mapped_reads in
    let mapped_pairs = incr_if (main_mapping && pair_witness) s.mapped_pairs in
    { total ; qc_pass ; single_reads ; read_pairs ; mapped_reads ; mapped_pairs }

let update s al =
  update_gen s al.Sam.flags

let update0 s al =
  let open Or_error.Monad_infix in
  Bam.Alignment0.flags al
  >>| update_gen s



module Fragment_length_histogram = struct
  type t = {
    min_mapq : int ;
    counts : int Accu.Counter.t ;
  }

  let create ?(min_mapq = 0) () = {
    min_mapq ;
    counts = Accu.Counter.create () ;
  }

  let with_mapped_pair ~min_mapq al f =
    let open Or_error.Monad_infix in
    Bam.Alignment0.flags al >>= fun fl ->
    let multi_segment = Sam.Flags.has_multiple_segments fl in
    let each_segment_properly_aligned = Sam.Flags.each_segment_properly_aligned fl in
    let segment_unmapped = Sam.Flags.segment_unmapped fl in
    let qc_ok = match Bam.Alignment0.mapq al with
      | Some mapq -> mapq >= min_mapq
      | None -> false
    in
    let mapped_fragment =
      multi_segment && not segment_unmapped && each_segment_properly_aligned
    in
    if mapped_fragment && qc_ok then f () else Ok ()

  let update0 { min_mapq ; counts } al =
    with_mapped_pair ~min_mapq al (fun () ->
        let length = match Bam.Alignment0.tlen al with
          | Some l -> Int.abs l
          | _ -> assert false (* both reads are mapped so there should be a length *)
        in
        Accu.Counter.tick counts length ;
        Ok ()
      )

end

module Chr_histogram = struct
  type t = {
    min_mapq : int ;
    bam_header : Bam.Header.t ;
    counts : string Accu.Counter.t ;
  }

  let create ?(min_mapq = 0) bam_header = {
    min_mapq ;
    bam_header ;
    counts = Accu.Counter.create () ;
  }

  let update0 { min_mapq ; counts ; bam_header } al =
    Fragment_length_histogram.with_mapped_pair ~min_mapq al (fun () ->
        match Bam.Alignment0.rname al bam_header with
        | Ok (Some chr) ->
          Accu.Counter.tick counts chr ;
          Ok ()
        | Ok None -> Ok ()
        | Error _ as e -> e
      )
end
OCaml

Innovation. Community. Security.