package biocaml

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file transcripts.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
(*
type 'a transcript = {
  exons : (int * int) list;
  lo : int;
  hi : int;
  chr : string;
  info : 'a
}

type 'a t = 'a transcript list

module II = struct
  type t = int * int
  let compare = Pervasives.compare
end

module SIIMap = MMap.Make(String)(II)
module SSMap = MMap.Make(String)(String)

let add_length_to_transcripts transcripts =
  let f trx =
    let length =
      let g acc (lo,hi) = hi - lo + acc in
      List.fold_left g 0 trx.exons
    in
    { trx with info = trx.info,length }
  in
  List.map f transcripts

let of_composite_channel
    ?(chr_map=identity)
    ?(increment_lo_hi=(0,0))
    ic =
  let f acc l =
    let lst = String.nsplit l "\t" in
    let inclo,inchi = increment_lo_hi in
    let (nm,chr,st,fn) =
      List.nth lst 0,
      chr_map (List.nth lst 1),
      int_of_string (List.nth lst 2) + inclo,
      int_of_string (List.nth lst 3) + inchi
    in
    let g (nm,chr,st,fn) prev = match prev with
      | None ->
          {
            exons = [st,fn];
            lo = st;
            hi= fn;
            chr = chr;
            info = nm;
          }
      | Some trx ->
          {
            exons = (st,fn)::(trx.exons);
            lo = if st < trx.lo then st else trx.lo;
            hi = if fn > trx.hi then fn else trx.hi;
            chr = chr;
            info = nm;
          }
    in
    SSMap.add_with nm chr (g (nm,chr,st,fn)) acc
  in
  let ans = In_channel.fold_lines ic ~init:SSMap.empty ~f in
  let folder k1 k2 acc elem = elem::acc in
  let ans = List.rev (SSMap.fold folder [] ans) in
  add_length_to_transcripts ans

let of_composite_file ?(chr_map=identity) ?(increment_lo_hi=(0,0)) file =
  try_finally
    (of_composite_channel ~chr_map ~increment_lo_hi) close_in (open_in file)

let of_bed_channel ?(chr_map=identity) ?(increment_lo_hi=(1,0)) ic =
  let bed = Bed.to_list (Bed.of_channel ~chr_map ~increment_lo_hi ic) in
  let f acc (chr,s,f) =
    {
      exons = [s,f];
      lo = s;
      hi = f;
      chr = chr_map chr;
      info = "";
    }::acc
  in
  let ans = List.rev (List.fold_left f [] bed) in
  add_length_to_transcripts ans

let of_bed_file ?(chr_map=identity) ?(increment_lo_hi=(1,0)) file =
  try_finally (of_bed_channel ~chr_map ~increment_lo_hi) close_in (open_in file)

let of_gff transcript_name_of_exon gff =
  let f transcript_name row prev =
    let lo,hi = row.Gff.pos in
    match prev with
      | None -> {
          exons = [row.Gff.pos];
          lo = lo;
          hi = hi;
          chr = row.Gff.chr;
          info = transcript_name
        }
      | Some prev -> {
          exons = row.Gff.pos::prev.exons;
          lo = if lo < prev.lo then lo else prev.lo;
          hi = if hi > prev.hi then hi else prev.hi;
          chr = if row.Gff.chr = prev.chr then row.Gff.chr
            else failwithf "chromosome of transcript %s changed from %s to %s"
              transcript_name prev.chr row.Gff.chr ();
          info = (assert (transcript_name = prev.info); transcript_name)
        }
  in
  let g ans row = match transcript_name_of_exon row with
    | None -> ans
    | Some transcript_name ->
      StringMap.add_with transcript_name (f transcript_name row) ans
  in
  let ans = Gff.fold g StringMap.empty gff in
  StringMap.fold (fun _ x ans -> x::ans) ans []

let all_probes_in
    (trx_lst:'a t)
    (prbs: (string * int * int * 'b) list)
    : ('a * 'b array) t =
  let insert x prev = match prev with None -> [x] | Some l -> x::l in
  let siimap_of_exons =
    let f acc trx =
      SIIMap.add trx.chr (trx.lo,trx.hi) (trx.exons,trx.info) acc in
    List.fold_left f SIIMap.empty trx_lst
  in
  let stringmap_of_intervaltrees =
    let f acc trx = StringMap.add_with trx.chr (insert (trx.lo,trx.hi)) acc in
    let ans = List.fold_left f StringMap.empty trx_lst in
    StringMap.map IntervalTree.create ans
  in
  let f acc (chr,s,f,v) =
    let itree = StringMap.find chr stringmap_of_intervaltrees in
    let trxs = IntervalTree.within itree (s,f) in
    let g accu trx =
      let (exons,info) = SIIMap.find chr trx siimap_of_exons in
      let g_insert (info,u) prev =
        match prev with
          | None -> info,[v]
          | Some (i,lst) -> (assert (i = info); i,(v::lst))
      in
      match IntervalTree.within (IntervalTree.create exons) (s,f) with
        | [] -> accu
        | a::b -> SIIMap.add_with chr trx (g_insert (info,v)) accu
    in
    List.fold_left g acc trxs
  in
  let ans = List.fold_left f SIIMap.empty prbs in
  let ans = SIIMap.map (fun (info,lst) -> (info,Array.of_list lst)) ans in
  let f acc trx =
    try
      {
        exons = trx.exons;
        lo = trx.lo;
        hi = trx.hi;
        chr = trx.chr;
        info = SIIMap.find trx.chr (trx.lo,trx.hi) ans
      }::acc
    with Not_found -> acc
  in
  List.rev (List.fold_left f [] trx_lst)

let all_points_in
    (trx_lst:'a t)
    (points: (string * int * 'b) list)
    : ('a * 'b array) t =
  let probes = List.map (fun (x,y,z) -> (x,y,y,z)) points in
  all_probes_in trx_lst probes
*)
OCaml

Innovation. Community. Security.