package biocaml

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file wig.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294

open CFStream

type comment = [
| `comment of string
]
[@@deriving sexp]
type variable_step = [
| `variable_step_state_change of string * int option (* name x span *)
| `variable_step_value of int * float
]
[@@deriving sexp]
type fixed_step = [
| `fixed_step_state_change of string * int * int * int option
(* name, start, step, span *)
| `fixed_step_value of float
]
[@@deriving sexp]
type bed_graph_value = string * int * int * float
[@@deriving sexp]

type item = [comment | variable_step | fixed_step | `bed_graph_value of bed_graph_value ]
[@@deriving sexp]

(* `module_error` should progressively allow to “tag” error values. *)
let module_error e = Error (`wig e)

module Tags = struct
  type t = {
    allow_empty_lines: bool;
    sharp_comments: bool;
  }
  [@@deriving sexp]

  let default = {allow_empty_lines = false; sharp_comments = true}

  let to_string t = sexp_of_t t |> Sexplib.Sexp.to_string
  let of_string s =
    try Ok (Sexplib.Sexp.of_string s |> t_of_sexp)
    with e -> module_error (`tags_of_string e)

end

module Error = struct
  type parsing = [
    | `cannot_parse_key_values of Pos.t * string
    | `empty_line of Pos.t
    | `incomplete_input of Pos.t * string list * string option
    | `missing_chrom_value of Pos.t * string
    | `missing_start_value of Pos.t * string
    | `missing_step_value of Pos.t * string
    | `wrong_start_value of Pos.t * string
    | `wrong_step_value of Pos.t * string
    | `unrecognizable_line of Pos.t * string list
    | `wrong_bed_graph_value of Pos.t * string
    | `wrong_fixed_step_value of Pos.t * string
    | `wrong_span_value of Pos.t * string
    | `wrong_variable_step_value of Pos.t * string
  ]
  [@@deriving sexp]

  let parsing_error_to_string =
    let pos () a = Pos.to_string a in
    function
    | `cannot_parse_key_values (p, s) ->
      sprintf "cannot_parse_key_values (%a, %S)" pos p s
    | `empty_line p -> sprintf "empty_line (%a)" pos p
    | `incomplete_input (p, vs, vo) -> (* Pos.t * string list *string option *)
      sprintf "incomplete_input (%a, %s, %S)" pos p
        (String.concat ~sep:"; " vs) (Option.value ~default:"" vo)
    | `missing_chrom_value (p, v) -> (* Pos.t * string *)
      sprintf "missing_chrom_value (%a, %s)" pos p v
    | `missing_start_value (p, v) -> (* Pos.t * string *)
      sprintf "missing_start_value (%a, %s)" pos p v
    | `missing_step_value (p, v) -> (* Pos.t * string *)
      sprintf "missing_step_value (%a, %s)" pos p v
    | `wrong_start_value (p, v) -> (* Pos.t * string *)
      sprintf "wrong_start_value (%a, %s)" pos p v
    | `wrong_step_value (p, v) -> (* Pos.t * string *)
      sprintf "wrong_step_value (%a, %s)" pos p v
    | `unrecognizable_line (p, v) -> (* Pos.t * string list *)
      sprintf "unrecognizable_line (%a, %s)" pos p (String.concat ~sep:" " v)
    | `wrong_bed_graph_value (p, v) -> (* Pos.t * string *)
      sprintf "wrong_bed_graph_value (%a, %s)" pos p v
    | `wrong_fixed_step_value (p, v) -> (* Pos.t * string *)
      sprintf "wrong_fixed_step_value (%a, %s)" pos p v
    | `wrong_span_value (p, v) -> (* Pos.t * string *)
      sprintf "wrong_span_value (%a, %s)" pos p v
    | `wrong_variable_step_value (p, v) -> (* Pos.t * string *)
      sprintf "wrong_variable_step_value (%a, %s)" pos p v

  type to_bed_graph = [`not_in_variable_step_state | `not_in_fixed_step_state]
  [@@deriving sexp]

  type t = [ parsing | to_bed_graph ] [@@deriving sexp]
end

module Transform = struct
  open Result.Monad_infix

  let explode_key_value loc s =
    try
      let by_space =
        String.split_on_chars s ~on:[' '; '\n'; '\t'; '\r']
        |> List.filter ~f:(String.(<>) "") in
      Ok (List.map by_space ~f:(fun s ->
        begin match String.split ~on:'=' s with
        | [ key; value ] -> (key, value)
        | _ -> raise Caml.Not_found
        end))
    with
      Caml.Not_found -> Error (`cannot_parse_key_values (loc, s))

  let rec next ~tags p =
    let open Lines.Buffer in
    let assoc_find ~missing l v =
      match List.Assoc.find ~equal:String.equal l v with | Some v -> Ok v | None -> Error missing in
    let assoc_find_map ~missing ~wrong ~f l v =
      match List.Assoc.find ~equal:String.equal l v with
      | Some v -> (try Ok (f v) with _ -> Error wrong)
      | None -> Error missing in
    match (next_line p :> string option) with
    | Some "" ->
      if tags.Tags.allow_empty_lines
      then `output (Error (`empty_line (current_position p)))
      else next ~tags p
    | Some l when tags.Tags.sharp_comments && String.is_prefix l ~prefix:"#" ->
      `output (Ok (`comment String.(sub l ~pos:1 ~len:(length l - 1))))
    | Some l when String.is_prefix l ~prefix:"fixedStep" ->
      let output_m =
        explode_key_value (current_position p)
          String.(chop_prefix_exn l ~prefix:"fixedStep")
        >>= fun assoc ->
        assoc_find assoc "chrom"
          ~missing:(`missing_chrom_value (current_position p, l))
        >>= fun chrom ->
        assoc_find_map assoc "start"
          ~missing:(`missing_start_value (current_position p, l))
          ~f:Int.of_string ~wrong:(`wrong_start_value (current_position p, l))
        >>= fun start ->
        assoc_find_map assoc "step"
          ~missing:(`missing_step_value (current_position p, l))
          ~f:Int.of_string ~wrong:(`wrong_step_value (current_position p, l))
        >>= fun step ->
        begin match List.Assoc.find ~equal:String.equal assoc "span" with
        | None ->
          Ok (`fixed_step_state_change (chrom, start, step, None))
        | Some span ->
          begin match Option.try_with (fun () -> Int.of_string span) with
          | Some i ->
            Ok (`fixed_step_state_change (chrom, start, step, Some i))
          | None -> Error (`wrong_span_value (current_position p, span))
          end
        end
      in
      `output output_m
    | Some l when String.is_prefix l ~prefix:"variableStep" ->
      let output_m =
        explode_key_value (current_position p)
          String.(chop_prefix_exn l ~prefix:"variableStep")
        >>= fun assoc ->
        assoc_find assoc "chrom"
          ~missing:(`missing_chrom_value (current_position p, l))
        >>= fun chrom ->
        begin match List.Assoc.find ~equal:String.equal assoc "span" with
        | None -> Ok (`variable_step_state_change (chrom, None))
        | Some span ->
          begin match Option.try_with (fun () -> Int.of_string span) with
          | Some i -> Ok (`variable_step_state_change (chrom, Some i))
          | None -> Error (`wrong_span_value (current_position p, span))
          end
        end
      in
      `output output_m
    | Some l ->
      let by_space =
        String.split_on_chars l ~on:[' '; '\n'; '\t'; '\r']
        |> List.filter ~f:(String.(<>) "") in
      begin match by_space with
      | [ one_value ] ->
        (try `output (Ok (`fixed_step_value Float.(of_string one_value)))
         with _ -> `output (Error (`wrong_fixed_step_value (current_position p, l))))
      | [ fst_val; snd_val] ->
        (try `output (Ok (`variable_step_value (Int.of_string fst_val,
                                            Float.of_string snd_val)))
         with _ -> `output (Error (`wrong_variable_step_value (current_position p, l))))
      | [ chr; b; e; v; ] ->
        (try `output (Ok (`bed_graph_value (chr,
                                        Int.of_string b,
                                        Int.of_string e,
                                        Float.of_string v)))
         with _ -> `output (Error (`wrong_bed_graph_value (current_position p, l))))
      | l ->
        `output (Error (`unrecognizable_line (current_position p, l)))
      end
    | None ->
      `not_ready


  let string_to_item ?filename ?(tags=Tags.default) () =
    let name = sprintf "wig_parser:%s" Option.(value ~default:"<>" filename) in
    let next = next ~tags in
    Lines.Transform.make_merge_error ~name ?filename ~next ()


  let item_to_string ?(tags=Tags.default) () =
    let to_string = function
    | `comment c -> if tags.Tags.sharp_comments then sprintf "#%s\n" c else ""
    | `variable_step_state_change (chrom, span) ->
      sprintf "variableStep chrom=%s%s\n" chrom
        Option.(value_map ~default:"" span ~f:(sprintf " span=%d"))
    | `variable_step_value (pos, v) -> sprintf "%d %g\n" pos v
    | `fixed_step_state_change (chrom, start, step, span) ->
      sprintf "fixedStep chrom=%s start=%d step=%d%s\n" chrom start step
        Option.(value_map ~default:"" span ~f:(sprintf " span=%d"))
    | `fixed_step_value v -> sprintf "%g\n" v
    | `bed_graph_value (chrom, start, stop, v) ->
      sprintf "%s %d %d %g\n" chrom start stop v in
    Tfxm.of_function ~name:"wig_to_string" to_string

  let item_to_bed_graph () =
    let queue = Queue.create () in
    let current_state = ref None in
    Tfxm.make ~name:"wig_to_variable_step" ()
      ~feed:(function
      | `comment _ -> ()
      | `bed_graph_value already_done ->
        Queue.enqueue queue (`output (Ok already_done))
      | `variable_step_state_change (chrom, span) ->
        current_state := Some (`variable (chrom, span))
      | `variable_step_value (pos, v) ->
        begin match !current_state with
        | Some (`variable (chrom, span)) ->
          let stop = pos + Option.(value ~default:1 span) - 1 in
          Queue.enqueue queue (`output (Ok  (chrom, pos, stop, v)))
        | _ ->
          Queue.enqueue queue (`output (Error (`not_in_variable_step_state)))
        end
      | `fixed_step_state_change (chrom, start, step, span) ->
        current_state := Some (`fixed (chrom, start, step , span, 0))
      | `fixed_step_value v ->
        begin match !current_state with
        | Some (`fixed (chrom, start, step, span, current)) ->
          let pos = start + (step * current) in
          let stop = pos + Option.(value ~default:1 span) - 1 in
          Queue.enqueue queue (`output (Ok (chrom, pos, stop, v)));
          current_state := Some  (`fixed (chrom, start, step , span, current + 1))
        | _ ->
          Queue.enqueue queue (`output (Error (`not_in_fixed_step_state)))
        end)
      ~next:(fun stopped ->
        match Queue.dequeue queue with
        | None -> if stopped then `end_of_stream else `not_ready
        | Some v -> v)

end

exception Error of  Error.t
let error_to_exn e = Error e

let in_channel_to_item_stream ?(buffer_size=65536) ?filename ?tags inp =
  let x = Transform.string_to_item ?filename ?tags () in
  Tfxm.(in_channel_strings_to_stream inp x ~buffer_size)

let in_channel_to_item_stream_exn ?buffer_size ?filename ?tags inp =
  Stream.result_to_exn ~error_to_exn
    (in_channel_to_item_stream ?filename ?buffer_size ?tags inp)

let in_channel_to_bed_graph ?(buffer_size=65536) ?filename ?tags inp =
  let x = Transform.string_to_item ?filename ?tags () in
  let y = Transform.item_to_bed_graph () in
  Tfxm.(
    compose_results x y ~on_error:(function `left x -> x | `right x -> x)
    |> in_channel_strings_to_stream ~buffer_size inp
  )

let in_channel_to_bed_graph_exn ?buffer_size ?filename ?tags inp =
  Stream.result_to_exn ~error_to_exn
    (in_channel_to_bed_graph ?filename ?buffer_size ?tags inp)


let item_to_string ?(tags=Tags.default) =
  function
  | `comment c -> if tags.Tags.sharp_comments then sprintf "#%s\n" c else ""
  | `variable_step_state_change (chrom, span) ->
    sprintf "variableStep chrom=%s%s\n" chrom
      Option.(value_map ~default:"" span ~f:(sprintf " span=%d"))
  | `variable_step_value (pos, v) -> sprintf "%d %g\n" pos v
  | `fixed_step_state_change (chrom, start, step, span) ->
    sprintf "fixedStep chrom=%s start=%d step=%d%s\n" chrom start step
      Option.(value_map ~default:"" span ~f:(sprintf " span=%d"))
  | `fixed_step_value v -> sprintf "%g\n" v
  | `bed_graph_value (chrom, start, stop, v) ->
    sprintf "%s %d %d %g\n" chrom start stop v
OCaml

Innovation. Community. Security.