package biocaml

  1. Overview
  2. Docs
The OCaml Bioinformatics Library

Install

Dune Dependency

Authors

Maintainers

Sources

biocaml-0.11.2.tbz
sha256=fae219e66db06f81f3fd7d9e44717ccf2d6d85701adb12004ab4ae6d3359dd2d
sha512=f6abd60dac2e02777be81ce3b5acdc0db23b3fa06731f5b2d0b32e6ecc9305fe64f407bbd95a3a9488b14d0a7ac7c41c73a7e18c329a8f18febfc8fd50eccbc6

doc/src/biocaml.unix/table.ml.html

Source file table.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146


module Row = struct

  let module_error e = Error (`table_row e)


  type item_type = [`type_int | `type_float | `type_string ] [@@deriving sexp]
  type t_type = item_type array [@@deriving sexp]

  type item = [`int of int | `float of float | `string of string ] [@@deriving sexp]
  type t = item array [@@deriving sexp]

  module Tags = struct

    type t = [
      | `separator of char
      | `strict_about of [ `row_length | `cell_type ]
      | `format of item_type array
    ] list
    [@@deriving sexp]

    let separators tags =
      List.filter_map tags ~f:(function
        | `separator c -> Some c
        | _ -> None)

    let strict_row_length tags =
      List.exists tags ~f:(function `strict_about `row_length -> true | _ -> false)

    let strict_cell_type tags =
      List.exists tags ~f:(function `strict_about `cell_type -> true | _ -> false)

    let format tags =
      List.find_map tags ~f:(function `format f -> Some f | _ -> None)

    let default = [ `separator '\t' ]

    let default_extension tags =
      match separators tags with
      | '\t' :: _ -> "tsv"
      | ',' :: _ -> "csv"
      | _ -> "table"

    let to_string t = sexp_of_t t |> Sexplib.Sexp.to_string
    let of_string s =
      try Ok (Sexplib.Sexp.of_string s |> t_of_sexp)
      with e -> module_error (`tags_of_string e)

  end

  module Error = struct

    type line_parsing =
      [ `wrong_format of
          [ `column_number
          | `float_of_string of string
          | `int_of_string of string ] * t_type * string ]
    [@@deriving sexp]

    type t = line_parsing [@@deriving sexp]

  end

  let of_line ?(separators=[' '; '\t']) ?(strict_row_length=false)
      ?(strict_cell_type=false) ?format line =
    let l = (line : Line.t :> string) in
    let module With_exns = struct
      exception Int_of_string of string
      exception Float_of_string of string
      let int s =
        try Int.of_string s with _ -> raise (Int_of_string s)
      let float s =
        try Float.of_string s with _ -> raise (Float_of_string s)
      let of_line ~format l =
        let tokens =
          String.split_on_chars ~on:separators l |> List.filter ~f:String.((<>) "")
          |> Array.of_list in
        begin match format with
        | None ->
          Ok (Array.map tokens ~f:(fun s -> `string s))
        | Some format ->
          begin try
            if strict_row_length && Array.length format > Array.length tokens
            then Error (`wrong_format (`column_number, format, l))
            else begin
              let row =
                Array.mapi tokens ~f:(fun i tok ->
                  let typ =
                    if strict_cell_type then format.(i)
                    else (try format.(i) with _ -> `type_string) in
                  match typ with
                | `type_int -> `int (int tok)
                | `type_float -> `float (float tok)
                | `type_string -> `string tok) in
              Ok row
            end
          with
          | Invalid_argument _ (* should be the array access *) ->
            Error (`wrong_format (`column_number, format, l))
          | Int_of_string s ->
            Error (`wrong_format (`int_of_string s, format, l))
          | Float_of_string s ->
            Error (`wrong_format (`float_of_string s, format, l))
          end
        end
    end in
    (With_exns.of_line ~format l : (t, _) Result.t)

  let to_line ~sep t =
    let item_to_string = function
      | `int i -> Int.to_string i
      | `float f -> sprintf "%g" f
      | `string s -> s in
    Line.of_string_unsafe
      (String.concat_array ~sep (Array.map t ~f:item_to_string))


  module Transform = struct

    let line_to_item ?(tags: Tags.t = Tags.default) () =
      let separators = Tags.separators tags in
      let strict_row_length = Tags.strict_row_length tags in
      let strict_cell_type = Tags.strict_cell_type tags in
      let format = Tags.format tags in
      Tfxm.on_output
        ~f:begin fun s ->
          of_line ~separators ~strict_row_length ~strict_cell_type ?format
            (s : Lines.item)
          |> begin function
          | Ok o -> Ok o
          | Error e -> Error (`table_row (e : Error.line_parsing))
          end
        end
        (Tfxm.identity ())

    let item_to_line  ?(tags: Tags.t = Tags.default) () =
      let sep =
        Tags.separators tags |> List.hd |> Option.value ~default:'\t'
        |> Char.to_string in
      Tfxm.on_output ~f:(to_line ~sep)
        (Tfxm.identity ())

  end

end
OCaml

Innovation. Community. Security.