package biocaml
The OCaml Bioinformatics Library
Install
Dune Dependency
Authors
Maintainers
Sources
biocaml-0.11.2.tbz
sha256=fae219e66db06f81f3fd7d9e44717ccf2d6d85701adb12004ab4ae6d3359dd2d
sha512=f6abd60dac2e02777be81ce3b5acdc0db23b3fa06731f5b2d0b32e6ecc9305fe64f407bbd95a3a9488b14d0a7ac7c41c73a7e18c329a8f18febfc8fd50eccbc6
doc/src/biocaml.unix/gff.ml.html
Source file gff.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
open CFStream (* Version 2: http://www.sanger.ac.uk/resources/software/gff/spec.html http://gmod.org/wiki/GFF2 Version 3: http://www.sequenceontology.org/gff3.shtml http://gmod.org/wiki/GFF3 *) type record = { seqname: string; source: string option; feature: string option; pos: int * int; score: float option; strand: [`plus | `minus | `not_applicable | `unknown ]; phase: int option; attributes: (string * string list) list; } [@@deriving sexp] type item = [ `comment of string | `record of record ] [@@deriving sexp] (* `module_error` should progressively allow to “tag” error values. *) let module_error e = Error (`gff e) module Tags = struct type t = { version: [`two | `three]; allow_empty_lines: bool; sharp_comments: bool; } [@@deriving sexp] let default = {version = `three; allow_empty_lines = false; sharp_comments = true} let to_string t = sexp_of_t t |> Sexplib.Sexp.to_string let of_string s = try Ok (Sexplib.Sexp.of_string s |> t_of_sexp) with e -> module_error (`tags_of_string e) end module Error = struct type parsing = [ `cannot_parse_float of Pos.t * string | `cannot_parse_int of Pos.t * string | `cannot_parse_strand of Pos.t * string | `cannot_parse_string of Pos.t * string | `empty_line of Pos.t | `incomplete_input of Pos.t * string list * string option | `wrong_attributes of Pos.t * string | `wrong_row of Pos.t * string | `wrong_url_escaping of Pos.t * string ] [@@deriving sexp] type t = parsing [@@deriving sexp] end module Transform = struct open Result.Monad_infix let parse_string msg pos i = begin try Ok (Scanf.sscanf i "%S " Fun.id) with | _ -> begin match (Scanf.sscanf i "%s " Fun.id) with | "" -> Error (`cannot_parse_string (pos, msg)) | s -> Ok (Uri.pct_decode s) end end let parse_string_opt m pos i = parse_string m pos i >>= fun s -> begin match s with | "." -> Ok None | s -> Ok (Some s) end let parse_int msg pos i = parse_string msg pos i >>= fun s -> (try Ok (Int.of_string s) with _ -> Error (`cannot_parse_int (pos, msg))) let parse_float_opt msg pos i = parse_string_opt msg pos i >>= function | Some s -> (try Ok (Some (Float.of_string s)) with _ -> Error (`cannot_parse_float (pos, msg))) | None -> Ok None let parse_int_opt msg pos i = parse_string_opt msg pos i >>= function | Some s -> (try Ok (Some (Int.of_string s)) with _ -> Error (`cannot_parse_int (pos, msg))) | None -> Ok None let parse_attributes_version_3 position i = let whole_thing = String.concat ~sep:"\t" i in (* let b = Buffer.create 42 in *) (* String.iter (String.concat ~sep:"\t" i) (function *) (* | ' ' -> Buffer.add_string b "%20" *) (* | c -> Buffer.add_char b c); *) (* Buffer.contents b *) (* in *) let get_csv s = List.map (String.split ~on:',' s) ~f:(fun s -> parse_string "value" position String.(strip s)) |> List.partition_map ~f:Result.to_either |> (function | (ok, []) -> Ok ok | (_, notok :: _) -> Error notok) in let rec loop pos acc = begin match String.lfindi whole_thing ~pos ~f:(fun _ c -> Char.equal c '=') with | Some equal -> parse_string "tag" position (String.slice whole_thing pos equal) >>= fun tag -> let pos = equal + 1 in begin match String.lfindi whole_thing ~pos ~f:(fun _ c -> Char.equal c ';') with | Some semicolon -> let delimited = String.slice whole_thing pos semicolon in get_csv delimited >>= fun values -> loop (semicolon + 1) ((tag, values) :: acc) | None -> let delimited = String.(sub whole_thing ~pos ~len:(length whole_thing - pos)) in get_csv delimited >>= fun values -> Ok ((tag, values) :: acc) end | None -> if pos >= String.length whole_thing then Ok acc else Error (`wrong_attributes (position, whole_thing)) end in (try loop 0 [] with _ -> Error (`wrong_attributes (position, whole_thing))) >>| List.rev let parse_attributes_version_2 position l = let whole_thing = String.(concat ~sep:"\t" l |> strip) in let parse_string i = begin try Some (Scanf.bscanf i "%S " Fun.id) with | _ -> begin match (Scanf.bscanf i "%s " Fun.id) with | "" -> None | s -> Some s end end in let inch = Scanf.Scanning.from_string whole_thing in let tokens = Stream.(from (fun _ -> parse_string inch) |> Fn.flip npeek Int.max_value) in let rec go_3_by_3 acc = function | k :: v :: [] -> Ok (List.rev ((k, [v]) :: acc)) | k :: v :: ";" :: rest -> go_3_by_3 ((k, [v]) :: acc) rest | [] | [";"] -> Ok (List.rev acc) | _ -> Error (`wrong_attributes (position, whole_thing)) in go_3_by_3 [] tokens let parse_row ~version pos s = let fields = String.split ~on:'\t' s in begin match fields with | seqname :: source :: feature :: start :: stop :: score :: strand :: phase :: rest -> let result = parse_string "Sequence name" pos seqname >>= fun seqname -> parse_string_opt "Source" pos source >>= fun source -> parse_string_opt "Feature" pos feature >>= fun feature -> parse_int "Start Position" pos start >>= fun start -> parse_int "Stop Position" pos stop >>= fun stop -> parse_float_opt "Score" pos score >>= fun score -> parse_string_opt "Strand" pos strand >>= (function | Some "+" -> Ok `plus | None -> Ok `not_applicable | Some "-" -> Ok `minus | Some "?" -> Ok `unknown | Some s -> Error (`cannot_parse_strand (pos, s))) >>= fun strand -> parse_int_opt "Phase/Frame" pos phase >>= fun phase -> begin match version with | `two -> parse_attributes_version_2 pos rest | `three -> parse_attributes_version_3 pos rest end >>= fun attributes -> Ok (`record {seqname; source; feature; pos = (start, stop); score; strand; phase; attributes}) in `output result | _ -> `output (Error (`wrong_row (pos, s))) end let rec next ~ p = let open Lines.Buffer in match (next_line p :> string option) with | None -> `not_ready | Some "" -> if tags.Tags.allow_empty_lines then `output (Error (`empty_line (current_position p))) else next ~tags p | Some l when tags.Tags.sharp_comments && String.(is_prefix (strip l) ~prefix:"#") -> `output (Ok (`comment String.(sub l ~pos:1 ~len:(length l - 1)))) | Some l -> parse_row ~version:tags.Tags.version (current_position p) l let string_to_item ?filename ~ () = let name = sprintf "gff_parser:%s" Option.(value ~default:"<>" filename) in let next = next ~tags in Lines.Transform.make_merge_error ~name ?filename ~next () let item_to_string_pure version = (function | `comment c -> sprintf "#%s\n" c | `record t -> let escape = match version with | `three -> (fun s -> Uri.pct_encode s) | `two -> sprintf "%S" in let optescape o = Option.value_map ~default:"." o ~f:escape in String.concat ~sep:"\t" [ escape t.seqname; optescape t.source; optescape t.feature; sprintf "%d" (fst t.pos); sprintf "%d" (snd t.pos); Option.value_map ~default:"." ~f:(sprintf "%g") t.score; (match t.strand with`plus -> "+" | `minus -> "-" | `not_applicable -> "." | `unknown -> "?"); Option.value_map ~default:"." ~f:(sprintf "%d") t.phase; String.concat ~sep:";" (List.map t.attributes ~f:(fun (k,v) -> match version with | `three -> sprintf "%s=%s" (Uri.pct_encode k) (List.map v ~f:Uri.pct_encode |> String.concat ~sep:",") | `two -> sprintf "%S %s" k (List.map v ~f:escape |> String.concat ~sep:",") )); ] ^ "\n" ) let item_to_string ~ () = Tfxm.of_function ~name:"gff_to_string" (item_to_string_pure tags.Tags.version) end exception Error of Error.t let error_to_exn e = Error e let in_channel_to_item_stream ?(buffer_size=65536) ?filename ?(=Tags.default) inp = let x = Transform.string_to_item ~tags ?filename () in Tfxm.(in_channel_strings_to_stream inp x ~buffer_size) let in_channel_to_item_stream_exn ?buffer_size ? inp = Stream.result_to_exn ~error_to_exn (in_channel_to_item_stream ?buffer_size ?tags inp) let item_to_string ?(=Tags.default) item = Transform.item_to_string_pure tags.Tags.version item
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>