package biocaml
The OCaml Bioinformatics Library
Install
Dune Dependency
Authors
Maintainers
Sources
biocaml-0.11.2.tbz
sha256=fae219e66db06f81f3fd7d9e44717ccf2d6d85701adb12004ab4ae6d3359dd2d
sha512=f6abd60dac2e02777be81ce3b5acdc0db23b3fa06731f5b2d0b32e6ecc9305fe64f407bbd95a3a9488b14d0a7ac7c41c73a7e18c329a8f18febfc8fd50eccbc6
doc/src/biocaml.unix/jaspar.ml.html
Source file jaspar.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
open CFStream let (/) = Filename.concat type collection = Core | Phylofacts | CNE | PBM | PBM_HOMEO | PBM_HLH | FAM | SPLICE | POLII type motif = { id : string ; jaspar_id : string ; collection : collection ; factor_name : string ; factor_class : string ; family : string option ; comment : string option ; medline : string ; matrix : int array array ; } let collection_of_string = function | "CNE" -> CNE | "FAM" -> FAM | "PHYLOFACTS" -> Phylofacts | "CORE" -> Core | "PBM" -> PBM | "PBM_HOMEO" -> PBM_HOMEO | "PBM_HLH" -> PBM_HLH | "SPLICE" -> SPLICE | "POLII" -> POLII | s -> failwithf "Jaspa.collection_of_string: unknown collection %s" s () let fold_data_file name ~init ~f = let add_item accu l = let fields = String.split ~on:'\t' (l : Line.t :> string) in f accu fields in In_channel.with_file name ~f:(fun ic -> Stream.fold (Lines.of_channel ic) ~init ~f:add_item ) let load_matrix fn = fold_data_file (fn / "MATRIX.txt") ~init:String.Map.empty ~f:( fun accu -> function | [ db_id ; collection ; jaspar_id ; _ ; factor_name ] -> String.Map.set accu ~key:db_id ~data:(object method collection = collection_of_string collection method jaspar_id = jaspar_id method factor_name = factor_name end) | _ -> assert false ) let load_matrix_data fn = let parse = function | [ id ; base ; col ; count ] -> let col = int_of_string col in object method id = id method base = base method col = col method count = count end | _ -> assert false in let vector_of_lines l = List.sort ~compare:(fun l1 l2 -> String.compare l1#base l2#base) l |> List.map ~f:(fun l -> Int.of_float (Float.of_string l#count)) |> Array.of_list in let matrix_of_lines l = let id = (List.hd_exn l)#id in let matrix = List.sort l ~compare:(fun x y -> compare x#col y#col) |> List.group ~break:(fun l1 l2 -> l1#col <> l2#col) |> List.map ~f:vector_of_lines |> Array.of_list in id, matrix in let data = In_channel.with_file (fn / "MATRIX_DATA.txt") ~f:(fun ic -> Lines.of_channel ic |> Stream.skip ~n:1 |> Stream.map ~f:(Line.split ~on:'\t') |> Stream.to_list |> List.sort ~compare:(fun x y -> Poly.compare (List.hd x) (List.hd y)) |> List.group ~break:Poly.(fun x y -> List.hd x <> List.hd y) |> List.map ~f:(List.map ~f:parse) |> List.map ~f:matrix_of_lines ) in String.Map.of_alist_exn data module SS = struct include Tuple.Make(String)(String) include Tuple.Comparable(String)(String) end module SSM = Map.Make(SS) let load_annotation fn = fold_data_file (fn / "MATRIX_ANNOTATION.txt") ~init:SSM.empty ~f:(fun accu -> function | id :: field :: data :: _ -> SSM.set accu ~key:(id, field) ~data | _ -> assert false ) let load fn = let matrix = load_matrix fn in let matrix_data = load_matrix_data fn in let annotations = load_annotation fn in let res = String.Map.mapi matrix ~f:(fun ~key ~data -> { id = key ; jaspar_id = data#jaspar_id ; collection = data#collection ; factor_name = data#factor_name ; factor_class = SSM.find_exn annotations (key, "class") ; comment = ( match SSM.find annotations (key, "comment") with | Some "-" -> None | x -> x ) ; family = SSM.find annotations (key, "family") ; medline = SSM.find_exn annotations (key, "medline") ; matrix = String.Map.find_exn matrix_data key ; }) in String.Map.data res
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>