package biocaml
The OCaml Bioinformatics Library
Install
Dune Dependency
Authors
Maintainers
Sources
biocaml-0.11.2.tbz
sha256=fae219e66db06f81f3fd7d9e44717ccf2d6d85701adb12004ab4ae6d3359dd2d
sha512=f6abd60dac2e02777be81ce3b5acdc0db23b3fa06731f5b2d0b32e6ecc9305fe64f407bbd95a3a9488b14d0a7ac7c41c73a7e18c329a8f18febfc8fd50eccbc6
doc/src/biocaml.unix/pwm.ml.html
Source file pwm.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
open CFStream type count_matrix = int array array type background = float array type t = float array array let int_of_char = function | 'a' | 'A' -> 0 | 'c' | 'C' -> 1 | 'g' | 'G' -> 2 | 't' | 'T' -> 3 | _ -> 4 let flat_background () = Caml.Array.make 4 0.25 let background_of_sequence seq pc = let counts = Caml.Array.make 4 0 and n = ref 0 in for i = 0 to String.length seq - 1 do let k = int_of_char seq.[i] in if k < 4 then ( counts.(k) <- counts.(k) + 1 ; incr n ) done ; Array.map ~f:(fun c -> (float c +. pc) /. (float !n +. 4. *. pc)) counts let reverse_complement a = let open Array in let n = length a in init n ~f:(fun i -> let r = copy a.(n - 1 - i) in swap r 0 3 ; swap r 1 2 ; r ) let make mat bg = let open Array in map mat ~f:(fun p -> let n = fold ~f:( + ) ~init:0 p in let r = mapi p ~f:(fun i x -> log ((float x +. bg.(i)) /. float n /. bg.(i)) ) in let n_case = Stream.Infix.(0 --^ (Array.length p)) |> Stream.fold ~f:(fun accu i -> accu +. bg.(i) *. r.(i)) ~init:0. in append r [| n_case |]) let tandem ?(orientation = `direct) ~spacer mat1 mat2 bg = Array.concat [ (if Poly.(orientation = `everted) then reverse_complement else Fun.id) (make mat1 bg) ; Array.init spacer ~f:(fun _ -> Caml.Array.make 5 0.) ; (if Poly.(orientation = `inverted) then reverse_complement else Fun.id) (make mat2 bg) ] let gen_scan f init mat seq tol = let r = ref init and n = String.length seq and m = Array.length mat in let seq = Array.init n ~f:(fun i -> int_of_char seq.[i]) in for i = n - m downto 0 do let score = ref 0. in for j = 0 to m - 1 do score := !score +. Array.(unsafe_get (unsafe_get mat j) (unsafe_get seq (i + j))) done ; if Float.(!score > tol) then r := f i !score !r done ; !r let scan = gen_scan (fun pos score l -> (pos, score) :: l) [] let best_hit mat seq = let (pos, _) as r = gen_scan (fun p1 s1 ((_, s2) as r2) -> if Float.(s1 > s2) then (p1, s1) else r2) (-1, Float.neg_infinity) mat seq Float.neg_infinity in if pos < 0 then raise (Invalid_argument "Pwm.best_hit: sequence shorter than the matrix") else r external stub_fast_scan : t -> int array -> float -> (int * float) list = "biocaml_pwm_scan" let fast_scan mat seq tol = let n = String.length seq in let seq = Array.init n ~f:(fun i -> int_of_char seq.[i]) in stub_fast_scan mat seq tol
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>