package biocaml
Install
Dune Dependency
Authors
Maintainers
Sources
md5=486aeb3e552dabae85839e2af30d6c52
sha512=4ed2df0b7cbd80bd6e29bd8fee9d2dacd9379ad0f4ff142bd8e16ade3f1507f6cc7cbe4c614943b8feb8fa4705935695cb458606b0da813dbf255b1e566a43cf
doc/biocaml.unix/Biocaml_unix/Sam/index.html
Module Biocaml_unix.Sam
Source
SAM files. Documentation here assumes familiarity with the SAM specification.
Types
Header Types
Header item tags define the different types of header lines. The term "tag" in this context should not be confused with its use in "tag-value" pairs, which comprise the content of header items.
A tag-value pair comprising the content of header items. Tag-value pairs occur in other places too, but this type is specifically for those in the header.
type header_line = private {
version : string;
(*VN
*)sort_order : sort_order option;
(*SO
*)group_order : group_order option;
(*GO
*)
}
@HD. A header consists of different types of lines. Confusingly, one of these types is called the "header line", which is what this type refers to. It does not refer generically to any line within a header.
type ref_seq = private {
name : string;
(*SN
*)length : int;
(*LN
*)assembly : string option;
(*AS
*)md5 : string option;
(*M5
*)species : string option;
(*SP
*)uri : string option;
(*UR
*)
}
@SQ. Reference sequence.
type read_group = private {
id : string;
(*ID
*)seq_center : string option;
(*CN
*)description : string option;
(*DS
*)run_date : [ `Date of string | `Time of string ] option;
(*DT
*)flow_order : string option;
(*FO
*)key_seq : string option;
(*KS
*)library : string option;
(*LB
*)program : string option;
(*PG
*)predicted_median_insert_size : int option;
(*PI
*)platform : platform option;
(*PL
*)platform_unit : string option;
(*PU
*)sample : string option;
(*SM
*)
}
@RG.
type program = private {
id : string;
(*ID
*)name : string option;
(*PN
*)command_line : string option;
(*CL
*)previous_id : string option;
(*PP
*)description : string option;
(*DS
*)version : string option;
(*VN
*)
}
@PG.
type header_item = private [<
| `HD of header_line
| `SQ of ref_seq
| `RG of read_group
| `PG of program
| `CO of string
| `Other of string * tag_value list
]
type header = private {
version : string option;
sort_order : sort_order option;
group_order : group_order option;
ref_seqs : ref_seq list;
read_groups : read_group list;
programs : program list;
comments : string list;
others : (string * tag_value list) list;
}
sort_order
: Guaranteed to beNone
ifversion = None
.
ref_seqs
: List of @SQ items. Order matters; it dictates alignment sorting order whensort_order = `Coordinate
.
read_groups
: Unordered list of @RG items.
programs
: List of @PG lines. Currently unordered, but we should topologically sort.
comments
: Unordered list of @CO lines.
type cigar_op = private [<
| `Alignment_match of int
| `Insertion of int
| `Deletion of int
| `Skipped of int
| `Soft_clipping of int
| `Hard_clipping of int
| `Padding of int
| `Seq_match of int
| `Seq_mismatch of int
]
CIGAR operations.
type optional_field_value = private [<
| `A of char
| `i of Core_kernel.Int64.t
| `f of float
| `Z of string
| `H of string
| `B of char * string list
]
The constructor encodes the TYPE and each carries its corresponding VALUE.
type alignment = private {
qname : string option;
(*QNAME
*)flags : Flags.t;
(*FLAG
*)rname : string option;
(*RNAME
*)pos : int option;
(*POS
*)mapq : int option;
(*MAPQ
*)cigar : cigar_op list;
(*CIGAR
*)rnext : rnext option;
(*RNEXT
*)pnext : int option;
(*PNEXT
*)tlen : int option;
(*TLEN
*)seq : string option;
(*SEQ
*)qual : Phred_score.t list;
(*QUAL
*)optional_fields : optional_field list;
}
For cigar
and qual
, empty list indicates no value, i.e. '*', was given.
include sig ... end
val read :
?start:Pos.t ->
Future_unix.Reader.t ->
(header * alignment Core_kernel.Or_error.t Future_unix.Pipe.Reader.t)
Core_kernel.Or_error.t
Future_unix.Deferred.t
val write :
Future_unix.Writer.t ->
?header:header ->
alignment Future_unix.Pipe.Reader.t ->
unit Future_unix.Deferred.t
val write_file :
?perm:int ->
?append:bool ->
string ->
?header:header ->
alignment Future_unix.Pipe.Reader.t ->
unit Future_unix.Deferred.t
Low-level Parsers and Constructors
val header_line :
version:string ->
?sort_order:sort_order ->
?group_order:group_order ->
unit ->
header_line Core_kernel.Or_error.t
Low-level Header Parsers and Constructors
val read_group :
id:string ->
?seq_center:string ->
?description:string ->
?run_date:string ->
?flow_order:string ->
?key_seq:string ->
?library:string ->
?program:string ->
?predicted_median_insert_size:int ->
?platform:platform ->
?platform_unit:string ->
?sample:string ->
unit ->
read_group Core_kernel.Or_error.t
The run_date
string will be parsed as a Date.t or Time.t, whichever is possible. If it is a time without a timezone, local timezone will be assumed.
val header :
?version:string ->
?sort_order:sort_order ->
?group_order:group_order ->
?ref_seqs:ref_seq list ->
?read_groups:read_group list ->
?programs:program list ->
?comments:string list ->
?others:(string * tag_value list) list ->
unit ->
header Core_kernel.Or_error.t
Low-level Optional field Parsers and Constructors
Low-level Optional field Parsers and Constructors
val optional_field_value_B :
char ->
string list ->
optional_field_value Core_kernel.Or_error.t
val alignment :
?ref_seqs:Core_kernel.String.Set.t ->
?qname:string ->
flags:Flags.t ->
?rname:string ->
?pos:int ->
?mapq:int ->
?cigar:cigar_op list ->
?rnext:rnext ->
?pnext:int ->
?tlen:int ->
?seq:string ->
?qual:Phred_score.t list ->
?optional_fields:optional_field list ->
unit ->
alignment Core_kernel.Or_error.t
Low-level Alignment Parsers and Constructors
Low-level Printers
Low-level Header Printers
Low-level Alignment Printers