package biocaml
Install
Dune Dependency
Authors
Maintainers
Sources
sha256=fae219e66db06f81f3fd7d9e44717ccf2d6d85701adb12004ab4ae6d3359dd2d
sha512=f6abd60dac2e02777be81ce3b5acdc0db23b3fa06731f5b2d0b32e6ecc9305fe64f407bbd95a3a9488b14d0a7ac7c41c73a7e18c329a8f18febfc8fd50eccbc6
doc/biocaml.unix/Biocaml_unix/Sam/index.html
Module Biocaml_unix.Sam
Source
SAM files. Documentation here assumes familiarity with the SAM specification.
Types
Header Types
Header item tags define the different types of header lines. The term "tag" in this context should not be confused with its use in "tag-value" pairs, which comprise the content of header items.
A tag-value pair comprising the content of header items. Tag-value pairs occur in other places too, but this type is specifically for those in the header.
type header_line = private {
version : string;
(*VN
*)sort_order : sort_order option;
(*SO
*)group_order : group_order option;
(*GO
*)
}
@HD. A header consists of different types of lines. Confusingly, one of these types is called the "header line", which is what this type refers to. It does not refer generically to any line within a header.
type ref_seq = private {
name : string;
(*SN
*)length : int;
(*LN
*)assembly : string option;
(*AS
*)md5 : string option;
(*M5
*)species : string option;
(*SP
*)uri : string option;
(*UR
*)
}
@SQ. Reference sequence.
type read_group = private {
id : string;
(*ID
*)seq_center : string option;
(*CN
*)description : string option;
(*DS
*)run_date : [ `Date of string | `Time of string ] option;
(*DT
*)flow_order : string option;
(*FO
*)key_seq : string option;
(*KS
*)library : string option;
(*LB
*)program : string option;
(*PG
*)predicted_median_insert_size : int option;
(*PI
*)platform : platform option;
(*PL
*)platform_unit : string option;
(*PU
*)sample : string option;
(*SM
*)
}
@RG.
type program = private {
id : string;
(*ID
*)name : string option;
(*PN
*)command_line : string option;
(*CL
*)previous_id : string option;
(*PP
*)description : string option;
(*DS
*)version : string option;
(*VN
*)
}
@PG.
type header_item = private [<
| `HD of header_line
| `SQ of ref_seq
| `RG of read_group
| `PG of program
| `CO of string
| `Other of string * tag_value list
]
type header = private {
version : string option;
sort_order : sort_order option;
group_order : group_order option;
ref_seqs : ref_seq list;
read_groups : read_group list;
programs : program list;
comments : string list;
others : (string * tag_value list) list;
}
sort_order
: Guaranteed to beNone
ifversion = None
.
ref_seqs
: List of @SQ items. Order matters; it dictates alignment sorting order whensort_order = `Coordinate
.
read_groups
: Unordered list of @RG items.
programs
: List of @PG lines. Currently unordered, but we should topologically sort.
comments
: Unordered list of @CO lines.
type cigar_op = private [<
| `Alignment_match of int
| `Insertion of int
| `Deletion of int
| `Skipped of int
| `Soft_clipping of int
| `Hard_clipping of int
| `Padding of int
| `Seq_match of int
| `Seq_mismatch of int
]
CIGAR operations.
type optional_field_value = private [<
| `A of char
| `i of Core.Int64.t
| `f of float
| `Z of string
| `H of string
| `B of char * string list
]
The constructor encodes the TYPE and each carries its corresponding VALUE.
type alignment = private {
qname : string option;
(*QNAME
*)flags : Flags.t;
(*FLAG
*)rname : string option;
(*RNAME
*)pos : int option;
(*POS
*)mapq : int option;
(*MAPQ
*)cigar : cigar_op list;
(*CIGAR
*)rnext : rnext option;
(*RNEXT
*)pnext : int option;
(*PNEXT
*)tlen : int option;
(*TLEN
*)seq : string option;
(*SEQ
*)qual : Phred_score.t list;
(*QUAL
*)optional_fields : optional_field list;
}
For cigar
and qual
, empty list indicates no value, i.e. '*', was given.
include sig ... end
val read :
?start:Pos.t ->
Future_unix.Reader.t ->
(header * alignment Core.Or_error.t Future_unix.Pipe.Reader.t)
Core.Or_error.t
Future_unix.Deferred.t
val write :
Future_unix.Writer.t ->
?header:header ->
alignment Future_unix.Pipe.Reader.t ->
unit Future_unix.Deferred.t
val write_file :
?perm:int ->
?append:bool ->
string ->
?header:header ->
alignment Future_unix.Pipe.Reader.t ->
unit Future_unix.Deferred.t
Low-level Parsers and Constructors
val header_line :
version:string ->
?sort_order:sort_order ->
?group_order:group_order ->
unit ->
header_line Core.Or_error.t
Low-level Header Parsers and Constructors
val ref_seq :
name:string ->
length:int ->
?assembly:string ->
?md5:string ->
?species:string ->
?uri:string ->
unit ->
ref_seq Core.Or_error.t
val read_group :
id:string ->
?seq_center:string ->
?description:string ->
?run_date:string ->
?flow_order:string ->
?key_seq:string ->
?library:string ->
?program:string ->
?predicted_median_insert_size:int ->
?platform:platform ->
?platform_unit:string ->
?sample:string ->
unit ->
read_group Core.Or_error.t
The run_date
string will be parsed as a Date.t or Time.t, whichever is possible. If it is a time without a timezone, local timezone will be assumed.
val header :
?version:string ->
?sort_order:sort_order ->
?group_order:group_order ->
?ref_seqs:ref_seq list ->
?read_groups:read_group list ->
?programs:program list ->
?comments:string list ->
?others:(string * tag_value list) list ->
unit ->
header Core.Or_error.t
Low-level Optional field Parsers and Constructors
Low-level Optional field Parsers and Constructors
val alignment :
?ref_seqs:Core.String.Set.t ->
?qname:string ->
flags:Flags.t ->
?rname:string ->
?pos:int ->
?mapq:int ->
?cigar:cigar_op list ->
?rnext:rnext ->
?pnext:int ->
?tlen:int ->
?seq:string ->
?qual:Phred_score.t list ->
?optional_fields:optional_field list ->
unit ->
alignment Core.Or_error.t
Low-level Alignment Parsers and Constructors
Low-level Printers
Low-level Header Printers
Low-level Alignment Printers