Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file fastq.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220typeitem={name:string;sequence:string;comment:string;qualities:string;}[@@derivingsexp]letsplit_names=matchString.lsplit2s~on:' 'with|None->s,None|Some(x,y)->x,Somey(******************************************************************************)(* Illumina-specific formats *)(******************************************************************************)moduleIllumina=structtypesurface=[`Top|`Bottom]typetile={surface:surface;swath:int;number:int}lettile_of_strings=letopenResult.Monad_infixinifString.lengths<>4||not(String.for_alls~f:Char.is_digit)thenerror"invalid tile"ssexp_of_stringelse((matchs.[0]with|'1'->Ok`Top|'2'->Ok`Bottom|x->error"invalid surface"xsexp_of_char)>>=funsurface->(matchs.[1]with|'1'->Ok1|'2'->Ok2|'3'->Ok3|x->error"invalid swath"xsexp_of_char)>>=funswath->(String.(subs~pos:2~len:(lengths-2))|>funx->(tryOk(Int.of_stringx)withFailure_->error"tile number not an int"ssexp_of_string)|>function|Error_ase->e|Okx->ifx<=0thenerror"invalid tile number"xsexp_of_intelseOkx)>>=funnumber->Ok{surface;swath;number})lettile_to_stringt=sprintf"%c%d%02d"(matcht.surfacewith`Top->'1'|`Bottom->'2')t.swatht.numbertypesequence_id={instrument:string;run_number:int;flowcell_id:string;lane:int;tile:tile;x_pos:int;y_pos:int;read:int;is_filtered:bool;control_number:int;index:string}letsequence_id_of_strings=letopenResult.Monad_infixinletinamevalue=tryOk(Int.of_stringvalue)withFailure_->error(sprintf"%s not an int"name)valuesexp_of_stringinletbnamevalue=matchvaluewith|"Y"->Oktrue|"N"->Okfalse|_->error(sprintf"%s must be Y or N"name)valuesexp_of_stringinmatchString.lsplit2s~on:' 'with|Some(x,y)->(matchString.splitx~on:':',String.splity~on:':'with|[instrument;run_number;flowcell_id;lane;tile;x_pos;y_pos],[read;is_filtered;control_number;index]->i"run_number"run_number>>=funrun_number->i"lane"lane>>=funlane->tile_of_stringtile>>=funtile->i"x_pos"x_pos>>=funx_pos->i"y_pos"y_pos>>=funy_pos->i"read"read>>=funread->b"is_filtered"is_filtered>>=funis_filtered->i"control_number"control_number>>=funcontrol_number->Ok{instrument;run_number;flowcell_id;lane;tile;x_pos;y_pos;read;is_filtered;control_number;index}|_->error"invalid Illumina sequence identifier"ssexp_of_string)|_->error"invalid Illumina sequence identifier"ssexp_of_stringend(******************************************************************************)(* Printing *)(******************************************************************************)letitem_to_stringr=sprintf"@%s\n%s\n+%s\n%s\n"r.namer.sequencer.commentr.qualities(******************************************************************************)(* Parsing *)(******************************************************************************)letname_of_line?(pos=Pos.unknown)line=letline=(line:Line.t:>string)inletn=String.lengthlineinifn=0||Char.(line.[0]<>'@')thenerror"invalid name"(pos,line)[%sexp_of:Pos.t*string]elseOk(String.subline~pos:1~len:(n-1))letsequence_of_line?pos:_line=(line:Line.t:>string)letcomment_of_line?(pos=Pos.unknown)line=letline=(line:Line.t:>string)inletn=String.lengthlineinifn=0||Char.(line.[0]<>'+')thenerror"invalid comment"(pos,line)[%sexp_of:Pos.t*string]elseOk(String.subline~pos:1~len:(n-1))letqualities_of_line?(pos=Pos.unknown)?sequenceline=letline=(line:Line.t:>string)inmatchsequencewith|None->Okline|Somesequence->letm=String.lengthsequenceinletn=String.lengthlineinifm<>nthenerror"length of sequence and qualities differ"(pos,sequence,line)[%sexp_of:Pos.t*string*string]elseOkline(******************************************************************************)(* Input/Output *)(******************************************************************************)moduleMakeIO(Future:Future.S)=structopenFutureletread_itemic:itemOr_error.tReader.Read_result.tDeferred.t=Reader.read_lineic>>=function|`Eof->return`Eof|`Okline->matchname_of_line(Line.of_string_unsafeline)with|Error_ase->return(`Oke)|Okname->Reader.read_lineic>>=function|`Eof->return(`Ok(Error(Error.of_string"incomplete input")))|`Okline->letsequence=sequence_of_line(Line.of_string_unsafeline)inReader.read_lineic>>=function|`Eof->return(`Ok(Error(Error.of_string"incomplete input")))|`Okline->matchcomment_of_line(Line.of_string_unsafeline)with|Error_ase->return(`Oke)|Okcomment->Reader.read_lineic>>=function|`Eof->return(`Ok(Error(Error.of_string"incomplete input")))|`Okline->matchqualities_of_line~sequence(Line.of_string_unsafeline)with|Error_ase->return(`Oke)|Okqualities->return(`Ok(Ok{name;sequence;comment;qualities}))letreadic=Reader.read_allicread_itemletwrite_item(w:Writer.t)(x:item):unitDeferred.t=letopenWriterinwrite_charw'@'>>=fun()->write_linewx.name>>=fun()->write_linewx.sequence>>=fun()->write_charw'+'>>=fun()->write_linewx.comment>>=fun()->write_linewx.qualitiesletwritewpipe_r=Pipe.iterpipe_r~f:(write_itemw)letwrite_file?perm?appendfilepipe_r=Writer.with_file?perm?appendfile~f:(funw->writewpipe_r)endincludeMakeIO(Future_unix)