Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file fasta.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260moduleResult=Biocaml_resultopenCFStreamtypeheader=stringlisttypeitem={description:string;sequence:string;}typefmt={allow_sharp_comments:bool;allow_semicolon_comments:bool;allow_empty_lines:bool;comments_only_at_top:bool;max_line_length:intoption;alphabet:stringoption;}letdefault_fmt={allow_sharp_comments=true;allow_semicolon_comments=false;allow_empty_lines=false;comments_only_at_top=true;max_line_length=None;alphabet=None;}typeitem0=[|`Commentofstring|`Empty_line|`Descriptionofstring|`Partial_sequenceofstring]letsequence_to_int_lists=String.splits~on:' '|>Result.List.map~f:(funx->tryOk(Int.of_stringx)withFailure_->error"invalid int"xsexp_of_string)(******************************************************************************)(* Low-level Parsing *)(******************************************************************************)letparse_item0?(allow_sharp_comments=true)?(allow_semicolon_comments=false)?(allow_empty_lines=false)?max_line_length?alphabetline=letopenResult.Monad_infixinlets=(line:Line.t:>string)inletn=String.lengthsin(matchmax_line_lengthwith|None->Ok()|Somex->ifx<=nthenOk()elseerror"max_line_length exceeded"(x,n)[%sexp_of:int*int])>>=fun()->ifallow_empty_lines&&(String.for_alls~f:Char.is_whitespace)thenOk`Empty_lineelseif(notallow_empty_lines&&n=0)thenOr_error.error_string"allow_empty_lines is false but got empty line"(* n > 0 if we got here *)elseifs.[0]='>'thenOk(`Description(String.slices1n))elsematchallow_sharp_comments,allow_semicolon_comments,s.[0]with|true,true,(';'|'#')|true,false,'#'|false,true,';'->Ok(`Comment(String.slices1n))|false,false,(';'|'#')->Or_error.error_string"comments lines are not allowed"|_->(matchalphabetwith|None->Ok(`Partial_sequences)|Somealphabet->ifString.for_alls~f:(String.memalphabet)thenOk(`Partial_sequences)else(* TODO: report which character is outside alphabet *)error"sequence contains string outside allowed alphabet"(s,alphabet)[%sexp_of:string*string])(******************************************************************************)(* Input/Output *)(******************************************************************************)moduleLines=Lines.MakeIO(Future_unix)letread0?(start=Pos.(incr_lineunknown))?(allow_sharp_comments=true)?(allow_semicolon_comments=false)?(allow_empty_lines=false)?max_line_length?alphabetr=letpos=refstartinStream.map(Lines.readr)~f:(funline->letcurrent_pos=!posinpos:=Pos.incr_line!pos;parse_item0line~allow_sharp_comments~allow_semicolon_comments~allow_empty_lines?max_line_length?alphabet|>funx->Or_error.tag_argx"position"current_posPos.sexp_of_t)(** Return the initial comment lines. Upon return, [item0s] will point
to first item0 that is not a `Comment, but there may still be
additional `Comment items later. *)letread_header?(allow_empty_lines=false)(item0s:item0Or_error.tStream.t):headerOr_error.t=letrecloopaccum:headerOr_error.t=matchStream.peekitem0swith|Some(Ok(`Commentx))->(Stream.junkitem0s;loop(x::accum))|Some(Ok`Empty_line)->(ifallow_empty_linesthenloopaccumelseOr_error.error_string"allow_empty_lines is false but got empty line in header")|Some(Ok(`Description_))|Some(Ok(`Partial_sequence_))->Okaccum|Some(Error_ase)->e|None->Okaccuminloop[]|>Result.map~f:List.revletread?start?(fmt=default_fmt)r=let{allow_sharp_comments;allow_semicolon_comments;allow_empty_lines;comments_only_at_top;max_line_length;alphabet}=fmtinleterror_strings=Some(Or_error.error_strings)inletitem0s=read0r?start~allow_sharp_comments~allow_semicolon_comments~allow_empty_lines?max_line_length?alphabetinmatchread_header~allow_empty_linesitem0swith|Error_ase->e|Okheader->letrecfdescriptionpartial_seqs:itemOr_error.toption=matchStream.peekitem0swith|Some(Ok(`Comment_))->beginifcomments_only_at_topthenerror_string"comments_only_at_top = true but got comment later"else(Stream.junkitem0s;fdescriptionpartial_seqs)end|Some(Ok`Empty_line)->beginifallow_empty_linesthen(Stream.junkitem0s;fdescriptionpartial_seqs)elseerror_string"allow_empty_lines = false but got empty line"end|Some(Ok(`Descriptionx))->beginmatchdescription,partial_seqswith|None,[]->(Stream.junkitem0s;f(Somex)[])|None,_::_->(* `Partial_sequence branch assures this doesn't happen*)assertfalse|Some_,[]->error_string"previous description line not followed by sequence"|Somedescription,partial_seqs->Some(Ok{description;sequence=partial_seqs|>List.rev|>String.concat~sep:"";})end|Some(Ok(`Partial_sequencex))->beginmatchdescription,partial_seqswith|None,_->error_string"sequence not preceded by description line"|Some_,partial_seqs->(Stream.junkitem0s;fdescription(x::partial_seqs))end|Some(Error_ase)->beginStream.junkitem0s;Someeend|None->beginmatchdescription,partial_seqswith|None,[]->None|None,_::_->(* `Partial_sequence branch assures this doesn't happen*)assertfalse|Some_,[]->error_string"description line not followed by sequence, reached end-of-file"|Somedescription,partial_seqs->Some(Ok{description;sequence=partial_seqs|>List.rev|>String.concat~sep:"";})endinOk(header,Stream.from(fun_->fNone[]))letwith_file?fmtfile~f=letstart=Pos.make~source:file~line:1()inIn_channel.with_filefile~f:(funcin->matchread~start?fmtcinwith|Error_ase->e|Ok(header,strm)->fheaderstrm)