Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file gff.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275openCFStream(*
Version 2:
http://www.sanger.ac.uk/resources/software/gff/spec.html
http://gmod.org/wiki/GFF2
Version 3:
http://www.sequenceontology.org/gff3.shtml
http://gmod.org/wiki/GFF3
*)typerecord={seqname:string;source:stringoption;feature:stringoption;pos:int*int;score:floatoption;strand:[`plus|`minus|`not_applicable|`unknown];phase:intoption;attributes:(string*stringlist)list;}[@@derivingsexp]typeitem=[`commentofstring|`recordofrecord][@@derivingsexp](* `module_error` should progressively allow to “tag” error values. *)letmodule_errore=Error(`gffe)moduleTags=structtypet={version:[`two|`three];allow_empty_lines:bool;sharp_comments:bool;}[@@derivingsexp]letdefault={version=`three;allow_empty_lines=false;sharp_comments=true}letto_stringt=sexp_of_tt|>Sexplib.Sexp.to_stringletof_strings=tryOk(Sexplib.Sexp.of_strings|>t_of_sexp)withe->module_error(`tags_of_stringe)endmoduleError=structtypeparsing=[`cannot_parse_floatofPos.t*string|`cannot_parse_intofPos.t*string|`cannot_parse_strandofPos.t*string|`cannot_parse_stringofPos.t*string|`empty_lineofPos.t|`incomplete_inputofPos.t*stringlist*stringoption|`wrong_attributesofPos.t*string|`wrong_rowofPos.t*string|`wrong_url_escapingofPos.t*string][@@derivingsexp]typet=parsing[@@derivingsexp]endmoduleTransform=structopenResult.Monad_infixletparse_stringmsgposi=begintryOk(Scanf.sscanfi"%S "ident)with|_->beginmatch(Scanf.sscanfi"%s "ident)with|""->Error(`cannot_parse_string(pos,msg))|s->Ok(Uri.pct_decodes)endendletparse_string_optmposi=parse_stringmposi>>=funs->beginmatchswith|"."->OkNone|s->Ok(Somes)endletparse_intmsgposi=parse_stringmsgposi>>=funs->(tryOk(Int.of_strings)with_->Error(`cannot_parse_int(pos,msg)))letparse_float_optmsgposi=parse_string_optmsgposi>>=function|Somes->(tryOk(Some(Float.of_strings))with_->Error(`cannot_parse_float(pos,msg)))|None->OkNoneletparse_int_optmsgposi=parse_string_optmsgposi>>=function|Somes->(tryOk(Some(Int.of_strings))with_->Error(`cannot_parse_int(pos,msg)))|None->OkNoneletparse_attributes_version_3positioni=letwhole_thing=String.concat~sep:"\t"iin(* let b = Buffer.create 42 in *)(* String.iter (String.concat ~sep:"\t" i) (function *)(* | ' ' -> Buffer.add_string b "%20" *)(* | c -> Buffer.add_char b c); *)(* Buffer.contents b *)(* in *)letget_csvs=List.map(String.split~on:','s)~f:(funs->parse_string"value"positionString.(strips))|>List.partition_map~f:Result.ok_fst|>(function|(ok,[])->Okok|(_,notok::_)->Errornotok)inletrecloopposacc=beginmatchString.lfindiwhole_thing~pos~f:(fun_c->c='=')with|Someequal->parse_string"tag"position(String.slicewhole_thingposequal)>>=funtag->letpos=equal+1inbeginmatchString.lfindiwhole_thing~pos~f:(fun_c->c=';')with|Somesemicolon->letdelimited=String.slicewhole_thingpossemicoloninget_csvdelimited>>=funvalues->loop(semicolon+1)((tag,values)::acc)|None->letdelimited=String.(subwhole_thing~pos~len:(lengthwhole_thing-pos))inget_csvdelimited>>=funvalues->Ok((tag,values)::acc)end|None->ifpos>=String.lengthwhole_thingthenOkaccelseError(`wrong_attributes(position,whole_thing))endin(tryloop0[]with_->Error(`wrong_attributes(position,whole_thing)))>>|List.revletparse_attributes_version_2positionl=letwhole_thing=String.(concat~sep:"\t"l|>strip)inletparse_stringi=begintrySome(Scanf.bscanfi"%S "ident)with|_->beginmatch(Scanf.bscanfi"%s "ident)with|""->None|s->Somesendendinletinch=Scanf.Scanning.from_stringwhole_thinginlettokens=Stream.(from(fun_->parse_stringinch)|>Fn.flipnpeekInt.max_value)inletrecgo_3_by_3acc=function|k::v::[]->Ok(List.rev((k,[v])::acc))|k::v::";"::rest->go_3_by_3((k,[v])::acc)rest|[]|[";"]->Ok(List.revacc)|_->Error(`wrong_attributes(position,whole_thing))ingo_3_by_3[]tokensletparse_row~versionposs=letfields=String.split~on:'\t'sinbeginmatchfieldswith|seqname::source::feature::start::stop::score::strand::phase::rest->letresult=parse_string"Sequence name"posseqname>>=funseqname->parse_string_opt"Source"possource>>=funsource->parse_string_opt"Feature"posfeature>>=funfeature->parse_int"Start Position"posstart>>=funstart->parse_int"Stop Position"posstop>>=funstop->parse_float_opt"Score"posscore>>=funscore->parse_string_opt"Strand"posstrand>>=(function|Some"+"->Ok`plus|None->Ok`not_applicable|Some"-"->Ok`minus|Some"?"->Ok`unknown|Somes->Error(`cannot_parse_strand(pos,s)))>>=funstrand->parse_int_opt"Phase/Frame"posphase>>=funphase->beginmatchversionwith|`two->parse_attributes_version_2posrest|`three->parse_attributes_version_3posrestend>>=funattributes->Ok(`record{seqname;source;feature;pos=(start,stop);score;strand;phase;attributes})in`outputresult|_->`output(Error(`wrong_row(pos,s)))endletrecnext~tagsp=letopenLines.Bufferinmatch(next_linep:>stringoption)with|None->`not_ready|Some""->iftags.Tags.allow_empty_linesthen`output(Error(`empty_line(current_positionp)))elsenext~tagsp|Somelwhentags.Tags.sharp_comments&&String.(is_prefix(stripl)~prefix:"#")->`output(Ok(`commentString.(subl~pos:1~len:(lengthl-1))))|Somel->parse_row~version:tags.Tags.version(current_positionp)lletstring_to_item?filename~tags()=letname=sprintf"gff_parser:%s"Option.(value~default:"<>"filename)inletnext=next~tagsinLines.Transform.make_merge_error~name?filename~next()letitem_to_string_pureversion=(function|`commentc->sprintf"#%s\n"c|`recordt->letescape=matchversionwith|`three->(funs->Uri.pct_encodes)|`two->sprintf"%S"inletoptescapeo=Option.value_map~default:"."o~f:escapeinString.concat~sep:"\t"[escapet.seqname;optescapet.source;optescapet.feature;sprintf"%d"(fstt.pos);sprintf"%d"(sndt.pos);Option.value_map~default:"."~f:(sprintf"%g")t.score;(matcht.strandwith`plus->"+"|`minus->"-"|`not_applicable->"."|`unknown->"?");Option.value_map~default:"."~f:(sprintf"%d")t.phase;String.concat~sep:";"(List.mapt.attributes~f:(fun(k,v)->matchversionwith|`three->sprintf"%s=%s"(Uri.pct_encodek)(List.mapv~f:Uri.pct_encode|>String.concat~sep:",")|`two->sprintf"%S %s"k(List.mapv~f:escape|>String.concat~sep:",")));]^"\n")letitem_to_string~tags()=Tfxm.of_function~name:"gff_to_string"(item_to_string_puretags.Tags.version)endexceptionErrorofError.tleterror_to_exne=Erroreletin_channel_to_item_stream?(buffer_size=65536)?filename?(tags=Tags.default)inp=letx=Transform.string_to_item~tags?filename()inTfxm.(in_channel_strings_to_streaminpx~buffer_size)letin_channel_to_item_stream_exn?buffer_size?tagsinp=Stream.result_to_exn~error_to_exn(in_channel_to_item_stream?buffer_size?tagsinp)letitem_to_string?(tags=Tags.default)item=Transform.item_to_string_puretags.Tags.versionitem