Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file mzData.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226(* File: biocaml_mzXML.ml
Copyright (C) 2011
Christophe Troestler <Christophe.Troestler@umons.ac.be>
WWW: http://math.umons.ac.be/an/software/
This library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License version 3 or
later as published by the Free Software Foundation, with the special
exception on linking described in the file LICENSE.
This library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
LICENSE for more details. *)(* http://www.umanitoba.ca/afs/plant_science/psgendb/local/install/ncbi_cxx--Jun_15_2010/src/algo/ms/formats/mzdata/mzData.dtd *)openBigarraytypevec=(float,float64_elt,fortran_layout)Array1.ttypeint_vec=(int,int_elt,fortran_layout)Array1.tmoduleBase64=structexternalinit:unit->unit="biocaml_base64_init"let()=init()externallittle32:string->npeaks:int->(float,float64_elt,_)Array1.t->unit="biocaml_base64_little32"externalbig32:string->npeaks:int->(float,float64_elt,_)Array1.t->unit="biocaml_base64_big32"externallittle64:string->npeaks:int->(float,float64_elt,_)Array1.t->unit="biocaml_base64_little64"externalbig64:string->npeaks:int->(float,float64_elt,_)Array1.t->unit="biocaml_base64_big64"letdecode~precision~little_endians=ifprecision=32then(* One 32 bits (thus 4 bytes) floats per peak *)letnpeaks=(String.lengths/4)*3/4inletv=Array1.createfloat64fortran_layoutnpeaksiniflittle_endianthenlittle32s~npeaksvelsebig32s~npeaksv;velseifprecision=64thenletnpeaks=(String.lengths/4)*3/8inletv=Array1.createfloat64fortran_layoutnpeaksiniflittle_endianthenlittle64s~npeaksvelsebig64s~npeaksv;velseinvalid_arg"MzData: <peak> precision must be 32 or 64"end(* XML helper functions
***********************************************************************)letrecskip_tag_loopxmldepth=matchXmlm.inputxmlwith|`El_start_->skip_tag_loopxml(depth+1)|`El_end->ifdepth>0thenskip_tag_loopxml(depth-1)|`Data_|`Dtd_->skip_tag_loopxmldepth(* The start tag is supposed to be already read. Skip to the closing tag. *)letskip_tagxml=skip_tag_loopxml0letrecget_next_dataxml=matchXmlm.inputxmlwith|`Datas->(* No to consecutive `Data are guaranteed, no concat *)s|`El_start_->skip_tagxml;(* ensure the corresponding close tag is read *)get_next_dataxml|`El_end->failwith"MzData.spectrums: got tag while looking for XML data"|_->get_next_dataxmlletrecreturn_on_end_tagxmlv=matchXmlm.inputxmlwith|`El_end->v|`El_start_->skip_tagxml;return_on_end_tagxmlv|_->return_on_end_tagxmlvletrecattribute_exnname=function|[]->failwith"MzData.spectrums: attribute not found"|((_,n),v)::tl->ifString.(n=name)thenvelseattribute_exnnametl(* mzData parsing
***********************************************************************)modulePrecursor=structtypet={mslevel:int;(** 1: MS, 2: MS/MS,... *)mz:float;(** MassToChargeRatio *)z:float;(** ChargeState *)int:float;(** Intensity *)}(* Commission of atomic weights and isotopic abondance *)letmass_proton=1.00727646677(* Dalton *)letmassp=(p.mz-.mass_proton)*.p.z(* Get <ionSelection> content. FIXME: the spec does not define which
param. should be present. *)letrecget_ionSelectionxmlpdepth=matchXmlm.inputxmlwith|`El_start((_,"cvParam"),attr)->letdepth=depth+1in(* for </cvParam> *)letname=attribute_exn"name"attrinletvalue=attribute_exn"value"attrinifString.(name="MassToChargeRatio")thenget_ionSelectionxml{pwithmz=Float.of_stringvalue}depthelseifString.(name="ChargeState")thenget_ionSelectionxml{pwithz=Float.of_stringvalue}depthelseifString.(name="Intensity")thenget_ionSelectionxml{pwithint=Float.of_stringvalue}depthelseget_ionSelectionxmlpdepth|`El_start_->get_ionSelectionxmlp(depth+1)|`El_end->ifdepth=0thenp(* </ionSelection> *)elseget_ionSelectionxmlp(depth-1)|`Data_|`Dtd_->get_ionSelectionxmlpdepth(* skip *)letrecget_precursorxmlp=matchXmlm.inputxmlwith|`El_start((_,"ionSelection"),_)->get_ionSelectionxmlp0|`El_start_->skip_tagxml;get_precursorxmlp(* <activation> *)|`El_end->p(* </precursor> *)|`Data_|`Dtd_->get_precursorxmlp(* Knowing that <precursorList> was just read, parse the [xml] to
get the list of precursors. *)letrecadd_listxmlpl=matchXmlm.inputxmlwith|`El_start((_,"precursor"),attr)->letmslevel=int_of_string(attribute_exn"msLevel"attr)inletp=get_precursorxml{mslevel;mz=Float.nan;z=Float.nan;int=Float.nan}inadd_listxml(p::pl)|`El_start_->skip_tagxml;add_listxmlpl|`El_end->pl(* </precursorList> *)|`Data_|`Dtd_->add_listxmlplletlistxml=add_listxml[]endtypespectrum={id:int;mslevel:int;precursor:Precursor.tlist;mz:vec;int:vec;sup:(string*vec)list;}(* Parse and decode <data>. *)letrecvec_of_binary_dataxml=matchXmlm.inputxmlwith|`El_start((_,"data"),atts)->letprecision=int_of_string(attribute_exn"precision"atts)inletlength=int_of_string(attribute_exn"length"atts)inletlittle_endian=String.(attribute_exn"endian"atts="little")inletdata=get_next_dataxmlinletv=Base64.decode~precision~little_endiandatainifArray1.dimv<>lengththenfailwith(sprintf"MzData: Invalid XML: <data> expected \
length: %i, got: %i"length(Array1.dimv));return_on_end_tagxmlv(* </data> *)|_->vec_of_binary_dataxml(* [get_spectrum xml spec 0] returns [spec] updated with the content
of the <spectrum> block. *)letrecget_spectrumxmlspecdepth=matchXmlm.inputxmlwith|`El_start((_,"spectrumInstrument"),atts)->letmslevel=int_of_string(attribute_exn"msLevel"atts)inletspec={specwithmslevel}inget_spectrumxmlspec(depth+1)|`El_start((_,"precursorList"),_)->letspec={specwithprecursor=Precursor.listxml}inget_spectrumxmlspec(depth+1)|`El_start((_,"mzArrayBinary"),_)->letspec={specwithmz=vec_of_binary_dataxml}inget_spectrumxmlspec(depth+1)|`El_start((_,"intenArrayBinary"),_)->letspec={specwithint=vec_of_binary_dataxml}inget_spectrumxmlspec(depth+1)(* | `El_start((_, "supDataArray"), _) -> *)(* | `El_start((_, "supDataArrayBinary"), _) -> *)|`El_start_->get_spectrumxmlspec(depth+1)|`El_end->ifdepth=0thenspecelseget_spectrumxmlspec(depth-1)|_->get_spectrumxmlspecdepth(* skip *)letempty_vec=Array1.createfloat64fortran_layout0letof_filefname=letfh=In_channel.createfnameinletxml=Xmlm.make_input~enc:(Some`UTF_8)(`Channelfh)inletscans=ref[]inwhilenot(Xmlm.eoixml)domatchXmlm.inputxmlwith|`El_start((_,"spectrum"),atts)->letid=int_of_string(attribute_exn"id"atts)in(* retentionTime ? *)letscan={id;mslevel=0;precursor=[];mz=empty_vec;int=empty_vec;sup=[]}inletscan=get_spectrumxmlscan0inscans:=scan::!scans|_->()done;!scans(* Local Variables: *)(* compile-command: "make -k -C ../.." *)(* End: *)