Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file ocsigen_lib.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517(* Ocsigen
* Copyright (C) 2005 Vincent Balat
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, with linking exception;
* either version 2.1 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*)includeOcsigen_lib_basemoduleString=String_base(*****************************************************************************)moduleIp_address=structexceptionNo_such_hostletget_inet_addr?(v6=false)host=letrecaux=function|[]->Lwt.failNo_such_host|{Unix.ai_addr=Unix.ADDR_INET(inet_addr,_);_}::_->Lwt.returninet_addr|_::l->auxlinletoptions=[ifv6thenLwt_unix.AI_FAMILYLwt_unix.PF_INET6elseLwt_unix.AI_FAMILYLwt_unix.PF_INET]inLwt.bind(Lwt_unix.getaddrinfohost""options)auxletof_sockaddr=function|Unix.ADDR_INET(ip,_port)->ip|_->raise(Ocsigen_Internal_Error"ip of unix socket")end(*****************************************************************************)moduleFilename=structincludeFilenameletbasenamef=letn=String.lengthfinleti=tryString.rindexf'\\'+1withNot_found->0inletj=tryString.rindexf'/'+1withNot_found->0inletk=maxijinifk<nthenString.subfk(n-k)else"none"letextension_no_directoryfilename=tryletpos=String.rindexfilename'.'inString.subfilename(pos+1)((String.lengthfilename)-pos-1)withNot_found->raiseNot_foundletextensionfilename=tryletpos=String.rindexfilename'.'andslash=tryString.rindexfilename'/'withNot_found->-1inifpos>slashthenString.subfilename(pos+1)((String.lengthfilename)-pos-1)else(* Dot before a directory separator *)raiseNot_foundwithNot_found->(* No dot in filename *)raiseNot_foundend(*****************************************************************************)letmake_cryptographic_safe_string=letrng=Cryptokit.Random.device_rng"/dev/urandom"infun()->letrandom_part=letrandom_number=Cryptokit.Random.stringrng20inletto_b64=Cryptokit.Base64.encode_compact()inCryptokit.transform_stringto_b64random_numberandsequential_part=(*VVV Use base 64 also here *)Printf.sprintf"%Lx"(Int64.bits_of_float(Unix.gettimeofday()))inrandom_part^sequential_part(* The string is produced from the concatenation of two components:
a 160-bit random sequence obtained from /dev/urandom, and a
64-bit sequential component derived from the system clock. The
former is supposed to prevent session spoofing. The assumption
is that given the high cryptographic quality of /dev/urandom, it
is impossible for an attacker to deduce the sequence of random
numbers produced. As for the latter component, it exists to
prevent a theoretical (though infinitesimally unlikely) session
ID collision if the server were to be restarted.
*)moduleNetstring_pcre=structletregexps=Pcre.regexp~flags:[`MULTILINE]slettempl_re=Pcre.regexp"(?:\\\\\\d)|[\\$\\\\]";;lettr_templs=(* Convert \n to $n etc. *)(* Unfortunately we cannot just replace \ by $. *)letrectrl=matchlwithPcre.Delim"$"::l'->"$$"::trl'|Pcre.Delim"\\"::Pcre.Delim"$"::l'->"$$"::trl'|Pcre.Delim"\\"::Pcre.Delims::l'->s::trl'|Pcre.Delim"\\"::Pcre.Texts::l'->s::trl'|[Pcre.Delim"\\"]->failwith"trailing backslash"|Pcre.Delimd::l'->assert(d.[0]='\\');letn=Char.coded.[1]-Char.code'0'inifn=0then"$&"::trl'else("$"^string_of_intn^"$!")::trl'|Pcre.Textt::l'->t::trl'|Pcre.Group(_,_)::_->assertfalse|Pcre.NoGroup::_->assertfalse|[]->[]inletl=Pcre.full_split~rex:templ_re~max:(-1)sinString.concat""(trl)letmatched_groupresultn_=ifn<0||n>=Pcre.num_of_subsresultthenraiseNot_found;ignore(Pcre.get_substring_ofsresultn);Pcre.get_substringresultnletmatched_stringresult_=ignore(Pcre.get_substring_ofsresult0);Pcre.get_substringresult0letglobal_replacepattempls=Pcre.replace~rex:pat~itempl:(Pcre.subst(tr_templtempl))sletglobal_substitutepatsubsts=Pcre.substitute_substrings~rex:pat~subst:(funr->substrs)sletsearch_forwardpatspos=letresult=Pcre.exec~rex:pat~possinfst(Pcre.get_substring_ofsresult0),resultletstring_aftersn=String.subsn(String.lengths-n)letbounded_splitexprtextnum=letstart=tryletstart_substrs=Pcre.exec~rex:expr~flags:[`ANCHORED]textin(* or Not_found *)let(_,match_end)=Pcre.get_substring_ofsstart_substrs0inmatch_endwithNot_found->0inletrecsplitstartn=ifstart>=String.lengthtextthen[]elseifn=1then[string_aftertextstart]elsetryletnext_substrs=Pcre.exec~rex:expr~pos:starttextin(* or Not_found *)letpos,match_end=Pcre.get_substring_ofsnext_substrs0inString.subtextstart(pos-start)::splitmatch_end(n-1)withNot_found->[string_aftertextstart]insplitstartnumletsplitseps=bounded_splitseps0letstring_matchpatspos=tryletresult=Pcre.exec~rex:pat~flags:[`ANCHORED]~possinSomeresultwithNot_found->NoneendmoduleUrl=structincludeUrl_base(* Taken from Neturl version 1.1.2 *)letproblem_re1=Netstring_pcre.regexp"[ <>\"{}|\\\\^\\[\\]`]"letfixup_url_string1=Netstring_pcre.global_substituteproblem_re1(funms->Printf.sprintf"%%%02x"(Char.codes.[fst(Pcre.get_substring_ofsm0)]))(* I add this fixup to handle %uxxxx sent by browsers.
Translated to %xx%xx *)letproblem_re2=Netstring_pcre.regexp"\\%u(..)(..)"letfixup_url_strings=fixup_url_string1(Netstring_pcre.global_substituteproblem_re2(funms->String.concat""["%";Netstring_pcre.matched_groupm1s;"%";Netstring_pcre.matched_groupm2s])s)(*VVV This is in Netencoding but we have a problem with ~
(not encoded by browsers). Here is a patch that does not encode '~': *)moduleMyUrl=structletpercent_encode=letlengths=letl=Array.make2563inString.iter(func->l.(Char.codec)<-1)(* Unreserved Characters (section 2.3 of RFC 3986) *)"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~";linfuns->letl=String.lengthsinletl'=ref0infori=0tol-1dol':=!l'+lengths.(Char.codes.[i])done;ifl=!l'thenselselets'=Bytes.create!l'inletj=ref0inlethex="0123456789ABCDEF"infori=0tol-1doletc=s.[i]inletn=Char.codes.[i]inletd=lengths.(n)inifd=1thenBytes.sets'!jcelsebeginBytes.sets'!j'%';Bytes.sets'(!j+1)hex.[nlsr4];Bytes.sets'(!j+2)hex.[nland0xf]end;j:=!j+ddone;Bytes.unsafe_to_strings'letencode_plus=letlengths=letl=Array.make2563inString.iter(func->l.(Char.codec)<-1)(* Unchanged characters + space (HTML spec) *)"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.* ";linfuns->letl=String.lengthsinletl'=ref0infori=0tol-1dol':=!l'+lengths.(Char.codes.[i])done;lets'=Bytes.create!l'inletj=ref0inlethex="0123456789ABCDEF"infori=0tol-1doletc=s.[i]inletn=Char.codes.[i]inletd=lengths.(n)inifd=1thenBytes.sets'!j(ifc=' 'then'+'elsec)elsebeginBytes.sets'!j'%';Bytes.sets'(!j+1)hex.[nlsr4];Bytes.sets'(!j+2)hex.[nland0xf]end;j:=!j+ddone;Bytes.unsafe_to_strings'letencode?(plus=true)s=ifplusthenencode_plusselsepercent_encodesendleturl_decoding_re=Netstring_pcre.regexp"\\+\\|%..\\|%.\\|%";;letof_hex1c=matchcwith|('0'..'9')->Char.codec-Char.code'0'|('A'..'F')->Char.codec-Char.code'A'+10|('a'..'f')->Char.codec-Char.code'a'+10|_->raiseNot_foundletencode=MyUrl.encodeletdecode?(plus=true)s=letpos=0andlen=Noneinlets_l=String.lengthsinlets1=ifpos=0&&len=Nonethenselseletlen=matchlenwithSomen->n|None->s_linString.subsposleninletl=String.lengths1inNetstring_pcre.global_substituteurl_decoding_re(funr_->matchNetstring_pcre.matched_stringrs1with|"+"->ifplusthen" "else"+"|_->leti=fst(Pcre.get_substring_ofsr0)in(* Assertion: s1.[i] = '%' *)ifi+2>=lthenfailwith"decode";letc1=s1.[i+1]inletc2=s1.[i+2]inbegintryletk1=of_hex1c1inletk2=of_hex1c2inString.make1(Char.chr((k1lsl4)lork2))withNot_found->failwith"decode"end)s1letmake_encoded_parametersparams=String.concat"&"(List.map(fun(name,value)->encodename^"="^encodevalue)params)letstring_of_url_path~encodel=ifencodethenfixup_url_string(String.concat"/"(List.map(*Netencoding.Url.encode*)(MyUrl.encode~plus:false)l))(* ' ' are not encoded to '+' in paths *)elseString.concat"/"l(* BYXXX : check illicit characters *)leturl_split_re=Str.regexp"[&=]";;(* taken from Ocamlnet 4.1.2 *)letdest_url_encoded_parametersparstr=letrecparse_after_amptl=matchtlwith|Str.Textname::Str.Delim"="::Str.Textvalue::tl'->(decodename,decodevalue)::parse_nexttl'|Str.Textname::Str.Delim"="::Str.Delim"&"::tl'->(decodename,"")::parse_after_amptl'|Str.Textname::Str.Delim"="::[]->[decodename,""]|_->failwith"dest_url_encoded_parameters"andparse_nexttl=matchtlwith|[]->[]|Str.Delim"&"::tl'->parse_after_amptl'|_->failwith"dest_url_encoded_parameters"inlettoklist=Str.full_spliturl_split_reparstrinmatchtoklistwith|[]->[]|_->parse_after_amptoklistletparse=(* We do not accept http://login:pwd@host:port (should we?). *)leturl_re=Netstring_pcre.regexp"^([Hh][Tt][Tt][Pp][Ss]?)://([0-9a-zA-Z.-]+|\\[[0-9A-Fa-f:.]+\\])(:([0-9]+))?/([^\\?]*)(\\?(.*))?$"inletshort_url_re=Netstring_pcre.regexp"^/([^\\?]*)(\\?(.*))?$"in(* let url_relax_re = Netstring_pcre.regexp "^[Hh][Tt][Tt][Pp][Ss]?://[^/]+" in
*)funurl->letmatch_re=Netstring_pcre.string_matchurl_reurl0inlet(https,host,port,pathstring,query)=matchmatch_rewith|None->(matchNetstring_pcre.string_matchshort_url_reurl0with|None->raiseOcsigen_Bad_Request|Somem->letpath=fixup_url_string(Netstring_pcre.matched_groupm1url)inletquery=trySome(fixup_url_string(Netstring_pcre.matched_groupm3url))withNot_found->Nonein(None,None,None,path,query))|Somem->letpath=fixup_url_string(Netstring_pcre.matched_groupm5url)inletquery=trySome(fixup_url_string(Netstring_pcre.matched_groupm7url))withNot_found->Noneinlethttps=try(matchNetstring_pcre.matched_groupm1urlwith|"http"->Somefalse|"https"->Sometrue|_->None)withNot_found->Noneinlethost=trySome(Netstring_pcre.matched_groupm2url)withNot_found->Noneinletport=trySome(int_of_string(Netstring_pcre.matched_groupm4url))withNot_found->Nonein(https,host,port,path,query)in(* Note that the fragment (string after #) is not sent by browsers *)(*20110707 ' ' is encoded to '+' in queries, but not in paths.
Warning: if we write the URL manually, we must encode ' ' to '+' manually
(not done by the browser).
--Vincent
*)letget_params=lazybeginletparams_string=matchquerywithNone->""|Somes->sintrydest_url_encoded_parametersparams_stringwithFailure_->raiseOcsigen_Bad_Requestendinletpath=List.map(decode~plus:false)(split_pathpathstring)inletpath=remove_dotdotpath(* and remove "//" *)(* here we remove .. from paths, as it is dangerous.
But in some very particular cases, we may want them?
I prefer forbid that. *)inleturi_string=matchquerywith|None->pathstring|Somes->String.concat"?"[pathstring;s]in(https,host,port,uri_string,path,query,get_params)letprefix_and_path_of_turl=let(https,host,port,_,path,_,_)=parseurlinlethttps_str=matchhttpswith|None->""|Somex->ifxthen"https://"else"http://"inlethost_str=matchhostwith|None->""|Somex->xinletport_str=matchportwith|None->""|Somex->string_of_intxin(https_str^host_str^":"^port_str,path)endmoduleDate=structletname_of_day=function|0->"Sun"|1->"Mon"|2->"Tue"|3->"Wed"|4->"Thu"|5->"Fri"|6->"Sat"|_->failwith"name_of_day"letname_of_month=function|0->"Jan"|1->"Feb"|2->"Mar"|3->"Apr"|4->"May"|5->"Jun"|6->"Jul"|7->"Aug"|8->"Sep"|9->"Oct"|10->"Nov"|11->"Dec"|_->failwith"name_of_month"letto_stringd=let{Unix.tm_wday;tm_mday;tm_mon;tm_year;tm_hour;tm_min;tm_sec;_}=Unix.gmtimedinPrintf.sprintf"%s, %02d %s %d %02d:%02d:%02d GMT"(name_of_daytm_wday)tm_mday(name_of_monthtm_mon)(tm_year+1900)tm_hourtm_mintm_secend