Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file batGenlex.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540openBatInnerPervasivesopenBatParserCoopenBatCharParserincludeGenlexletstring_of_token=function|Kwds->Printf.sprintf"Kwd %S"s|Idents->Printf.sprintf"Ident %S"s|Inti->Printf.sprintf"Int %d"i|Floatf->Printf.sprintf"Float %f"f|Strings->Printf.sprintf"String %S"s|Charc->Printf.sprintf"Char %C"ctypelexer_error=|IllegalCharacterofchar|NotReallyAChar|NotReallyAnEscape|EndOfStreamexception LexerErroroflexer_error*intexceptionEarlyEndOfStreamtypeenum={mutable position:int;content:charBatEnum.t}letjunk e=e.position <-e.position+1;BatEnum.junke.contentletpeeke=BatEnum.peeke.contenttypet=(string,token)Hashtbl.tletof_listx=letkwd_table=Hashtbl.create(List.lengthx)inList.iter(funs->Hashtbl.addkwd_tables(Kwds))x;kwd_tableletto_enum_filterkwd_table=letinitial_buffer=Bytes.create32inletbuffer=refinitial_bufferinletbufpos=ref0inletreset_buffer()=buffer:=initial_buffer;bufpos:=0inletstorec=if!bufpos>=Bytes.length!bufferthenbeginletnewbuffer=Bytes.create(2*!bufpos)inBytes.blit!buffer0newbuffer0!bufpos;buffer:=newbufferend;Bytes.set !buffer!bufposc;incrbufposinletget_string()=lets=Bytes.sub_string!buffer0!bufposinbuffer:=initial_buffer;sinletident_or_keyword id=tryHashtbl.findkwd_tableidwithNot_found-> Identidandkeyword_or_errorcpos=lets=BatString.of_charcintryHashtbl.find kwd_tableswithNot_found->raise(LexerError(IllegalCharacterc,pos))inletrecnext_token(enum:enum)=match peek enum withSome(' '|'\010'|'\013'|'\009'|'\026'|'\012')->junkenum;next_tokenenum|Some('A'..'Z'|'a'..'z'|'_'|'\192'..'\255'asc)->junkenum;lets=enuminreset_buffer();store c;idents|Some('!'|'%'|'&'|'$'|'#'|'+'|'/'|':'|'<'|'='|'>'|'?'|'@'|'\\'|'~'|'^'|'|'|'*'asc)->junkenum;lets=enuminreset_buffer();store c;ident2s|Some('0'..'9'asc)->junkenum;lets=enuminreset_buffer();store c;numbers|Some'\''->junkenum;letc=trycharenumwithEarlyEndOfStream ->raise(LexerError(NotReallyAChar,enum.position))inbeginmatchpeekenumwithSome'\''->junkenum;Some(Charc)|None->raiseEarlyEndOfStream|_->raise(LexerError(NotReallyAChar,enum.position))end|Some '"'->junkenum;lets=enuminreset_buffer();Some (String(strings))|Some'-'->junkenum;neg_numberenum|Some'('->junkenum;maybe_commentenum|Somec->junk enum;Some(keyword_or_errorcenum.position)|_->Noneandident(enum:enum)=match peek enumwithSome('A'..'Z'|'a'..'z'|'\192'..'\255'|'0'..'9'|'_'|'\''asc)->junkenum;lets=enuminstorec;idents|_->Some(ident_or_keyword (get_string()))andident2(enum:enum)=match peek enumwithSome('!'|'%'|'&'|'$'|'#'|'+'|'-'|'/'|':'|'<'|'='|'>'|'?'|'@'|'\\'|'~'|'^'|'|'|'*'asc)->junkenum;lets=enuminstorec;ident2s|_->Some(ident_or_keyword(get_string()))andneg_number(enum:enum)=match peek enumwithSome('0'..'9'asc)->junkenum;lets=enuminreset_buffer();store '-';storec;numbers|_->lets=enum inreset_buffer();store '-';ident2sandnumber(enum:enum)=match peek enumwithSome('0'..'9'asc)->junkenum;lets=enuminstorec;numbers|Some'.'->junkenum;lets=enuminstore'.';decimal_parts|Some('e'|'E')->junkenum;lets=enuminstore'E';exponent_parts|_->Some(Int(int_of_string(get_string())))anddecimal_part(enum:enum)=matchpeekenumwithSome('0'..'9'asc)->junkenum;lets=enuminstorec;decimal_parts|Some('e'|'E')->junkenum;lets=enuminstore'E';exponent_parts|_->Some(Float(float_of_string(get_string())))andexponent_part(enum:enum)=matchpeekenumwithSome('+'|'-'asc)->junkenum;lets=enuminstorec;end_exponent_parts|_->end_exponent_part enumandend_exponent_part (enum:enum)=match peek enumwithSome('0'..'9'asc)->junkenum;lets=enuminstorec;end_exponent_parts|_->Some(Float(float_of_string(get_string())))andstring(enum :enum)=match peek enumwithSome'"'->junkenum;get_string()|Some '\\'->junkenum;letc=tryescapeenumwithEarlyEndOfStream ->raise(LexerError(NotReallyAnEscape,enum.position))inlets=enuminstorec;strings|Somec->junkenum;lets=enuminstorec;strings|_->raiseEarlyEndOfStreamandchar(enum:enum)=match peek enumwithSome'\\'->junkenum;begintryescapeenumwith|EarlyEndOfStream ->raise(LexerError(NotReallyAChar,enum.position))end|Somec->junkenum;c|_->raiseEarlyEndOfStreamandescape(enum:enum)=match peek enumwithSome'n'->junkenum;'\n'|Some'r'->junkenum;'\r'|Some't'->junkenum;'\t'|Some('0'..'9'asc1)->junkenum;beginmatchpeek enumwithSome('0'..'9'asc2)->junkenum;beginmatchpeekenumwithSome ('0'..'9'asc3)->junkenum;Char.chr((Char.codec1-48)*100+(Char.codec2-48)*10+(Char.codec3-48))|Some _->raise(LexerError(NotReallyAnEscape,enum.position))|None->raiseEarlyEndOfStreamend|Some_->raise(LexerError(NotReallyAnEscape,enum.position))|_->raiseEarlyEndOfStreamend|Somec->junkenum;c|_->raiseEarlyEndOfStreamandmaybe_comment(enum:enum)=matchpeekenumwithSome'*'->junkenum;lets=enumincomments;next_tokens|_->Some(keyword_or_error '('enum.position)andcomment(enum:enum)=match peek enumwithSome'('->junkenum;maybe_nested_commentenum|Some'*'->junkenum;maybe_end_commentenum|Some_->junkenum;commentenum|_->raiseEarlyEndOfStreamandmaybe_nested_comment(enum:enum)=matchpeekenumwithSome'*'->junkenum;lets=enumincomments;comments|Some_->junkenum;commentenum|_->raiseEarlyEndOfStreamandmaybe_end_comment(enum:enum)=match peek enumwithSome')'->junkenum;()|Some'*'->junkenum;maybe_end_commentenum|Some_->junkenum;commentenum|_->raiseEarlyEndOfStreaminfuninput->BatEnum.from_while(fun_count->next_token {position=0;content=input})letto_stream_filter(kwd_table:t)(x:charStream.t):tokenStream.t=(BatStream.of_enum(to_enum_filterkwd_table(BatStream.enumx)))letto_lazy_list_filterkwd_tablex=(BatLazyList.of_enum(to_enum_filterkwd_table(BatLazyList.enumx)))letocaml_escape=label"OCaml-style escaped character"(any>>=function|'n' -> return'\n'|'r'->return '\r'|'t'->return '\t'|'\\'->return'\\'|'b'->return '\b'|'"'->return '"'|'x'->times2hex>>=funt->return (Char.chr(BatInt.of_string(BatString.implode('0'::'x'::t))))|'0'..'9'asx->times2digit>>=funt->return (Char.chr(BatInt.of_string(BatString.implode(x::t))))|_->fail)moduleLanguages=structmoduletypeDefinition=sigvalcomment_delimiters:(string*string)optionval line_comment_start :stringoptionvalnested_comments :boolvalident_start:(char,char,position)BatParserCo.tvalident_letter:(char,char,position)BatParserCo.tvalop_start:(char,char,position)BatParserCo.tvalop_letter:(char,char,position)BatParserCo.tvalreserved_names:stringlistvalcase_sensitive:boolendmoduleLibrary =struct(**A good approximation of language definition for OCaml's lexer*)moduleOCaml=structletcomment_delimiters=Some("(*","*)")letline_comment_start=Noneletnested_comments=trueletident_start =either[uppercase;lowercase;one_of['_';'`']]letident_letter=either[uppercase;lowercase;digit;one_of['\'';'_']]letop_start=satisfy(BatChar.is_symbol)letop_letter=op_startletreserved_names=["fun";"let";"module";"begin";"end";"sig";"function";"{";"}";";";"|";",";":";".";](*@TODO: Complete*)letcase_sensitive=trueend(**A good approximation of language definition for C++'s lexer*)moduleC=structletcomment_delimiters=Some("/*","*/")letline_comment_start=Some"//"letnested_comments=trueletident_start =either[uppercase;lowercase;char'_']letident_letter =either[ident_start;digit]letop_start=one_of[';';':';'!';'$';'%';'&';'*';'+';'.';'/';'<';'=';'>';'?';'^';'|';'-';'~']letop_letter=op_startletreserved_names=["continue";"volatile";"register";"unsigned";"typedef";"default";"sizeof";"switch";"return";"extern";"struct";"static";"signed";"while";"break";"union";"const";"else";"case";"enum";"auto";"goto";"for";"if";"do"]letcase_sensitive=trueendend(** Create a lexer based on conventions*)moduleMake(M:Definition)=structopenM(** {6 Case management} *)letchar=ifcase_sensitivethencharelsecase_charletstring=ifcase_sensitivethenstringelsecase_stringletadapt_case=ifcase_sensitivethenidentityelse##V<5##BatString.lowercase##V>=5##BatString.lowercase_asciiletstring_compare=ifcase_sensitivethenString.compareelseBatString.icompare(** {6 Whitespace management} *)letline_comment=match line_comment_startwith|None->fail|Some s->(*label "Line comment"*)label""(strings>>=fun_->ignore_zero_plus(not_char'\n')>>=fun_->newline>>=fun_->return())(*Note: we use [string] rather than [CharParser.string], as the line comment
may be introduced by a word rather than a symbol (e.g. Basic's [REM]), hence
may depend on case sensitivity.*)letmultiline_comment=matchcomment_delimiterswith|None->fail|Some (l,r)->(*label "Multi-line comment"*)label""(letl0=String.getl0andr0=String.getr0andstring_r=stringrinletin_comment()=ifnested_commentsthenletnot_lr=label("Neither \""^l^"\" nor ^\""^r^"\"")(none_of[r0;l0])inletrecaux()=label"aux"(either[stringr>>=(fun_->return());stringl>>=(fun_->aux()>>=fun_->aux ());(ignore_one_plusnot_lr)>>=fun _->aux ()])inaux()elsestringl>>>label"Contents of comments"(letrecaux()=maybestring_r>>=function|Some_->return()|None->any>>>aux()inaux())inin_comment())let comment=(line_comment<|>multiline_comment)>>>return()letwhitespaces=ignore_zero_plus (either[satisfyBatChar.is_whitespace>>=(fun_->return());comment])letto_symbolp=p>>=funr->whitespaces>>=fun_->return(BatString.of_listr)letlexemep=p>>=funr->whitespaces>>=fun_->returnr(** {6 Actual content} *)letidentifier_content=either[ident_start>::zero_plusident_letter;op_start>::zero_plusop_letter]letis_reserveds=List.memsreserved_namesletident_or_kwd=label"identifier or reserved"(label""(to_symbolidentifier_content>>=funs->(* Printf.eprintf "Got something %S\n" s;*)return(adapt_cases)))letident=label"identifier or operator"(label""(ident_or_kwd>>=funs->ifis_reservedsthenfailelse((*Printf.eprintf "Got ident %S\n" s;*)returns)))(* let kwd = label "keyword"
(ident_or_kwd >>= fun s ->
if is_reserved s then (Printf.eprintf "Got reserved %S\n" s; return s)
else fail)*)letkwd=label"keyword"(ident_or_kwd)letidentifiers=label("specific identifier \""^s^"\"")(label""(ident>>=funs'->ifstring_comparess'=0thenreturn()elsefail))letkeywords=label("specific keyword \""^s^"\"")(label""(kwd>>=funs'->ifstring_comparess'=0thenreturn()elsefail))(* let as_identifier p = p >>= fun s -> if List.mem s reserved_names then fail else return s
let as_operator p = p >>= fun s -> if List.mem s reserved_op_names then fail else return s*)(* let any_reserved = label "reserved name"
( to_symbol (ident_start >:: zero_plus ident_letter) >>= fun s ->
if List.mem s reserved_names then return s
else fail)
let any_reserved_op = label "reserved operator"
( to_symbol (op_start >:: zero_plus op_letter) >>= fun s ->
if List.mem s reserved_op_names then return s
else fail)*)letchar_literal=label"Character literal"(BatCharParser.char'\''>>=fun_->any>>=function|'\\'->ocaml_escape|c->returnc)>>=func->BatCharParser.char'\''>>=fun_->return cletstring_literal=label"String Literal"(lexeme(BatCharParser.char'"'>>>letreccontentchars=any>>=function|'"'->returnchars|'\\'->ocaml_escape>>=fune->content(e::chars)|e->(*Printf.eprintf "Just received char %c\n" e;*)content(e::chars)incontent[]>>=func->(*Printf.eprintf "Sending full string %S\n" (String.of_list (List.rev c));*)return(BatString.of_list(List.revc))))letinteger=label "OCaml-style integer"(lexeme(maybe(BatCharParser.char'-')>>=funsign->one_plusdigit>>=fundigits ->letnumber=BatInt.of_string(BatString.of_listdigits)inmatchsignwith|Some_->return(~-number)|None->returnnumber))letfloat=label "OCaml-style floating-point number"(lexeme(maybe(BatCharParser.char'-')>>=funsign->post_mapBatString.of_list(zero_plusdigit)>>=funint_part->maybe(BatCharParser.char'.'>>=fun_->post_mapBatString.of_list(zero_plusdigit))>>=fundecimal_part->maybe(BatCharParser.char'E'>>=fun_->maybe(BatCharParser.char'+'<|>BatCharParser.char'-')>>=funsign->letsign=BatOption.default'+'signinone_plusdigit>>=funexpo->return("E"^(BatString.of_charsign)^(BatString.of_listexpo)))>>=funexpo->letnumber=match(decimal_part,expo)with|Somed,Somee->Some(int_part^"."^d^e)|Somed,None->Some(int_part^"."^d)|None,Somee->Some(int_part^e)|None,None->Noneinmatchnumberwith|None->fail|Somen->letabsolute=BatFloat.of_stringninreturn(matchsignwith|None->absolute|Some_->~-.absolute)))letnumber=(float >>=funf->return (`Floatf))<|>(integer>>= funi->return (`Integeri))(** Getting it all together. *)letcheck_reserved=ifcase_sensitivethenfunx->ifis_reservedxthen(Kwdx)else(Identx)elsefunx->letx=##V<5##BatString.lowercase##V>=5##BatString.lowercase_asciixinifis_reservedxthen(Kwdx)else(Identx)letas_parser=whitespaces>>=fun_->either[ident_or_kwd>>=(funx->return(check_reservedx));float>>=(funx->return(Floatx));integer>>=(funx->return(Intx));string_literal>>=(funx->return(Stringx));char_literal>>=(funx->return(Charx))]letfeed=source_mapas_parserletstart=whitespacesendend