Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file float.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050open!Importopen!PrintfmoduleBytes=Bytes0includeFloat0letraise_s=Error.raise_smoduleT=structtypet=float[@@deriving_inlinehash,sexp,sexp_grammar]let(hash_fold_t:Ppx_hash_lib.Std.Hash.state->t->Ppx_hash_lib.Std.Hash.state)=hash_fold_floatand(hash:t->Ppx_hash_lib.Std.Hash.hash_value)=letfunc=hash_floatinfunx->funcx;;lett_of_sexp=(float_of_sexp:Sexplib0.Sexp.t->t)letsexp_of_t=(sexp_of_float:t->Sexplib0.Sexp.t)let(t_sexp_grammar:tSexplib0.Sexp_grammar.t)=float_sexp_grammar[@@@end]lethashable:tHashable.t={hash;compare;sexp_of_t}letcompare=Float_replace_polymorphic_compare.compareendincludeTincludeComparator.Make(T)(* Open replace_polymorphic_compare after including functor instantiations so they do not
shadow its definitions. This is here so that efficient versions of the comparison
functions are available within this module. *)openFloat_replace_polymorphic_compareletinvariant(_:t)=()letto_floatx=xletof_floatx=xletof_strings=tryfloat_of_stringswith|_->invalid_argf"Float.of_string %s"s();;externalformat_float:string->float->string="caml_format_float"(* Stolen from [pervasives.ml]. Adds a "." at the end if needed. It is in
[pervasives.mli], but it also says not to use it directly, so we copy and paste the
code. It makes the assumption on the string passed in argument that it was returned by
[format_float]. *)letvalid_float_lexems=letl=String.lengthsinletrecloopi=ifInt_replace_polymorphic_compare.(>=)ilthens^"."else(matchs.[i]with|'0'..'9'|'-'->loop(i+1)|_->s)inloop0;;(* Let [y] be a power of 2. Then the next representable float is:
[z = y * (1 + 2 ** -52)]
and the previous one is
[x = y * (1 - 2 ** -53)]
In general, every two adjacent floats are within a factor of between [1 + 2**-53]
and [1 + 2**-52] from each other, that is within [1 + 1.1e-16] and [1 + 2.3e-16].
So if the decimal representation of a float starts with "1", then its adjacent floats
will usually differ from it by 1, and sometimes by 2, at the 17th significant digit
(counting from 1).
On the other hand, if the decimal representation starts with "9", then the adjacent
floats will be off by no more than 23 at the 16th and 17th significant digits.
E.g.:
{v
# sprintf "%.17g" (1024. *. (1. -. 2.** (-53.)));;
11111111
1234 5678901234567
- : string = "1023.9999999999999"
v}
Printing a couple of extra digits reveals that the difference indeed is roughly 11 at
digits 17th and 18th (that is, 13th and 14th after "."):
{v
# sprintf "%.19g" (1024. *. (1. -. 2.** (-53.)));;
1111111111
1234 567890123456789
- : string = "1023.999999999999886"
v}
The ulp (the difference between adjacent floats) is twice as big on the other side of
1024.:
{v
# sprintf "%.19g" (1024. *. (1. +. 2.** (-52.)));;
1111111111
1234 567890123456789
- : string = "1024.000000000000227"
v}
Now take a power of 2 which starts with 99:
{v
# 2.**93. ;;
1111111111
1 23456789012345678
- : float = 9.9035203142830422e+27
# 2.**93. *. (1. +. 2.** (-52.));;
- : float = 9.9035203142830444e+27
# 2.**93. *. (1. -. 2.** (-53.));;
- : float = 9.9035203142830411e+27
v}
The difference between 2**93 and its two neighbors is slightly more than, respectively,
1 and 2 at significant digit 16.
Those examples show that:
- 17 significant digits is always sufficient to represent a float without ambiguity
- 15th significant digit can always be represented accurately
- converting a decimal number with 16 significant digits to its nearest float and back
can change the last decimal digit by no more than 1
To make sure that floats obtained by conversion from decimal fractions (e.g. "3.14")
are printed without trailing non-zero digits, one should choose the first among the
'%.15g', '%.16g', and '%.17g' representations which does round-trip:
{v
# sprintf "%.15g" 3.14;;
- : string = "3.14" (* pick this one *)
# sprintf "%.16g" 3.14;;
- : string = "3.14"
# sprintf "%.17g" 3.14;;
- : string = "3.1400000000000001" (* do not pick this one *)
# sprintf "%.15g" 8.000000000000002;;
- : string = "8" (* do not pick this one--does not round-trip *)
# sprintf "%.16g" 8.000000000000002;;
- : string = "8.000000000000002" (* prefer this one *)
# sprintf "%.17g" 8.000000000000002;;
- : string = "8.0000000000000018" (* this one has one digit of junk at the end *)
v}
Skipping the '%.16g' in the above procedure saves us some time, but it means that, as
seen in the second example above, occasionally numbers with exactly 16 significant
digits will have an error introduced at the 17th digit. That is probably OK for
typical use, because a number with 16 significant digits is "ugly" already. Adding one
more doesn't make it much worse for a human reader.
On the other hand, we cannot skip '%.15g' and only look at '%.16g' and '%.17g', since
the inaccuracy at the 16th digit might introduce the noise we want to avoid:
{v
# sprintf "%.15g" 9.992;;
- : string = "9.992" (* pick this one *)
# sprintf "%.16g" 9.992;;
- : string = "9.992000000000001" (* do not pick this one--junk at the end *)
# sprintf "%.17g" 9.992;;
- : string = "9.9920000000000009"
v}
*)letto_stringx=valid_float_lexem(lety=format_float"%.15g"xiniffloat_of_stringy=xthenyelseformat_float"%.17g"x);;letmax_value=infinityletmin_value=neg_infinityletmin_positive_subnormal_value=2.**-1074.letmin_positive_normal_value=2.**-1022.letzero=0.letone=1.letminus_one=-1.letpi=0x3.243F6A8885A308D313198A2E037073letsqrt_pi=0x1.C5BF891B4EF6AA79C3B0520D5DB938letsqrt_2pi=0x2.81B263FEC4E0B2CAF9483F5CE459DCleteuler=0x0.93C467E37DB0C7A4D1BE3F810152CBletof_int=Int.to_floatletto_int=Int.of_floatletof_int63i=Int63.to_floatiletof_int64i=Caml.Int64.to_floatiletto_int64=Caml.Int64.of_floatletiround_lbound=lower_bound_for_intInt.num_bitsletiround_ubound=upper_bound_for_intInt.num_bits(* The performance of the "exn" rounding functions is important, so they are written
out separately, and tuned individually. (We could have the option versions call
the "exn" versions, but that imposes arguably gratuitous overhead---especially
in the case where the capture of backtraces is enabled upon "with"---and that seems
not worth it when compared to the relatively small amount of code duplication.) *)(* Error reporting below is very carefully arranged so that, e.g., [iround_nearest_exn]
itself can be inlined into callers such that they don't need to allocate a box for the
[float] argument. This is done with a box [box] function carefully chosen to allow the
compiler to create a separate box for the float only in error cases. See, e.g.,
[../../zero/test/price_test.ml] for a mechanical test of this property when building
with [X_LIBRARY_INLINING=true]. *)letiround_upt=ift>0.0then(lett'=ceiltinift'<=iround_uboundthenSome(Int.of_float_uncheckedt')elseNone)elseift>=iround_lboundthenSome(Int.of_float_uncheckedt)elseNone;;let[@ocaml.inlinealways]iround_up_exnt=ift>0.0then(lett'=ceiltinift'<=iround_uboundthenInt.of_float_uncheckedt'elseinvalid_argf"Float.iround_up_exn: argument (%f) is too large"(boxt)())elseift>=iround_lboundthenInt.of_float_uncheckedtelseinvalid_argf"Float.iround_up_exn: argument (%f) is too small or NaN"(boxt)();;letiround_downt=ift>=0.0thenift<=iround_uboundthenSome(Int.of_float_uncheckedt)elseNoneelse(lett'=floortinift'>=iround_lboundthenSome(Int.of_float_uncheckedt')elseNone);;let[@ocaml.inlinealways]iround_down_exnt=ift>=0.0thenift<=iround_uboundthenInt.of_float_uncheckedtelseinvalid_argf"Float.iround_down_exn: argument (%f) is too large"(boxt)()else(lett'=floortinift'>=iround_lboundthenInt.of_float_uncheckedt'elseinvalid_argf"Float.iround_down_exn: argument (%f) is too small or NaN"(boxt)());;letiround_towards_zerot=ift>=iround_lbound&&t<=iround_uboundthenSome(Int.of_float_uncheckedt)elseNone;;let[@ocaml.inlinealways]iround_towards_zero_exnt=ift>=iround_lbound&&t<=iround_uboundthenInt.of_float_uncheckedtelseinvalid_argf"Float.iround_towards_zero_exn: argument (%f) is out of range or NaN"(boxt)();;(* Outside of the range (round_nearest_lb..round_nearest_ub), all representable doubles
are integers in the mathematical sense, and [round_nearest] should be identity.
However, for odd numbers with the absolute value between 2**52 and 2**53, the formula
[round_nearest x = floor (x + 0.5)] does not hold:
{v
# let naive_round_nearest x = floor (x +. 0.5);;
# let x = 2. ** 52. +. 1.;;
val x : float = 4503599627370497.
# naive_round_nearest x;;
- : float = 4503599627370498.
v}
*)letround_nearest_lb=-.(2.**52.)letround_nearest_ub=2.**52.(* For [x = one_ulp `Down 0.5], the formula [floor (x +. 0.5)] for rounding to nearest
does not work, because the exact result is halfway between [one_ulp `Down 1.] and [1.],
and it gets rounded up to [1.] due to the round-ties-to-even rule. *)letone_ulp_less_than_half=one_ulp`Down0.5let[@ocaml.inlinealways]add_half_for_round_nearestt=t+.ift=one_ulp_less_than_halfthenone_ulp_less_than_half(* since t < 0.5, make sure the result is < 1.0 *)else0.5;;letiround_nearest_32t=ift>=0.then(lett'=add_half_for_round_nearesttinift'<=iround_uboundthenSome(Int.of_float_uncheckedt')elseNone)else(lett'=floor(t+.0.5)inift'>=iround_lboundthenSome(Int.of_float_uncheckedt')elseNone);;letiround_nearest_64t=ift>=0.thenift<round_nearest_ubthenSome(Int.of_float_unchecked(add_half_for_round_nearestt))elseift<=iround_uboundthenSome(Int.of_float_uncheckedt)elseNoneelseift>round_nearest_lbthenSome(Int.of_float_unchecked(floor(t+.0.5)))elseift>=iround_lboundthenSome(Int.of_float_uncheckedt)elseNone;;letiround_nearest=matchWord_size.word_sizewith|W64->iround_nearest_64|W32->iround_nearest_32;;letiround_nearest_exn_32t=ift>=0.then(lett'=add_half_for_round_nearesttinift'<=iround_uboundthenInt.of_float_uncheckedt'elseinvalid_argf"Float.iround_nearest_exn: argument (%f) is too large"(boxt)())else(lett'=floor(t+.0.5)inift'>=iround_lboundthenInt.of_float_uncheckedt'elseinvalid_argf"Float.iround_nearest_exn: argument (%f) is too small"(boxt)());;let[@ocaml.inlinealways]iround_nearest_exn_64t=ift>=0.thenift<round_nearest_ubthenInt.of_float_unchecked(add_half_for_round_nearestt)elseift<=iround_uboundthenInt.of_float_uncheckedtelseinvalid_argf"Float.iround_nearest_exn: argument (%f) is too large"(boxt)()elseift>round_nearest_lbthenInt.of_float_unchecked(floor(t+.0.5))elseift>=iround_lboundthenInt.of_float_uncheckedtelseinvalid_argf"Float.iround_nearest_exn: argument (%f) is too small or NaN"(boxt)();;letiround_nearest_exn=matchWord_size.word_sizewith|W64->iround_nearest_exn_64|W32->iround_nearest_exn_32;;(* The following [iround_exn] and [iround] functions are slower than the ones above.
Their equivalence to those functions is tested in the unit tests below. *)let[@inline]iround_exn?(dir=`Nearest)t=matchdirwith|`Zero->iround_towards_zero_exnt|`Nearest->iround_nearest_exnt|`Up->iround_up_exnt|`Down->iround_down_exnt;;letiround?(dir=`Nearest)t=trySome(iround_exn~dirt)with|_->None;;letis_inft=1./.t=0.letis_finitet=t-.t=0.letmin_inan(x:t)y=ifis_nanythenxelseifis_nanxthenyelseifx<ythenxelsey;;letmax_inan(x:t)y=ifis_nanythenxelseifis_nanxthenyelseifx>ythenxelsey;;letadd=(+.)letsub=(-.)letneg=(~-.)letabs=abs_floatletscale=(*.)letsquarex=x*.xmoduleParts:sigtypetvalfractional:t->floatvalintegral:t->floatvalmodf:float->tend=structtypet=float*floatletfractionalt=fsttletintegralt=sndtletmodf=modfendletmodf=Parts.modfletround_down=floorletround_up=ceilletround_towards_zerot=ift>=0.thenround_downtelseround_upt(* see the comment above [round_nearest_lb] and [round_nearest_ub] for an explanation *)let[@ocaml.inline]round_nearest_inlinet=ift>round_nearest_lb&&t<round_nearest_ubthenfloor(add_half_for_round_nearestt)elset+.0.;;letround_nearestt=(round_nearest_inline[@ocaml.inlinedalways])tletround_nearest_half_to_event=ift<=round_nearest_lb||t>=round_nearest_ubthent+.0.else(letfloor=floortin(* [ceil_or_succ = if t is an integer then t +. 1. else ceil t]. Faster than [ceil]. *)letceil_or_succ=floor+.1.inletdiff_floor=t-.floorinletdiff_ceil=ceil_or_succ-.tinifdiff_floor<diff_ceilthenfloorelseifdiff_floor>diff_ceilthenceil_or_succelseif(* exact tie, pick the even *)mod_floatfloor2.=0.thenfloorelseceil_or_succ);;letint63_round_lbound=lower_bound_for_intInt63.num_bitsletint63_round_ubound=upper_bound_for_intInt63.num_bitsletint63_round_up_exnt=ift>0.0then(lett'=ceiltinift'<=int63_round_uboundthenInt63.of_float_uncheckedt'elseinvalid_argf"Float.int63_round_up_exn: argument (%f) is too large"(Float0.boxt)())elseift>=int63_round_lboundthenInt63.of_float_uncheckedtelseinvalid_argf"Float.int63_round_up_exn: argument (%f) is too small or NaN"(Float0.boxt)();;letint63_round_down_exnt=ift>=0.0thenift<=int63_round_uboundthenInt63.of_float_uncheckedtelseinvalid_argf"Float.int63_round_down_exn: argument (%f) is too large"(Float0.boxt)()else(lett'=floortinift'>=int63_round_lboundthenInt63.of_float_uncheckedt'elseinvalid_argf"Float.int63_round_down_exn: argument (%f) is too small or NaN"(Float0.boxt)());;letint63_round_nearest_portable_alloc_exnt0=lett=(round_nearest_inline[@ocaml.inlinedalways])t0inift>0.thenift<=int63_round_uboundthenInt63.of_float_uncheckedtelseinvalid_argf"Float.int63_round_nearest_portable_alloc_exn: argument (%f) is too large"(boxt0)()elseift>=int63_round_lboundthenInt63.of_float_uncheckedtelseinvalid_argf"Float.int63_round_nearest_portable_alloc_exn: argument (%f) is too small or NaN"(boxt0)();;letint63_round_nearest_arch64_noalloc_exnf=Int63.of_int(iround_nearest_exnf)letint63_round_nearest_exn=matchWord_size.word_sizewith|W64->int63_round_nearest_arch64_noalloc_exn|W32->int63_round_nearest_portable_alloc_exn;;letround?(dir=`Nearest)t=matchdirwith|`Nearest->round_nearestt|`Down->round_downt|`Up->round_upt|`Zero->round_towards_zerot;;moduleClass=structtypet=|Infinite|Nan|Normal|Subnormal|Zero[@@deriving_inlinecompare,enumerate,sexp,sexp_grammar]letcompare=(Ppx_compare_lib.polymorphic_compare:t->t->int)letall=([Infinite;Nan;Normal;Subnormal;Zero]:tlist)lett_of_sexp=(leterror_source__006_="float.ml.Class.t"infunction|Sexplib0.Sexp.Atom("infinite"|"Infinite")->Infinite|Sexplib0.Sexp.Atom("nan"|"Nan")->Nan|Sexplib0.Sexp.Atom("normal"|"Normal")->Normal|Sexplib0.Sexp.Atom("subnormal"|"Subnormal")->Subnormal|Sexplib0.Sexp.Atom("zero"|"Zero")->Zero|Sexplib0.Sexp.List(Sexplib0.Sexp.Atom("infinite"|"Infinite")::_)assexp__007_->Sexplib0.Sexp_conv_error.stag_no_argserror_source__006_sexp__007_|Sexplib0.Sexp.List(Sexplib0.Sexp.Atom("nan"|"Nan")::_)assexp__007_->Sexplib0.Sexp_conv_error.stag_no_argserror_source__006_sexp__007_|Sexplib0.Sexp.List(Sexplib0.Sexp.Atom("normal"|"Normal")::_)assexp__007_->Sexplib0.Sexp_conv_error.stag_no_argserror_source__006_sexp__007_|Sexplib0.Sexp.List(Sexplib0.Sexp.Atom("subnormal"|"Subnormal")::_)assexp__007_->Sexplib0.Sexp_conv_error.stag_no_argserror_source__006_sexp__007_|Sexplib0.Sexp.List(Sexplib0.Sexp.Atom("zero"|"Zero")::_)assexp__007_->Sexplib0.Sexp_conv_error.stag_no_argserror_source__006_sexp__007_|Sexplib0.Sexp.List(Sexplib0.Sexp.List_::_)assexp__005_->Sexplib0.Sexp_conv_error.nested_list_invalid_sumerror_source__006_sexp__005_|Sexplib0.Sexp.List[]assexp__005_->Sexplib0.Sexp_conv_error.empty_list_invalid_sumerror_source__006_sexp__005_|sexp__005_->Sexplib0.Sexp_conv_error.unexpected_stagerror_source__006_sexp__005_:Sexplib0.Sexp.t->t);;letsexp_of_t=(function|Infinite->Sexplib0.Sexp.Atom"Infinite"|Nan->Sexplib0.Sexp.Atom"Nan"|Normal->Sexplib0.Sexp.Atom"Normal"|Subnormal->Sexplib0.Sexp.Atom"Subnormal"|Zero->Sexplib0.Sexp.Atom"Zero":t->Sexplib0.Sexp.t);;let(t_sexp_grammar:tSexplib0.Sexp_grammar.t)={untyped=Variant{case_sensitivity=Case_sensitive_except_first_character;clauses=[No_tag{name="Infinite";clause_kind=Atom_clause};No_tag{name="Nan";clause_kind=Atom_clause};No_tag{name="Normal";clause_kind=Atom_clause};No_tag{name="Subnormal";clause_kind=Atom_clause};No_tag{name="Zero";clause_kind=Atom_clause}]}};;[@@@end]letto_stringt=string_of_sexp(sexp_of_tt)letof_strings=t_of_sexp(sexp_of_strings)endletclassifyt=letmoduleC=Classinmatchclassify_floattwith|FP_normal->C.Normal|FP_subnormal->C.Subnormal|FP_zero->C.Zero|FP_infinite->C.Infinite|FP_nan->C.Nan;;letinsert_underscores?(delimiter='_')?(strip_zero=false)string=matchString.lsplit2string~on:'.'with|None->Int_conversions.insert_delimiterstring~delimiter|Some(left,right)->letleft=Int_conversions.insert_delimiterleft~delimiterinletright=ifstrip_zerothenString.rstripright~drop:(func->Char.(=)c'0')elserightin(matchrightwith|""->left|_->left^"."^right);;letto_string_hum?delimiter?(decimals=3)?strip_zero?(explicit_plus=false)f=ifInt_replace_polymorphic_compare.(<)decimals0theninvalid_argf"to_string_hum: invalid argument ~decimals=%d"decimals();matchclassifyfwith|Class.Infinite->iff>0.then"inf"else"-inf"|Class.Nan->"nan"|Class.Normal|Class.Subnormal|Class.Zero->lets=ifexplicit_plusthensprintf"%+.*f"decimalsfelsesprintf"%.*f"decimalsfininsert_underscoress?delimiter?strip_zero;;letsexp_of_tt=letsexp=sexp_of_ttinmatch!Sexp.of_float_stylewith|`No_underscores->sexp|`Underscores->(matchsexpwith|List_->raise_s(Sexp.message"[sexp_of_float] produced strange sexp"["sexp",Sexp.sexp_of_tsexp])|Atomstring->ifString.containsstring'E'thensexpelseAtom(insert_underscoresstring));;letto_padded_compact_string_customt?(prefix="")~kilo~mega~giga~tera?peta()=(* Round a ratio toward the nearest integer, resolving ties toward the nearest even
number. For sane inputs (in particular, when [denominator] is an integer and
[abs numerator < 2e52]) this should be accurate. Otherwise, the result might be a
little bit off, but we don't really use that case. *)letiround_ratio_exn~numerator~denominator=letk=floor(numerator/.denominator)in(* if [abs k < 2e53], then both [k] and [k +. 1.] are accurately represented, and in
particular [k +. 1. > k]. If [denominator] is also an integer, and
[abs (denominator *. (k +. 1)) < 2e53] (and in some other cases, too), then [lower]
and [higher] are actually both accurate. Since (roughly)
[numerator = denominator *. k] then for [abs numerator < 2e52] we should be
fine. *)letlower=denominator*.kinlethigher=denominator*.(k+.1.)in(* Subtracting numbers within a factor of two from each other is accurate.
So either the two subtractions below are accurate, or k = 0, or k = -1.
In case of a tie, round to even. *)letdiff_right=higher-.numeratorinletdiff_left=numerator-.lowerinletk=iround_nearest_exnkinifdiff_right<diff_leftthenk+1elseifdiff_right>diff_leftthenkelseif(* a tie *)Int_replace_polymorphic_compare.(=)(kmod2)0thenkelsek+1inmatchclassifytwith|Class.Infinite->ift<0.0then"-inf "else"inf "|Class.Nan->"nan "|Class.Subnormal|Class.Normal|Class.Zero->letgot=letconv_onet=assert(0.<=t&&t<999.95);letx=prefix^format_float"%.1f"tin(* Fix the ".0" suffix *)ifString.is_suffixx~suffix:".0"then(letx=Bytes.of_stringxinletn=Bytes.lengthxinBytes.setx(n-1)' ';Bytes.setx(n-2)' ';Bytes.unsafe_to_string~no_mutation_while_string_reachable:x)elsexinletconvmagtdenominator=assert((denominator=100.&&t>=999.95)||(denominator>=100_000.&&t>=round_nearest(denominator*.9.999_5)));assert(t<round_nearest(denominator*.9_999.5));leti,d=letk=iround_ratio_exn~numerator:t~denominatorin(* [mod] is okay here because we know i >= 0. *)k/10,kmod10inletopenInt_replace_polymorphic_compareinassert(0<=i&&i<1000);assert(0<=d&&d<10);ifd=0thensprintf"%s%d%s "prefiximagelsesprintf"%s%d%s%d"prefiximagdin(* While the standard metric prefixes (e.g. capital "M" rather than "m", [1]) are
nominally more correct, this hinders readability in our case. E.g., 10G6 and
1066 look too similar. That's an extreme example, but in general k,m,g,t,p
probably stand out better than K,M,G,T,P when interspersed with digits.
[1] http://en.wikipedia.org/wiki/Metric_prefix *)(* The trick here is that:
- the first boundary (999.95) as a float is slightly over-represented (so it is
better approximated as "1k" than as "999.9"),
- the other boundaries are accurately represented, because they are integers.
That's why the strict equalities below do exactly what we want. *)ift<999.95E0thenconv_onetelseift<999.95E3thenconvkilot100.elseift<999.95E6thenconvmegat100_000.elseift<999.95E9thenconvgigat100_000_000.elseift<999.95E12thenconvterat100_000_000_000.else(matchpetawith|None->sprintf"%s%.1e"prefixt|Somepeta->ift<999.95E15thenconvpetat100_000_000_000_000.elsesprintf"%s%.1e"prefixt)inift>=0.thengotelse"-"^go~-.t;;letto_padded_compact_stringt=to_padded_compact_string_customt~kilo:"k"~mega:"m"~giga:"g"~tera:"t"~peta:"p"();;(* Performance note: Initializing the accumulator to 1 results in one extra
multiply; e.g., to compute x ** 4, we in principle only need 2 multiplies,
but this function will have 3 multiplies. However, attempts to avoid this
(like decrementing n and initializing accum to be x, or handling small
exponents as a special case) have not yielded anything that is a net
improvement.
*)letint_powxn=letopenInt_replace_polymorphic_compareinifn=0then1.else((* Using [x +. (-0.)] on the following line convinces the compiler to avoid a certain
boxing (that would result in allocation in each iteration). Soon, the compiler
shouldn't need this "hint" to avoid the boxing. The reason we add -0 rather than 0
is that [x +. (-0.)] is apparently always the same as [x], whereas [x +. 0.] is
not, in that it sends [-0.] to [0.]. This makes a difference because we want
[int_pow (-0.) (-1)] to return neg_infinity just like [-0. ** -1.] would. *)letx=ref(x+.-0.)inletn=refninletaccum=ref1.inif!n<0then((* x ** n = (1/x) ** -n *)x:=1./.!x;n:=~-(!n);if!n<0then((* n must have been min_int, so it is now so big that it has wrapped around.
We decrement it so that it looks positive again, but accordingly have
to put an extra factor of x in the accumulator.
*)accum:=!x;decrn));(* Letting [a] denote (the original value of) [x ** n], we maintain
the invariant that [(x ** n) *. accum = a]. *)while!n>1doif!nland1<>0thenaccum:=!x*.!accum;x:=!x*.!x;n:=!nlsr1done;(* n is necessarily 1 at this point, so there is one additional
multiplication by x. *)!x*.!accum);;letround_genx~how=ifx=0.then0.elseifnot(is_finitex)thenxelse((* Significant digits and decimal digits. *)letsd,dd=matchhowwith|`significant_digitssd->letdd=sd-to_int(round_up(log10(absx)))insd,dd|`decimal_digitsdd->letsd=dd+to_int(round_up(log10(absx)))insd,ddinletopenInt_replace_polymorphic_compareinifsd<0then0.elseifsd>=17thenxelse((* Choose the order that is exactly representable as a float. Small positive
integers are, but their inverses in most cases are not. *)letabs_dd=Int.absddinifabs_dd>22||sd>=16(* 10**22 is exactly representable as a float, but 10**23 is not, so use the slow
path. Similarly, if we need 16 significant digits in the result, then the integer
[round_nearest (x <op> order)] might not be exactly representable as a float, since
for some ranges we only have 15 digits of precision guaranteed.
That said, we are still rounding twice here:
1) first time when rounding [x *. order] or [x /. order] to the nearest float
(just the normal way floating-point multiplication or division works),
2) second time when applying [round_nearest_half_to_even] to the result of the
above operation
So for arguments within an ulp from a tie we might still produce an off-by-one
result. *)thenof_string(sprintf"%.*g"sdx)else(letorder=int_pow10.abs_ddinifdd>=0thenround_nearest_half_to_even(x*.order)/.orderelseround_nearest_half_to_even(x/.order)*.order)));;letround_significantx~significant_digits=ifInt_replace_polymorphic_compare.(<=)significant_digits0theninvalid_argf"Float.round_significant: invalid argument significant_digits:%d"significant_digits()elseround_genx~how:(`significant_digitssignificant_digits);;letround_decimalx~decimal_digits=round_genx~how:(`decimal_digitsdecimal_digits)letbetweent~low~high=low<=t&&t<=highletclamp_exnt~min~max=(* Also fails if [min] or [max] is nan *)assert(min<=max);(* clamp_unchecked is in float0.ml *)clamp_uncheckedt~min~max;;letclampt~min~max=(* Also fails if [min] or [max] is nan *)ifmin<=maxthenOk(clamp_uncheckedt~min~max)elseOr_error.error_s(Sexp.message"clamp requires [min <= max]"["min",T.sexp_of_tmin;"max",T.sexp_of_tmax]);;let(+)=(+.)let(-)=(-.)let(*)=(*.)let(**)=(**)let(/)=(/.)let(%)=(%.)let(~-)=(~-.)letsign_exnt:Sign.t=ift>0.thenPoselseift<0.thenNegelseift=0.thenZeroelseError.raise_s(Sexp.message"Float.sign_exn of NAN"["",sexp_of_tt]);;letsign_or_nant:Sign_or_nan.t=ift>0.thenPoselseift<0.thenNegelseift=0.thenZeroelseNan;;letieee_negativet=letbits=Caml.Int64.bits_of_floattinPoly.(bits<Caml.Int64.zero);;letexponent_bits=11letmantissa_bits=52letexponent_mask64=Int64.(shift_leftoneexponent_bits-one)letexponent_mask=Int64.to_int_exnexponent_mask64letmantissa_mask=Int63.(shift_leftonemantissa_bits-one)letmantissa_mask64=Int63.to_int64mantissa_maskletieee_exponentt=letbits=Caml.Int64.bits_of_floattinInt64.(bit_and(shift_right_logicalbitsmantissa_bits)exponent_mask64)|>Caml.Int64.to_int;;letieee_mantissat=letbits=Caml.Int64.bits_of_floattinInt63.of_int64_exnCaml.Int64.(logandbitsmantissa_mask64);;letcreate_ieee_exn~negative~exponent~mantissa=ifInt.(bit_andexponentexponent_mask<>exponent)thenfailwithf"exponent %d out of range [0, %d]"exponentexponent_mask()elseifInt63.(bit_andmantissamantissa_mask<>mantissa)thenfailwithf"mantissa %s out of range [0, %s]"(Int63.to_stringmantissa)(Int63.to_stringmantissa_mask)()else(letsign_bits=ifnegativethenCaml.Int64.min_intelseCaml.Int64.zeroinletexpt_bits=Caml.Int64.shift_left(Caml.Int64.of_intexponent)mantissa_bitsinletmant_bits=Int63.to_int64mantissainletbits=Caml.Int64.(logorsign_bits(logorexpt_bitsmant_bits))inCaml.Int64.float_of_bitsbits);;letcreate_ieee~negative~exponent~mantissa=Or_error.try_with(fun()->create_ieee_exn~negative~exponent~mantissa);;moduleTerse=structtypenonrect=tlett_of_sexp=t_of_sexpletto_stringx=Printf.sprintf"%.8G"xletsexp_of_tx=Sexp.Atom(to_stringx)letof_stringx=of_stringxlett_sexp_grammar=t_sexp_grammarendincludeComparable.With_zero(structincludeTletzero=zeroend)(* These are partly here as a performance hack to avoid some boxing we're getting with
the versions we get from [With_zero]. They also make [Float.is_negative nan] and
[Float.is_non_positive nan] return [false]; the versions we get from [With_zero] return
[true]. *)letis_positivet=t>0.letis_non_negativet=t>=0.letis_negativet=t<0.letis_non_positivet=t<=0.includePretty_printer.Register(structincludeTletmodule_name="Base.Float"letto_string=to_stringend)moduleO=structlet(+)=(+)let(-)=(-)let(*)=(*)let(/)=(/)let(%)=(%)let(~-)=(~-)let(**)=(**)include(Float_replace_polymorphic_compare:Comparisons.Infixwithtypet:=t)letabs=absletneg=negletzero=zeroletof_int=of_intletof_floatx=xendmoduleO_dot=structlet(*.)=(*)let(+.)=(+)let(-.)=(-)let(/.)=(/)let(%.)=(%)let(~-.)=(~-)let(**.)=(**)endmodulePrivate=structletbox=boxletclamp_unchecked=clamp_uncheckedletlower_bound_for_int=lower_bound_for_intletupper_bound_for_int=upper_bound_for_intletspecialized_hash=hash_floatletone_ulp_less_than_half=one_ulp_less_than_halfletint63_round_nearest_portable_alloc_exn=int63_round_nearest_portable_alloc_exnletint63_round_nearest_arch64_noalloc_exn=int63_round_nearest_arch64_noalloc_exnletiround_nearest_exn_64=iround_nearest_exn_64end(* Include type-specific [Replace_polymorphic_compare] at the end, after
including functor application that could shadow its definitions. This is
here so that efficient versions of the comparison functions are exported by
this module. *)includeFloat_replace_polymorphic_compare(* These functions specifically replace defaults in replace_polymorphic_compare.
The desired behavior here is to propagate a nan if either argument is nan. Because the
first comparison will always return false if either argument is nan, it suffices to
check if x is nan. Then, when x is nan or both x and y are nan, we return x = nan; and
when y is nan but not x, we return y = nan.
There are various ways to implement these functions. The benchmark below shows a few
different versions. This benchmark was run over an array of random floats (none of
which are nan).
┌────────────────────────────────────────────────┬──────────┐
│ Name │ Time/Run │
├────────────────────────────────────────────────┼──────────┤
│ if is_nan x then x else if x < y then x else y │ 2.42us │
│ if is_nan x || x < y then x else y │ 2.02us │
│ if x < y || is_nan x then x else y │ 1.88us │
└────────────────────────────────────────────────┴──────────┘
The benchmark below was run when x > y is always true (again, no nan values).
┌────────────────────────────────────────────────┬──────────┐
│ Name │ Time/Run │
├────────────────────────────────────────────────┼──────────┤
│ if is_nan x then x else if x < y then x else y │ 2.83us │
│ if is_nan x || x < y then x else y │ 1.97us │
│ if x < y || is_nan x then x else y │ 1.56us │
└────────────────────────────────────────────────┴──────────┘
*)letmin(x:t)y=ifx<y||is_nanxthenxelseyletmax(x:t)y=ifx>y||is_nanxthenxelsey