package melange
Toolchain to produce JS from Reason/OCaml
Install
Dune Dependency
Authors
Maintainers
Sources
melange-2.1.0.tbz
sha256=6112ecfe8185871b57ed570e0809b8101d7482b18b19e2d9200c90275dc6a32c
sha512=ee3a48e7d066bc8dd9a6a0c05a6e2e1ef117d4e7e545a710860d5f017d96c13acc39f6c26274b895ee8d23c324ccfb9292476c22059ac6f90b7393dea9f30db2
doc/src/melange_ppx/utf8_string.ml.html
Source file utf8_string.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576
(* Copyright (C) 2015-2016 Bloomberg Finance L.P. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * In addition to the permissions granted to you by the LGPL, you may combine * or link a "work that uses the Library" with a publicly distributed version * of this file to produce a combined library or application, then distribute * that combined work under the terms of your choosing, with no requirement * to comply with the obligations normally placed on you by section 4 of the * LGPL version 3 (or the corresponding section of a later version of the LGPL * should you choose to use a later version). * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *) open Ppxlib let valid_hex x = match x with '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true | _ -> false let merge_loc (l : location) (r : location) = if l.loc_ghost then r else if r.loc_ghost then l else match (l, r) with | { loc_start; _ }, { loc_end; _ } (* TODO: improve*) -> { loc_start; loc_end; loc_ghost = false } module Utf8_string = struct type error = | Invalid_code_point | Unterminated_backslash | Invalid_hex_escape | Invalid_unicode_escape let pp_error fmt err = Format.pp_print_string fmt @@ match err with | Invalid_code_point -> "Invalid code point" | Unterminated_backslash -> "\\ ended unexpectedly" | Invalid_hex_escape -> "Invalid \\x escape" | Invalid_unicode_escape -> "Invalid \\u escape" type exn += Error of int (* offset *) * error let error ~loc error = raise (Error (loc, error)) (* let error ~loc ~pos error = *) (* [%expr *) (* [%ocaml.error *) (* [%e *) (* Ast_helper.Exp.constant *) (* (Pconst_string (Format.asprintf "%a" pp_error error, loc, None))]]] *) (* Note the [loc] really should be the utf8-offset, it has nothing to do with our escaping mechanism *) (* we can not just print new line in ES5 seems we don't need escape "\b" "\f" we need escape "\n" "\r" since ocaml multiple-line allows [\n] visual input while es5 string does not*) let rec check_and_transform (loc : int) (buf : Buffer.t) (s : string) (byte_offset : int) (s_len : int) = if byte_offset = s_len then () else let current_char = s.[byte_offset] in match Ast_utf8_string.classify current_char with | Single 92 (* '\\' *) -> escape_code (loc + 1) buf s (byte_offset + 1) s_len | Single 34 -> Buffer.add_string buf "\\\""; check_and_transform (loc + 1) buf s (byte_offset + 1) s_len | Single 10 -> Buffer.add_string buf "\\n"; check_and_transform (loc + 1) buf s (byte_offset + 1) s_len | Single 13 -> Buffer.add_string buf "\\r"; check_and_transform (loc + 1) buf s (byte_offset + 1) s_len | Single _ -> Buffer.add_char buf current_char; check_and_transform (loc + 1) buf s (byte_offset + 1) s_len | Invalid | Cont _ -> error ~loc Invalid_code_point | Leading (n, _) -> let i' = Ast_utf8_string.next s ~remaining:n byte_offset in if i' < 0 then error ~loc Invalid_code_point else ( for k = byte_offset to i' do Buffer.add_char buf s.[k] done; check_and_transform (loc + 1) buf s (i' + 1) s_len) (* we share the same escape sequence with js *) and escape_code loc buf s offset s_len = if offset >= s_len then error ~loc Unterminated_backslash else Buffer.add_char buf '\\'; let cur_char = s.[offset] in match cur_char with | '\\' | 'b' | 't' | 'n' | 'v' | 'f' | 'r' | '0' | '$' -> Buffer.add_char buf cur_char; check_and_transform (loc + 1) buf s (offset + 1) s_len | 'u' -> Buffer.add_char buf cur_char; unicode (loc + 1) buf s (offset + 1) s_len | 'x' -> Buffer.add_char buf cur_char; two_hex (loc + 1) buf s (offset + 1) s_len | _ -> (* Regular characters, like `a` in `\a`, * are valid escape sequences *) Buffer.add_char buf cur_char; check_and_transform (loc + 1) buf s (offset + 1) s_len and two_hex loc buf s offset s_len = if offset + 1 >= s_len then error ~loc Invalid_hex_escape; (*Location.raise_errorf ~loc "\\x need at least two chars";*) let a, b = (s.[offset], s.[offset + 1]) in if valid_hex a && valid_hex b then ( Buffer.add_char buf a; Buffer.add_char buf b; check_and_transform (loc + 2) buf s (offset + 2) s_len) else error ~loc Invalid_hex_escape (*Location.raise_errorf ~loc "%c%c is not a valid hex code" a b*) and unicode loc buf s offset s_len = if offset + 3 >= s_len then error ~loc Invalid_unicode_escape (*Location.raise_errorf ~loc "\\u need at least four chars"*); let a0, a1, a2, a3 = (s.[offset], s.[offset + 1], s.[offset + 2], s.[offset + 3]) in if valid_hex a0 && valid_hex a1 && valid_hex a2 && valid_hex a3 then ( Buffer.add_char buf a0; Buffer.add_char buf a1; Buffer.add_char buf a2; Buffer.add_char buf a3; check_and_transform (loc + 4) buf s (offset + 4) s_len) else error ~loc Invalid_unicode_escape (*Location.raise_errorf ~loc "%c%c%c%c is not a valid unicode point" a0 a1 a2 a3 *) (* http://www.2ality.com/2015/01/es6-strings.html console.log('\uD83D\uDE80'); (* ES6*) console.log('\u{1F680}'); *) let transform_test s = let s_len = String.length s in let buf = Buffer.create (s_len * 2) in check_and_transform 0 buf s 0 s_len; Buffer.contents buf let transform s = let s_len = String.length s in let buf = Buffer.create (s_len * 2) in check_and_transform 0 buf s 0 s_len; Buffer.contents buf end module Interp = struct type error = | Invalid_code_point | Unterminated_backslash | Invalid_escape_code of char | Invalid_hex_escape | Invalid_unicode_escape | Unterminated_variable | Unmatched_paren | Invalid_syntax_of_var of string type kind = String | Var of int * int (* [Var (loffset, roffset)] For parens it used to be (2,-1) for non-parens it used to be (1,0) *) (* Note the position is about code point *) type pos = { lnum : int; offset : int; byte_bol : int; (* Note it actually needs to be in sync with OCaml's lexing semantics *) } type segment = { start : pos; finish : pos; kind : kind; content : string } type segments = segment list type cxt = { mutable segment_start : pos; buf : Buffer.t; s_len : int; mutable segments : segments; mutable pos_bol : int; (* record the abs position of current beginning line *) mutable byte_bol : int; mutable pos_lnum : int; (* record the line number *) } type exn += Error of pos * pos * error let pp_error fmt err = Format.pp_print_string fmt @@ match err with | Invalid_code_point -> "Invalid code point" | Unterminated_backslash -> "\\ ended unexpectedly" | Invalid_escape_code c -> "Invalid escape code: " ^ String.make 1 c | Invalid_hex_escape -> "Invalid \\x escape" | Invalid_unicode_escape -> "Invalid \\u escape" | Unterminated_variable -> "$ unterminated" | Unmatched_paren -> "Unmatched paren" | Invalid_syntax_of_var s -> "`" ^ s ^ "' is not a valid syntax of interpolated identifer" let valid_lead_identifier_char x = match x with 'a' .. 'z' | '_' -> true | _ -> false let valid_identifier_char x = match x with | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' | '\'' -> true | _ -> false (* Invariant: [valid_lead_identifier] has to be [valid_identifier] *) let valid_identifier = let for_all_from = let rec unsafe_for_all_range s ~start ~finish p = start > finish || p (String.unsafe_get s start) && unsafe_for_all_range s ~start:(start + 1) ~finish p in fun s start p -> let len = String.length s in if start < 0 then invalid_arg "for_all_from" else unsafe_for_all_range s ~start ~finish:(len - 1) p in fun s -> let s_len = String.length s in if s_len = 0 then false else valid_lead_identifier_char s.[0] && for_all_from s 1 valid_identifier_char (* let is_space x = match x with | ' ' | '\n' | '\t' -> true | _ -> false *) (* FIXME: multiple line offset if there is no line offset. Note {|{j||} border will never trigger a new line *) let update_position border ({ lnum; offset; byte_bol } : pos) (pos : Lexing.position) = if lnum = 0 then { pos with pos_cnum = pos.pos_cnum + border + offset } (* When no newline, the column number is [border + offset] *) else { pos with pos_lnum = pos.pos_lnum + lnum; pos_bol = pos.pos_cnum + border + byte_bol; pos_cnum = pos.pos_cnum + border + byte_bol + offset; (* when newline, the column number is [offset] *) } let update border (start : pos) (finish : pos) (loc : Location.t) : Location.t = let start_pos = loc.loc_start in { loc with loc_start = update_position border start start_pos; loc_end = update_position border finish start_pos; } let update_newline ~byte_bol loc cxt = cxt.pos_lnum <- cxt.pos_lnum + 1; cxt.pos_bol <- loc; cxt.byte_bol <- byte_bol let pos_error cxt ~loc error = raise (Error ( cxt.segment_start, { lnum = cxt.pos_lnum; offset = loc - cxt.pos_bol; byte_bol = cxt.byte_bol; }, error )) let add_var_segment cxt loc loffset roffset = let content = Buffer.contents cxt.buf in Buffer.clear cxt.buf; let next_loc = { lnum = cxt.pos_lnum; offset = loc - cxt.pos_bol; byte_bol = cxt.byte_bol; } in if valid_identifier content then ( cxt.segments <- { start = cxt.segment_start; finish = next_loc; kind = Var (loffset, roffset); content; } :: cxt.segments; cxt.segment_start <- next_loc) else pos_error cxt ~loc (Invalid_syntax_of_var content) let add_str_segment cxt loc = let content = Buffer.contents cxt.buf in Buffer.clear cxt.buf; let next_loc = { lnum = cxt.pos_lnum; offset = loc - cxt.pos_bol; byte_bol = cxt.byte_bol; } in cxt.segments <- { start = cxt.segment_start; finish = next_loc; kind = String; content } :: cxt.segments; cxt.segment_start <- next_loc let rec check_and_transform (loc : int) s byte_offset ({ s_len; buf; _ } as cxt : cxt) = if byte_offset = s_len then add_str_segment cxt loc else let current_char = s.[byte_offset] in match Ast_utf8_string.classify current_char with | Single 92 (* '\\' *) -> escape_code (loc + 1) s (byte_offset + 1) cxt | Single 34 -> Buffer.add_string buf "\\\""; check_and_transform (loc + 1) s (byte_offset + 1) cxt | Single 10 -> Buffer.add_string buf "\\n"; let loc = loc + 1 in let byte_offset = byte_offset + 1 in update_newline ~byte_bol:byte_offset loc cxt; (* Note variable could not have new-line *) check_and_transform loc s byte_offset cxt | Single 13 -> Buffer.add_string buf "\\r"; check_and_transform (loc + 1) s (byte_offset + 1) cxt | Single 36 -> (* $ *) add_str_segment cxt loc; let offset = byte_offset + 1 in if offset >= s_len then pos_error ~loc cxt Unterminated_variable else let cur_char = s.[offset] in if cur_char = '(' then expect_var_paren (loc + 2) s (offset + 1) cxt else expect_simple_var (loc + 1) s offset cxt | Single _ -> Buffer.add_char buf current_char; check_and_transform (loc + 1) s (byte_offset + 1) cxt | Invalid | Cont _ -> pos_error ~loc cxt Invalid_code_point | Leading (n, _) -> let i' = Ast_utf8_string.next s ~remaining:n byte_offset in if i' < 0 then pos_error cxt ~loc Invalid_code_point else ( for k = byte_offset to i' do Buffer.add_char buf s.[k] done; check_and_transform (loc + 1) s (i' + 1) cxt) (*Lets keep identifier simple, so that we could generating a function easier in the future for example let f = [%fn{| $x + $y = $x_add_y |}] *) and expect_simple_var loc s offset ({ buf; s_len; _ } as cxt) = let v = ref offset in if not (offset < s_len && valid_lead_identifier_char s.[offset]) then pos_error cxt ~loc (Invalid_syntax_of_var String.empty) else ( while !v < s_len && valid_identifier_char s.[!v] do (* TODO*) let cur_char = s.[!v] in Buffer.add_char buf cur_char; incr v done; let added_length = !v - offset in let loc = added_length + loc in add_var_segment cxt loc 1 0; check_and_transform loc s (added_length + offset) cxt) and expect_var_paren loc s offset ({ buf; s_len; _ } as cxt) = let v = ref offset in while !v < s_len && s.[!v] <> ')' do let cur_char = s.[!v] in Buffer.add_char buf cur_char; incr v done; let added_length = !v - offset in let loc = added_length + 1 + loc in if !v < s_len && s.[!v] = ')' then ( add_var_segment cxt loc 2 (-1); check_and_transform loc s (added_length + 1 + offset) cxt) else pos_error cxt ~loc Unmatched_paren (* we share the same escape sequence with js *) and escape_code loc s offset ({ buf; s_len; _ } as cxt) = if offset >= s_len then pos_error cxt ~loc Unterminated_backslash else Buffer.add_char buf '\\'; let cur_char = s.[offset] in match cur_char with | '\\' | 'b' | 't' | 'n' | 'v' | 'f' | 'r' | '0' | '$' -> Buffer.add_char buf cur_char; check_and_transform (loc + 1) s (offset + 1) cxt | 'u' -> Buffer.add_char buf cur_char; unicode (loc + 1) s (offset + 1) cxt | 'x' -> Buffer.add_char buf cur_char; two_hex (loc + 1) s (offset + 1) cxt | _ -> pos_error cxt ~loc (Invalid_escape_code cur_char) and two_hex loc s offset ({ buf; s_len; _ } as cxt) = if offset + 1 >= s_len then pos_error cxt ~loc Invalid_hex_escape; let a, b = (s.[offset], s.[offset + 1]) in if valid_hex a && valid_hex b then ( Buffer.add_char buf a; Buffer.add_char buf b; check_and_transform (loc + 2) s (offset + 2) cxt) else pos_error cxt ~loc Invalid_hex_escape and unicode loc s offset ({ buf; s_len; _ } as cxt) = if offset + 3 >= s_len then pos_error cxt ~loc Invalid_unicode_escape; let a0, a1, a2, a3 = (s.[offset], s.[offset + 1], s.[offset + 2], s.[offset + 3]) in if valid_hex a0 && valid_hex a1 && valid_hex a2 && valid_hex a3 then ( Buffer.add_char buf a0; Buffer.add_char buf a1; Buffer.add_char buf a2; Buffer.add_char buf a3; check_and_transform (loc + 4) s (offset + 4) cxt) else pos_error cxt ~loc Invalid_unicode_escape (* TODO: test empty var $() $ failure, Allow identifers x.A.y *) open Ast_helper (* Longident.parse "Pervasives.^" *) let concat_ident : Longident.t = Ldot (Lident "Stdlib", "^") (* FIXME: remove deps on `Pervasives` *) (* JS string concatMany *) (* Ldot (Ldot (Lident "Js", "String2"), "concat") *) (* Longident.parse "Js.String.make" *) let to_string_ident : Longident.t = Ldot (Ldot (Lident "Js", "String2"), "make") let escaped_j_delimiter = "*j" (* not user level syntax allowed *) let unescaped_j_delimiter = "j" let unescaped_js_delimiter = "js" let escaped = Some escaped_j_delimiter let border = String.length "{j|" let aux loc (segment : segment) ~to_string_ident : Parsetree.expression = match segment with | { start; finish; kind; content } -> ( match kind with | String -> let loc = update border start finish loc in Exp.constant (Pconst_string (content, loc, escaped)) | Var (soffset, foffset) -> let loc = { loc with loc_start = update_position (soffset + border) start loc.loc_start; loc_end = update_position (foffset + border) finish loc.loc_start; } in [%expr [%e Exp.ident ~loc { loc; txt = to_string_ident }] [%e Exp.ident ~loc { loc; txt = Lident content }]]) let concat_exp a_loc x ~(lhs : Parsetree.expression) : Parsetree.expression = let loc = merge_loc a_loc lhs.pexp_loc in [%expr [%e Exp.ident { txt = concat_ident; loc }] [%e lhs] [%e aux loc x ~to_string_ident:(Longident.Ldot (Lident "Obj", "magic"))]] (* Invariant: the [lhs] is always of type string *) let rec handle_segments loc (rev_segments : segment list) = match rev_segments with | [] -> Exp.constant (Pconst_string ("", loc, escaped)) | [ segment ] -> aux loc segment ~to_string_ident (* string literal *) | { content = ""; _ } :: rest -> handle_segments loc rest | a :: rest -> concat_exp loc a ~lhs:(handle_segments loc rest) let transform_interp loc s = let s_len = String.length s in let buf = Buffer.create (s_len * 2) in let cxt : cxt = { segment_start = { lnum = 0; offset = 0; byte_bol = 0 }; buf; s_len; segments = []; pos_lnum = 0; byte_bol = 0; pos_bol = 0; } in check_and_transform 0 s 0 cxt; handle_segments loc cxt.segments let transform_test s = let s_len = String.length s in let buf = Buffer.create (s_len * 2) in let cxt = { segment_start = { lnum = 0; offset = 0; byte_bol = 0 }; buf; s_len; segments = []; pos_lnum = 0; byte_bol = 0; pos_bol = 0; } in check_and_transform 0 s 0 cxt; List.rev cxt.segments let transform (e : Parsetree.expression) s loc delim : Parsetree.expression = if String.equal delim unescaped_js_delimiter then { e with pexp_desc = Pexp_constant (Pconst_string (Utf8_string.transform s, loc, escaped)); } else if String.equal delim unescaped_j_delimiter then transform_interp e.pexp_loc s else e let transform (e : Parsetree.expression) s loc delim : Parsetree.expression = try transform e s loc delim with | Utf8_string.Error (offset, error) -> [%expr [%ocaml.error [%e Exp.constant (Pconst_string ( Format.asprintf "Offset: %d, %a" offset Utf8_string.pp_error error, loc, None ))]]] | Error (start, pos, error) -> let loc = update border start pos loc in [%expr [%ocaml.error [%e Exp.constant (Pconst_string (Format.asprintf "%a" pp_error error, loc, None))]]] end
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>