package metadata

  1. Overview
  2. Docs

Source file metadataCharEncoding.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
module type T = sig
  val convert :
    ?source:[ `ISO_8859_1 | `UTF_8 | `UTF_16 | `UTF_16LE | `UTF_16BE ] ->
    string ->
    string
end

module Naive : T = struct
  let convert ?source s =
    let source = match source with None -> `UTF_8 | Some x -> x in
    let endianness = ref `BE in
    let buf = Buffer.create 10 in
    match source with
      | (`UTF_16 | `UTF_16LE | `UTF_16BE) as source ->
          let get_char =
            match source with
              | `UTF_16LE -> String.get_utf_16le_uchar
              | `UTF_16BE -> String.get_utf_16be_uchar
              | `UTF_16 -> (
                  match !endianness with
                    | `LE -> String.get_utf_16le_uchar
                    | `BE -> String.get_utf_16be_uchar)
          in
          let len = String.length s in
          let rec f pos =
            if pos = len then Buffer.contents buf
            else if pos + 2 <= len && s.[pos] = '\xfe' && s.[pos] = '\xff' then (
              endianness := `BE;
              f (pos + 2))
            else if pos + 2 <= len && s.[pos] = '\xff' && s.[pos] = '\xfe' then (
              endianness := `LE;
              f (pos + 2))
            else (
              let d = get_char s pos in
              let c = Uchar.utf_decode_uchar d in
              Buffer.add_utf_8_uchar buf c;
              f (pos + Uchar.utf_decode_length d))
          in
          f 0
      | `UTF_8 -> s
      | _ -> s
end
OCaml

Innovation. Community. Security.