package calculon

  1. Overview
  2. Docs

Source file irclog.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109

(** {1 Small Parser for IRC Logs} *)

type 'a sequence = ('a -> unit) -> unit

type log_record = {
  author: string;
  time: string;
  msg: string;
}

let string_of_record r =
  Printf.sprintf "{author=%s, time=%s, msg=%s}" r.author r.time r.msg

let pp_record out r =
  Format.fprintf out "{author=%s, time=%s, msg=%s}" r.author r.time r.msg

let re_irssi = Re.Posix.re "([0-9:]*)<([^>]*)> (.*)" |> Re.compile

let re_weechat = Re.Posix.re "([0-9 :]*)\t([^>]*)\t(.*)" |> Re.compile

type fmt =
  | Irssi
  | Weechat

let re_of_fmt = function
  | Irssi -> re_irssi
  | Weechat -> re_weechat

let fmt_of_string = function
  | "irssi" -> Irssi
  | "weechat" -> Weechat
  | s -> invalid_arg ("unknown Irclog.fmt: " ^ s)

let string_of_fmt = function
  | Irssi -> "irssi"
  | Weechat -> "weechat"

let fmt_l = List.map string_of_fmt [Irssi; Weechat]

(* read lines *)
let rec seq_lines_ ic yield =
  match try Some (input_line ic) with End_of_file -> None with
    | Some s -> yield s; seq_lines_ ic yield
    | None -> ()

let norm_author s =
  if s="" then s
  else match s.[0] with
    | '+' | '@' -> String.sub s 1 (String.length s-1)
    | _ -> s

let parse_record fmt s =
  let re = re_of_fmt fmt in
  begin match Re.exec_opt re s with
    | None -> None
    | Some g ->
      let time = Re.Group.get g 1 |> String.trim in
      let author = Re.Group.get g 2 |> String.trim |> norm_author in
      let msg = Re.Group.get g 3 in
      (* check if this line is useless *)
      begin match author, fmt with
        | ("--" | "<--" | "-->"), Weechat -> None (* join/part *)
        | _ -> Some {author; time; msg}
      end
  end

let seq_record_ fmt ic yield =
  seq_lines_ ic
    (fun l -> match parse_record fmt l with
       | None -> ()
       | Some r -> yield r)

let iter_file fmt file yield =
  CCIO.with_in file (fun ic -> seq_record_ fmt ic yield)

let rec seq_files_ dir yield =
  let d = Unix.opendir dir in
  CCFun.finally1
    ~h:(fun () -> Unix.closedir d)
    (fun d ->
       let rec aux () = match try Some (Unix.readdir d) with End_of_file -> None with
         | Some s ->
           let abs_s = Filename.concat dir s in
           begin
             if s = "." || s = ".."  then ()
             else if Sys.is_directory abs_s
             then seq_files_ abs_s yield
             else yield abs_s
           end;
           aux ()
         | None -> ()
       in
       aux ())
    d

let iter_dir fmt dir yield =
  seq_files_ dir
    (fun file ->
       CCIO.with_in file
         (fun ic -> seq_record_ fmt ic (fun x -> yield (file,x))))

let iter_file_or_dir fmt s =
  if Sys.is_directory s
  then
    seq_files_ s
    |> Iter.flat_map (iter_file fmt)
  else iter_file fmt s

OCaml

Innovation. Community. Security.