#!/usr/bin/gawk -f

/^Alang CDATA / {lang = $3}
/^\(/ {stack[level++] = lang}
/^\)/ {lang = stack[--level]}

/^Aid CDATA / {id = $3}
/^\)/ {id = ""}
/^Aclass CDATA / && /updated/ {in_date = 1}
/^\)/ {in_date = ""}

/^\(li$/ && id {in_li = 1; newsid = id; content = "Aid CDATA " id "\n"}
/^[()A-]/ && in_li && !in_date {content = content $0 "\n"}
/^-/ && in_li && in_date {
  date = substr($0, 2);
  gsub(/ |\\t|\\r|\\n/, "", date)
  time = mktime(substr(date, 1, 4) " " substr(date, 6, 2) " " \
    substr(date, 9, 2) " 00 00 00");
  content = content "Aclass CDATA updated\nAtitle CDATA " date "\n" \
    "(span\n" strftime("(i%n-%-e%n)i%n- %b %Y", time) "\n)span\n";
}
/^\)li$/ && in_li {
  in_li = 0;
  if (date && newsid && !seen[newsid]) {
    items[n++] = date "\t" content;
    seen[newsid] = 1;
    newsid = "";
    date = "";
    content = "";
  }
}

END {
  if (n) {
    sort(items, 0, n - 1);
    first = n < 5 ? 0 : n - 5;
    printf "Aclass CDATA dated\n(ul\n-\\n\n";
    for (i = n - 1; i >= first; i--)
      printf "%s-\\n\n", substr(items[i], index(items[i], "\t") + 1);
    printf ")ul\n-\\n\n";
  }
}
