#!/usr/bin/gawk -f

BEGIN {IGNORECASE = 1}
/<html/ {if (match($0, /lang=['"]?([a-z-]*)/, a)) lang = a[1]; else lang= "en"}
/<title/ {title = $0}
title && /^[^<]*$/ {title = title " " $0}
/^[^<]*<\/title/ {title = title " " $0}
/<\/title/ {
  gsub(/ *<[^>]*> */, "", title);
  print "<li lang=" lang "><a href=\"" FILENAME "\">" title "</a>";
  nextfile;
}
