#!/bin/bash
#
# CGI script for post-processing CSS3 modules
#
# CGI defines the following variables
# (see http://hoohoo.ncsa.uiuc.edu/cgi/)
#
# SERVER_SOFTWARE 
# SERVER_NAME 
# GATEWAY_INTERFACE 
# SERVER_PROTOCOL 
# SERVER_PORT 
# REQUEST_METHOD 
# PATH_INFO 
# PATH_TRANSLATED 
# SCRIPT_NAME 
# QUERY_STRING 
# REMOTE_HOST 
# REMOTE_ADDR 
# AUTH_TYPE 
# REMOTE_USER 
# REMOTE_IDENT 
# CONTENT_TYPE 
# CONTENT_LENGTH 
# HTTP_ACCEPT 
# HTTP_USER_AGENT
# HTTP_*
#
# The following variables are expected on input or in QUERY_STRING:
#
# - method
#   is either "text," "url" or "file"
# - text
#   should contain HTML source; it is only used if method is "text"
# - file
#   should contain HTML source; it is only used if method is "file"
# - url
#   is a URL to dereference to get HTML source; only used if method is "url"
# - output
#   is "err", "html", or "auto"
# - login
#   optional, used if method is url and the URL is password-protected
# - password
#   optional, used if method is url and the URL is password-protected
# - date
#   optional, default is today's date (format: YYYY-MM-DD)
# - ids
#   if on, adds IDs to all P and LI elements
# - group
#   The group that is writing the document, e.g., "CSS" ort "FX". This
#   is used to select an appropriate status section.
# - omitdchtml
#   Normally, the postprocessor adds META/LINK elements according to
#   DC-HTML, to aid software for citation management in extracting
#   bibliographic data. If omitdchtml is on, no such elements are
#   added.  See http://dublincore.org/documents/2008/08/04/dc-html/
# - status
#   The status of the document (WD, CR, REC, ED, etc.). If not set,
#   the script looks for it in the "this version" URL. And if not
#   found, it defaults to "ED"
#
# The HTML source is transformed with a couple of programs. The output
# is either the transformed HTML (if "output" is "html") or the errors
# and warnings that were generated during the transformation (if
# "output" is "err"). If "output" is "auto," the output is the
# transformed HTML if there are no errors, and an HTTP 400 error
# otherwise.
#
# If group is "CSS" or "FX", the program also expands pairs of quotes
# as in 'property' and ''CSS code''.
#
# Copyright  2007-2012 W3C (MIT, ERCIM, Keio)
# See http://www.w3.org/Consortium/Legal/ipr-notice
#
# Author: Bert Bos <bert@w3.org>
# Created: 28 July 2000

# For testing:
#
#debugging=true

[ $debugging ] && exec 2>&1
[ $debugging ] && echo "Content-Type: text/plain"
[ $debugging ] && echo


# error -- output an error message and exit
#
function error
{
  echo "Status: $1 $2"
  echo "Content-Type: text/plain"
  echo
  echo "Error: $2"
  exit
}


# url_check -- check if the URL is "safe"
#
function url_check
{
  case $1 in
    ftp:*|http:*) return 0;;
    *) return 1;;		# Forbid "file:" URLs (security hole!)
  esac
}


# ---------------------------- main ----------------------------

PREFIX=/tmp/process.cgi.$$.
TMPERR=/tmp/process.cgi.err.$$
TMPOUT4=/tmp/process.cgi.out4.$$
TMPOUT3=/tmp/process.cgi.out3.$$
TMPOUT2=/tmp/process.cgi.out2.$$
TMPOUT1=/tmp/process.cgi.out1.$$
TMPOUT=/tmp/process.cgi.out.$$
NORM=/tmp/process.cgi.norm.$$
NORMAUX=/tmp/process.cgi.normaux.$$
INFORM=/tmp/process.cgi.inform.$$
INFORMAUX=/tmp/process.cgi.informaux.$$
trap 'rm -f $TMPERR $TMPOUT4 $TMPOUT3 $TMPOUT2 $TMPOUT1 $TMPOUT $NORM $NORMAUX $INFORM $INFORMAUX $BIBASC ${PREFIX}*' 0

bibfile=biblio.ref
bibfileBase=http://dev.w3.org/CSS/biblio/
statusdir=/afs/w3.org/pub/WWW/Style/Group/css3-src
rights="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright"

PATH=$PWD/bin:$PATH

[ $debugging ] && echo parseform = `which parseform`
[ $debugging ] && echo hxnum = `which hxnum`
[ $debugging ] && echo hxtoc = `which hxtoc`
[ $debugging ] && echo hxindex = `which hxindex`
[ $debugging ] && echo hxcite = `which hxcite`
[ $debugging ] && echo hxmkbib = `which hxmkbib`
[ $debugging ] && echo special-include = `which special-include`
[ $debugging ] && echo wget = `which wget`
[ $debugging ] && echo hxaddid = `which hxaddid`
[ $debugging ] && echo hxpipe = `which hxpipe`
[ $debugging ] && echo hxunpipe = `which hxunpipe`
[ $debugging ] && echo hxref = `which hxref`
[ $debugging ] && echo hxnormalize = `which hxnormalize`
[ $debugging ] && echo hxextract = `which hxextract`
[ $debugging ] && echo quotestoclasses = `which quotestoclasses`
[ $debugging ] && echo hxselect = `which hxselect`
[ $debugging ] && echo propdeftodb = `which propdeftodb`
[ $debugging ] && echo propdefdbtotable = `which propdefdbtotable`

# Sanity check: do we understand this CGI version?
#
[ $GATEWAY_INTERFACE = "CGI/1.1" ] || error 501 "CGI version mismatch"

[ $debugging ] && echo "-------------------------------"
[ $debugging ] && set
[ $debugging ] && echo "-------------------------------"

# Parse the form data (parseform understands GET, POST and file upload).
# Extract the form fields.
#
parseform $PREFIX
[ -f ${PREFIX}output ] && output=$(tr -c -d a-zA-Z0-9 <${PREFIX}output)
[ -f ${PREFIX}method ] && method=$(tr -c -d a-zA-Z0-9 <${PREFIX}method)
[ -f ${PREFIX}group ] && group=$(tr -c -d a-zA-Z0-9 <${PREFIX}group)
[ -f ${PREFIX}login ] && login=$(<${PREFIX}login)
[ -f ${PREFIX}password ] && password=$(<${PREFIX}password)
[ -f ${PREFIX}date ] && date=$(tr -c -d 0-9 <${PREFIX}date)
[ -f ${PREFIX}ids ] && ids=$(<${PREFIX}ids)
[ -f ${PREFIX}status ] && status=$(<${PREFIX}status)
[ -f ${PREFIX}omitdchtml ] && omitdchtml=$(<${PREFIX}omitdchtml)

[ $debugging ] && echo method: \"$method\"
[ $debugging ] && echo output: \"$output\"
[ $debugging ] && echo group: \"$group\"
[ $debugging ] && echo login: \"$login\"
[ $debugging ] && echo password: \"$password\"
[ $debugging ] && echo date: \"$date\"
[ $debugging ] && echo ids: \"$ids\"
[ $debugging ] && echo status: \"$status\"
[ $debugging ] && echo omitdchtml: \"$omitdchtml\"

group=${group:-none}
output=${output:-auto}

# Depending on the method, store the content of the "file," "text" or
# (indirectly) "url" variables in a temporary file.
#
case $method in
  file)
    input=${PREFIX}file;;
  text)
    input=${PREFIX}text;;
  url)
    [ -f ${PREFIX}url ] || error 403 "Incorrect query: missing \"url\" field"
    f=$(< ${PREFIX}url)
    url_check $f || error 403 "Only \"ftp:\" or \"http:\" URLs are allowed"
    h=`wget -nv --http-user="$login" --http-passwd="$password" -O $TMPOUT3 $f \
     2>&1` || error 403 "Failed to retrieve from that URL: ${h/$'\n'/ }"
    input=$TMPOUT3;;
  *)
    error 403 "Incorrect query: missing or illegal field \"method\"";;
esac

[ $debugging ] && echo
[ $debugging ] && echo input: \"$input\"

# Get a copy of the bibliography database
#
wget -N --no-remove-listing -nv $bibfileBase/$bibfile

# Heuristically derive the status, shortname and the format of the
# document, by looking for certain keywords and a "dated" URL in the
# first few lines of the file.
#
# Bug: The sed command to find the shortname assumes the <H1> is not
# on the same line as the DOCTYPE, so that it can skip the latter. 
# Files with CR instead of LF (old Mac files) will fail this. A "tr
# '\r' '\n'" can fix it, but seems unnecessary, in view of the rarity
# of such files.
#
if head -1 $input | fgrep -q '<?xml'; then
  format=-x
elif head -30 $input | fgrep -q '//DTD XHTML'; then
  format=-x
else
  format=
fi
if head -30 $input | fgrep -q -i '<!doctype html>'; then
  is_html5=true
else
  is_html5=false
fi

[ $debugging ] && echo format: \"$format\"
[ $debugging ] && echo html5: $is_html5

if ! $is_html5; then
  biblioinc=$statusdir/biblio.inc
  copyright=$statusdir/copyright.inc
  cite='<a href="%b#ref-%L" rel="biblioentry">[%L]<!--{{%m%L}}--></a>'
else
  biblioinc=$statusdir/biblio5.inc
  copyright=$statusdir/copyright.inc
  cite='<a href="%b#ref-%L">[%L]<!--{{%m%L}}--></a>'
fi

# If not yet set, find the status in the "This version" URL, or default to "ED"
[[ -z "$status" ]] && status=`sed -r -n -e '
  # Remove comments, PIs and DOCTYPEs, including multi-line ones
  :start;
  s/<!--([^-]|-[^-]|--[^>])*-->//g
  s/<![Dd][Oo][Cc][Tt][Yy][Pp][Ee][^>]*>//g
  s/<\?[^>]*>//g
  /<!--/{N;bstart;}
  /<![Dd][Oo][Cc][Tt][Yy][Pp][Ee]/{N;bstart;}
  /<\?/{N;bstart;}
  # Stop looking as soon as we see "Latest version" or some other things
  /Latest  *([Pp]ubli(c|shed)  *)?[Vv]ersion/q
  /<dt>Editor/q
  /<h2[^>]*>Abstract</q
  /<dt>Author/q
  # Check if current line contains a status, if so, print it and quit
  t check
  :check
  s/.*http:\/\/www.w3.org\/TR\/[^\/]*\/(PER|REC|PR|CR|WD|NOTE|MO|ED)-.*/\1/
  t found
  b
  :found
  p
  q
' $input`
status=${status:-ED}

# If the status is WD, it may in fact be a first WD, which needs a
# different boilerplate. Check for a "Previous version" link.
#
prevlink=`sed -r -n -e '
    # Remove comments, PIs and DOCTYPEs, including multi-line ones
    :start;
    s/<!--([^-]|-[^-]|--[^>])*-->//g
    s/<![Dd][Oo][Cc][Tt][Yy][Pp][Ee][^>]*>//g
    s/<\?[^>]*>//g
    /<!--/{N;bstart;}
    /<![Dd][Oo][Cc][Tt][Yy][Pp][Ee]/{N;bstart;}
    /<\?/{N;bstart;}
    # Stop processing at the first </dl>
    /<\/[Dd][Ll]/q
    # Once we find "Previous version", loop until we see a link or a <dt>/</dl>
    :findlink
    /Previous  *[Vv]ersion.*http:/{s/.*(http:[^ \n"'\''<>]*).*/\1/;p;q;}
    /Previous  *[Vv]ersion.*<[Dd][Tt]/q
    /Previous  *[Vv]ersion.*<\/[Dd][Ll]/q
    /Previous  *[Vv]ersion/{N;bfindlink;}
  ' $input`
if [ "$status" = "WD" ] && [ -z "$prevlink" ]; then status=FPWD; fi

[ $debugging ] && echo status: \"$status\"

shortname=`sed -r -n -e '
  /<[Hh]1/,/<\/[Dd][Ll]/{
    # Remove comments, including multi-line ones
    :start
    s/<!--([^-]|-[^-]|--[^>])*-->//g
    /<!--/{N;bstart;}
    t reset
    :reset
    s#.*http://[a-z0-9-]+\.w3\.org/([^ "<>/]+/)*[A-Z]+-([a-zA-Z0-9_+-]+)-(\[[A-Z]+\]|[0-9]{8})/?($|[ "<>].*)#\2#
    s#.*http://[a-z0-9-]+\.w3\.org/([^ "<>/]*/)*([a-zA-Z0-9_+-]+)/?([ "<>].*)?$#\2#
    t found
    b
    :found
    p
    q
  }
' $input`
shortname=${shortname:-unknown-shortname}

[ $debugging ] && echo shortname: \"$shortname\"

# Generate the long version of the document type
#
case $status in
  FPWD) longstatus="W3C First Public Working Draft";;
  WD) longstatus="W3C Working Draft";;
  LC) longstatus="W3C Last Call Working Draft";;
  CR) longstatus="W3C Candidate Recommendation";;
  PR) longstatus="W3C Proposed Recommendation";;
  REC) longstatus="W3C Recommendation";;
  PER) longstatus="W3C Proposed Edited Recommendation";;
  NOTE) longstatus="W3C Working Group Note";;
  MO) longstatus="W3C Member-only Draft";;
  ED) longstatus="Editor's Draft";;
  *) error 500 "Unknown document type. Must be FPWD, WD, LC, CR, PR, REC, PER, NOTE, MO or ED.";;
esac

# If there are logos or status sections specific to a document type
# and/or group, use those. Otherwise use a generic one.

if [ -f ${statusdir}/logo-${status}-${group}.inc ]; then
  logofile=${statusdir}/logo-${status}-${group}.inc
elif [ -f ${statusdir}/logo-${status}.inc ]; then
  logofile=${statusdir}/logo-${status}.inc
elif [ -f ${statusdir}/logo-${group}.inc ]; then
  logofile=${statusdir}/logo-${group}.inc
elif [ -f ${statusdir}/logo.inc ]; then
  logofile=${statusdir}/logo.inc
else
  logofile=/dev/null
fi

if [ -f ${statusdir}/status-${status}-${group}.inc ]; then
  statusfile=${statusdir}/status-${status}-${group}.inc
elif [ -f ${statusdir}/status-${status}.inc ]; then
  statusfile=${statusdir}/status-${status}.inc
elif [ -f ${statusdir}/status-${group}.inc ]; then
  statusfile=${statusdir}/status-${group}.inc
elif [ -f ${statusdir}/status.inc ]; then
  statusfile=${statusdir}/status.inc
else
  statusfile=/dev/null
fi

if [ -f ${statusdir}/conformance-${status}-${group}.inc ]; then
  conformancefile=${statusdir}/conformance-${status}-${group}.inc
elif [ -f ${statusdir}/conformance-${status}.inc ]; then
  conformancefile=${statusdir}/conformance-${status}.inc
elif [ -f ${statusdir}/conformance-${group}.inc ]; then
  conformancefile=${statusdir}/conformance-${group}.inc
elif [ -f ${statusdir}/conformance.inc ]; then
  conformancefile=${statusdir}/conformance.inc
else
  conformancefile=/dev/null
fi

case "$status" in
  FPWD|WD|LC) style="http://www.w3.org/StyleSheets/TR/W3C-WD";;
  CR) style="http://www.w3.org/StyleSheets/TR/W3C-CR";;
  PR) style="http://www.w3.org/StyleSheets/TR/W3C-PR";;
  REC) style="http://www.w3.org/StyleSheets/TR/W3C-REC";;
  PER) style="http://www.w3.org/StyleSheets/TR/W3C-PER";;
  NOTE) style="http://www.w3.org/StyleSheets/TR/W3C-WG-NOTE";;
  MO) style="http://www.w3.org/StyleSheets/TR/W3C-MO";;
  ED) style="http://www.w3.org/StyleSheets/TR/W3C-ED";;
  *) error 500 "Bug in the postprocessor: unknown document type. Sorry!";;
esac

[ $debugging ] && echo "logofile: \"$logofile\""
[ $debugging ] && echo "statusfile: \"$statusfile\""
[ $debugging ] && echo "conformancefile: \"$conformancefile\""
[ $debugging ] && echo "style: \"$style\""

# Find the document's title from the TITLE element
#
title=`sed -r -n -e '
  :start
  /<title[^>]*>.*<\//{
    s/.*<title[^>]*>(.*)<\/.*/\1/
    s/\n/ /g
    s/& /\&amp; /g
    s/"/\&quot;/g
    s/</\&lt;/g
    s/>/\&gt;/g
    p
    q
  }
  /<title/{
    N
    b start
  }
' $input`

# The different date formats, either based on today's date or the
# user-provided date
#
if [ -z "$date" ]; then
  cdate=$(date +%Y%m%d)
  date=$(date +%e\ %B\ %Y)
  year=$(date +%Y)
else
  cdate=$date
  year=${cdate:0:4}
  date=`echo $cdate | sed\
   -e 's/\(....\)01\(..\)/\2 January \1/'\
   -e 's/\(....\)02\(..\)/\2 February \1/'\
   -e 's/\(....\)03\(..\)/\2 March \1/'\
   -e 's/\(....\)04\(..\)/\2 April \1/'\
   -e 's/\(....\)05\(..\)/\2 May \1/'\
   -e 's/\(....\)06\(..\)/\2 June \1/'\
   -e 's/\(....\)07\(..\)/\2 July \1/'\
   -e 's/\(....\)08\(..\)/\2 August \1/'\
   -e 's/\(....\)09\(..\)/\2 September \1/'\
   -e 's/\(....\)10\(..\)/\2 October \1/'\
   -e 's/\(....\)11\(..\)/\2 November \1/'\
   -e 's/\(....\)12\(..\)/\2 December \1/'\
   -e 's/^0//'`
fi

latest="http://www.w3.org/TR/$shortname/"
case "$status" in
  MO) version="http://www.w3.org/Style/Group/css3-src/$shortname/";;
  ED) version="http://dev.w3.org/csswg/$shortname/";;
  FPWD|LC) version="http://www.w3.org/TR/$year/WD-$shortname-$cdate/";;
  *) version="http://www.w3.org/TR/$year/$status-$shortname-$cdate/";;
esac

# First WDs have different boilerplate text, but the same URL as
# non-first WDs.
#
case "$status" in
  FPWD|LC) statusabbrev=WD;;
  *) statusabbrev="$status";;
esac

# Run the pipeline and generate output. An extra newline is added,
# because sed ignores the last line if it doesn't end with a newline.
# Do special CSS processing (replace 'foo' and ''foo'').
#
((hxnum -l 2 $input |\
  hxtoc $format -t -l 2 |\
  special-include logo $logofile |\
  special-include copyright $copyright |\
  special-include status $statusfile |\
  special-include conformance $conformancefile |\
  if [[ "$ids" == "on" ]]; then hxaddid $format p; else cat; fi |\
  if [[ "$ids" == "on" ]]; then hxaddid $format li; else cat; fi
  echo) |\
 sed \
 -e "s|http://www.w3.org/StyleSheets/TR/W3C-[A-Z][A-Z]*|$style|g" \
 -e "s/\\[SHORTNAME[^]]*]/$shortname/g" \
 -e "s/\\[LONGSTATUS[^]]*]/$longstatus/g" \
 -e "s/\\[STATUS[^]]*]/$statusabbrev/g" \
 -e "s|\\[LATEST[^]]*]|$latest|g" \
 -e "s|\\[VERSION[^]]*]|$version|g" \
 -e "s|\\[PREVIOUS[^]]*]|$prevlink|g" \
 -e "s/\\[YEAR[^]]*]/$year/g" \
 -e "s/\\[DATE[^]]*]/$date/g" \
 -e "s/\\[TITLE[^]]*]/$title/g" \
 -e "s/\\[CDATE[^]]*]/$cdate/g" |\
 if [[ "$group" == "CSS" ]] || [[ "$group" == "FX" ]]; then quotestoclasses;\
 else cat; fi |\
 hxcite -p "$cite" -c -a $NORMAUX -m '!' $bibfile |\
 hxcite -p "$cite" -c -a $INFORMAUX $bibfile) >$TMPOUT 2>>$TMPERR

# Extract table of properties and insert it at the place of a
# <!--properties--> comment
# Extract table of descriptors and insert it at the place of a
# <!--descriptors--> comment
#
{
 mv $TMPOUT $TMPOUT1
 hxunent -b $TMPOUT1 | hxaddid -x dfn >$TMPOUT2
 hxpipe $TMPOUT2 |\
  CLASS1=propdef CLASS2=propdef-extra propdeftodb |\
  sort -bdf |\
  CLASS=properties propdefdbtotable |\
  hxunpipe >$TMPOUT3
 hxpipe $TMPOUT2 |\
  CLASS1=descdef CLASS2=descdef-extra propdeftodb |\
  sort -bdf |\
  CLASS=descriptors propdefdbtotable |\
  hxunpipe >$TMPOUT4
 special-include properties $TMPOUT3 <$TMPOUT2 |\
  special-include descriptors $TMPOUT4 >$TMPOUT
} 2>>$TMPERR

#(sh propdeftotab $TMPOUT1 |\
# sh tabtoproptable $TMPOUT1 >$TMPOUT) 2>>$TMPERR
#
# Extract table of descriptors and insert it at the place of a
# <!--descriptors--> comment
#
#mv $TMPOUT $TMPOUT1
#(sh propdeftotab -c descdef $TMPOUT1 |\
# sh tabtoproptable $TMPOUT1 descriptors >$TMPOUT) 2>>$TMPERR

# If DC-HTML mark-up is requested, (heuristically) find the
# editors/authors and insert corresponding META and LINK elements.
# (We also need to duplicate the title for Google Scholar. :-( )
#
# To do: Remove or merge any existing META and LINK elements for
# dcterms.title, dcterms.creator, dcterms.publisher, dcterms.type,
# dcterms.issued and dcterms.identifier, even if the prefix isn't
# "dcterms."
#
if [ "$omitdchtml" != "on" ]; then

  hxnormalize -x -i 0 -l 99999 $TMPOUT >$TMPOUT1 2>/dev/null
  hxselect head <$TMPOUT1 >$TMPOUT4

  # Isolate the DDs that contain the editors or authors. Then use the
  # hcard mark-up, if any, to find the names; or apply some heuristics
  # to strip affiliations, e-mail addresses, etc.
  #
  h=$(sed -r \
    -e '1,/<dt[^>]*> *([Ee]dit|[Aa]uth)ors?[ ]*:/d' \
    -e '/<\/dl/,$d' \
    -e '/<dt/,$d' $TMPOUT1)
  names=$(hxselect -s '\n' -c .vcard .fn <<<"$h")
  if [ -z "$names" ]; then
    names=$(hxselect -s '\n' -c .vcard.fn <<<"$h")
  fi
  if [ -z "$names" ]; then
    names=$(sed -r \
      -e 's/<[^>]*>//g' \
      -e 's/,? *&lt;.*&gt;//g' \
      -e 's/,? *[a-zA-Z0-9_.-]*@[a-zA-Z0-9_.-]*//g' \
      -e 's/\([^)]*\) *$//' \
      -e 's/, *....*$//' \
      -e '/^ *$/d' \
      -e 's/   */ /g' \
      -e 's/^ //' \
      -e 's/ $//' <<<"$h")
  fi

  [ -n "$names" ] || echo "Warning: Could not find editors or authors." >&2

  [ $debugging ] && echo names: \"${names//$'\n'/, }\"

  isodate=${cdate:0:4}-${cdate:4:2}-${cdate:6:2}

  # Generate the LINK and META elements. Then insert those in the HTML
  # file and also add the DC-HTML specification to the PROFILE. HTML5
  # doesn't yet define schema.dcterms and dcterms.rights as keywords,
  # so they are commented-out for now.
  #
  rm -f $TMPOUT3
  if $is_html5; then echo '<!--' >>$TMPOUT3; fi
  echo "  <link href=\"http://purl.org/dc/terms/\" rel=\"schema.dcterms\">" >>$TMPOUT3
  if ! egrep -q '<link[^>]*rel="dcterms\.rights"' $TMPOUT4; then
    echo "  <link href=\"$rights\" rel=\"dcterms.rights\">" >>$TMPOUT3
  elif ! fgrep -q "<link href=\"$rights\" rel=\"dcterms.rights\"" $TMPOUT4; then
    echo "Warning: Document already has a LINK with rel=dcterms.rights" >&2
  fi
  if $is_html5; then echo '  -->' >>$TMPOUT3; fi
  echo "<meta content=\"$title\" name=\"dcterms.title\">" >>$TMPOUT3
  echo "<meta content=\"text\" name=\"dcterms.type\">" >>$TMPOUT3
  echo "<meta content=\"$isodate\" name=\"dcterms.date\">" >>$TMPOUT3
  sed \
    -e 's/"/\&quot;/g' \
    -e 's/^/<meta name=dcterms.creator content="/' \
    -e 's/$/">/' <<<"$names" >>$TMPOUT3
  if ! egrep -q '<meta[^>]*name="dcterms\.publisher"' $TMPOUT4; then
     echo '<meta content="W3C" name="dcterms.publisher"/>' >>$TMPOUT3
  elif ! fgrep -q '<meta content="W3C" name="dcterms\.publisher"' $TMPOUT4; then
    echo "Warning: Document already has a META with name=dcterms.publisher" >&2
  fi
  if ! egrep -q '<meta[^>]*name="dcterms\.identifier"' $TMPOUT4; then
    echo "<meta content=\"$version\" name=\"dcterms.identifier\"/>" >>$TMPOUT3
  else
    echo "Warning: Document already has a META with name=dcterms.identifier" >&2
  fi
  if egrep -q '<meta[^>]*name="dcterms\.(title|type|date|creator)"' $TMPOUT4; then
    echo "Warning: Removed existing META elements with dcterms.title, dcterms.type, dcterms.date and dcterms.creator" >&2
  fi

  if ! $is_html5; then		# Add PROFILE attribute
    dcterms='http://dublincore.org/documents/2008/08/04/dc-html/'
    if head -30 $TMPOUT1 | fgrep -i '<head' | fgrep "$dcterms"; then
      : 			# Profile is already there
    else
      sed -e "
      :start
      /<head[^>]*\$/{N;b start;}
      s|<head[ \\n	]*>|<head profile=\"\">|
      s|<head[^>]*profile=['\"]|&$dcterms |" $TMPOUT1 >$TMPOUT
      mv $TMPOUT $TMPOUT1
    fi
  fi
  sed -e "
    s/<meta[^>]*name=\"dcterms.title\"[^>]*>//
    s/<meta[^>]*name=\"dcterms.type\"[^>]*>//
    s/<meta[^>]*name=\"dcterms.date\"[^>]*>//
    s/<meta[^>]*name=\"dcterms.creator\"[^>]*>//
    /<\/title>/r $TMPOUT3" $TMPOUT1 >$TMPOUT

fi 2>>$TMPERR

# Extract bibliography. Convert the bibliography from UTF-8 to
# numerical character entities before use. Remove references from the
# list of informal references that are already in the normative list.
#
comm -23 <(sort -u $INFORMAUX) <(sort -u $NORMAUX) >$TMPOUT4
mv $TMPOUT4 $INFORMAUX
hxmkbib -a $NORMAUX $bibfile $biblioinc 2>>$TMPERR | xml2asc >$NORM
hxmkbib -a $INFORMAUX $bibfile $biblioinc 2>>$TMPERR | xml2asc >$INFORM

# Insert the bibliography. Make cross references to defining instances
# of terms.
#
mv $TMPOUT $TMPOUT1
(special-include normative $NORM $TMPOUT1 |\
 special-include informative $INFORM |\
 hxunent -b |\
 hxref -l $format |\
 LC_ALL=en_US hxindex $format -t -n |\
 hxnormalize $format -i 1 -l 78) >$TMPOUT 2>>$TMPERR

[ $debugging ] && wc $TMPOUT
[ $debugging ] && wc $TMPERR

# Depending on "output," return either the errors and warnings or the
# generated HTML
#
if [ $output == "err" ] || ( [ $output == "auto" ] && [ -s $TMPERR ] ); then
  echo "Content-Type: text/plain"
  echo "X-W3C-Postprocessor-Errors:" $(egrep -i -v '^Warning:' $TMPERR | wc -l)
  echo "X-W3C-Postprocessor-Warnings:" $(egrep -i '^Warning:' $TMPERR | wc -l)
  echo
  if [ $REQUEST_METHOD != "HEAD" ]; then
    if [ -s $TMPERR ]; then cat $TMPERR; else echo "No errors"; fi
    echo "(Processed in $SECONDS seconds)"
  fi
else
  echo "Content-Type: text/html"
  echo "X-W3C-Postprocessor-Errors:" $(egrep -i -v '^Warning:' $TMPERR | wc -l)
  echo "X-W3C-Postprocessor-Warnings:" $(egrep -i '^Warning:' $TMPERR | wc -l)
  echo
  if [ $REQUEST_METHOD != "HEAD" ]; then cat $TMPOUT; fi
fi

# Local variables:
# mode: ksh
# End:

