#!/bin/sed -rf
#
# This script looks for words in single quotes and puts <code
# class=property> around them, on the assumption that such words
# represent property names.
#
# It also looks for any other phrases (i.e., not single words) in
# single quotes and adds <code class=css> around them, assuming that
# those represent pieces of CSS.
#
# Phrases in pairs of single quotes (''...'') also get <code
# class=css> around them.
#
# In the process, the single, straight quotes (') are replaced by
# &lsquo; and &rsquo; and those quotes are outside the code element,
# because xref can't link <dfn>foo</> and <code>&lsquo;foo&rsquo;</>
# (it sees lsquo as letters).
#
# Note: Option -r invokes extended regular expressions. Under Mac OS
# X, this option is called -E instead.
#
# The {} commands use a newline instead of a ";" between the commands,
# because sed on Mac OS X doesn't accept ";" after a "b" command :-(
#
# To do: the script gets confused by a typo such as ''foo'. If the
# script takes a long time and ends with a line containing a '', there
# is probably an unmatched '' earlier in the document...
#
# Author: Bert Bos <bert@w3.org>
# Created: 28 July 2005

# Make sure the pattern space doesn't have half of a tag

:completetags
/<[^>]*$/{
	N
	b completetags
}

# If a single quote occurs inside a tag, escape it. Bug: we're
# assuming that only double quotes are used to quote attributes.

:escape-in-markup
s/(<[^>]*)'/\1\&#39;/
t escape-in-markup

# If a single quote appears between <pre> and </pre>, escape it.
# First put whole pre element in pattern space,
# then escape all single quotes.

:complete-pre
/<[Pp][Rr][Ee]([^<]|<[^/]|<\/[^Pp])*$/{
	N
	b complete-pre
}
:escape-in-pre
s/(<[Pp][Rr][Ee]([^<]|<[^/]|<\/[^Pp])*)'/\1\&#39;/
t escape-in-pre

# Ditto for <code>...</code>

:complete-code
/<[Cc][Oo][Dd][Ee]([^<]|<[^/]|<\/[^Cc])*$/{
	N
	b complete-code
}
:escape-in-code
s/(<[Cc][Oo][Dd][Ee]([^<]|<[^/]|<\/[^Cc])*)'/\1\&#39;/
t escape-in-code

# Simple rules to catch common mistakes: ''foo' '''foo''

/'''/{
	h
	s/.*/*** Found three quotes in a row (''') at:/
	w /dev/stderr
	g
	w /dev/stderr
	s/'''/''/g
}
/''[a-zA-Z0-9-]+'[^']/{
	h
	s/.*/*** Pair of quotes ('') closed by single quote (') at:/
	w /dev/stderr
	g
	w /dev/stderr
	s/(''[a-zA-Z0-9-]+')([^'])/\1'\2/g
}
/''[a-zA-Z0-9-]+'$/{
	h
	s/.*/*** Pair of quotes ('') closed by single quote (') at:/
	w /dev/stderr
	g
	w /dev/stderr
	s/'$/''/
}

# If there is an unmatched quote at the end of the line, add the next
# line to the pattern space. (Quotes preceded by a letter are not
# counted as opening quotes, because they are likely to be inside
# words like "can't".) Ditto if there is an unmatched pair of quotes.
# (Don't add the next line if we're already on the last line.)

:completequotes
$b check
/^'[^']*$/{
	N
	b completequotes
}
/^([^']*[^a-zA-Z=']'[^']+')*[^']*[^a-zA-Z=']'[^']*$/{
	N
	b completequotes
}
/^''([^']|'[^'])*$/{
	N
	b completequotes
}
/^(([^']|'[^'])*''([^']|'[^'])*'')*([^']|'[^'])*''([^']|'[^']|'$)*$/{
	N
	b completequotes
}

:check
/^'[^']*$/b error
/^([^']*[^a-zA-Z=']'[^']+')*[^']*[^a-zA-Z=']'[^']*$/b error
/^''([^']|'[^'])*$/b error
/^(([^']|'[^'])*''([^']|'[^'])*'')*([^']|'[^'])*''([^']|'[^']|'$)*$/b error
b noerror

:error
h
s/.*/*** Unmatched quote, detected near:/
w /dev/stderr
g
s/\n.*/\n\n/
w /dev/stderr

:noerror

# The special convention ''word'' (two single quotes on each side) is
# used to mark CSS code that isn't a property name, but could be
# confused with it if it only had one quote.

s/''(([^']|'[^'])*)''/\&lsquo;<code class=css>\1<\/code>\&rsquo;/g

# Escape any single quotes between the newly created <code>...</code>
# pairs.

:escape-in-code-again
s/(<[Cc][Oo][Dd][Ee]([^<]|<[^/]|<\/[^Cc])*)'/\1\&#39;/
t escape-in-code-again

# If the current pattern space has words in single quotes, not already
# in mark-up, then put <code class=property> around those words.

s/(^|[^a-zA-Z='])'([a-zA-Z-][a-z-]*)'/\1\&lsquo;<code class=property>\2<\/code>\&rsquo;/g

# If the current pattern space has text (other than property names)
# enclosed in single quotes, mark the text up with <code class=css>.

s/(^|[^a-zA-Z='])'([^']*[^'a-zA-Z-][^']*)'/\1\&lsquo;<code class=css>\2<\/code>\&rsquo;/g

# Convert any escaped quotes back to literal quotes.

s/&#39;/'/g
