#!/usr/local/bin/perl # Check for lack of space before a start tag and # after and end tag. # Ian Jacobs # $Id: chkspaces,v 1.10 2009-04-20 13:40:26 bbos Exp $ $PROGNAME = substr($0, rindex($0, "/") + 1); # copy file in memory sub readfile { $buf = ""; if (!open(INPUT, $_[0])) { die "$PROGNAME Error: Cannot open file: $_[0]\n"; } while () { $buf .= $_; } close(INPUT); } # read reference database $pattern1 = "(<\\/[^>]+>\\w+)"; $pattern2 = "(\\w(?:<(\\w+)\\s*[^>]*>)+)"; # Check for duplicate words. $pattern3 = "(\\b([a-z]\\w*)(?:\\s+|\\s*(?:<[^>]+>)+\\s*|\\s*(?:<\\/[^>]+>)+\\s*)\\2\\b)"; foreach $file (@ARGV) { print STDOUT "Checking $file\n"; if (readfile($file)) { $_ = $buf; while (/$pattern1/sio) { $continue = $'; print STDOUT "\tMissing space?\t$1\n"; $_ = $continue; } while (/$pattern2/sio) { $continue = $'; $match = $1; $elem = $2; if ( $2 =~ /br|th|td|sup|sub|ol|ul|li|dl|dt|dd/i ) { print STDOUT "\tMissing space?\t$match (Possibly harmless for $elem)\n"; } else { print STDOUT "\tMissing space?\t$match\n"; } $_ = $continue; } while (/$pattern3/si) { $continue = $'; $word = $2; $context = $1; $context =~ s/\n/\\n/g; print STDOUT "\tDuplicate word?\t`$word' in `$context'\n"; $_ = $continue; } while (/((?:\w([\(\)])\w)|(?:\w([.?:\"'])[A-Z]))/s) { $continue = $'; $word = "$2$3"; $context = $1; $context =~ s/\n/\\n/g; print STDOUT "\tMissing space around punc?\t`$word' in `$context'\n"; $_ = $continue; } } }