diff --git a/src/main/java/org/archive/resource/html/ExtractingParseObserver.java b/src/main/java/org/archive/resource/html/ExtractingParseObserver.java index e1f57b55..a29744c8 100644 --- a/src/main/java/org/archive/resource/html/ExtractingParseObserver.java +++ b/src/main/java/org/archive/resource/html/ExtractingParseObserver.java @@ -23,7 +23,7 @@ public class ExtractingParseObserver implements ParseObserver { protected static String cssUrlPatString = "url\\s*\\(\\s*([\\\\\"']*.+?[\\\\\"']*)\\s*\\)"; protected static String cssImportNoUrlPatString = - "@import\\s+(('[^']+')|(\"[^\"]+\")|(\\('[^']+'\\))|(\\(\"[^\"]+\"\\))|(\\([^)]+\\))|([a-z0-9_.:/\\\\-]+))\\s*;"; + "@import\\s+((?:'[^']+')|(?:\"[^\"]+\")|(?:\\('[^']+'\\))|(?:\\(\"[^\"]+\"\\))|(?:\\([^)]+\\))|(?:[a-z0-9_.:/\\\\-]+))\\s*;"; protected static Pattern cssImportNoUrlPattern = Pattern .compile(cssImportNoUrlPatString); @@ -372,36 +372,25 @@ private void patternCSSExtract(HTMLMetaData data, Pattern pattern, String conten Matcher m = pattern.matcher(content); int idx = 0; int contentLen = content.length(); - while((idx < contentLen) && m.find(idx)) { + while((idx < contentLen) && m.find()) { + idx = m.end(); String url = m.group(1); - int origUrlLength = url.length(); - int urlStart = m.start(1); - int urlEnd = m.end(1); - idx = urlEnd; if(url.length() < 2) { continue; } if ((url.charAt(0) == '(') - && (url.charAt(origUrlLength-1) == ')')) { - url = url.substring(1, origUrlLength - 1); - urlStart += 1; - origUrlLength -= 2; + && (url.charAt(url.length()-1) == ')')) { + url = url.substring(1, url.length() - 1); } - if (url.charAt(0) == '"') { - url = url.substring(1, origUrlLength - 1); - urlStart += 1; - } else if (url.charAt(0) == '\'') { - url = url.substring(1, origUrlLength - 1); - urlStart += 1; + if (url.charAt(0) == '"' || url.charAt(0) == '\'') { + url = url.substring(1, url.length() - 1); } else if (url.charAt(0) == '\\') { - if(url.length() == 2) + if(url.length() <= 4) { continue; - url = url.substring(2, origUrlLength - 2); - urlStart += 2; + } + url = url.substring(2, url.length() - 2); } - int urlLength = url.length(); data.addHref("path","STYLE/#text","href",url); - idx += urlLength; } } } diff --git a/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java b/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java index 24b6c18a..7d8c7ea8 100644 --- a/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java +++ b/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java @@ -19,7 +19,8 @@ public void testHandleStyleNodeExceptions() throws Exception { "url (' ')", "url('\")", "url(')", - "url('\"')" + "url('\"')", + "url('\\\"\"')" }; boolean except = false; HTMLMetaData md = new HTMLMetaData(new MetaData());