From 0dd8a167e31eb8c9cd592f2290cfd279aa8430f9 Mon Sep 17 00:00:00 2001 From: Gerhard Gossen Date: Wed, 2 Mar 2016 14:17:04 +0100 Subject: [PATCH] Fix some ARC issues --- pom.xml | 4 ++-- .../archive/extract/RealCDXExtractorOutput.java | 2 +- src/main/java/org/archive/io/ArchiveReader.java | 2 +- src/main/java/org/archive/io/arc/ARCRecord.java | 2 +- .../extract/RealCDXExtractorOutputTest.java | 14 ++++++++++++++ 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/pom.xml b/pom.xml index 7a32de08..5d8fffdd 100644 --- a/pom.xml +++ b/pom.xml @@ -98,8 +98,8 @@ org.apache.hadoop - hadoop-core - 0.20.2-cdh3u4 + hadoop-client + 2.6.0 commons-httpclient diff --git a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java index 8ca3ff82..e9922c56 100644 --- a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java +++ b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java @@ -314,7 +314,7 @@ static String resolve(String context, String spec) { URL cUrl = new URL(context); URL url = new URL(cUrl, spec); // this constructor escapes its arguments, if necessary - URI uri = new URI(url.getProtocol(), url.getHost(), url.getPath(), url.getQuery(), url.getRef()); + URI uri = new URI(url.getProtocol(), url.getUserInfo(), url.getHost(), url.getPort(), url.getPath(), url.getQuery(), url.getRef()); return uri.toASCIIString(); } catch (URISyntaxException e) { diff --git a/src/main/java/org/archive/io/ArchiveReader.java b/src/main/java/org/archive/io/ArchiveReader.java index 66056d33..55faef17 100644 --- a/src/main/java/org/archive/io/ArchiveReader.java +++ b/src/main/java/org/archive/io/ArchiveReader.java @@ -79,7 +79,7 @@ public abstract class ArchiveReader implements ArchiveFileConstants, Iterable " + escaped); + assertTrue(escaped.indexOf(" ") < 0); + } + } }