diff --git a/pom.xml b/pom.xml
index e542d616..add79749 100644
--- a/pom.xml
+++ b/pom.xml
@@ -72,7 +72,7 @@
com.google.guava
guava
- 14.0.1
+ 17.0
diff --git a/src/main/java/org/archive/extract/DumpingExtractorOutput.java b/src/main/java/org/archive/extract/DumpingExtractorOutput.java
index a4151076..69591931 100644
--- a/src/main/java/org/archive/extract/DumpingExtractorOutput.java
+++ b/src/main/java/org/archive/extract/DumpingExtractorOutput.java
@@ -9,8 +9,8 @@
import org.archive.util.StreamCopy;
import org.json.JSONException;
+import com.google.common.io.ByteStreams;
import com.google.common.io.CountingOutputStream;
-import com.google.common.io.NullOutputStream;
public class DumpingExtractorOutput implements ExtractorOutput {
private static final Logger LOG =
@@ -22,7 +22,7 @@ public DumpingExtractorOutput(OutputStream out) {
}
public void output(Resource resource) throws IOException {
- NullOutputStream nullo = new NullOutputStream();
+ OutputStream nullo = ByteStreams.nullOutputStream();
CountingOutputStream co = new CountingOutputStream(nullo);
StreamCopy.copy(resource.getInputStream(), co);
long bytes = co.getCount();
diff --git a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java
index 306f67a3..62a423c5 100644
--- a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java
+++ b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java
@@ -1,6 +1,7 @@
package org.archive.extract;
import java.io.IOException;
+import java.io.OutputStream;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
@@ -23,8 +24,8 @@
import org.json.JSONException;
import org.json.JSONObject;
+import com.google.common.io.ByteStreams;
import com.google.common.io.CountingOutputStream;
-import com.google.common.io.NullOutputStream;
public class RealCDXExtractorOutput implements ExtractorOutput {
private static final Logger LOG =
@@ -72,7 +73,7 @@ public RealCDXExtractorOutput(PrintWriter out) {
// SimpleJSONPathSpec gzFooterLengthSpec = new SimpleJSONPathSpec("Container.Gzip-Metadata.Footer-Length");
// SimpleJSONPathSpec gzHeaderLengthSpec = new SimpleJSONPathSpec("Container.Gzip-Metadata.Header-Length");
public void output(Resource resource) throws IOException {
- NullOutputStream nullo = new NullOutputStream();
+ OutputStream nullo = ByteStreams.nullOutputStream();
CountingOutputStream co = new CountingOutputStream(nullo);
try {
StreamCopy.copy(resource.getInputStream(), co);
diff --git a/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java b/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java
index 0d564a6f..ff46a914 100644
--- a/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java
+++ b/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java
@@ -1,6 +1,7 @@
package org.archive.extract;
import java.io.IOException;
+import java.io.OutputStream;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
@@ -21,8 +22,8 @@
import org.json.JSONException;
import org.json.JSONObject;
+import com.google.common.io.ByteStreams;
import com.google.common.io.CountingOutputStream;
-import com.google.common.io.NullOutputStream;
public class WARCMetadataRecordExtractorOutput implements ExtractorOutput {
private static final Logger LOG =
@@ -47,7 +48,7 @@ public WARCMetadataRecordExtractorOutput(PrintWriter out) {
}
public void output(Resource resource) throws IOException {
- NullOutputStream nullo = new NullOutputStream();
+ OutputStream nullo = ByteStreams.nullOutputStream();
CountingOutputStream co = new CountingOutputStream(nullo);
try {
StreamCopy.copy(resource.getInputStream(), co);
diff --git a/src/main/java/org/archive/resource/AbstractResource.java b/src/main/java/org/archive/resource/AbstractResource.java
index 409e7408..301c53d4 100755
--- a/src/main/java/org/archive/resource/AbstractResource.java
+++ b/src/main/java/org/archive/resource/AbstractResource.java
@@ -5,7 +5,7 @@
import org.archive.util.StreamCopy;
-import com.google.common.io.NullOutputStream;
+import com.google.common.io.ByteStreams;
public abstract class AbstractResource implements Resource {
protected ResourceContainer container;
@@ -44,7 +44,7 @@ public static void dumpShort(PrintStream out, Resource resource) throws IOExcept
// out.println("Headers Before");
// out.print(m.toString());
- long bytes = StreamCopy.copy(resource.getInputStream(), new NullOutputStream());
+ long bytes = StreamCopy.copy(resource.getInputStream(), ByteStreams.nullOutputStream());
out.println("Resource Was:"+bytes+" Long");
out.println("[\n]Headers After");
diff --git a/src/main/java/org/archive/resource/arc/ARCResource.java b/src/main/java/org/archive/resource/arc/ARCResource.java
index 5d63fd4d..b6e0a1c1 100644
--- a/src/main/java/org/archive/resource/arc/ARCResource.java
+++ b/src/main/java/org/archive/resource/arc/ARCResource.java
@@ -18,8 +18,8 @@
import org.archive.util.io.EOFObserver;
import org.archive.util.io.PushBackOneByteInputStream;
+import com.google.common.io.ByteStreams;
import com.google.common.io.CountingInputStream;
-import com.google.common.io.LimitInputStream;
public class ARCResource extends AbstractResource
@@ -54,7 +54,7 @@ public ARCResource(MetaData metaData, ResourceContainer container,
fields.putLong(DECLARED_LENGTH_KEY, arcMetaData.getLength());
countingIS = new CountingInputStream(
- new LimitInputStream(raw, arcMetaData.getLength()));
+ ByteStreams.limit(raw, arcMetaData.getLength()));
try {
digIS = new DigestInputStream(countingIS,
diff --git a/src/main/java/org/archive/resource/http/HTTPResponseResource.java b/src/main/java/org/archive/resource/http/HTTPResponseResource.java
index b5d189bc..cc325427 100644
--- a/src/main/java/org/archive/resource/http/HTTPResponseResource.java
+++ b/src/main/java/org/archive/resource/http/HTTPResponseResource.java
@@ -7,7 +7,6 @@
import java.security.NoSuchAlgorithmException;
import java.util.logging.Logger;
-
import org.archive.format.http.HttpHeader;
import org.archive.format.http.HttpResponse;
import org.archive.format.http.HttpResponseMessage;
@@ -20,8 +19,8 @@
import org.archive.util.io.EOFNotifyingInputStream;
import org.archive.util.io.EOFObserver;
+import com.google.common.io.ByteStreams;
import com.google.common.io.CountingInputStream;
-import com.google.common.io.LimitInputStream;
@@ -65,7 +64,7 @@ public HTTPResponseResource(MetaData metaData,
headers.putString(h.getName(),h.getValue());
}
if(forceCheck && (length >= 0)) {
- LimitInputStream lis = new LimitInputStream(response, length);
+ InputStream lis = ByteStreams.limit(response, length);
countingIS = new CountingInputStream(lis);
} else {
countingIS = new CountingInputStream(response);
diff --git a/src/main/java/org/archive/resource/warc/WARCResource.java b/src/main/java/org/archive/resource/warc/WARCResource.java
index ab9b6900..80929206 100644
--- a/src/main/java/org/archive/resource/warc/WARCResource.java
+++ b/src/main/java/org/archive/resource/warc/WARCResource.java
@@ -19,8 +19,8 @@
import org.archive.util.io.EOFObserver;
import org.archive.util.io.PushBackOneByteInputStream;
+import com.google.common.io.ByteStreams;
import com.google.common.io.CountingInputStream;
-import com.google.common.io.LimitInputStream;
public class WARCResource extends AbstractResource implements EOFObserver, ResourceConstants {
CountingInputStream countingIS;
@@ -51,7 +51,7 @@ public WARCResource(MetaData metaData, ResourceContainer container,
if(length >= 0) {
countingIS = new CountingInputStream(
- new LimitInputStream(response, length));
+ ByteStreams.limit(response, length));
} else {
throw new ResourceParseException(null);
}
diff --git a/src/main/java/org/archive/url/URLRegexTransformer.java b/src/main/java/org/archive/url/URLRegexTransformer.java
index 930f5b34..c5505a74 100644
--- a/src/main/java/org/archive/url/URLRegexTransformer.java
+++ b/src/main/java/org/archive/url/URLRegexTransformer.java
@@ -101,7 +101,7 @@ public static String hostToPublicSuffix(String host) {
InternetDomainName idn;
try {
- idn = InternetDomainName.fromLenient(host);
+ idn = InternetDomainName.from(host);
} catch(IllegalArgumentException e) {
return host;
}
@@ -109,7 +109,7 @@ public static String hostToPublicSuffix(String host) {
if(tmp == null) {
return host;
}
- String pubSuff = tmp.name();
+ String pubSuff = tmp.toString();
int idx = host.lastIndexOf(".", host.length() - (pubSuff.length()+2));
if(idx == -1) {
return host;
diff --git a/src/main/java/org/archive/util/TextUtils.java b/src/main/java/org/archive/util/TextUtils.java
index 707f93c7..9061a161 100644
--- a/src/main/java/org/archive/util/TextUtils.java
+++ b/src/main/java/org/archive/util/TextUtils.java
@@ -36,8 +36,9 @@
import org.apache.commons.lang.StringEscapeUtils;
-import com.google.common.base.Function;
-import com.google.common.collect.MapMaker;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
public class TextUtils {
private static final String FIRSTWORD = "^([^\\s]*).*$";
@@ -51,11 +52,11 @@ protected Map initialValue() {
};
/** global soft-cache of Patterns, by string key */
- private static final ConcurrentMap PATTERNS = new MapMaker()
+ private static final LoadingCache PATTERNS = CacheBuilder.newBuilder()
.concurrencyLevel(16)
.softValues()
- .makeComputingMap(new Function() {
- public Pattern apply(String regex) {
+ .build(new CacheLoader() {
+ public Pattern load(String regex) {
return Pattern.compile(regex);
}
});
@@ -84,7 +85,7 @@ public static Matcher getMatcher(String pattern, CharSequence input) {
final Map matchers = TL_MATCHER_MAP.get();
Matcher m = (Matcher)matchers.get(pattern);
if(m == null) {
- m = PATTERNS.get(pattern).matcher(input);
+ m = PATTERNS.getUnchecked(pattern).matcher(input);
} else {
matchers.put(pattern,null);
m.reset(input);
diff --git a/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java b/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java
index 621c6bce..93757a45 100644
--- a/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java
+++ b/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java
@@ -6,7 +6,7 @@
import org.apache.hadoop.fs.FSDataInputStream;
import org.archive.util.binsearch.AbstractSeekableLineReader;
-import com.google.common.io.LimitInputStream;
+import com.google.common.io.ByteStreams;
public class HDFSSeekableLineReader extends AbstractSeekableLineReader {
private FSDataInputStream fsdis;
@@ -23,7 +23,7 @@ public InputStream doSeekLoad(long offset, int maxLength) throws IOException {
fsdis.seek(offset);
if (maxLength >= 0) {
- return new LimitInputStream(fsdis, maxLength);
+ return ByteStreams.limit(fsdis, maxLength);
} else {
return fsdis;
}
diff --git a/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java b/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java
index b211db16..5131dd06 100644
--- a/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java
+++ b/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java
@@ -7,7 +7,7 @@
import org.archive.util.binsearch.AbstractSeekableLineReader;
-import com.google.common.io.LimitInputStream;
+import com.google.common.io.ByteStreams;
public class RandomAccessFileSeekableLineReader extends AbstractSeekableLineReader {
@@ -24,7 +24,7 @@ public InputStream doSeekLoad(long offset, int maxLength) throws IOException {
FileInputStream fis = new FileInputStream(raf.getFD());
if (maxLength > 0) {
- return new LimitInputStream(fis, maxLength);
+ return ByteStreams.limit(fis, maxLength);
} else {
return fis;
}
diff --git a/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java b/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java
index d3dc1ff6..710ff069 100644
--- a/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java
+++ b/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java
@@ -30,7 +30,7 @@
import org.archive.util.ArchiveUtils;
import org.archive.util.zip.GZIPMembersInputStream;
-import com.google.common.io.NullOutputStream;
+import com.google.common.io.ByteStreams;
import com.google.common.primitives.Bytes;
/**
@@ -70,14 +70,14 @@ public static void main(String [] args) {
public void testFullReadAllFour() throws IOException {
GZIPMembersInputStream gzin =
new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz));
- int count = IOUtils.copy(gzin, new NullOutputStream());
+ int count = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong length uncompressed data", 1024+(32*1024)+1+5, count);
}
public void testFullReadSixSmall() throws IOException {
GZIPMembersInputStream gzin =
new GZIPMembersInputStream(new ByteArrayInputStream(sixsmall_gz));
- int count = IOUtils.copy(gzin, new NullOutputStream());
+ int count = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong length uncompressed data", 1+5+1+5+1+5, count);
}
@@ -85,31 +85,31 @@ public void testReadPerMemberAllFour() throws IOException {
GZIPMembersInputStream gzin =
new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz));
gzin.setEofEachMember(true);
- int count0 = IOUtils.copy(gzin, new NullOutputStream());
+ int count0 = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong 1k member count", 1024, count0);
assertEquals("wrong member number", 0, gzin.getMemberNumber());
assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart());
assertEquals("wrong member0 end", noise1k_gz.length, gzin.getCurrentMemberEnd());
gzin.nextMember();
- int count1 = IOUtils.copy(gzin, new NullOutputStream());
+ int count1 = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong 32k member count", (32*1024), count1);
assertEquals("wrong member number", 1, gzin.getMemberNumber());
assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart());
assertEquals("wrong member1 end", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd());
gzin.nextMember();
- int count2 = IOUtils.copy(gzin, new NullOutputStream());
+ int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong 1-byte member count", 1, count2);
assertEquals("wrong member number", 2, gzin.getMemberNumber());
assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart());
assertEquals("wrong member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd());
gzin.nextMember();
- int count3 = IOUtils.copy(gzin, new NullOutputStream());
+ int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong 5-byte member count", 5, count3);
assertEquals("wrong member number", 3, gzin.getMemberNumber());
assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart());
assertEquals("wrong member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd());
gzin.nextMember();
- int countEnd = IOUtils.copy(gzin, new NullOutputStream());
+ int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong eof count", 0, countEnd);
}
@@ -118,14 +118,14 @@ public void testReadPerMemberSixSmall() throws IOException {
new GZIPMembersInputStream(new ByteArrayInputStream(sixsmall_gz));
gzin.setEofEachMember(true);
for(int i = 0; i < 3; i++) {
- int count2 = IOUtils.copy(gzin, new NullOutputStream());
+ int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong 1-byte member count", 1, count2);
gzin.nextMember();
- int count3 = IOUtils.copy(gzin, new NullOutputStream());
+ int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong 5-byte member count", 5, count3);
gzin.nextMember();
}
- int countEnd = IOUtils.copy(gzin, new NullOutputStream());
+ int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong eof count", 0, countEnd);
}
@@ -172,19 +172,19 @@ public void testMemberSeek() throws IOException {
new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz));
gzin.setEofEachMember(true);
gzin.compressedSeek(noise1k_gz.length+noise32k_gz.length);
- int count2 = IOUtils.copy(gzin, new NullOutputStream());
+ int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong 1-byte member count", 1, count2);
// assertEquals("wrong Member number", 2, gzin.getMemberNumber());
assertEquals("wrong Member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart());
assertEquals("wrong Member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd());
gzin.nextMember();
- int count3 = IOUtils.copy(gzin, new NullOutputStream());
+ int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong 5-byte member count", 5, count3);
// assertEquals("wrong Member number", 3, gzin.getMemberNumber());
assertEquals("wrong Member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart());
assertEquals("wrong Member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd());
gzin.nextMember();
- int countEnd = IOUtils.copy(gzin, new NullOutputStream());
+ int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong eof count", 0, countEnd);
}
@@ -195,7 +195,7 @@ public void testMemberIterator() throws IOException {
Iterator iter = gzin.memberIterator();
assertTrue(iter.hasNext());
GZIPMembersInputStream gzMember0 = iter.next();
- int count0 = IOUtils.copy(gzMember0, new NullOutputStream());
+ int count0 = IOUtils.copy(gzMember0, ByteStreams.nullOutputStream());
assertEquals("wrong 1k member count", 1024, count0);
assertEquals("wrong member number", 0, gzin.getMemberNumber());
assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart());
@@ -203,7 +203,7 @@ public void testMemberIterator() throws IOException {
assertTrue(iter.hasNext());
GZIPMembersInputStream gzMember1 = iter.next();
- int count1 = IOUtils.copy(gzMember1, new NullOutputStream());
+ int count1 = IOUtils.copy(gzMember1, ByteStreams.nullOutputStream());
assertEquals("wrong 32k member count", (32*1024), count1);
assertEquals("wrong member number", 1, gzin.getMemberNumber());
assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart());
@@ -211,7 +211,7 @@ public void testMemberIterator() throws IOException {
assertTrue(iter.hasNext());
GZIPMembersInputStream gzMember2 = iter.next();
- int count2 = IOUtils.copy(gzMember2, new NullOutputStream());
+ int count2 = IOUtils.copy(gzMember2, ByteStreams.nullOutputStream());
assertEquals("wrong 1-byte member count", 1, count2);
assertEquals("wrong member number", 2, gzin.getMemberNumber());
assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart());
@@ -219,7 +219,7 @@ public void testMemberIterator() throws IOException {
assertTrue(iter.hasNext());
GZIPMembersInputStream gzMember3 = iter.next();
- int count3 = IOUtils.copy(gzMember3, new NullOutputStream());
+ int count3 = IOUtils.copy(gzMember3, ByteStreams.nullOutputStream());
assertEquals("wrong 5-byte member count", 5, count3);
assertEquals("wrong member number", 3, gzin.getMemberNumber());
assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart());