diff --git a/pom.xml b/pom.xml index e542d616..add79749 100644 --- a/pom.xml +++ b/pom.xml @@ -72,7 +72,7 @@ com.google.guava guava - 14.0.1 + 17.0 diff --git a/src/main/java/org/archive/extract/DumpingExtractorOutput.java b/src/main/java/org/archive/extract/DumpingExtractorOutput.java index a4151076..69591931 100644 --- a/src/main/java/org/archive/extract/DumpingExtractorOutput.java +++ b/src/main/java/org/archive/extract/DumpingExtractorOutput.java @@ -9,8 +9,8 @@ import org.archive.util.StreamCopy; import org.json.JSONException; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingOutputStream; -import com.google.common.io.NullOutputStream; public class DumpingExtractorOutput implements ExtractorOutput { private static final Logger LOG = @@ -22,7 +22,7 @@ public DumpingExtractorOutput(OutputStream out) { } public void output(Resource resource) throws IOException { - NullOutputStream nullo = new NullOutputStream(); + OutputStream nullo = ByteStreams.nullOutputStream(); CountingOutputStream co = new CountingOutputStream(nullo); StreamCopy.copy(resource.getInputStream(), co); long bytes = co.getCount(); diff --git a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java index 306f67a3..62a423c5 100644 --- a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java +++ b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java @@ -1,6 +1,7 @@ package org.archive.extract; import java.io.IOException; +import java.io.OutputStream; import java.io.PrintWriter; import java.net.MalformedURLException; import java.net.URISyntaxException; @@ -23,8 +24,8 @@ import org.json.JSONException; import org.json.JSONObject; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingOutputStream; -import com.google.common.io.NullOutputStream; public class RealCDXExtractorOutput implements ExtractorOutput { private static final Logger LOG = @@ -72,7 +73,7 @@ public RealCDXExtractorOutput(PrintWriter out) { // SimpleJSONPathSpec gzFooterLengthSpec = new SimpleJSONPathSpec("Container.Gzip-Metadata.Footer-Length"); // SimpleJSONPathSpec gzHeaderLengthSpec = new SimpleJSONPathSpec("Container.Gzip-Metadata.Header-Length"); public void output(Resource resource) throws IOException { - NullOutputStream nullo = new NullOutputStream(); + OutputStream nullo = ByteStreams.nullOutputStream(); CountingOutputStream co = new CountingOutputStream(nullo); try { StreamCopy.copy(resource.getInputStream(), co); diff --git a/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java b/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java index 0d564a6f..ff46a914 100644 --- a/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java +++ b/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java @@ -1,6 +1,7 @@ package org.archive.extract; import java.io.IOException; +import java.io.OutputStream; import java.io.PrintWriter; import java.net.MalformedURLException; import java.net.URISyntaxException; @@ -21,8 +22,8 @@ import org.json.JSONException; import org.json.JSONObject; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingOutputStream; -import com.google.common.io.NullOutputStream; public class WARCMetadataRecordExtractorOutput implements ExtractorOutput { private static final Logger LOG = @@ -47,7 +48,7 @@ public WARCMetadataRecordExtractorOutput(PrintWriter out) { } public void output(Resource resource) throws IOException { - NullOutputStream nullo = new NullOutputStream(); + OutputStream nullo = ByteStreams.nullOutputStream(); CountingOutputStream co = new CountingOutputStream(nullo); try { StreamCopy.copy(resource.getInputStream(), co); diff --git a/src/main/java/org/archive/resource/AbstractResource.java b/src/main/java/org/archive/resource/AbstractResource.java index 409e7408..301c53d4 100755 --- a/src/main/java/org/archive/resource/AbstractResource.java +++ b/src/main/java/org/archive/resource/AbstractResource.java @@ -5,7 +5,7 @@ import org.archive.util.StreamCopy; -import com.google.common.io.NullOutputStream; +import com.google.common.io.ByteStreams; public abstract class AbstractResource implements Resource { protected ResourceContainer container; @@ -44,7 +44,7 @@ public static void dumpShort(PrintStream out, Resource resource) throws IOExcept // out.println("Headers Before"); // out.print(m.toString()); - long bytes = StreamCopy.copy(resource.getInputStream(), new NullOutputStream()); + long bytes = StreamCopy.copy(resource.getInputStream(), ByteStreams.nullOutputStream()); out.println("Resource Was:"+bytes+" Long"); out.println("[\n]Headers After"); diff --git a/src/main/java/org/archive/resource/arc/ARCResource.java b/src/main/java/org/archive/resource/arc/ARCResource.java index 5d63fd4d..b6e0a1c1 100644 --- a/src/main/java/org/archive/resource/arc/ARCResource.java +++ b/src/main/java/org/archive/resource/arc/ARCResource.java @@ -18,8 +18,8 @@ import org.archive.util.io.EOFObserver; import org.archive.util.io.PushBackOneByteInputStream; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingInputStream; -import com.google.common.io.LimitInputStream; public class ARCResource extends AbstractResource @@ -54,7 +54,7 @@ public ARCResource(MetaData metaData, ResourceContainer container, fields.putLong(DECLARED_LENGTH_KEY, arcMetaData.getLength()); countingIS = new CountingInputStream( - new LimitInputStream(raw, arcMetaData.getLength())); + ByteStreams.limit(raw, arcMetaData.getLength())); try { digIS = new DigestInputStream(countingIS, diff --git a/src/main/java/org/archive/resource/http/HTTPResponseResource.java b/src/main/java/org/archive/resource/http/HTTPResponseResource.java index b5d189bc..cc325427 100644 --- a/src/main/java/org/archive/resource/http/HTTPResponseResource.java +++ b/src/main/java/org/archive/resource/http/HTTPResponseResource.java @@ -7,7 +7,6 @@ import java.security.NoSuchAlgorithmException; import java.util.logging.Logger; - import org.archive.format.http.HttpHeader; import org.archive.format.http.HttpResponse; import org.archive.format.http.HttpResponseMessage; @@ -20,8 +19,8 @@ import org.archive.util.io.EOFNotifyingInputStream; import org.archive.util.io.EOFObserver; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingInputStream; -import com.google.common.io.LimitInputStream; @@ -65,7 +64,7 @@ public HTTPResponseResource(MetaData metaData, headers.putString(h.getName(),h.getValue()); } if(forceCheck && (length >= 0)) { - LimitInputStream lis = new LimitInputStream(response, length); + InputStream lis = ByteStreams.limit(response, length); countingIS = new CountingInputStream(lis); } else { countingIS = new CountingInputStream(response); diff --git a/src/main/java/org/archive/resource/warc/WARCResource.java b/src/main/java/org/archive/resource/warc/WARCResource.java index ab9b6900..80929206 100644 --- a/src/main/java/org/archive/resource/warc/WARCResource.java +++ b/src/main/java/org/archive/resource/warc/WARCResource.java @@ -19,8 +19,8 @@ import org.archive.util.io.EOFObserver; import org.archive.util.io.PushBackOneByteInputStream; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingInputStream; -import com.google.common.io.LimitInputStream; public class WARCResource extends AbstractResource implements EOFObserver, ResourceConstants { CountingInputStream countingIS; @@ -51,7 +51,7 @@ public WARCResource(MetaData metaData, ResourceContainer container, if(length >= 0) { countingIS = new CountingInputStream( - new LimitInputStream(response, length)); + ByteStreams.limit(response, length)); } else { throw new ResourceParseException(null); } diff --git a/src/main/java/org/archive/url/URLRegexTransformer.java b/src/main/java/org/archive/url/URLRegexTransformer.java index 930f5b34..c5505a74 100644 --- a/src/main/java/org/archive/url/URLRegexTransformer.java +++ b/src/main/java/org/archive/url/URLRegexTransformer.java @@ -101,7 +101,7 @@ public static String hostToPublicSuffix(String host) { InternetDomainName idn; try { - idn = InternetDomainName.fromLenient(host); + idn = InternetDomainName.from(host); } catch(IllegalArgumentException e) { return host; } @@ -109,7 +109,7 @@ public static String hostToPublicSuffix(String host) { if(tmp == null) { return host; } - String pubSuff = tmp.name(); + String pubSuff = tmp.toString(); int idx = host.lastIndexOf(".", host.length() - (pubSuff.length()+2)); if(idx == -1) { return host; diff --git a/src/main/java/org/archive/util/TextUtils.java b/src/main/java/org/archive/util/TextUtils.java index 707f93c7..9061a161 100644 --- a/src/main/java/org/archive/util/TextUtils.java +++ b/src/main/java/org/archive/util/TextUtils.java @@ -36,8 +36,9 @@ import org.apache.commons.lang.StringEscapeUtils; -import com.google.common.base.Function; -import com.google.common.collect.MapMaker; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; public class TextUtils { private static final String FIRSTWORD = "^([^\\s]*).*$"; @@ -51,11 +52,11 @@ protected Map initialValue() { }; /** global soft-cache of Patterns, by string key */ - private static final ConcurrentMap PATTERNS = new MapMaker() + private static final LoadingCache PATTERNS = CacheBuilder.newBuilder() .concurrencyLevel(16) .softValues() - .makeComputingMap(new Function() { - public Pattern apply(String regex) { + .build(new CacheLoader() { + public Pattern load(String regex) { return Pattern.compile(regex); } }); @@ -84,7 +85,7 @@ public static Matcher getMatcher(String pattern, CharSequence input) { final Map matchers = TL_MATCHER_MAP.get(); Matcher m = (Matcher)matchers.get(pattern); if(m == null) { - m = PATTERNS.get(pattern).matcher(input); + m = PATTERNS.getUnchecked(pattern).matcher(input); } else { matchers.put(pattern,null); m.reset(input); diff --git a/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java b/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java index 621c6bce..93757a45 100644 --- a/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java +++ b/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java @@ -6,7 +6,7 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.archive.util.binsearch.AbstractSeekableLineReader; -import com.google.common.io.LimitInputStream; +import com.google.common.io.ByteStreams; public class HDFSSeekableLineReader extends AbstractSeekableLineReader { private FSDataInputStream fsdis; @@ -23,7 +23,7 @@ public InputStream doSeekLoad(long offset, int maxLength) throws IOException { fsdis.seek(offset); if (maxLength >= 0) { - return new LimitInputStream(fsdis, maxLength); + return ByteStreams.limit(fsdis, maxLength); } else { return fsdis; } diff --git a/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java b/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java index b211db16..5131dd06 100644 --- a/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java +++ b/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java @@ -7,7 +7,7 @@ import org.archive.util.binsearch.AbstractSeekableLineReader; -import com.google.common.io.LimitInputStream; +import com.google.common.io.ByteStreams; public class RandomAccessFileSeekableLineReader extends AbstractSeekableLineReader { @@ -24,7 +24,7 @@ public InputStream doSeekLoad(long offset, int maxLength) throws IOException { FileInputStream fis = new FileInputStream(raf.getFD()); if (maxLength > 0) { - return new LimitInputStream(fis, maxLength); + return ByteStreams.limit(fis, maxLength); } else { return fis; } diff --git a/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java b/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java index d3dc1ff6..710ff069 100644 --- a/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java +++ b/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java @@ -30,7 +30,7 @@ import org.archive.util.ArchiveUtils; import org.archive.util.zip.GZIPMembersInputStream; -import com.google.common.io.NullOutputStream; +import com.google.common.io.ByteStreams; import com.google.common.primitives.Bytes; /** @@ -70,14 +70,14 @@ public static void main(String [] args) { public void testFullReadAllFour() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); - int count = IOUtils.copy(gzin, new NullOutputStream()); + int count = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong length uncompressed data", 1024+(32*1024)+1+5, count); } public void testFullReadSixSmall() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(sixsmall_gz)); - int count = IOUtils.copy(gzin, new NullOutputStream()); + int count = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong length uncompressed data", 1+5+1+5+1+5, count); } @@ -85,31 +85,31 @@ public void testReadPerMemberAllFour() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); gzin.setEofEachMember(true); - int count0 = IOUtils.copy(gzin, new NullOutputStream()); + int count0 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 1k member count", 1024, count0); assertEquals("wrong member number", 0, gzin.getMemberNumber()); assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart()); assertEquals("wrong member0 end", noise1k_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int count1 = IOUtils.copy(gzin, new NullOutputStream()); + int count1 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 32k member count", (32*1024), count1); assertEquals("wrong member number", 1, gzin.getMemberNumber()); assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member1 end", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int count2 = IOUtils.copy(gzin, new NullOutputStream()); + int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 1-byte member count", 1, count2); assertEquals("wrong member number", 2, gzin.getMemberNumber()); assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int count3 = IOUtils.copy(gzin, new NullOutputStream()); + int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 5-byte member count", 5, count3); assertEquals("wrong member number", 3, gzin.getMemberNumber()); assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int countEnd = IOUtils.copy(gzin, new NullOutputStream()); + int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong eof count", 0, countEnd); } @@ -118,14 +118,14 @@ public void testReadPerMemberSixSmall() throws IOException { new GZIPMembersInputStream(new ByteArrayInputStream(sixsmall_gz)); gzin.setEofEachMember(true); for(int i = 0; i < 3; i++) { - int count2 = IOUtils.copy(gzin, new NullOutputStream()); + int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 1-byte member count", 1, count2); gzin.nextMember(); - int count3 = IOUtils.copy(gzin, new NullOutputStream()); + int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 5-byte member count", 5, count3); gzin.nextMember(); } - int countEnd = IOUtils.copy(gzin, new NullOutputStream()); + int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong eof count", 0, countEnd); } @@ -172,19 +172,19 @@ public void testMemberSeek() throws IOException { new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); gzin.setEofEachMember(true); gzin.compressedSeek(noise1k_gz.length+noise32k_gz.length); - int count2 = IOUtils.copy(gzin, new NullOutputStream()); + int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 1-byte member count", 1, count2); // assertEquals("wrong Member number", 2, gzin.getMemberNumber()); assertEquals("wrong Member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong Member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int count3 = IOUtils.copy(gzin, new NullOutputStream()); + int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 5-byte member count", 5, count3); // assertEquals("wrong Member number", 3, gzin.getMemberNumber()); assertEquals("wrong Member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong Member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int countEnd = IOUtils.copy(gzin, new NullOutputStream()); + int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong eof count", 0, countEnd); } @@ -195,7 +195,7 @@ public void testMemberIterator() throws IOException { Iterator iter = gzin.memberIterator(); assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember0 = iter.next(); - int count0 = IOUtils.copy(gzMember0, new NullOutputStream()); + int count0 = IOUtils.copy(gzMember0, ByteStreams.nullOutputStream()); assertEquals("wrong 1k member count", 1024, count0); assertEquals("wrong member number", 0, gzin.getMemberNumber()); assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart()); @@ -203,7 +203,7 @@ public void testMemberIterator() throws IOException { assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember1 = iter.next(); - int count1 = IOUtils.copy(gzMember1, new NullOutputStream()); + int count1 = IOUtils.copy(gzMember1, ByteStreams.nullOutputStream()); assertEquals("wrong 32k member count", (32*1024), count1); assertEquals("wrong member number", 1, gzin.getMemberNumber()); assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart()); @@ -211,7 +211,7 @@ public void testMemberIterator() throws IOException { assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember2 = iter.next(); - int count2 = IOUtils.copy(gzMember2, new NullOutputStream()); + int count2 = IOUtils.copy(gzMember2, ByteStreams.nullOutputStream()); assertEquals("wrong 1-byte member count", 1, count2); assertEquals("wrong member number", 2, gzin.getMemberNumber()); assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); @@ -219,7 +219,7 @@ public void testMemberIterator() throws IOException { assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember3 = iter.next(); - int count3 = IOUtils.copy(gzMember3, new NullOutputStream()); + int count3 = IOUtils.copy(gzMember3, ByteStreams.nullOutputStream()); assertEquals("wrong 5-byte member count", 5, count3); assertEquals("wrong member number", 3, gzin.getMemberNumber()); assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart());