Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>14.0.1</version>
<version>17.0</version>
</dependency>

<dependency>
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/archive/extract/DumpingExtractorOutput.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
import org.archive.util.StreamCopy;
import org.json.JSONException;

import com.google.common.io.ByteStreams;
import com.google.common.io.CountingOutputStream;
import com.google.common.io.NullOutputStream;

public class DumpingExtractorOutput implements ExtractorOutput {
private static final Logger LOG =
Expand All @@ -22,7 +22,7 @@ public DumpingExtractorOutput(OutputStream out) {
}

public void output(Resource resource) throws IOException {
NullOutputStream nullo = new NullOutputStream();
OutputStream nullo = ByteStreams.nullOutputStream();
CountingOutputStream co = new CountingOutputStream(nullo);
StreamCopy.copy(resource.getInputStream(), co);
long bytes = co.getCount();
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/org/archive/extract/RealCDXExtractorOutput.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.archive.extract;

import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
Expand All @@ -23,8 +24,8 @@
import org.json.JSONException;
import org.json.JSONObject;

import com.google.common.io.ByteStreams;
import com.google.common.io.CountingOutputStream;
import com.google.common.io.NullOutputStream;

public class RealCDXExtractorOutput implements ExtractorOutput {
private static final Logger LOG =
Expand Down Expand Up @@ -72,7 +73,7 @@ public RealCDXExtractorOutput(PrintWriter out) {
// SimpleJSONPathSpec gzFooterLengthSpec = new SimpleJSONPathSpec("Container.Gzip-Metadata.Footer-Length");
// SimpleJSONPathSpec gzHeaderLengthSpec = new SimpleJSONPathSpec("Container.Gzip-Metadata.Header-Length");
public void output(Resource resource) throws IOException {
NullOutputStream nullo = new NullOutputStream();
OutputStream nullo = ByteStreams.nullOutputStream();
CountingOutputStream co = new CountingOutputStream(nullo);
try {
StreamCopy.copy(resource.getInputStream(), co);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.archive.extract;

import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
Expand All @@ -21,8 +22,8 @@
import org.json.JSONException;
import org.json.JSONObject;

import com.google.common.io.ByteStreams;
import com.google.common.io.CountingOutputStream;
import com.google.common.io.NullOutputStream;

public class WARCMetadataRecordExtractorOutput implements ExtractorOutput {
private static final Logger LOG =
Expand All @@ -47,7 +48,7 @@ public WARCMetadataRecordExtractorOutput(PrintWriter out) {
}

public void output(Resource resource) throws IOException {
NullOutputStream nullo = new NullOutputStream();
OutputStream nullo = ByteStreams.nullOutputStream();
CountingOutputStream co = new CountingOutputStream(nullo);
try {
StreamCopy.copy(resource.getInputStream(), co);
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/archive/resource/AbstractResource.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import org.archive.util.StreamCopy;

import com.google.common.io.NullOutputStream;
import com.google.common.io.ByteStreams;

public abstract class AbstractResource implements Resource {
protected ResourceContainer container;
Expand Down Expand Up @@ -44,7 +44,7 @@ public static void dumpShort(PrintStream out, Resource resource) throws IOExcept
// out.println("Headers Before");
// out.print(m.toString());

long bytes = StreamCopy.copy(resource.getInputStream(), new NullOutputStream());
long bytes = StreamCopy.copy(resource.getInputStream(), ByteStreams.nullOutputStream());
out.println("Resource Was:"+bytes+" Long");

out.println("[\n]Headers After");
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/archive/resource/arc/ARCResource.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
import org.archive.util.io.EOFObserver;
import org.archive.util.io.PushBackOneByteInputStream;

import com.google.common.io.ByteStreams;
import com.google.common.io.CountingInputStream;
import com.google.common.io.LimitInputStream;

public class ARCResource extends AbstractResource

Expand Down Expand Up @@ -54,7 +54,7 @@ public ARCResource(MetaData metaData, ResourceContainer container,
fields.putLong(DECLARED_LENGTH_KEY, arcMetaData.getLength());

countingIS = new CountingInputStream(
new LimitInputStream(raw, arcMetaData.getLength()));
ByteStreams.limit(raw, arcMetaData.getLength()));

try {
digIS = new DigestInputStream(countingIS,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import java.security.NoSuchAlgorithmException;
import java.util.logging.Logger;


import org.archive.format.http.HttpHeader;
import org.archive.format.http.HttpResponse;
import org.archive.format.http.HttpResponseMessage;
Expand All @@ -20,8 +19,8 @@
import org.archive.util.io.EOFNotifyingInputStream;
import org.archive.util.io.EOFObserver;

import com.google.common.io.ByteStreams;
import com.google.common.io.CountingInputStream;
import com.google.common.io.LimitInputStream;



Expand Down Expand Up @@ -65,7 +64,7 @@ public HTTPResponseResource(MetaData metaData,
headers.putString(h.getName(),h.getValue());
}
if(forceCheck && (length >= 0)) {
LimitInputStream lis = new LimitInputStream(response, length);
InputStream lis = ByteStreams.limit(response, length);
countingIS = new CountingInputStream(lis);
} else {
countingIS = new CountingInputStream(response);
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/archive/resource/warc/WARCResource.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
import org.archive.util.io.EOFObserver;
import org.archive.util.io.PushBackOneByteInputStream;

import com.google.common.io.ByteStreams;
import com.google.common.io.CountingInputStream;
import com.google.common.io.LimitInputStream;

public class WARCResource extends AbstractResource implements EOFObserver, ResourceConstants {
CountingInputStream countingIS;
Expand Down Expand Up @@ -51,7 +51,7 @@ public WARCResource(MetaData metaData, ResourceContainer container,

if(length >= 0) {
countingIS = new CountingInputStream(
new LimitInputStream(response, length));
ByteStreams.limit(response, length));
} else {
throw new ResourceParseException(null);
}
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/archive/url/URLRegexTransformer.java
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,15 @@ public static String hostToPublicSuffix(String host) {
InternetDomainName idn;

try {
idn = InternetDomainName.fromLenient(host);
idn = InternetDomainName.from(host);
} catch(IllegalArgumentException e) {
return host;
}
InternetDomainName tmp = idn.publicSuffix();
if(tmp == null) {
return host;
}
String pubSuff = tmp.name();
String pubSuff = tmp.toString();
int idx = host.lastIndexOf(".", host.length() - (pubSuff.length()+2));
if(idx == -1) {
return host;
Expand Down
13 changes: 7 additions & 6 deletions src/main/java/org/archive/util/TextUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@

import org.apache.commons.lang.StringEscapeUtils;

import com.google.common.base.Function;
import com.google.common.collect.MapMaker;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;

public class TextUtils {
private static final String FIRSTWORD = "^([^\\s]*).*$";
Expand All @@ -51,11 +52,11 @@ protected Map<String,Matcher> initialValue() {
};

/** global soft-cache of Patterns, by string key */
private static final ConcurrentMap<String, Pattern> PATTERNS = new MapMaker()
private static final LoadingCache<String, Pattern> PATTERNS = CacheBuilder.newBuilder()
.concurrencyLevel(16)
.softValues()
.makeComputingMap(new Function<String, Pattern>() {
public Pattern apply(String regex) {
.build(new CacheLoader<String, Pattern>() {
public Pattern load(String regex) {
return Pattern.compile(regex);
}
});
Expand Down Expand Up @@ -84,7 +85,7 @@ public static Matcher getMatcher(String pattern, CharSequence input) {
final Map<String,Matcher> matchers = TL_MATCHER_MAP.get();
Matcher m = (Matcher)matchers.get(pattern);
if(m == null) {
m = PATTERNS.get(pattern).matcher(input);
m = PATTERNS.getUnchecked(pattern).matcher(input);
} else {
matchers.put(pattern,null);
m.reset(input);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import org.apache.hadoop.fs.FSDataInputStream;
import org.archive.util.binsearch.AbstractSeekableLineReader;

import com.google.common.io.LimitInputStream;
import com.google.common.io.ByteStreams;

public class HDFSSeekableLineReader extends AbstractSeekableLineReader {
private FSDataInputStream fsdis;
Expand All @@ -23,7 +23,7 @@ public InputStream doSeekLoad(long offset, int maxLength) throws IOException {
fsdis.seek(offset);

if (maxLength >= 0) {
return new LimitInputStream(fsdis, maxLength);
return ByteStreams.limit(fsdis, maxLength);
} else {
return fsdis;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import org.archive.util.binsearch.AbstractSeekableLineReader;

import com.google.common.io.LimitInputStream;
import com.google.common.io.ByteStreams;

public class RandomAccessFileSeekableLineReader extends AbstractSeekableLineReader {

Expand All @@ -24,7 +24,7 @@ public InputStream doSeekLoad(long offset, int maxLength) throws IOException {
FileInputStream fis = new FileInputStream(raf.getFD());

if (maxLength > 0) {
return new LimitInputStream(fis, maxLength);
return ByteStreams.limit(fis, maxLength);
} else {
return fis;
}
Expand Down
36 changes: 18 additions & 18 deletions src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import org.archive.util.ArchiveUtils;
import org.archive.util.zip.GZIPMembersInputStream;

import com.google.common.io.NullOutputStream;
import com.google.common.io.ByteStreams;
import com.google.common.primitives.Bytes;

/**
Expand Down Expand Up @@ -70,46 +70,46 @@ public static void main(String [] args) {
public void testFullReadAllFour() throws IOException {
GZIPMembersInputStream gzin =
new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz));
int count = IOUtils.copy(gzin, new NullOutputStream());
int count = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong length uncompressed data", 1024+(32*1024)+1+5, count);
}

public void testFullReadSixSmall() throws IOException {
GZIPMembersInputStream gzin =
new GZIPMembersInputStream(new ByteArrayInputStream(sixsmall_gz));
int count = IOUtils.copy(gzin, new NullOutputStream());
int count = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong length uncompressed data", 1+5+1+5+1+5, count);
}

public void testReadPerMemberAllFour() throws IOException {
GZIPMembersInputStream gzin =
new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz));
gzin.setEofEachMember(true);
int count0 = IOUtils.copy(gzin, new NullOutputStream());
int count0 = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong 1k member count", 1024, count0);
assertEquals("wrong member number", 0, gzin.getMemberNumber());
assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart());
assertEquals("wrong member0 end", noise1k_gz.length, gzin.getCurrentMemberEnd());
gzin.nextMember();
int count1 = IOUtils.copy(gzin, new NullOutputStream());
int count1 = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong 32k member count", (32*1024), count1);
assertEquals("wrong member number", 1, gzin.getMemberNumber());
assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart());
assertEquals("wrong member1 end", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd());
gzin.nextMember();
int count2 = IOUtils.copy(gzin, new NullOutputStream());
int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong 1-byte member count", 1, count2);
assertEquals("wrong member number", 2, gzin.getMemberNumber());
assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart());
assertEquals("wrong member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd());
gzin.nextMember();
int count3 = IOUtils.copy(gzin, new NullOutputStream());
int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong 5-byte member count", 5, count3);
assertEquals("wrong member number", 3, gzin.getMemberNumber());
assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart());
assertEquals("wrong member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd());
gzin.nextMember();
int countEnd = IOUtils.copy(gzin, new NullOutputStream());
int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong eof count", 0, countEnd);
}

Expand All @@ -118,14 +118,14 @@ public void testReadPerMemberSixSmall() throws IOException {
new GZIPMembersInputStream(new ByteArrayInputStream(sixsmall_gz));
gzin.setEofEachMember(true);
for(int i = 0; i < 3; i++) {
int count2 = IOUtils.copy(gzin, new NullOutputStream());
int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong 1-byte member count", 1, count2);
gzin.nextMember();
int count3 = IOUtils.copy(gzin, new NullOutputStream());
int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong 5-byte member count", 5, count3);
gzin.nextMember();
}
int countEnd = IOUtils.copy(gzin, new NullOutputStream());
int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong eof count", 0, countEnd);
}

Expand Down Expand Up @@ -172,19 +172,19 @@ public void testMemberSeek() throws IOException {
new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz));
gzin.setEofEachMember(true);
gzin.compressedSeek(noise1k_gz.length+noise32k_gz.length);
int count2 = IOUtils.copy(gzin, new NullOutputStream());
int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong 1-byte member count", 1, count2);
// assertEquals("wrong Member number", 2, gzin.getMemberNumber());
assertEquals("wrong Member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart());
assertEquals("wrong Member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd());
gzin.nextMember();
int count3 = IOUtils.copy(gzin, new NullOutputStream());
int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong 5-byte member count", 5, count3);
// assertEquals("wrong Member number", 3, gzin.getMemberNumber());
assertEquals("wrong Member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart());
assertEquals("wrong Member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd());
gzin.nextMember();
int countEnd = IOUtils.copy(gzin, new NullOutputStream());
int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream());
assertEquals("wrong eof count", 0, countEnd);
}

Expand All @@ -195,31 +195,31 @@ public void testMemberIterator() throws IOException {
Iterator<GZIPMembersInputStream> iter = gzin.memberIterator();
assertTrue(iter.hasNext());
GZIPMembersInputStream gzMember0 = iter.next();
int count0 = IOUtils.copy(gzMember0, new NullOutputStream());
int count0 = IOUtils.copy(gzMember0, ByteStreams.nullOutputStream());
assertEquals("wrong 1k member count", 1024, count0);
assertEquals("wrong member number", 0, gzin.getMemberNumber());
assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart());
assertEquals("wrong member0 end", noise1k_gz.length, gzin.getCurrentMemberEnd());

assertTrue(iter.hasNext());
GZIPMembersInputStream gzMember1 = iter.next();
int count1 = IOUtils.copy(gzMember1, new NullOutputStream());
int count1 = IOUtils.copy(gzMember1, ByteStreams.nullOutputStream());
assertEquals("wrong 32k member count", (32*1024), count1);
assertEquals("wrong member number", 1, gzin.getMemberNumber());
assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart());
assertEquals("wrong member1 end", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd());

assertTrue(iter.hasNext());
GZIPMembersInputStream gzMember2 = iter.next();
int count2 = IOUtils.copy(gzMember2, new NullOutputStream());
int count2 = IOUtils.copy(gzMember2, ByteStreams.nullOutputStream());
assertEquals("wrong 1-byte member count", 1, count2);
assertEquals("wrong member number", 2, gzin.getMemberNumber());
assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart());
assertEquals("wrong member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd());

assertTrue(iter.hasNext());
GZIPMembersInputStream gzMember3 = iter.next();
int count3 = IOUtils.copy(gzMember3, new NullOutputStream());
int count3 = IOUtils.copy(gzMember3, ByteStreams.nullOutputStream());
assertEquals("wrong 5-byte member count", 5, count3);
assertEquals("wrong member number", 3, gzin.getMemberNumber());
assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart());
Expand Down