From f130aad04b255e7d8cd4eee4bac86c25b0cbbf36 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 6 Jan 2015 15:54:59 -0800 Subject: [PATCH 1/5] move RecordingOutputStreamTest.java from heritrix to webarchive-commons --- .../archive/io/RecordingOutputStreamTest.java | 260 ++++++++++++++++++ 1 file changed, 260 insertions(+) create mode 100644 src/test/java/org/archive/io/RecordingOutputStreamTest.java diff --git a/src/test/java/org/archive/io/RecordingOutputStreamTest.java b/src/test/java/org/archive/io/RecordingOutputStreamTest.java new file mode 100644 index 00000000..1c53549b --- /dev/null +++ b/src/test/java/org/archive/io/RecordingOutputStreamTest.java @@ -0,0 +1,260 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.archive.io; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; + +import org.archive.util.TmpDirTestCase; + + +/** + * Test casesfor RecordingOutputStream. + * + * @author stack + */ +public class RecordingOutputStreamTest extends TmpDirTestCase +{ + /** + * Size of buffer used in tests. + */ + private static final int BUFFER_SIZE = 5; + + /** + * How much to write total to testing RecordingOutputStream. + */ + private static final int WRITE_TOTAL = 10; + + + /* + * @see TmpDirTestCase#setUp() + */ + protected void setUp() throws Exception + { + super.setUp(); + } + + /** + * Test reusing instance of RecordingOutputStream. + * + * @throws IOException Failed open of backing file or opening of + * input streams verifying recording. + */ + public void testReuse() + throws IOException + { + final String BASENAME = "testReuse"; + cleanUpOldFiles(BASENAME); + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(getTmpDir(), BASENAME + "Bkg.txt")).getAbsolutePath()); + for (int i = 0; i < 3; i++) + { + reuse(BASENAME, ros, i); + } + } + + private void reuse(String baseName, RecordingOutputStream ros, int index) + throws IOException + { + final String BASENAME = baseName + Integer.toString(index); + File f = writeIntRecordedFile(ros, BASENAME, WRITE_TOTAL); + verifyRecording(ros, f, WRITE_TOTAL); + // Do again to test that I can get a new ReplayInputStream on same + // RecordingOutputStream. + verifyRecording(ros, f, WRITE_TOTAL); + } + + /** + * Method to test for void write(int). + * + * Uses small buffer size and small write size. Test mark and reset too. + * + * @throws IOException Failed open of backing file or opening of + * input streams verifying recording. + */ + public void testWriteint() + throws IOException + { + final String BASENAME = "testWriteint"; + cleanUpOldFiles(BASENAME); + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath()); + File f = writeIntRecordedFile(ros, BASENAME, WRITE_TOTAL); + verifyRecording(ros, f, WRITE_TOTAL); + // Do again to test that I can get a new ReplayInputStream on same + // RecordingOutputStream. + verifyRecording(ros, f, WRITE_TOTAL); + } + + /** + * Method to test for void write(byte []). + * + * Uses small buffer size and small write size. + * + * @throws IOException Failed open of backing file or opening of + * input streams verifying recording. + */ + public void testWritebytearray() + throws IOException + { + final String BASENAME = "testWritebytearray"; + cleanUpOldFiles(BASENAME); + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath()); + File f = writeByteRecordedFile(ros, BASENAME, WRITE_TOTAL); + verifyRecording(ros, f, WRITE_TOTAL); + // Do again to test that I can get a new ReplayInputStream on same + // RecordingOutputStream. + verifyRecording(ros, f, WRITE_TOTAL); + } + + /** + * Test mark and reset. + * @throws IOException + */ + public void testMarkReset() throws IOException + { + final String BASENAME = "testMarkReset"; + cleanUpOldFiles(BASENAME); + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath()); + File f = writeByteRecordedFile(ros, BASENAME, WRITE_TOTAL); + verifyRecording(ros, f, WRITE_TOTAL); + ReplayInputStream ris = ros.getReplayInputStream(); + ris.mark(10 /*Arbitrary value*/); + // Read from the stream. + ris.read(); + ris.read(); + ris.read(); + // Reset it. It should be back at zero. + ris.reset(); + assertEquals("Reset to zero", ris.read(), 0); + assertEquals("Reset to zero char 1", ris.read(), 1); + assertEquals("Reset to zero char 2", ris.read(), 2); + // Mark stream. Here. Next character should be '3'. + ris.mark(10 /* Arbitrary value*/); + ris.read(); + ris.read(); + ris.reset(); + assertEquals("Reset to zero char 3", ris.read(), 3); + } + + /** + * Record a file write. + * + * Write a file w/ characters that start at null and ascend to + * filesize. Record the writing w/ passed ros + * recordingoutputstream. Return the file recorded as result of method. + * The file output stream that is recorded is named + * basename + ".txt". + * + *

This method writes a character at a time. + * + * @param ros RecordingOutputStream to record with. + * @param basename Basename of file. + * @param size How many characters to write. + * @return Recorded output stream. + */ + private File writeIntRecordedFile(RecordingOutputStream ros, + String basename, int size) + throws IOException + { + File f = new File(getTmpDir(), basename + ".txt"); + FileOutputStream fos = new FileOutputStream(f); + ros.open(fos); + for (int i = 0; i < WRITE_TOTAL; i++) + { + ros.write(i); + } + ros.close(); + fos.close(); + assertEquals("Content-Length test", size, + ros.getResponseContentLength()); + return f; + } + + /** + * Record a file byte array write. + * + * Write a file w/ characters that start at null and ascend to + * filesize. Record the writing w/ passed ros + * recordingoutputstream. Return the file recorded as result of method. + * The file output stream that is recorded is named + * basename + ".txt". + * + *

This method writes using a byte array. + * + * @param ros RecordingOutputStream to record with. + * @param basename Basename of file. + * @param size How many characters to write. + * @return Recorded output stream. + */ + private File writeByteRecordedFile(RecordingOutputStream ros, + String basename, int size) + throws IOException + { + File f = new File(getTmpDir(), basename + ".txt"); + FileOutputStream fos = new FileOutputStream(f); + ros.open(fos); + byte [] b = new byte[size]; + for (int i = 0; i < size; i++) + { + b[i] = (byte)i; + } + ros.write(b); + ros.close(); + fos.close(); + assertEquals("Content-Length test", size, + ros.getResponseContentLength()); + return f; + } + + /** + * Verify what was written is both in the file written to and in the + * recording stream. + * + * @param ros Stream to check. + * @param f File that was recorded. Stream should have its content + * exactly. + * @param size Amount of bytes written. + * + * @exception IOException Failure reading streams. + */ + private void verifyRecording(RecordingOutputStream ros, File f, + int size) throws IOException + { + assertEquals("Recorded file size.", size, f.length()); + FileInputStream fis = new FileInputStream(f); + assertNotNull("FileInputStream not null", fis); + ReplayInputStream ris = ros.getReplayInputStream(); + assertNotNull("ReplayInputStream not null", ris); + for (int i = 0; i < size; i++) + { + assertEquals("ReplayInputStream content verification", i, + ris.read()); + assertEquals("Recorded file content verification", i, + fis.read()); + } + assertEquals("ReplayInputStream at EOF", -1, ris.read()); + fis.close(); + ris.close(); + } +} From da5d63d41d83fe4d5ea6d14165830e75c568c9a2 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 6 Jan 2015 15:58:31 -0800 Subject: [PATCH 2/5] fix for https://github.com/iipc/webarchive-commons/issues/38 - detect end of http protocol headers in a smarter way, to avoid calling write(byte) repeatedly; add unit tests --- .../org/archive/io/RecordingOutputStream.java | 49 +++++++-- .../archive/io/RecordingOutputStreamTest.java | 100 ++++++++++++++++++ 2 files changed, 142 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/archive/io/RecordingOutputStream.java b/src/main/java/org/archive/io/RecordingOutputStream.java index fe05701c..7d2ff212 100644 --- a/src/main/java/org/archive/io/RecordingOutputStream.java +++ b/src/main/java/org/archive/io/RecordingOutputStream.java @@ -242,6 +242,26 @@ public void write(int b) throws IOException { checkLimits(); } + private int findMessageBodyBeginMark(byte[] b, int off, int len) { + if ((lastTwoBytes[1] == '\n' || lastTwoBytes[0] == '\n' && lastTwoBytes[1] == '\r') + && len >= 1 && b[off] == '\n') { + return 1; + } else if (lastTwoBytes[1] == '\n' && len >= 2 && b[off] == '\r' && b[off+1] == '\n') { + return 2; + } + + for (int i = off; i < off + len - 1; i++) { + if (b[i] == '\n' && b[i+1] == '\n') { + return i + 2; + } else if (b[i] == '\n' && b[i+1] == '\r' + && i + 2 < off + len && b[i+2] == '\n') { + return i + 3; + } + } + + return -1; + } + public void write(byte[] b, int off, int len) throws IOException { if(position < maxPosition) { if(position+len<=maxPosition) { @@ -255,20 +275,35 @@ public void write(byte[] b, int off, int len) throws IOException { off += consumeRange; len -= consumeRange; } - - // see comment on int[] lastTwoBytes - while (messageBodyBeginMark < 0 && len > 0) { - write(b[off]); - off++; - len--; + + if (messageBodyBeginMark < 0) { + // see comment on int[] lastTwoBytes + int mark = findMessageBodyBeginMark(b, off, len); + if (mark > 0) { + if(recording) { + record(b, off, mark - off); + } + if (this.out != null) { + this.out.write(b, off, mark - off); + } + markMessageBodyBegin(); + len = len - (mark - off); + off = mark; + } } - + if(recording) { record(b, off, len); } if (this.out != null) { this.out.write(b, off, len); } + if (len >= 1) { + lastTwoBytes[1] = b[off + len - 1]; + if (len >= 2) { + lastTwoBytes[0] = b[off + len - 2]; + } + } checkLimits(); } diff --git a/src/test/java/org/archive/io/RecordingOutputStreamTest.java b/src/test/java/org/archive/io/RecordingOutputStreamTest.java index 1c53549b..f697ff31 100644 --- a/src/test/java/org/archive/io/RecordingOutputStreamTest.java +++ b/src/test/java/org/archive/io/RecordingOutputStreamTest.java @@ -18,11 +18,13 @@ */ package org.archive.io; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; +import org.archive.util.Base32; import org.archive.util.TmpDirTestCase; @@ -257,4 +259,102 @@ private void verifyRecording(RecordingOutputStream ros, File f, fis.close(); ris.close(); } + + public void testMessageBodyBegin() throws IOException { + final String BASENAME = "testMessageBodyBegin"; + cleanUpOldFiles(BASENAME); + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath()); + ros.setSha1Digest(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n\nabcdefghij".getBytes()); + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\r\n\r\nabcdefghij".getBytes()); + assertEquals(14, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n\r\nabcdefghij".getBytes()); + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n".getBytes()); + assertEquals(-1, ros.getMessageBodyBegin()); + ros.write("\nabcdefghij".getBytes()); + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n".getBytes()); + assertEquals(-1, ros.getMessageBodyBegin()); + ros.write("\r\nabcdefghij".getBytes()); + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n\r".getBytes()); + assertEquals(-1, ros.getMessageBodyBegin()); + ros.write("\nabcdefghij".getBytes()); + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789".getBytes()); + ros.write('\n'); + assertEquals(-1, ros.getMessageBodyBegin()); + ros.write("\nabcdefghij".getBytes()); + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789".getBytes()); + ros.write('\n'); + ros.write('\n'); + for (int b: "abcdefghij".getBytes()) { + ros.write(b); + } + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789".getBytes()); + ros.write('\n'); + ros.write('\r'); + ros.write('\n'); + for (int b: "abcdefghij".getBytes()) { + ros.write(b); + } + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n".getBytes()); + ros.write('\n'); + ros.write("abcdefghij".getBytes()); + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n\r".getBytes()); + ros.write('\n'); + ros.write("abcdefghij".getBytes()); + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + } } From 808dcfe76002ebc126c168abb5b6f00b5d3b7e07 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 6 Jan 2015 16:08:48 -0800 Subject: [PATCH 3/5] move TmpDirTestCase.java from heritrix to webarchive-commons --- .../java/org/archive/util/TmpDirTestCase.java | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 src/main/java/org/archive/util/TmpDirTestCase.java diff --git a/src/main/java/org/archive/util/TmpDirTestCase.java b/src/main/java/org/archive/util/TmpDirTestCase.java new file mode 100644 index 00000000..09ec345b --- /dev/null +++ b/src/main/java/org/archive/util/TmpDirTestCase.java @@ -0,0 +1,119 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.util; + +import java.io.File; +import java.io.IOException; + +import junit.framework.TestCase; + + +/** + * Base class for TestCases that want access to a tmp dir for the writing + * of files. + * + * @author stack + */ +public abstract class TmpDirTestCase extends TestCase +{ + /** + * Name of the system property that holds pointer to tmp directory into + * which we can safely write files. + */ + public static final String TEST_TMP_SYSTEM_PROPERTY_NAME = "testtmpdir"; + + /** + * Default test tmp. + */ + public static final String DEFAULT_TEST_TMP_DIR = File.separator + "tmp" + + File.separator + "heritrix-junit-tests"; + + /** + * Directory to write temporary files to. + */ + private File tmpDir = null; + + + public TmpDirTestCase() + { + super(); + } + + public TmpDirTestCase(String testName) + { + super(testName); + } + + /* + * @see TestCase#setUp() + */ + protected void setUp() throws Exception { + super.setUp(); + this.tmpDir = tmpDir(); + } + + /** + * @return Returns the tmpDir. + */ + public File getTmpDir() + { + return this.tmpDir; + } + + /** + * Delete any files left over from previous run. + * + * @param basename Base name of files we're to clean up. + */ + public void cleanUpOldFiles(String basename) { + cleanUpOldFiles(getTmpDir(), basename); + } + + /** + * Delete any files left over from previous run. + * + * @param prefix Base name of files we're to clean up. + * @param basedir Directory to start cleaning in. + */ + public void cleanUpOldFiles(File basedir, String prefix) { + File [] files = FileUtils.getFilesWithPrefix(basedir, prefix); + if (files != null) { + for (int i = 0; i < files.length; i++) { + org.apache.commons.io.FileUtils.deleteQuietly(files[i]); + } + } + } + + + public static File tmpDir() throws IOException { + String tmpDirStr = System.getProperty(TEST_TMP_SYSTEM_PROPERTY_NAME); + tmpDirStr = (tmpDirStr == null)? DEFAULT_TEST_TMP_DIR: tmpDirStr; + File tmpDir = new File(tmpDirStr); + FileUtils.ensureWriteableDirectory(tmpDir); + + if (!tmpDir.canWrite()) + { + throw new IOException(tmpDir.getAbsolutePath() + + " is unwriteable."); + } + + return tmpDir; + } +} From eda46e2554f52d0514de04b6624f81964e67289d Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 6 Jan 2015 16:24:39 -0800 Subject: [PATCH 4/5] update junit dependency since TmpDirTestCase.java is not in the "test" area --- pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6664efd8..df8d0928 100644 --- a/pom.xml +++ b/pom.xml @@ -65,7 +65,6 @@ junit junit 3.8.1 - test From c77d6f5b0dcd899f5adff3db8eab87319cc162ed Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 27 Jan 2015 14:55:45 -0800 Subject: [PATCH 5/5] update CHANGES.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 7fb2f7c4..b872846d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ * [Escape redirect URLs in RealCDXExtractorOutput](https://github.com/iipc/webarchive-commons/pull/36) * [Tests fail on Windows](https://github.com/iipc/webarchive-commons/issues/2) * [Test fails on Java 8](https://github.com/iipc/webarchive-commons/issues/31) +* [RecordingOutputStream can affect tcp packets sent in an undesirable way](https://github.com/iipc/webarchive-commons/issues/38) 1.1.4 -----