diff --git a/src/main/java/org/archive/io/RecordingOutputStream.java b/src/main/java/org/archive/io/RecordingOutputStream.java
index fe05701c..7d2ff212 100644
--- a/src/main/java/org/archive/io/RecordingOutputStream.java
+++ b/src/main/java/org/archive/io/RecordingOutputStream.java
@@ -242,6 +242,26 @@ public void write(int b) throws IOException {
checkLimits();
}
+ private int findMessageBodyBeginMark(byte[] b, int off, int len) {
+ if ((lastTwoBytes[1] == '\n' || lastTwoBytes[0] == '\n' && lastTwoBytes[1] == '\r')
+ && len >= 1 && b[off] == '\n') {
+ return 1;
+ } else if (lastTwoBytes[1] == '\n' && len >= 2 && b[off] == '\r' && b[off+1] == '\n') {
+ return 2;
+ }
+
+ for (int i = off; i < off + len - 1; i++) {
+ if (b[i] == '\n' && b[i+1] == '\n') {
+ return i + 2;
+ } else if (b[i] == '\n' && b[i+1] == '\r'
+ && i + 2 < off + len && b[i+2] == '\n') {
+ return i + 3;
+ }
+ }
+
+ return -1;
+ }
+
public void write(byte[] b, int off, int len) throws IOException {
if(position < maxPosition) {
if(position+len<=maxPosition) {
@@ -255,20 +275,35 @@ public void write(byte[] b, int off, int len) throws IOException {
off += consumeRange;
len -= consumeRange;
}
-
- // see comment on int[] lastTwoBytes
- while (messageBodyBeginMark < 0 && len > 0) {
- write(b[off]);
- off++;
- len--;
+
+ if (messageBodyBeginMark < 0) {
+ // see comment on int[] lastTwoBytes
+ int mark = findMessageBodyBeginMark(b, off, len);
+ if (mark > 0) {
+ if(recording) {
+ record(b, off, mark - off);
+ }
+ if (this.out != null) {
+ this.out.write(b, off, mark - off);
+ }
+ markMessageBodyBegin();
+ len = len - (mark - off);
+ off = mark;
+ }
}
-
+
if(recording) {
record(b, off, len);
}
if (this.out != null) {
this.out.write(b, off, len);
}
+ if (len >= 1) {
+ lastTwoBytes[1] = b[off + len - 1];
+ if (len >= 2) {
+ lastTwoBytes[0] = b[off + len - 2];
+ }
+ }
checkLimits();
}
diff --git a/src/main/java/org/archive/util/TmpDirTestCase.java b/src/main/java/org/archive/util/TmpDirTestCase.java
new file mode 100644
index 00000000..09ec345b
--- /dev/null
+++ b/src/main/java/org/archive/util/TmpDirTestCase.java
@@ -0,0 +1,119 @@
+/*
+ * This file is part of the Heritrix web crawler (crawler.archive.org).
+ *
+ * Licensed to the Internet Archive (IA) by one or more individual
+ * contributors.
+ *
+ * The IA licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.archive.util;
+
+import java.io.File;
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+
+/**
+ * Base class for TestCases that want access to a tmp dir for the writing
+ * of files.
+ *
+ * @author stack
+ */
+public abstract class TmpDirTestCase extends TestCase
+{
+ /**
+ * Name of the system property that holds pointer to tmp directory into
+ * which we can safely write files.
+ */
+ public static final String TEST_TMP_SYSTEM_PROPERTY_NAME = "testtmpdir";
+
+ /**
+ * Default test tmp.
+ */
+ public static final String DEFAULT_TEST_TMP_DIR = File.separator + "tmp" +
+ File.separator + "heritrix-junit-tests";
+
+ /**
+ * Directory to write temporary files to.
+ */
+ private File tmpDir = null;
+
+
+ public TmpDirTestCase()
+ {
+ super();
+ }
+
+ public TmpDirTestCase(String testName)
+ {
+ super(testName);
+ }
+
+ /*
+ * @see TestCase#setUp()
+ */
+ protected void setUp() throws Exception {
+ super.setUp();
+ this.tmpDir = tmpDir();
+ }
+
+ /**
+ * @return Returns the tmpDir.
+ */
+ public File getTmpDir()
+ {
+ return this.tmpDir;
+ }
+
+ /**
+ * Delete any files left over from previous run.
+ *
+ * @param basename Base name of files we're to clean up.
+ */
+ public void cleanUpOldFiles(String basename) {
+ cleanUpOldFiles(getTmpDir(), basename);
+ }
+
+ /**
+ * Delete any files left over from previous run.
+ *
+ * @param prefix Base name of files we're to clean up.
+ * @param basedir Directory to start cleaning in.
+ */
+ public void cleanUpOldFiles(File basedir, String prefix) {
+ File [] files = FileUtils.getFilesWithPrefix(basedir, prefix);
+ if (files != null) {
+ for (int i = 0; i < files.length; i++) {
+ org.apache.commons.io.FileUtils.deleteQuietly(files[i]);
+ }
+ }
+ }
+
+
+ public static File tmpDir() throws IOException {
+ String tmpDirStr = System.getProperty(TEST_TMP_SYSTEM_PROPERTY_NAME);
+ tmpDirStr = (tmpDirStr == null)? DEFAULT_TEST_TMP_DIR: tmpDirStr;
+ File tmpDir = new File(tmpDirStr);
+ FileUtils.ensureWriteableDirectory(tmpDir);
+
+ if (!tmpDir.canWrite())
+ {
+ throw new IOException(tmpDir.getAbsolutePath() +
+ " is unwriteable.");
+ }
+
+ return tmpDir;
+ }
+}
diff --git a/src/test/java/org/archive/io/RecordingOutputStreamTest.java b/src/test/java/org/archive/io/RecordingOutputStreamTest.java
new file mode 100644
index 00000000..f697ff31
--- /dev/null
+++ b/src/test/java/org/archive/io/RecordingOutputStreamTest.java
@@ -0,0 +1,360 @@
+/*
+ * This file is part of the Heritrix web crawler (crawler.archive.org).
+ *
+ * Licensed to the Internet Archive (IA) by one or more individual
+ * contributors.
+ *
+ * The IA licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.archive.io;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import org.archive.util.Base32;
+import org.archive.util.TmpDirTestCase;
+
+
+/**
+ * Test casesfor RecordingOutputStream.
+ *
+ * @author stack
+ */
+public class RecordingOutputStreamTest extends TmpDirTestCase
+{
+ /**
+ * Size of buffer used in tests.
+ */
+ private static final int BUFFER_SIZE = 5;
+
+ /**
+ * How much to write total to testing RecordingOutputStream.
+ */
+ private static final int WRITE_TOTAL = 10;
+
+
+ /*
+ * @see TmpDirTestCase#setUp()
+ */
+ protected void setUp() throws Exception
+ {
+ super.setUp();
+ }
+
+ /**
+ * Test reusing instance of RecordingOutputStream.
+ *
+ * @throws IOException Failed open of backing file or opening of
+ * input streams verifying recording.
+ */
+ public void testReuse()
+ throws IOException
+ {
+ final String BASENAME = "testReuse";
+ cleanUpOldFiles(BASENAME);
+ RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE,
+ (new File(getTmpDir(), BASENAME + "Bkg.txt")).getAbsolutePath());
+ for (int i = 0; i < 3; i++)
+ {
+ reuse(BASENAME, ros, i);
+ }
+ }
+
+ private void reuse(String baseName, RecordingOutputStream ros, int index)
+ throws IOException
+ {
+ final String BASENAME = baseName + Integer.toString(index);
+ File f = writeIntRecordedFile(ros, BASENAME, WRITE_TOTAL);
+ verifyRecording(ros, f, WRITE_TOTAL);
+ // Do again to test that I can get a new ReplayInputStream on same
+ // RecordingOutputStream.
+ verifyRecording(ros, f, WRITE_TOTAL);
+ }
+
+ /**
+ * Method to test for void write(int).
+ *
+ * Uses small buffer size and small write size. Test mark and reset too.
+ *
+ * @throws IOException Failed open of backing file or opening of
+ * input streams verifying recording.
+ */
+ public void testWriteint()
+ throws IOException
+ {
+ final String BASENAME = "testWriteint";
+ cleanUpOldFiles(BASENAME);
+ RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE,
+ (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath());
+ File f = writeIntRecordedFile(ros, BASENAME, WRITE_TOTAL);
+ verifyRecording(ros, f, WRITE_TOTAL);
+ // Do again to test that I can get a new ReplayInputStream on same
+ // RecordingOutputStream.
+ verifyRecording(ros, f, WRITE_TOTAL);
+ }
+
+ /**
+ * Method to test for void write(byte []).
+ *
+ * Uses small buffer size and small write size.
+ *
+ * @throws IOException Failed open of backing file or opening of
+ * input streams verifying recording.
+ */
+ public void testWritebytearray()
+ throws IOException
+ {
+ final String BASENAME = "testWritebytearray";
+ cleanUpOldFiles(BASENAME);
+ RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE,
+ (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath());
+ File f = writeByteRecordedFile(ros, BASENAME, WRITE_TOTAL);
+ verifyRecording(ros, f, WRITE_TOTAL);
+ // Do again to test that I can get a new ReplayInputStream on same
+ // RecordingOutputStream.
+ verifyRecording(ros, f, WRITE_TOTAL);
+ }
+
+ /**
+ * Test mark and reset.
+ * @throws IOException
+ */
+ public void testMarkReset() throws IOException
+ {
+ final String BASENAME = "testMarkReset";
+ cleanUpOldFiles(BASENAME);
+ RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE,
+ (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath());
+ File f = writeByteRecordedFile(ros, BASENAME, WRITE_TOTAL);
+ verifyRecording(ros, f, WRITE_TOTAL);
+ ReplayInputStream ris = ros.getReplayInputStream();
+ ris.mark(10 /*Arbitrary value*/);
+ // Read from the stream.
+ ris.read();
+ ris.read();
+ ris.read();
+ // Reset it. It should be back at zero.
+ ris.reset();
+ assertEquals("Reset to zero", ris.read(), 0);
+ assertEquals("Reset to zero char 1", ris.read(), 1);
+ assertEquals("Reset to zero char 2", ris.read(), 2);
+ // Mark stream. Here. Next character should be '3'.
+ ris.mark(10 /* Arbitrary value*/);
+ ris.read();
+ ris.read();
+ ris.reset();
+ assertEquals("Reset to zero char 3", ris.read(), 3);
+ }
+
+ /**
+ * Record a file write.
+ *
+ * Write a file w/ characters that start at null and ascend to
+ * filesize. Record the writing w/ passed ros
+ * recordingoutputstream. Return the file recorded as result of method.
+ * The file output stream that is recorded is named
+ * basename + ".txt".
+ *
+ * This method writes a character at a time.
+ *
+ * @param ros RecordingOutputStream to record with.
+ * @param basename Basename of file.
+ * @param size How many characters to write.
+ * @return Recorded output stream.
+ */
+ private File writeIntRecordedFile(RecordingOutputStream ros,
+ String basename, int size)
+ throws IOException
+ {
+ File f = new File(getTmpDir(), basename + ".txt");
+ FileOutputStream fos = new FileOutputStream(f);
+ ros.open(fos);
+ for (int i = 0; i < WRITE_TOTAL; i++)
+ {
+ ros.write(i);
+ }
+ ros.close();
+ fos.close();
+ assertEquals("Content-Length test", size,
+ ros.getResponseContentLength());
+ return f;
+ }
+
+ /**
+ * Record a file byte array write.
+ *
+ * Write a file w/ characters that start at null and ascend to
+ * filesize. Record the writing w/ passed ros
+ * recordingoutputstream. Return the file recorded as result of method.
+ * The file output stream that is recorded is named
+ * basename + ".txt".
+ *
+ *
This method writes using a byte array.
+ *
+ * @param ros RecordingOutputStream to record with.
+ * @param basename Basename of file.
+ * @param size How many characters to write.
+ * @return Recorded output stream.
+ */
+ private File writeByteRecordedFile(RecordingOutputStream ros,
+ String basename, int size)
+ throws IOException
+ {
+ File f = new File(getTmpDir(), basename + ".txt");
+ FileOutputStream fos = new FileOutputStream(f);
+ ros.open(fos);
+ byte [] b = new byte[size];
+ for (int i = 0; i < size; i++)
+ {
+ b[i] = (byte)i;
+ }
+ ros.write(b);
+ ros.close();
+ fos.close();
+ assertEquals("Content-Length test", size,
+ ros.getResponseContentLength());
+ return f;
+ }
+
+ /**
+ * Verify what was written is both in the file written to and in the
+ * recording stream.
+ *
+ * @param ros Stream to check.
+ * @param f File that was recorded. Stream should have its content
+ * exactly.
+ * @param size Amount of bytes written.
+ *
+ * @exception IOException Failure reading streams.
+ */
+ private void verifyRecording(RecordingOutputStream ros, File f,
+ int size) throws IOException
+ {
+ assertEquals("Recorded file size.", size, f.length());
+ FileInputStream fis = new FileInputStream(f);
+ assertNotNull("FileInputStream not null", fis);
+ ReplayInputStream ris = ros.getReplayInputStream();
+ assertNotNull("ReplayInputStream not null", ris);
+ for (int i = 0; i < size; i++)
+ {
+ assertEquals("ReplayInputStream content verification", i,
+ ris.read());
+ assertEquals("Recorded file content verification", i,
+ fis.read());
+ }
+ assertEquals("ReplayInputStream at EOF", -1, ris.read());
+ fis.close();
+ ris.close();
+ }
+
+ public void testMessageBodyBegin() throws IOException {
+ final String BASENAME = "testMessageBodyBegin";
+ cleanUpOldFiles(BASENAME);
+ RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE,
+ (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath());
+ ros.setSha1Digest();
+
+ ros.open(new ByteArrayOutputStream());
+ ros.write("0123456789\n\nabcdefghij".getBytes());
+ assertEquals(12, ros.getMessageBodyBegin());
+ assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
+ ros.close();
+
+ ros.open(new ByteArrayOutputStream());
+ ros.write("0123456789\r\n\r\nabcdefghij".getBytes());
+ assertEquals(14, ros.getMessageBodyBegin());
+ assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
+ ros.close();
+
+ ros.open(new ByteArrayOutputStream());
+ ros.write("0123456789\n\r\nabcdefghij".getBytes());
+ assertEquals(13, ros.getMessageBodyBegin());
+ assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
+ ros.close();
+
+ ros.open(new ByteArrayOutputStream());
+ ros.write("0123456789\n".getBytes());
+ assertEquals(-1, ros.getMessageBodyBegin());
+ ros.write("\nabcdefghij".getBytes());
+ assertEquals(12, ros.getMessageBodyBegin());
+ assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
+ ros.close();
+
+ ros.open(new ByteArrayOutputStream());
+ ros.write("0123456789\n".getBytes());
+ assertEquals(-1, ros.getMessageBodyBegin());
+ ros.write("\r\nabcdefghij".getBytes());
+ assertEquals(13, ros.getMessageBodyBegin());
+ assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
+ ros.close();
+
+ ros.open(new ByteArrayOutputStream());
+ ros.write("0123456789\n\r".getBytes());
+ assertEquals(-1, ros.getMessageBodyBegin());
+ ros.write("\nabcdefghij".getBytes());
+ assertEquals(13, ros.getMessageBodyBegin());
+ assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
+ ros.close();
+
+ ros.open(new ByteArrayOutputStream());
+ ros.write("0123456789".getBytes());
+ ros.write('\n');
+ assertEquals(-1, ros.getMessageBodyBegin());
+ ros.write("\nabcdefghij".getBytes());
+ assertEquals(12, ros.getMessageBodyBegin());
+ assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
+ ros.close();
+
+ ros.open(new ByteArrayOutputStream());
+ ros.write("0123456789".getBytes());
+ ros.write('\n');
+ ros.write('\n');
+ for (int b: "abcdefghij".getBytes()) {
+ ros.write(b);
+ }
+ assertEquals(12, ros.getMessageBodyBegin());
+ assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
+ ros.close();
+
+ ros.open(new ByteArrayOutputStream());
+ ros.write("0123456789".getBytes());
+ ros.write('\n');
+ ros.write('\r');
+ ros.write('\n');
+ for (int b: "abcdefghij".getBytes()) {
+ ros.write(b);
+ }
+ assertEquals(13, ros.getMessageBodyBegin());
+ assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
+ ros.close();
+
+ ros.open(new ByteArrayOutputStream());
+ ros.write("0123456789\n".getBytes());
+ ros.write('\n');
+ ros.write("abcdefghij".getBytes());
+ assertEquals(12, ros.getMessageBodyBegin());
+ assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
+ ros.close();
+
+ ros.open(new ByteArrayOutputStream());
+ ros.write("0123456789\n\r".getBytes());
+ ros.write('\n');
+ ros.write("abcdefghij".getBytes());
+ assertEquals(13, ros.getMessageBodyBegin());
+ assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
+ ros.close();
+ }
+}