Skip to content

Commit 8decd8f

Browse files
author
Kristinn Sigurðsson
committed
Merge pull request iipc#39 from nlevitt/issue-38
fix for iipc#38 - detect end of http protocol headers in a smarter way, to avoid calling write(byte) repeatedly; add unit tests
2 parents 598c524 + c77d6f5 commit 8decd8f

5 files changed

Lines changed: 522 additions & 8 deletions

File tree

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* [Escape redirect URLs in RealCDXExtractorOutput](https://github.com/iipc/webarchive-commons/pull/36)
44
* [Tests fail on Windows](https://github.com/iipc/webarchive-commons/issues/2)
55
* [Test fails on Java 8](https://github.com/iipc/webarchive-commons/issues/31)
6+
* [RecordingOutputStream can affect tcp packets sent in an undesirable way](https://github.com/iipc/webarchive-commons/issues/38)
67

78
1.1.4
89
-----

pom.xml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@
6565
<groupId>junit</groupId>
6666
<artifactId>junit</artifactId>
6767
<version>3.8.1</version>
68-
<scope>test</scope>
6968
</dependency>
7069

7170
<dependency>

src/main/java/org/archive/io/RecordingOutputStream.java

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,26 @@ public void write(int b) throws IOException {
242242
checkLimits();
243243
}
244244

245+
private int findMessageBodyBeginMark(byte[] b, int off, int len) {
246+
if ((lastTwoBytes[1] == '\n' || lastTwoBytes[0] == '\n' && lastTwoBytes[1] == '\r')
247+
&& len >= 1 && b[off] == '\n') {
248+
return 1;
249+
} else if (lastTwoBytes[1] == '\n' && len >= 2 && b[off] == '\r' && b[off+1] == '\n') {
250+
return 2;
251+
}
252+
253+
for (int i = off; i < off + len - 1; i++) {
254+
if (b[i] == '\n' && b[i+1] == '\n') {
255+
return i + 2;
256+
} else if (b[i] == '\n' && b[i+1] == '\r'
257+
&& i + 2 < off + len && b[i+2] == '\n') {
258+
return i + 3;
259+
}
260+
}
261+
262+
return -1;
263+
}
264+
245265
public void write(byte[] b, int off, int len) throws IOException {
246266
if(position < maxPosition) {
247267
if(position+len<=maxPosition) {
@@ -255,20 +275,35 @@ public void write(byte[] b, int off, int len) throws IOException {
255275
off += consumeRange;
256276
len -= consumeRange;
257277
}
258-
259-
// see comment on int[] lastTwoBytes
260-
while (messageBodyBeginMark < 0 && len > 0) {
261-
write(b[off]);
262-
off++;
263-
len--;
278+
279+
if (messageBodyBeginMark < 0) {
280+
// see comment on int[] lastTwoBytes
281+
int mark = findMessageBodyBeginMark(b, off, len);
282+
if (mark > 0) {
283+
if(recording) {
284+
record(b, off, mark - off);
285+
}
286+
if (this.out != null) {
287+
this.out.write(b, off, mark - off);
288+
}
289+
markMessageBodyBegin();
290+
len = len - (mark - off);
291+
off = mark;
292+
}
264293
}
265-
294+
266295
if(recording) {
267296
record(b, off, len);
268297
}
269298
if (this.out != null) {
270299
this.out.write(b, off, len);
271300
}
301+
if (len >= 1) {
302+
lastTwoBytes[1] = b[off + len - 1];
303+
if (len >= 2) {
304+
lastTwoBytes[0] = b[off + len - 2];
305+
}
306+
}
272307
checkLimits();
273308
}
274309

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/*
2+
* This file is part of the Heritrix web crawler (crawler.archive.org).
3+
*
4+
* Licensed to the Internet Archive (IA) by one or more individual
5+
* contributors.
6+
*
7+
* The IA licenses this file to You under the Apache License, Version 2.0
8+
* (the "License"); you may not use this file except in compliance with
9+
* the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing, software
14+
* distributed under the License is distributed on an "AS IS" BASIS,
15+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
* See the License for the specific language governing permissions and
17+
* limitations under the License.
18+
*/
19+
20+
package org.archive.util;
21+
22+
import java.io.File;
23+
import java.io.IOException;
24+
25+
import junit.framework.TestCase;
26+
27+
28+
/**
29+
* Base class for TestCases that want access to a tmp dir for the writing
30+
* of files.
31+
*
32+
* @author stack
33+
*/
34+
public abstract class TmpDirTestCase extends TestCase
35+
{
36+
/**
37+
* Name of the system property that holds pointer to tmp directory into
38+
* which we can safely write files.
39+
*/
40+
public static final String TEST_TMP_SYSTEM_PROPERTY_NAME = "testtmpdir";
41+
42+
/**
43+
* Default test tmp.
44+
*/
45+
public static final String DEFAULT_TEST_TMP_DIR = File.separator + "tmp" +
46+
File.separator + "heritrix-junit-tests";
47+
48+
/**
49+
* Directory to write temporary files to.
50+
*/
51+
private File tmpDir = null;
52+
53+
54+
public TmpDirTestCase()
55+
{
56+
super();
57+
}
58+
59+
public TmpDirTestCase(String testName)
60+
{
61+
super(testName);
62+
}
63+
64+
/*
65+
* @see TestCase#setUp()
66+
*/
67+
protected void setUp() throws Exception {
68+
super.setUp();
69+
this.tmpDir = tmpDir();
70+
}
71+
72+
/**
73+
* @return Returns the tmpDir.
74+
*/
75+
public File getTmpDir()
76+
{
77+
return this.tmpDir;
78+
}
79+
80+
/**
81+
* Delete any files left over from previous run.
82+
*
83+
* @param basename Base name of files we're to clean up.
84+
*/
85+
public void cleanUpOldFiles(String basename) {
86+
cleanUpOldFiles(getTmpDir(), basename);
87+
}
88+
89+
/**
90+
* Delete any files left over from previous run.
91+
*
92+
* @param prefix Base name of files we're to clean up.
93+
* @param basedir Directory to start cleaning in.
94+
*/
95+
public void cleanUpOldFiles(File basedir, String prefix) {
96+
File [] files = FileUtils.getFilesWithPrefix(basedir, prefix);
97+
if (files != null) {
98+
for (int i = 0; i < files.length; i++) {
99+
org.apache.commons.io.FileUtils.deleteQuietly(files[i]);
100+
}
101+
}
102+
}
103+
104+
105+
public static File tmpDir() throws IOException {
106+
String tmpDirStr = System.getProperty(TEST_TMP_SYSTEM_PROPERTY_NAME);
107+
tmpDirStr = (tmpDirStr == null)? DEFAULT_TEST_TMP_DIR: tmpDirStr;
108+
File tmpDir = new File(tmpDirStr);
109+
FileUtils.ensureWriteableDirectory(tmpDir);
110+
111+
if (!tmpDir.canWrite())
112+
{
113+
throw new IOException(tmpDir.getAbsolutePath() +
114+
" is unwriteable.");
115+
}
116+
117+
return tmpDir;
118+
}
119+
}

0 commit comments

Comments
 (0)