Skip to content

Commit 41d3a58

Browse files
committed
CDX: Add CloseableCompositeIterator which iterates in sequence, optimization for zipnum clusters to be loaded sequentially when only looking for last line
1 parent 28c2995 commit 41d3a58

3 files changed

Lines changed: 110 additions & 0 deletions

File tree

src/main/java/org/archive/format/cdx/MultiCDXInputSource.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import org.archive.format.gzip.zipnum.ZipNumIndex;
1010
import org.archive.format.gzip.zipnum.ZipNumParams;
11+
import org.archive.util.iterator.CloseableCompositeIterator;
1112
import org.archive.util.iterator.CloseableIterator;
1213
import org.archive.util.iterator.SortedCompositeIterator;
1314

@@ -70,9 +71,36 @@ public CloseableIterator<String> getCDXIterator(String key, String prefix, boole
7071
return scitr;
7172
}
7273

74+
public CloseableIterator<String> createSeqIterator(String key, String start, String end, ZipNumParams params)
75+
{
76+
CloseableCompositeIterator<String> composite = new CloseableCompositeIterator<String>();
77+
CloseableIterator<String> iter = null;
78+
79+
for (CDXInputSource cdxReader : cdx) {
80+
try {
81+
iter = cdxReader.getCDXIterator(key, start, end, params);
82+
83+
if (!params.isReverse()) {
84+
composite.addLast(iter);
85+
} else {
86+
composite.addFirst(iter);
87+
}
88+
89+
} catch (IOException io) {
90+
LOGGER.warning(io.toString());
91+
}
92+
}
93+
94+
return composite;
95+
}
96+
7397

7498
public CloseableIterator<String> getCDXIterator(String key, String start, String end, ZipNumParams params) throws IOException {
7599

100+
if (params.isSequential()) {
101+
return this.createSeqIterator(key, start, end, params);
102+
}
103+
76104
SortedCompositeIterator<String> scitr = new SortedCompositeIterator<String>(cdx.size(), params.isReverse() ? reverseComparator : comparator);
77105

78106
CloseableIterator<String> iter = null;

src/main/java/org/archive/format/gzip/zipnum/ZipNumParams.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ public class ZipNumParams
66
protected int timestampDedupLength = 0;
77
protected int maxBlocks = 0;
88
private boolean reverse = false;
9+
private boolean sequential = false;
910

1011
public ZipNumParams()
1112
{
@@ -56,4 +57,12 @@ public boolean isReverse() {
5657
public void setReverse(boolean reverse) {
5758
this.reverse = reverse;
5859
}
60+
61+
public boolean isSequential() {
62+
return sequential;
63+
}
64+
65+
public void setSequential(boolean sequential) {
66+
this.sequential = sequential;
67+
}
5968
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
package org.archive.util.iterator;
2+
3+
import java.io.IOException;
4+
import java.util.Iterator;
5+
import java.util.LinkedList;
6+
7+
public class CloseableCompositeIterator<E> implements CloseableIterator<E> {
8+
9+
protected LinkedList<CloseableIterator<E>> iters;
10+
protected Iterator<CloseableIterator<E>> iterPtr;
11+
protected CloseableIterator<E> currIter;
12+
13+
public CloseableCompositeIterator()
14+
{
15+
iters = new LinkedList<CloseableIterator<E>>();
16+
}
17+
18+
public void addFirst(CloseableIterator<E> e)
19+
{
20+
iters.addFirst(e);
21+
}
22+
23+
public void addLast(CloseableIterator<E> e)
24+
{
25+
iters.addLast(e);
26+
}
27+
28+
@Override
29+
public boolean hasNext() {
30+
31+
if (iterPtr == null) {
32+
iterPtr = iters.iterator();
33+
currIter = iterPtr.next();
34+
}
35+
36+
if (currIter == null) {
37+
return false;
38+
}
39+
40+
while (currIter != null) {
41+
if (currIter.hasNext()) {
42+
return true;
43+
}
44+
45+
currIter = (iterPtr.hasNext() ? iterPtr.next() : null);
46+
}
47+
48+
return false;
49+
}
50+
51+
@Override
52+
public E next() {
53+
return currIter.next();
54+
}
55+
56+
@Override
57+
public void remove() {
58+
currIter.remove();
59+
}
60+
61+
@Override
62+
public void close() throws IOException {
63+
for (CloseableIterator<E> e : iters) {
64+
if (e != null) {
65+
try {
66+
e.close();
67+
} catch (IOException io) {
68+
69+
}
70+
}
71+
}
72+
}
73+
}

0 commit comments

Comments
 (0)