Skip to content

Commit 19fbc19

Browse files
committed
Added a JMH benchmark to compare the most commons CSV parsers
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1658276 13f79535-47bb-0310-9956-ffa450edef68
1 parent fd533e1 commit 19fbc19

2 files changed

Lines changed: 284 additions & 0 deletions

File tree

pom.xml

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,12 @@ CSV files of various types.
7272
<email>yonik@apache.org</email>
7373
<organization>The Apache Software Foundation</organization>
7474
</developer>
75+
<developer>
76+
<name>Emmanuel Bourg</name>
77+
<id>ebourg</id>
78+
<email>ebourg@apache.org</email>
79+
<organization>Apache</organization>
80+
</developer>
7581
<developer>
7682
<name>Gary Gregory</name>
7783
<id>ggregory</id>
@@ -361,6 +367,112 @@ CSV files of various types.
361367
</plugins>
362368
</build>
363369
</profile>
370+
371+
372+
<!-- Profile to build and run the benchmarks. Use 'mvn test -Pbenchmark', and add '-Dbenchmark=foo' to run only the foo benchmark -->
373+
<profile>
374+
<id>benchmark</id>
375+
376+
<dependencies>
377+
<dependency>
378+
<groupId>org.openjdk.jmh</groupId>
379+
<artifactId>jmh-core</artifactId>
380+
<version>1.5.2</version>
381+
<scope>test</scope>
382+
</dependency>
383+
384+
<dependency>
385+
<groupId>org.openjdk.jmh</groupId>
386+
<artifactId>jmh-generator-annprocess</artifactId>
387+
<version>1.5.2</version>
388+
<scope>test</scope>
389+
</dependency>
390+
391+
<dependency>
392+
<groupId>genjava</groupId>
393+
<artifactId>gj-csv</artifactId>
394+
<version>1.0</version>
395+
<scope>test</scope>
396+
</dependency>
397+
398+
<dependency>
399+
<groupId>net.sourceforge.javacsv</groupId>
400+
<artifactId>javacsv</artifactId>
401+
<version>2.0</version>
402+
<scope>test</scope>
403+
</dependency>
404+
405+
<dependency>
406+
<groupId>com.opencsv</groupId>
407+
<artifactId>opencsv</artifactId>
408+
<version>3.1</version>
409+
<scope>test</scope>
410+
</dependency>
411+
412+
<dependency>
413+
<groupId>net.sf.supercsv</groupId>
414+
<artifactId>super-csv</artifactId>
415+
<version>2.2.1</version>
416+
</dependency>
417+
418+
<!-- Not in Maven Central, download manually from http://kasparov.skife.org/csv/csv-1.0.jar and copy in the base directory -->
419+
<dependency>
420+
<groupId>org.skife.kasparov</groupId>
421+
<artifactId>csv</artifactId>
422+
<version>1.0</version>
423+
<scope>system</scope>
424+
<systemPath>${basedir}/csv-1.0.jar</systemPath>
425+
</dependency>
426+
</dependencies>
427+
428+
<properties>
429+
<skipTests>true</skipTests>
430+
<benchmark>org.apache</benchmark>
431+
</properties>
432+
433+
<build>
434+
<plugins>
435+
<!-- Enable the compilation of the benchmarks -->
436+
<plugin>
437+
<artifactId>maven-compiler-plugin</artifactId>
438+
<configuration combine.self="override">
439+
<testIncludes>
440+
<testInclude>**/*</testInclude>
441+
</testIncludes>
442+
</configuration>
443+
</plugin>
444+
445+
<!-- Hook the benchmarks to the test phase -->
446+
<plugin>
447+
<groupId>org.codehaus.mojo</groupId>
448+
<artifactId>exec-maven-plugin</artifactId>
449+
<executions>
450+
<execution>
451+
<id>benchmark</id>
452+
<phase>test</phase>
453+
<goals>
454+
<goal>exec</goal>
455+
</goals>
456+
<configuration>
457+
<classpathScope>test</classpathScope>
458+
<executable>java</executable>
459+
<arguments>
460+
<argument>-classpath</argument>
461+
<classpath/>
462+
<argument>org.openjdk.jmh.Main</argument>
463+
<argument>-rf</argument>
464+
<argument>json</argument>
465+
<argument>-rff</argument>
466+
<argument>target/jmh-result.json</argument>
467+
<argument>${benchmark}</argument>
468+
</arguments>
469+
</configuration>
470+
</execution>
471+
</executions>
472+
</plugin>
473+
</plugins>
474+
</build>
475+
</profile>
364476
</profiles>
365477

366478
</project>
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.commons.csv;
19+
20+
import java.io.BufferedReader;
21+
import java.io.FileReader;
22+
import java.io.IOException;
23+
import java.util.List;
24+
import java.util.concurrent.TimeUnit;
25+
26+
import com.generationjava.io.CsvReader;
27+
import org.openjdk.jmh.annotations.Benchmark;
28+
import org.openjdk.jmh.annotations.BenchmarkMode;
29+
import org.openjdk.jmh.annotations.Fork;
30+
import org.openjdk.jmh.annotations.Measurement;
31+
import org.openjdk.jmh.annotations.Mode;
32+
import org.openjdk.jmh.annotations.OutputTimeUnit;
33+
import org.openjdk.jmh.annotations.Threads;
34+
import org.openjdk.jmh.annotations.Warmup;
35+
import org.openjdk.jmh.infra.Blackhole;
36+
import org.supercsv.io.CsvListReader;
37+
import org.supercsv.prefs.CsvPreference;
38+
39+
@BenchmarkMode(Mode.AverageTime)
40+
@Fork(value = 1, jvmArgs = "-server")
41+
@Threads(1)
42+
@Warmup(iterations = 10)
43+
@Measurement(iterations = 10)
44+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
45+
public class CSVBenchmark {
46+
47+
private BufferedReader getReader() throws IOException {
48+
return new BufferedReader(new FileReader("worldcitiespop.txt"));
49+
}
50+
51+
@Benchmark
52+
public int baseline(Blackhole bh) throws Exception {
53+
BufferedReader in = getReader();
54+
int count = 0;
55+
String line;
56+
while ((line = in.readLine()) != null) {
57+
count++;
58+
}
59+
60+
bh.consume(count);
61+
in.close();
62+
return count;
63+
}
64+
65+
@Benchmark
66+
public int parseCommonsCSV(Blackhole bh) throws Exception {
67+
BufferedReader in = getReader();
68+
69+
CSVFormat format = CSVFormat.DEFAULT.withHeader();
70+
71+
int count = 0;
72+
for (CSVRecord record : format.parse(in)) {
73+
count++;
74+
}
75+
76+
bh.consume(count);
77+
in.close();
78+
return count;
79+
}
80+
81+
@Benchmark
82+
public int parseGenJavaCSV(Blackhole bh) throws Exception {
83+
BufferedReader in = getReader();
84+
85+
CsvReader reader = new CsvReader(in);
86+
reader.setFieldDelimiter(',');
87+
88+
int count = 0;
89+
String[] record = null;
90+
while ((record = reader.readLine()) != null) {
91+
count++;
92+
}
93+
94+
bh.consume(count);
95+
in.close();
96+
return count;
97+
}
98+
99+
@Benchmark
100+
public int parseJavaCSV(Blackhole bh) throws Exception {
101+
BufferedReader in = getReader();
102+
103+
com.csvreader.CsvReader reader = new com.csvreader.CsvReader(in, ',');
104+
reader.setRecordDelimiter('\n');
105+
106+
int count = 0;
107+
while (reader.readRecord()) {
108+
count++;
109+
}
110+
111+
bh.consume(count);
112+
in.close();
113+
return count;
114+
}
115+
116+
@Benchmark
117+
public int parseOpenCSV(Blackhole bh) throws Exception {
118+
BufferedReader in = getReader();
119+
120+
com.opencsv.CSVReader reader = new com.opencsv.CSVReader(in, ',');
121+
122+
int count = 0;
123+
while (reader.readNext() != null) {
124+
count++;
125+
}
126+
127+
bh.consume(count);
128+
in.close();
129+
return count;
130+
}
131+
132+
@Benchmark
133+
public int parseSkifeCSV(Blackhole bh) throws Exception {
134+
BufferedReader in = getReader();
135+
136+
org.skife.csv.CSVReader reader = new org.skife.csv.SimpleReader();
137+
reader.setSeperator(',');
138+
139+
CountingReaderCallback callback = new CountingReaderCallback();
140+
reader.parse(in, callback);
141+
142+
bh.consume(callback);
143+
in.close();
144+
return callback.count;
145+
}
146+
147+
private static class CountingReaderCallback implements org.skife.csv.ReaderCallback {
148+
public int count = 0;
149+
150+
@Override
151+
public void onRow(String[] fields) {
152+
count++;
153+
}
154+
}
155+
156+
@Benchmark
157+
public int parseSuperCSV(Blackhole bh) throws Exception {
158+
BufferedReader in = getReader();
159+
160+
CsvListReader reader = new CsvListReader(in, CsvPreference.STANDARD_PREFERENCE);
161+
162+
int count = 0;
163+
List<String> record = null;
164+
while ((record = reader.read()) != null) {
165+
count++;
166+
}
167+
168+
bh.consume(count);
169+
in.close();
170+
return count;
171+
}
172+
}

0 commit comments

Comments
 (0)