Skip to content

Commit 65ab9db

Browse files
committed
Make it easy to provide an alternative lexer if required
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1303620 13f79535-47bb-0310-9956-ffa450edef68
1 parent 7592782 commit 65ab9db

3 files changed

Lines changed: 106 additions & 71 deletions

File tree

src/main/java/org/apache/commons/csv/CSVLexer.java

Lines changed: 4 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -21,24 +21,14 @@
2121

2222
import static org.apache.commons.csv.Token.Type.*;
2323

24-
class CSVLexer {
24+
class CSVLexer extends Lexer {
2525

2626
private final StringBuilder wsBuf = new StringBuilder();
2727

28-
private final CSVFormat format;
29-
30-
/** The input stream */
31-
private final ExtendedBufferedReader in;
32-
3328
CSVLexer(CSVFormat format, ExtendedBufferedReader in) {
34-
this.format = format;
35-
this.in = in;
36-
}
37-
38-
public int getLineNumber() {
39-
return in.getLineNumber();
29+
super(format, in);
4030
}
41-
31+
4232
/**
4333
* Returns the next token.
4434
* <p/>
@@ -48,6 +38,7 @@ public int getLineNumber() {
4838
* @return the next token found
4939
* @throws java.io.IOException on stream access error
5040
*/
41+
@Override
5142
Token nextToken(Token tkn) throws IOException {
5243
wsBuf.setLength(0); // reuse
5344

@@ -182,16 +173,6 @@ private Token simpleTokenLexer(Token tkn, int c) throws IOException {
182173
return tkn;
183174
}
184175

185-
private void trimTrailingSpaces(StringBuilder buffer) {
186-
int length = buffer.length();
187-
while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) {
188-
length = length - 1;
189-
}
190-
if (length != buffer.length()) {
191-
buffer.setLength(length);
192-
}
193-
}
194-
195176
/**
196177
* An encapsulated token lexer
197178
* <p/>
@@ -253,51 +234,4 @@ private Token encapsulatedTokenLexer(Token tkn, int c) throws IOException {
253234
}
254235
}
255236

256-
private int readEscape(int c) throws IOException {
257-
// assume c is the escape char (normally a backslash)
258-
c = in.read();
259-
switch (c) {
260-
case 'r':
261-
return '\r';
262-
case 'n':
263-
return '\n';
264-
case 't':
265-
return '\t';
266-
case 'b':
267-
return '\b';
268-
case 'f':
269-
return '\f';
270-
default:
271-
return c;
272-
}
273-
}
274-
275-
/**
276-
* @return true if the given char is a whitespace character
277-
*/
278-
private boolean isWhitespace(int c) {
279-
return (c != format.getDelimiter()) && Character.isWhitespace((char) c);
280-
}
281-
282-
/**
283-
* Greedy - accepts \n, \r and \r\n
284-
* This checker consumes silently the second control-character...
285-
*
286-
* @return true if the given character is a line-terminator
287-
*/
288-
private boolean isEndOfLine(int c) throws IOException {
289-
// check if we have \r\n...
290-
if (c == '\r' && in.lookAhead() == '\n') {
291-
// note: does not change c outside of this method !!
292-
c = in.read();
293-
}
294-
return (c == '\n' || c == '\r');
295-
}
296-
297-
/**
298-
* @return true if the given character indicates end of file
299-
*/
300-
private boolean isEndOfFile(int c) {
301-
return c == ExtendedBufferedReader.END_OF_STREAM;
302-
}
303237
}

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
*/
6363
public class CSVParser implements Iterable<CSVRecord> {
6464

65-
private final CSVLexer lexer;
65+
private final Lexer lexer;
6666
private final Map<String, Integer> headerMapping;
6767

6868
// the following objects are shared to reduce garbage
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*
17+
*/
18+
19+
package org.apache.commons.csv;
20+
21+
import java.io.IOException;
22+
23+
/**
24+
* Abstract lexer class; contains common utility routines shared by lexers
25+
*/
26+
abstract class Lexer {
27+
28+
final CSVFormat format;
29+
30+
/** The input stream */
31+
final ExtendedBufferedReader in;
32+
33+
Lexer(CSVFormat format, ExtendedBufferedReader in) {
34+
this.format = format;
35+
this.in = in;
36+
}
37+
38+
int getLineNumber() {
39+
return in.getLineNumber();
40+
}
41+
42+
int readEscape(int c) throws IOException {
43+
// assume c is the escape char (normally a backslash)
44+
c = in.read();
45+
switch (c) {
46+
case 'r':
47+
return '\r';
48+
case 'n':
49+
return '\n';
50+
case 't':
51+
return '\t';
52+
case 'b':
53+
return '\b';
54+
case 'f':
55+
return '\f';
56+
default:
57+
return c;
58+
}
59+
}
60+
61+
void trimTrailingSpaces(StringBuilder buffer) {
62+
int length = buffer.length();
63+
while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) {
64+
length = length - 1;
65+
}
66+
if (length != buffer.length()) {
67+
buffer.setLength(length);
68+
}
69+
}
70+
71+
/**
72+
* @return true if the given char is a whitespace character
73+
*/
74+
boolean isWhitespace(int c) {
75+
return (c != format.getDelimiter()) && Character.isWhitespace((char) c);
76+
}
77+
78+
/**
79+
* Greedy - accepts \n, \r and \r\n
80+
* This checker consumes silently the second control-character...
81+
*
82+
* @return true if the given character is a line-terminator
83+
*/
84+
boolean isEndOfLine(int c) throws IOException {
85+
// check if we have \r\n...
86+
if (c == '\r' && in.lookAhead() == '\n') {
87+
// note: does not change c outside of this method !!
88+
c = in.read();
89+
}
90+
return (c == '\n' || c == '\r');
91+
}
92+
93+
/**
94+
* @return true if the given character indicates end of file
95+
*/
96+
boolean isEndOfFile(int c) {
97+
return c == ExtendedBufferedReader.END_OF_STREAM;
98+
}
99+
100+
abstract Token nextToken(Token reusableToken) throws IOException;
101+
}

0 commit comments

Comments
 (0)