Skip to content

Commit a07c9f7

Browse files
committed
Improve escape tests
Fix bug in readEscape() - was not handling EOF git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1306890 13f79535-47bb-0310-9956-ffa450edef68
1 parent 5c9697c commit a07c9f7

2 files changed

Lines changed: 46 additions & 5 deletions

File tree

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ int getLineNumber() {
6060
return in.getLineNumber();
6161
}
6262

63+
// TODO escape handling needs more work
6364
int readEscape() throws IOException {
6465
// assume c is the escape char (normally a backslash)
6566
int c = in.read();
@@ -74,6 +75,8 @@ int readEscape() throws IOException {
7475
return '\b';
7576
case 'f':
7677
return '\f';
78+
case ExtendedBufferedReader.END_OF_STREAM:
79+
throw new IOException("EOF whilst processing escape sequence");
7780
default:
7881
return c;
7982
}

src/test/java/org/apache/commons/csv/CSVLexerTest.java

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -147,28 +147,66 @@ public void testNextToken2EmptyLines() throws IOException {
147147

148148
}
149149

150-
// simple token with escaping
150+
// simple token with escaping not enabled
151151
@Test
152152
public void testNextToken3() throws IOException {
153153
/* file: a,\,,b
154154
* \,,
155155
*/
156-
String code = "a,\\,,b\n\\,,";
157-
CSVFormat format = CSVFormat.DEFAULT.withCommentStart('#');
156+
String code = "a,\\,,b\\\n\\,,";
157+
CSVFormat format = CSVFormat.DEFAULT;
158+
assertFalse(format.isEscaping());
158159
Lexer parser = getLexer(code, format);
159160

160161
assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));
161162
// an unquoted single backslash is not an escape char
162163
assertTokenEquals(TOKEN, "\\", parser.nextToken(new Token()));
163164
assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));
164-
assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));
165+
assertTokenEquals(EORECORD, "b\\", parser.nextToken(new Token()));
165166
// an unquoted single backslash is not an escape char
166167
assertTokenEquals(TOKEN, "\\", parser.nextToken(new Token()));
167168
assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));
168169
assertTokenEquals(EOF, "", parser.nextToken(new Token()));
169170
}
170171

171-
// encapsulator tokenizer (sinle line)
172+
// simple token with escaping enabled
173+
@Test
174+
public void testNextToken3Escaping() throws IOException {
175+
/* file: a,\,,b
176+
* \,,
177+
*/
178+
String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\n";
179+
CSVFormat format = CSVFormat.DEFAULT.withEscape('\\');
180+
assertTrue(format.isEscaping());
181+
Lexer parser = getLexer(code, format);
182+
183+
assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));
184+
assertTokenEquals(TOKEN, ",", parser.nextToken(new Token()));
185+
assertTokenEquals(EORECORD, "b\\", parser.nextToken(new Token()));
186+
assertTokenEquals(TOKEN, ",", parser.nextToken(new Token()));
187+
assertTokenEquals(TOKEN, "\nc", parser.nextToken(new Token()));
188+
assertTokenEquals(EOF, "d\n", parser.nextToken(new Token()));
189+
assertTokenEquals(EOF, "", parser.nextToken(new Token()));
190+
}
191+
192+
// simple token with escaping enabled
193+
@Test
194+
public void testNextToken3BadEscaping() throws IOException {
195+
String code = "a,b,c\\";
196+
CSVFormat format = CSVFormat.DEFAULT.withEscape('\\');
197+
assertTrue(format.isEscaping());
198+
Lexer parser = getLexer(code, format);
199+
200+
assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));
201+
assertTokenEquals(TOKEN, "b", parser.nextToken(new Token()));
202+
try {
203+
Token tkn = parser.nextToken(new Token());
204+
fail("Expected IOE, found "+tkn);
205+
} catch (IOException e) {
206+
}
207+
}
208+
209+
// encapsulator tokenizer (single line)
172210
@Test
173211
public void testNextToken4() throws IOException {
174212
/* file: a,"foo",b

0 commit comments

Comments
 (0)