Skip to content

Commit 28441e6

Browse files
committed
Add CSVException that extends IOException thrown on invalid input
instead of IOException
1 parent 761a337 commit 28441e6

6 files changed

Lines changed: 89 additions & 33 deletions

File tree

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
<body>
4343
<release version="1.11.1" date="YYYY-MM-DD" description="Feature and bug fix release (Java 8 or above)">
4444
<!-- ADD -->
45+
<action type="add" dev="ggregory" due-to="Gary Gregory">Add CSVException that extends IOException thrown on invalid input instead of IOException.</action>
4546
<!-- FIX -->
4647
<action type="fix" dev="ggregory" due-to="Gary Gregory">Fix PMD issues for port to PMD 7.1.0.</action>
4748
<action type="fix" dev="ggregory" due-to="Dávid Szigecsán, Gary Gregory">Fix some Javadoc links #442.</action>
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.commons.csv;
19+
20+
import java.io.IOException;
21+
import java.util.Formatter;
22+
import java.util.IllegalFormatException;
23+
24+
/**
25+
* Signals a CSV exception. For example, this exception is thrown when parsing invalid input.
26+
*
27+
* @since 1.12.0
28+
*/
29+
public class CSVException extends IOException {
30+
31+
private static final long serialVersionUID = 1L;
32+
33+
/**
34+
* Constructs a new instance with a formatted message.
35+
*
36+
* @param format A {@link Formatter} format string.
37+
* @param args See {@link String#format(String, Object...)}.
38+
* @throws IllegalFormatException See {@link String#format(String, Object...)}.
39+
*/
40+
public CSVException(final String format, final Object... args) {
41+
super(String.format(format, args));
42+
}
43+
44+
}

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2032,6 +2032,7 @@ public boolean isQuoteCharacterSet() {
20322032
* @param reader the input stream
20332033
* @return a parser over a stream of {@link CSVRecord}s.
20342034
* @throws IOException If an I/O error occurs
2035+
* @throws CSVException Thrown on invalid input.
20352036
*/
20362037
public CSVParser parse(final Reader reader) throws IOException {
20372038
return new CSVParser(reader, this);

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,11 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
145145
final class CSVRecordIterator implements Iterator<CSVRecord> {
146146
private CSVRecord current;
147147

148+
/**
149+
* Gets the next record.
150+
*
151+
* @return the next record.
152+
*/
148153
private CSVRecord getNextRecord() {
149154
return Uncheck.get(CSVParser.this::nextRecord);
150155
}
@@ -221,6 +226,7 @@ private static final class Headers {
221226
* If the parameters of the format are inconsistent or if either file or format are null.
222227
* @throws IOException
223228
* If an I/O error occurs
229+
* @throws CSVException Thrown on invalid input.
224230
*/
225231
public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException {
226232
Objects.requireNonNull(file, "file");
@@ -246,6 +252,7 @@ public static CSVParser parse(final File file, final Charset charset, final CSVF
246252
* If the parameters of the format are inconsistent or if either reader or format are null.
247253
* @throws IOException
248254
* If there is a problem reading the header or skipping the first record
255+
* @throws CSVException Thrown on invalid input.
249256
* @since 1.5
250257
*/
251258
@SuppressWarnings("resource")
@@ -270,6 +277,7 @@ public static CSVParser parse(final InputStream inputStream, final Charset chars
270277
* If the parameters of the format are inconsistent or if either file or format are null.
271278
* @throws IOException
272279
* If an I/O error occurs
280+
* @throws CSVException Thrown on invalid input.
273281
* @since 1.5
274282
*/
275283
@SuppressWarnings("resource")
@@ -296,6 +304,7 @@ public static CSVParser parse(final Path path, final Charset charset, final CSVF
296304
* If the parameters of the format are inconsistent or if either reader or format are null.
297305
* @throws IOException
298306
* If there is a problem reading the header or skipping the first record
307+
* @throws CSVException Thrown on invalid input.
299308
* @since 1.5
300309
*/
301310
public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException {
@@ -314,6 +323,7 @@ public static CSVParser parse(final Reader reader, final CSVFormat format) throw
314323
* If the parameters of the format are inconsistent or if either string or format are null.
315324
* @throws IOException
316325
* If an I/O error occurs
326+
* @throws CSVException Thrown on invalid input.
317327
*/
318328
public static CSVParser parse(final String string, final CSVFormat format) throws IOException {
319329
Objects.requireNonNull(string, "string");
@@ -341,6 +351,7 @@ public static CSVParser parse(final String string, final CSVFormat format) throw
341351
* If the parameters of the format are inconsistent or if either url, charset or format are null.
342352
* @throws IOException
343353
* If an I/O error occurs
354+
* @throws CSVException Thrown on invalid input.
344355
*/
345356
@SuppressWarnings("resource")
346357
public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException {
@@ -395,6 +406,7 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor
395406
* If the parameters of the format are inconsistent or if either reader or format are null.
396407
* @throws IOException
397408
* If there is a problem reading the header or skipping the first record
409+
* @throws CSVException Thrown on invalid input.
398410
*/
399411
public CSVParser(final Reader reader, final CSVFormat format) throws IOException {
400412
this(reader, format, 0, 1);
@@ -420,6 +432,7 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException
420432
* If the parameters of the format are inconsistent or if either the reader or format is null.
421433
* @throws IOException
422434
* If there is a problem reading the header or skipping the first record
435+
* @throws CSVException Thrown on invalid input.
423436
* @since 1.1
424437
*/
425438
@SuppressWarnings("resource")
@@ -465,6 +478,7 @@ private Map<String, Integer> createEmptyHeaderMap() {
465478
*
466479
* @return null if the format has no header.
467480
* @throws IOException if there is a problem reading the header or skipping the first record
481+
* @throws CSVException Thrown on invalid input.
468482
*/
469483
private Headers createHeaders() throws IOException {
470484
Map<String, Integer> hdrMap = null;
@@ -746,8 +760,8 @@ public Iterator<CSVRecord> iterator() {
746760
* Parses the next record from the current point in the stream.
747761
*
748762
* @return the record as an array of values, or {@code null} if the end of the stream has been reached
749-
* @throws IOException
750-
* on parse error or input read-failure
763+
* @throws IOException on parse error or input read-failure
764+
* @throws CSVException Thrown on invalid input.
751765
*/
752766
CSVRecord nextRecord() throws IOException {
753767
CSVRecord result = null;

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,10 @@ private char mapNullToDisabled(final Character c) {
207207
* A token corresponds to a term, a record change or an end-of-file indicator.
208208
* </p>
209209
*
210-
* @param token
211-
* an existing Token object to reuse. The caller is responsible for initializing the Token.
210+
* @param token an existing Token object to reuse. The caller is responsible for initializing the Token.
212211
* @return the next token found.
213-
* @throws IOException on stream access error.
212+
* @throws IOException on stream access error.
213+
* @throws CSVException Thrown on invalid input.
214214
*/
215215
Token nextToken(final Token token) throws IOException {
216216
// Get the last read char (required for empty line detection)
@@ -307,6 +307,7 @@ Token nextToken(final Token token) throws IOException {
307307
* @throws IOException
308308
* Thrown when in an invalid state: EOF before closing encapsulator or invalid character before
309309
* delimiter or EOL.
310+
* @throws CSVException Thrown on invalid input.
310311
*/
311312
private Token parseEncapsulatedToken(final Token token) throws IOException {
312313
token.isQuoted = true;
@@ -342,8 +343,8 @@ private Token parseEncapsulatedToken(final Token token) throws IOException {
342343
token.content.append((char) c);
343344
} else if (!Character.isWhitespace((char) c)) {
344345
// error invalid char between token and next delimiter
345-
throw new IOException(String.format("Invalid char between encapsulated token and delimiter at line: %,d, position: %,d",
346-
getCurrentLineNumber(), getCharacterPosition()));
346+
throw new CSVException("Invalid character between encapsulated token and delimiter at line: %,d, position: %,d",
347+
getCurrentLineNumber(), getCharacterPosition());
347348
}
348349
}
349350
}
@@ -356,8 +357,7 @@ private Token parseEncapsulatedToken(final Token token) throws IOException {
356357
return token;
357358
}
358359
// error condition (end of file before end of token)
359-
throw new IOException("(startline " + startLineNumber +
360-
") EOF reached before encapsulated token finished");
360+
throw new CSVException("(startline %,d) EOF reached before encapsulated token finished", startLineNumber);
361361
} else {
362362
// consume character
363363
token.content.append((char) c);
@@ -368,22 +368,20 @@ private Token parseEncapsulatedToken(final Token token) throws IOException {
368368
/**
369369
* Parses a simple token.
370370
* <p>
371-
* Simple tokens are tokens that are not surrounded by encapsulators. A simple token might contain escaped
372-
* delimiters (as \, or \;). The token is finished when one of the following conditions becomes true:
371+
* Simple tokens are tokens that are not surrounded by encapsulators. A simple token might contain escaped delimiters (as \, or \;). The token is finished
372+
* when one of the following conditions becomes true:
373373
* </p>
374374
* <ul>
375375
* <li>The end of line has been reached (EORECORD)</li>
376376
* <li>The end of stream has been reached (EOF)</li>
377377
* <li>An unescaped delimiter has been reached (TOKEN)</li>
378378
* </ul>
379379
*
380-
* @param token
381-
* the current token
382-
* @param ch
383-
* the current character
380+
* @param token the current token
381+
* @param ch the current character
384382
* @return the filled token
385-
* @throws IOException
386-
* on stream access error
383+
* @throws IOException on stream access error
384+
* @throws CSVException Thrown on invalid input.
387385
*/
388386
private Token parseSimpleToken(final Token token, int ch) throws IOException {
389387
// Faster to use while(true)+break than while(token.type == INVALID)
@@ -420,10 +418,9 @@ private Token parseSimpleToken(final Token token, int ch) throws IOException {
420418
/**
421419
* Appends the next escaped character to the token's content.
422420
*
423-
* @param token
424-
* the current token
425-
* @throws IOException
426-
* on stream access error
421+
* @param token the current token
422+
* @throws IOException on stream access error
423+
* @throws CSVException Thrown on invalid input.
427424
*/
428425
private void appendNextEscapedCharacterToToken(final Token token) throws IOException {
429426
if (isEscapeDelimiter()) {
@@ -467,15 +464,12 @@ boolean readEndOfLine(int ch) throws IOException {
467464

468465
// TODO escape handling needs more work
469466
/**
470-
* Handle an escape sequence.
471-
* The current character must be the escape character.
472-
* On return, the next character is available by calling {@link ExtendedBufferedReader#getLastChar()}
473-
* on the input stream.
467+
* Handle an escape sequence. The current character must be the escape character. On return, the next character is available by calling
468+
* {@link ExtendedBufferedReader#getLastChar()} on the input stream.
474469
*
475-
* @return the unescaped character (as an int) or {@link IOUtils#EOF} if char following the escape is
476-
* invalid.
477-
* @throws IOException if there is a problem reading the stream or the end of stream is detected:
478-
* the escape character is not allowed at end of stream
470+
* @return the unescaped character (as an int) or {@link IOUtils#EOF} if char following the escape is invalid.
471+
* @throws IOException if there is a problem reading the stream or the end of stream is detected: the escape character is not allowed at end of stream
472+
* @throws CSVException Thrown on invalid input.
479473
*/
480474
int readEscape() throws IOException {
481475
// the escape char has just been read (normally a backslash)
@@ -498,7 +492,7 @@ int readEscape() throws IOException {
498492
case Constants.BACKSPACE: // TODO is this correct?
499493
return ch;
500494
case EOF:
501-
throw new IOException("EOF whilst processing escape sequence");
495+
throw new CSVException("EOF while processing escape sequence");
502496
default:
503497
// Now check for meta-characters
504498
if (isMetaChar(ch)) {

src/test/java/org/apache/commons/csv/CSVParserTest.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import static org.junit.jupiter.api.Assertions.assertNull;
2828
import static org.junit.jupiter.api.Assertions.assertThrows;
2929
import static org.junit.jupiter.api.Assertions.assertTrue;
30+
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
3031

3132
import java.io.File;
3233
import java.io.IOException;
@@ -1555,10 +1556,11 @@ public void testThrowExceptionWithLineAndPosition() throws IOException {
15551556
.setSkipHeaderRecord(true)
15561557
.build();
15571558
// @formatter:on
1558-
15591559
try (CSVParser csvParser = csvFormat.parse(stringReader)) {
1560-
final Exception exception = assertThrows(UncheckedIOException.class, csvParser::getRecords);
1561-
assertTrue(exception.getMessage().contains("Invalid char between encapsulated token and delimiter at line: 2, position: 94"));
1560+
final UncheckedIOException exception = assertThrows(UncheckedIOException.class, csvParser::getRecords);
1561+
assertInstanceOf(CSVException.class, exception.getCause());
1562+
assertTrue(exception.getMessage().contains("Invalid character between encapsulated token and delimiter at line: 2, position: 94"),
1563+
exception::getMessage);
15621564
}
15631565
}
15641566

0 commit comments

Comments
 (0)