1919import java .io .InputStream ;
2020import java .io .InputStreamReader ;
2121import java .io .Reader ;
22- import java .util .Vector ;
22+ import java .util .ArrayList ;
2323
2424
2525/**
@@ -63,29 +63,41 @@ public class CSVParser {
6363 protected static final int TT_EOF = 1 ;
6464 /** Token with content when end of a line is reached. */
6565 protected static final int TT_EORECORD = 2 ;
66+
67+ /** Immutable empty String array. */
68+ private static final String [] EMPTY_STRING_ARRAY = new String [0 ];
6669
6770 // the input stream
6871 private ExtendedBufferedReader in ;
6972
7073 private CSVStrategy strategy ;
7174
75+ // the following objects are shared to reduce garbage
76+ /** A record buffer for getLine(). Grows as necessary and is reused. */
77+ private ArrayList record = new ArrayList ();
78+ private Token reusableToken = new Token ();
79+ private CharBuffer wsBuf = new CharBuffer ();
80+ private CharBuffer code = new CharBuffer (4 );
81+
82+
7283 /**
7384 * Token is an internal token representation.
7485 *
7586 * It is used as contract between the lexer and the parser.
7687 */
7788 class Token {
7889 /** Token type, see TT_xxx constants. */
79- int type ;
90+ int type = TT_INVALID ;
8091 /** The content buffer. */
81- StringBuffer content ;
92+ CharBuffer content = new CharBuffer ( INITIAL_TOKEN_LENGTH ) ;
8293 /** Token ready flag: indicates a valid token with content (ready for the parser). */
8394 boolean isReady ;
84- /** Initializes an empty token. */
85- Token () {
86- content = new StringBuffer (INITIAL_TOKEN_LENGTH );
87- type = TT_INVALID ;
88- isReady = false ;
95+
96+ Token reset () {
97+ content .clear ();
98+ type = TT_INVALID ;
99+ isReady = false ;
100+ return this ;
89101 }
90102 }
91103
@@ -160,7 +172,7 @@ public CSVParser(Reader input, char delimiter, char encapsulator, char commentSt
160172 * @throws IOException on parse error or input read-failure
161173 */
162174 public String [][] getAllValues () throws IOException {
163- Vector records = new Vector ();
175+ ArrayList records = new ArrayList ();
164176 String [] values ;
165177 String [][] ret = null ;
166178 while ((values = getLine ()) != null ) {
@@ -211,35 +223,35 @@ public String nextValue() throws IOException {
211223 * @throws IOException on parse error or input read-failure
212224 */
213225 public String [] getLine () throws IOException {
214- Vector record = new Vector ();
215- String [] ret = new String [0 ];
216- Token tkn ;
217- while ((tkn = nextToken ()).type == TT_TOKEN ) {
218- record .add (tkn .content .toString ());
219- }
220- // did we reached eorecord or eof ?
221- switch (tkn .type ) {
222- case TT_EORECORD :
223- record .add (tkn .content .toString ());
224- break ;
225- case TT_EOF :
226- if (tkn .isReady ) {
227- record .add (tkn .content .toString ());
228- } else {
229- ret = null ;
226+ String [] ret = EMPTY_STRING_ARRAY ;
227+ record .clear ();
228+ while (true ) {
229+ reusableToken .reset ();
230+ nextToken (reusableToken );
231+ switch (reusableToken .type ) {
232+ case TT_TOKEN :
233+ record .add (reusableToken .content .toString ());
234+ break ;
235+ case TT_EORECORD :
236+ record .add (reusableToken .content .toString ());
237+ break ;
238+ case TT_EOF :
239+ if (reusableToken .isReady ) {
240+ record .add (reusableToken .content .toString ());
241+ } else {
242+ ret = null ;
243+ }
244+ break ;
245+ case TT_INVALID :
246+ default :
247+ // error: throw IOException
248+ throw new IOException ("(line " + getLineNumber () + ") invalid parse sequence" );
249+ // unreachable: break;
230250 }
231- break ;
232- case TT_INVALID :
233- default :
234- // error: throw IOException
235- throw new IOException (
236- "(line " + getLineNumber ()
237- + ") invalid parse sequence" );
238- // unreachable: break;
251+ if (reusableToken .type != TT_TOKEN ) break ;
239252 }
240- if (record .size () > 0 ) {
241- ret = new String [record .size ()];
242- record .toArray (ret );
253+ if (!record .isEmpty ()) {
254+ ret = (String []) record .toArray (new String [record .size ()]);
243255 }
244256 return ret ;
245257 }
@@ -260,18 +272,26 @@ public int getLineNumber() {
260272 // the lexer(s)
261273 // ======================================================
262274
275+ /**
276+ * Convenience method for <code>nextToken(null)</code>.
277+ */
278+ protected Token nextToken () throws IOException {
279+ return nextToken (new Token ());
280+ }
281+
263282 /**
264283 * Returns the next token.
265284 *
266285 * A token corresponds to a term, a record change or an
267286 * end-of-file indicator.
268287 *
288+ * @param tkn an existing Token object to reuse. The caller is responsible to initialize the
289+ * Token.
269290 * @return the next token found
270291 * @throws IOException on stream access error
271292 */
272- protected Token nextToken () throws IOException {
273- Token tkn = new Token ();
274- StringBuffer wsBuf = new StringBuffer ();
293+ protected Token nextToken (Token tkn ) throws IOException {
294+ wsBuf .clear (); // resuse
275295
276296 // get the last read char (required for empty line detection)
277297 int lastChar = in .readAgain ();
@@ -321,29 +341,29 @@ protected Token nextToken() throws IOException {
321341 if (!strategy .isCommentingDisabled () && c == strategy .getCommentStart ()) {
322342 // ignore everything till end of line and continue (incr linecount)
323343 in .readLine ();
324- tkn = nextToken ();
344+ tkn = nextToken (tkn . reset () );
325345 } else if (c == strategy .getDelimiter ()) {
326346 // empty token return TT_TOKEN("")
327347 tkn .type = TT_TOKEN ;
328348 tkn .isReady = true ;
329349 } else if (eol ) {
330350 // empty token return TT_EORECORD("")
331- tkn .content .append ("" );
351+ //noop: tkn.content.append("");
332352 tkn .type = TT_EORECORD ;
333353 tkn .isReady = true ;
334354 } else if (c == strategy .getEncapsulator ()) {
335355 // consume encapsulated token
336356 encapsulatedTokenLexer (tkn , c );
337357 } else if (isEndOfFile (c )) {
338358 // end of file return TT_EOF()
339- tkn .content .append ("" );
359+ //noop: tkn.content.append("");
340360 tkn .type = TT_EOF ;
341361 tkn .isReady = true ;
342362 } else {
343363 // next token must be a simple token
344364 // add removed blanks when not ignoring whitespace chars...
345365 if (!strategy .getIgnoreLeadingWhitespaces ()) {
346- tkn .content .append (wsBuf . toString () );
366+ tkn .content .append (wsBuf );
347367 }
348368 simpleTokenLexer (tkn , c );
349369 }
@@ -370,7 +390,7 @@ protected Token nextToken() throws IOException {
370390 * @throws IOException on stream access error
371391 */
372392 private Token simpleTokenLexer (Token tkn , int c ) throws IOException {
373- StringBuffer wsBuf = new StringBuffer ();
393+ wsBuf . clear ();
374394 while (!tkn .isReady ) {
375395 if (isEndOfLine (c )) {
376396 // end of record
@@ -396,9 +416,8 @@ private Token simpleTokenLexer(Token tkn, int c) throws IOException {
396416 } else {
397417 // prepend whitespaces (if we have)
398418 if (wsBuf .length () > 0 ) {
399- // for J2SDK 1.3 compatibility we use toString()
400- tkn .content .append (wsBuf .toString ());
401- wsBuf .delete (0 , wsBuf .length ());
419+ tkn .content .append (wsBuf );
420+ wsBuf .clear ();
402421 }
403422 tkn .content .append ((char ) c );
404423 }
@@ -508,7 +527,7 @@ protected int unicodeEscapeLexer(int c) throws IOException {
508527 int ret = 0 ;
509528 // ignore 'u' (assume c==\ now) and read 4 hex digits
510529 c = in .read ();
511- StringBuffer code = new StringBuffer ( 4 );
530+ code . clear ( );
512531 try {
513532 for (int i = 0 ; i < 4 ; i ++) {
514533 c = in .read ();
0 commit comments