8000 [CSV-182] Allow some printing operations directly from CSVFormat. Add… · abester/commons-csv@4a6af45 · GitHub
Skip to content

Commit 4a6af45

Browse files
committed
[CSV-182] Allow some printing operations directly from CSVFormat. Adds APIs to CSVFormat so update version from 1.3.1-SNAPSHOT to 1.4-SNAPHOT a la semver. This commit refactors a small bit of guts code from CSVPrinter to CSVFormat.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1743430 13f79535-47bb-0310-9956-ffa450edef68
1 parent 21cb8b4 commit 4a6af45

4 files changed

Lines changed: 267 additions & 212 deletions

File tree

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ limitations under the License.
2424
</parent>
2525
<groupId>org.apache.commons</groupId>
2626
<artifactId>commons-csv</artifactId>
27-
<version>1.3.1-SNAPSHOT</version>
27+
<version>1.4-SNAPSHOT</version>
2828
<name>Apache Commons CSV</name>
2929
<url>http://commons.apache.org/proper/commons-csv/</url>
3030
<description>

src/changes/changes.xml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@
3838
<title>Release Notes</title>
3939
</properties>
4040
<body>
41-
<release version="1.3.1" date="2016-MM-DD" description="Feature and bug fix release">
41+
<release version="1.4" date="2016-MM-DD" description="Feature and bug fix release">
4242
<action issue="CSV-181" type="update" dev="ggregory" due-to="Gary Gregory">Make CSVPrinter.print(Object) GC-free.</action>
43+
<action issue="CSV-182" type="update" dev="ggregory" due-to="Gary Gregory">Allow some printing operations directly from CSVFormat.</action>
4344
</release>
4445
<release version="1.3" date="2016-05-09" description="Feature and bug fix release">
4546
<action issue="CSV-179" type="add" dev="britter">Add shortcut method for using first record as header to CSVFormat</action>

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 260 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@
1919

2020
import static org.apache.commons.csv.Constants.BACKSLASH;
2121
import static org.apache.commons.csv.Constants.COMMA;
22+
import static org.apache.commons.csv.Constants.COMMENT;
2223
import static org.apache.commons.csv.Constants.CR;
2324
import static org.apache.commons.csv.Constants.CRLF;
2425
import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
2526
import static org.apache.commons.csv.Constants.LF;
2627
import static org.apache.commons.csv.Constants.PIPE;
28+
import static org.apache.commons.csv.Constants.SP;
2729
import static org.apache.commons.csv.Constants.TAB;
2830

2931
import java.io.IOException;
@@ -499,7 +501,8 @@ public static CSVFormat valueOf(final String format) {
499501
* TODO
500502
* @param trim
501503
* TODO
502-
* @param trailingDelimiter TODO
504+
* @param trailingDelimiter
505+
* TODO
503506
* @throws IllegalArgumentException
504507
* if the delimiter is a line break character
505508
*/
@@ -860,6 +863,242 @@ public CSVPrinter print(final Appendable out) throws IOException {
860863
return new CSVPrinter(out, this);
861864
}
862865

866+
/**
867+
* Prints the string as the next value on the line. The value will be escaped or encapsulated as needed. Useful when
868+
* one wants to avoid creating CSVPrinters.
869+
*
870+
* @param value
871+
* value to be output.
872+
* @param out
873+
* where to print the value
874+
* @param newRecord
875+
* is this a new record
876+
* @throws IOException
877+
* If an I/O error occurs
878+
* @since 1.4
879+
*/
880+
public void print(final Object value, final Appendable out, final boolean newRecord) throws IOException {
881+
// null values are considered empty
882+
// Only call CharSequence.toString() if you have to, helps GC-free use cases.
883+
CharSequence charSequence;
884+
if (value == null) {
885+
charSequence = nullString == null ? Constants.EMPTY : nullString;
886+
} else {
887+
charSequence = value instanceof CharSequence ? (CharSequence) value : value.toString();
888+
}
889+
charSequence = getTrim() ? trim(charSequence) : charSequence;
890+
this.print(value, charSequence, 0, charSequence.length(), out, newRecord);
891+
}
892+
893+
private void print(final Object object, final CharSequence value, final int offset, final int len,
894+
final Appendable out, final boolean newRecord) throws IOException {
895+
if (!newRecord) {
896+
out.append(getDelimiter());
897+
}
898+
if (object == null) {
899+
out.append(value);
900+
} else if (isQuoteCharacterSet()) {
901+
// the original object is needed so can check for Number
902+
printAndQuote(object, value, offset, len, out, newRecord);
903+
} else if (isEscapeCharacterSet()) {
904+
printAndEscape(value, offset, len, out);
905+
} else {
906+
out.append(value, offset, offset + len);
907+
}
908+
}
909+
910+
/*
911+
* Note: must only be called if escaping is enabled, otherwise will generate NPE
912+
*/
913+
private void printAndEscape(final CharSequence value, final int offset, final int len, final Appendable out)
914+
throws IOException {
915+
int start = offset;
916+
int pos = offset;
917+
final int end = offset + len;
918+
919+
final char delim = getDelimiter();
920+
final char escape = getEscapeCharacter().charValue();
921+
922+
while (pos < end) {
923+
char c = value.charAt(pos);
924+
if (c == CR || c == LF || c == delim || c == escape) {
925+
// write out segment up until this char
926+
if (pos > start) {
927+
out.append(value, start, pos);
928+
}
929+
if (c == LF) {
930+
c = 'n';
931+
} else if (c == CR) {
932+
c = 'r';
933+
}
934+
935+
out.append(escape);
936+
out.append(c);
937+
938+
start = pos + 1; // start on the current char after this one
939+
}
940+
941+
pos++;
942+
}
943+
944+
// write last segment
945+
if (pos > start) {
946+
out.append(value, start, pos);
947+
}
948+
}
949+
950+
/*
951+
* Note: must only be called if quoting is enabled, otherwise will generate NPE
952+
*/
953+
// the original object is needed so can check for Number
954+
private void printAndQuote(final Object object, final CharSequence value, final int offset, final int len,
955+
final Appendable out, final boolean newRecord) throws IOException {
956+
boolean quote = false;
957+
int start = offset;
958+
int pos = offset;
959+
final int end = offset + len;
960+
961+
final char delimChar = getDelimiter();
962+
final char quoteChar = getQuoteCharacter().charValue();
963+
964+
QuoteMode quoteModePolicy = getQuoteMode();
965+
if (quoteModePolicy == null) {
966+
quoteModePolicy = QuoteMode.MINIMAL;
967+
}
968+
switch (quoteModePolicy) {
969+
case ALL:
970+
quote = true;
971+
break;
972+
case NON_NUMERIC:
973+
quote = !(object instanceof Number);
974+
break;
975+
case NONE:
976+
// Use the existing escaping code
977+
printAndEscape(value, offset, len, out);
978+
return;
979+
case MINIMAL:
980+
if (len <= 0) {
981+
// always quote an empty token that is the first
982+
// on the line, as it may be the only thing on the
983+
// line. If it were not quoted in that case,
984+
// an empty line has no tokens.
985+
if (newRecord) {
986+
quote = true;
987+
}
988+
} else {
989+
char c = value.charAt(pos);
990+
991+
// TODO where did this rule come from?
992+
if (newRecord && (c < '0' || c > '9' && c < 'A' || c > 'Z' && c < 'a' || c > 'z')) {
993+
quote = true;
994+
} else if (c <= COMMENT) {
995+
// Some other chars at the start of a value caused the parser to fail, so for now
996+
// encapsulate if we start in anything less than '#'. We are being conservative
997+
// by including the default comment char too.
998+
quote = true;
999+
} else {
1000+
while (pos < end) {
1001+
c = value.charAt(pos);
1002+
if (c == LF || c == CR || c == quoteChar || c == delimChar) {
1003+
quote = true;
1004+
break;
1005+
}
1006+
pos++;
1007+
}
1008+
1009+
if (!quote) {
1010+
pos = end - 1;
1011+
c = value.charAt(pos);
1012+
// Some other chars at the end caused the parser to fail, so for now
1013+
// encapsulate if we end in anything less than ' '
1014+
if (c <= SP) {
1015+
quote = true;
1016+
}
1017+
}
1018+
}
1019+
}
1020+
1021+
if (!quote) {
1022+
// no encapsulation needed - write out the original value
1023+
out.append(value, start, end);
1024+
return;
1025+
}
1026+
break;
1027+
default:
1028+
throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy);
1029+
}
1030+
1031+
if (!quote) {
1032+
// no encapsulation needed - write out the original value
1033+
out.append(value, start, end);
1034+
return;
1035+
}
1036+
1037+
// we hit something that needed encapsulation
1038+
out.append(quoteChar);
1039+
1040+
// Pick up where we left off: pos should be positioned on the first character that caused
1041+
// the need for encapsulation.
1042+
while (pos < end) {
1043+
final char c = value.charAt(pos);
1044+
if (c == quoteChar) {
1045+
// write out the chunk up until this point
1046+
1047+
// add 1 to the length to write out the encapsulator also
1048+
out.append(value, start, pos + 1);
1049+
// put the next starting position on the encapsulator so we will
1050+
// write it out again with the next string (effectively doubling it)
1051+
start = pos;
1052+
}
1053+
pos++;
1054+
}
1055+
1056+
// write the last segment
1057+
out.append(value, start, pos);
1058+
out.append(quoteChar);
1059+
}
1060+
1061+
/**
1062+
* Outputs the record separator.
1063+
*
1064+
* @param out
1065+
* where to write
1066+
*
1067+
* @throws IOException
1068+
* If an I/O error occurs
1069+
* @since 1.4
1070+
*/
1071+
public void println(final Appendable out) throws IOException {
1072+
if (getTrailingDelimiter()) {
1073+
out.append(getDelimiter());
1074+
}
1075+
if (recordSeparator != null) {
1076+
out.append(recordSeparator);
1077+
}
1078+
}
1079+
1080+
/**
1081+
* Prints the given values a single record of delimiter separated values followed by the record separator.
1082+
*
1083+
* <p>
1084+
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
1085+
* separator to the output after printing the record, so there is no need to call {@link #println(Appendable)}.
1086+
* </p>
1087+
*
1088+
* @param out where to write
1089+
* @param values
1090+
* values to output.
1091+
* @throws IOException
1092+
* If an I/O error occurs
1093+
* @since 1.4
1094+
*/
1095+
public void printRecord(final Appendable out, final Object... values) throws IOException {
1096+
for (int i = 0; i < values.length; i++) {
1097+
print(values[i], out, i == 0);
1098+
}
1099+
println(out);
1100+
}
1101+
8631102
@Override
8641103
public String toString() {
8651104
final StringBuilder sb = new StringBuilder();
@@ -917,6 +1156,23 @@ private String[] toStringArray(final Object[] values) {
9171156
return strings;
9181157
}
9191158

1159+
private CharSequence trim(final CharSequence charSequence) {
1160+
if (charSequence instanceof String) {
1161+
return ((String) charSequence).trim();
1162+
}
1163+
final int count = charSequence.length();
1164+
int len = count;
1165+
int pos = 0;
1166+
1167+
while ((pos < len) && (charSequence.charAt(pos) <= ' ')) {
1168+
pos++;
1169+
}
1170+
while ((pos < len) && (charSequence.charAt(len - 1) <= ' ')) {
1171+
len--;
1172+
}
1173+
return (pos > 0) || (len < count) ? charSequence.subSequence(pos, len) : charSequence;
1174+
}
1175+
9201176
/**
9211177
* Verifies the consistency of the parameters and throws an IllegalArgumentException if necessary.
9221178
*
@@ -1083,6 +1339,7 @@ public CSVFormat withEscape(final Character escape) {
10831339
* <p>
10841340
* Calling this method is equivalent to calling:
10851341
* </p>
1342+
*
10861343
* <pre>
10871344
* CSVFormat format = aFormat.withHeader().withSkipHeaderRecord();
10881345
* </pre>
@@ -1114,8 +1371,8 @@ public CSVFormat withFirstRecordAsHeader() {
11141371
* </p>
11151372
*
11161373
* @param headerEnum
1117-
* the enum defining the header, {@code null} if disabled, empty if parsed automatically, user
1118-
* specified otherwise.
1374+
* the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified
1375+
* otherwise.
11191376
*
11201377
* @return A new CSVFormat that is equal to this but with the specified header
11211378
* @see #withHeader(String...)

0 commit comments

Comments
 (0)