|
19 | 19 |
|
20 | 20 | import static org.apache.commons.csv.Constants.BACKSLASH; |
21 | 21 | import static org.apache.commons.csv.Constants.COMMA; |
| 22 | +import static org.apache.commons.csv.Constants.COMMENT; |
22 | 23 | import static org.apache.commons.csv.Constants.CR; |
23 | 24 | import static org.apache.commons.csv.Constants.CRLF; |
24 | 25 | import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR; |
25 | 26 | import static org.apache.commons.csv.Constants.LF; |
26 | 27 | import static org.apache.commons.csv.Constants.PIPE; |
| 28 | +import static org.apache.commons.csv.Constants.SP; |
27 | 29 | import static org.apache.commons.csv.Constants.TAB; |
28 | 30 |
|
29 | 31 | import java.io.IOException; |
@@ -499,7 +501,8 @@ public static CSVFormat valueOf(final String format) { |
499 | 501 | * TODO |
500 | 502 | * @param trim |
501 | 503 | * TODO |
502 | | - * @param trailingDelimiter TODO |
| 504 | + * @param trailingDelimiter |
| 505 | + * TODO |
503 | 506 | * @throws IllegalArgumentException |
504 | 507 | * if the delimiter is a line break character |
505 | 508 | */ |
@@ -860,6 +863,242 @@ public CSVPrinter print(final Appendable out) throws IOException { |
860 | 863 | return new CSVPrinter(out, this); |
861 | 864 | } |
862 | 865 |
|
| 866 | + /** |
| 867 | + * Prints the string as the next value on the line. The value will be escaped or encapsulated as needed. Useful when |
| 868 | + * one wants to avoid creating CSVPrinters. |
| 869 | + * |
| 870 | + * @param value |
| 871 | + * value to be output. |
| 872 | + * @param out |
| 873 | + * where to print the value |
| 874 | + * @param newRecord |
| 875 | + * is this a new record |
| 876 | + * @throws IOException |
| 877 | + * If an I/O error occurs |
| 878 | + * @since 1.4 |
| 879 | + */ |
| 880 | + public void print(final Object value, final Appendable out, final boolean newRecord) throws IOException { |
| 881 | + // null values are considered empty |
| 882 | + // Only call CharSequence.toString() if you have to, helps GC-free use cases. |
| 883 | + CharSequence charSequence; |
| 884 | + if (value == null) { |
| 885 | + charSequence = nullString == null ? Constants.EMPTY : nullString; |
| 886 | + } else { |
| 887 | + charSequence = value instanceof CharSequence ? (CharSequence) value : value.toString(); |
| 888 | + } |
| 889 | + charSequence = getTrim() ? trim(charSequence) : charSequence; |
| 890 | + this.print(value, charSequence, 0, charSequence.length(), out, newRecord); |
| 891 | + } |
| 892 | + |
| 893 | + private void print(final Object object, final CharSequence value, final int offset, final int len, |
| 894 | + final Appendable out, final boolean newRecord) throws IOException { |
| 895 | + if (!newRecord) { |
| 896 | + out.append(getDelimiter()); |
| 897 | + } |
| 898 | + if (object == null) { |
| 899 | + out.append(value); |
| 900 | + } else if (isQuoteCharacterSet()) { |
| 901 | + // the original object is needed so can check for Number |
| 902 | + printAndQuote(object, value, offset, len, out, newRecord); |
| 903 | + } else if (isEscapeCharacterSet()) { |
| 904 | + printAndEscape(value, offset, len, out); |
| 905 | + } else { |
| 906 | + out.append(value, offset, offset + len); |
| 907 | + } |
| 908 | + } |
| 909 | + |
| 910 | + /* |
| 911 | + * Note: must only be called if escaping is enabled, otherwise will generate NPE |
| 912 | + */ |
| 913 | + private void printAndEscape(final CharSequence value, final int offset, final int len, final Appendable out) |
| 914 | + throws IOException { |
| 915 | + int start = offset; |
| 916 | + int pos = offset; |
| 917 | + final int end = offset + len; |
| 918 | + |
| 919 | + final char delim = getDelimiter(); |
| 920 | + final char escape = getEscapeCharacter().charValue(); |
| 921 | + |
| 922 | + while (pos < end) { |
| 923 | + char c = value.charAt(pos); |
| 924 | + if (c == CR || c == LF || c == delim || c == escape) { |
| 925 | + // write out segment up until this char |
| 926 | + if (pos > start) { |
| 927 | + out.append(value, start, pos); |
| 928 | + } |
| 929 | + if (c == LF) { |
| 930 | + c = 'n'; |
| 931 | + } else if (c == CR) { |
| 932 | + c = 'r'; |
| 933 | + } |
| 934 | + |
| 935 | + out.append(escape); |
| 936 | + out.append(c); |
| 937 | + |
| 938 | + start = pos + 1; // start on the current char after this one |
| 939 | + } |
| 940 | + |
| 941 | + pos++; |
| 942 | + } |
| 943 | + |
| 944 | + // write last segment |
| 945 | + if (pos > start) { |
| 946 | + out.append(value, start, pos); |
| 947 | + } |
| 948 | + } |
| 949 | + |
| 950 | + /* |
| 951 | + * Note: must only be called if quoting is enabled, otherwise will generate NPE |
| 952 | + */ |
| 953 | + // the original object is needed so can check for Number |
| 954 | + private void printAndQuote(final Object object, final CharSequence value, final int offset, final int len, |
| 955 | + final Appendable out, final boolean newRecord) throws IOException { |
| 956 | + boolean quote = false; |
| 957 | + int start = offset; |
| 958 | + int pos = offset; |
| 959 | + final int end = offset + len; |
| 960 | + |
| 961 | + final char delimChar = getDelimiter(); |
| 962 | + final char quoteChar = getQuoteCharacter().charValue(); |
| 963 | + |
| 964 | + QuoteMode quoteModePolicy = getQuoteMode(); |
| 965 | + if (quoteModePolicy == null) { |
| 966 | + quoteModePolicy = QuoteMode.MINIMAL; |
| 967 | + } |
| 968 | + switch (quoteModePolicy) { |
| 969 | + case ALL: |
| 970 | + quote = true; |
| 971 | + break; |
| 972 | + case NON_NUMERIC: |
| 973 | + quote = !(object instanceof Number); |
| 974 | + break; |
| 975 | + case NONE: |
| 976 | + // Use the existing escaping code |
| 977 | + printAndEscape(value, offset, len, out); |
| 978 | + return; |
| 979 | + case MINIMAL: |
| 980 | + if (len <= 0) { |
| 981 | + // always quote an empty token that is the first |
| 982 | + // on the line, as it may be the only thing on the |
| 983 | + // line. If it were not quoted in that case, |
| 984 | + // an empty line has no tokens. |
| 985 | + if (newRecord) { |
| 986 | + quote = true; |
| 987 | + } |
| 988 | + } else { |
| 989 | + char c = value.charAt(pos); |
| 990 | + |
| 991 | + // TODO where did this rule come from? |
| 992 | + if (newRecord && (c < '0' || c > '9' && c < 'A' || c > 'Z' && c < 'a' || c > 'z')) { |
| 993 | + quote = true; |
| 994 | + } else if (c <= COMMENT) { |
| 995 | + // Some other chars at the start of a value caused the parser to fail, so for now |
| 996 | + // encapsulate if we start in anything less than '#'. We are being conservative |
| 997 | + // by including the default comment char too. |
| 998 | + quote = true; |
| 999 | + } else { |
| 1000 | + while (pos < end) { |
| 1001 | + c = value.charAt(pos); |
| 1002 | + if (c == LF || c == CR || c == quoteChar || c == delimChar) { |
| 1003 | + quote = true; |
| 1004 | + break; |
| 1005 | + } |
| 1006 | + pos++; |
| 1007 | + } |
| 1008 | + |
| 1009 | + if (!quote) { |
| 1010 | + pos = end - 1; |
| 1011 | + c = value.charAt(pos); |
| 1012 | + // Some other chars at the end caused the parser to fail, so for now |
| 1013 | + // encapsulate if we end in anything less than ' ' |
| 1014 | + if (c <= SP) { |
| 1015 | + quote = true; |
| 1016 | + } |
| 1017 | + } |
| 1018 | + } |
| 1019 | + } |
| 1020 | + |
| 1021 | + if (!quote) { |
| 1022 | + // no encapsulation needed - write out the original value |
| 1023 | + out.append(value, start, end); |
| 1024 | + return; |
| 1025 | + } |
| 1026 | + break; |
| 1027 | + default: |
| 1028 | + throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy); |
| 1029 | + } |
| 1030 | + |
| 1031 | + if (!quote) { |
| 1032 | + // no encapsulation needed - write out the original value |
| 1033 | + out.append(value, start, end); |
| 1034 | + return; |
| 1035 | + } |
| 1036 | + |
| 1037 | + // we hit something that needed encapsulation |
| 1038 | + out.append(quoteChar); |
| 1039 | + |
| 1040 | + // Pick up where we left off: pos should be positioned on the first character that caused |
| 1041 | + // the need for encapsulation. |
| 1042 | + while (pos < end) { |
| 1043 | + final char c = value.charAt(pos); |
| 1044 | + if (c == quoteChar) { |
| 1045 | + // write out the chunk up until this point |
| 1046 | + |
| 1047 | + // add 1 to the length to write out the encapsulator also |
| 1048 | + out.append(value, start, pos + 1); |
| 1049 | + // put the next starting position on the encapsulator so we will |
| 1050 | + // write it out again with the next string (effectively doubling it) |
| 1051 | + start = pos; |
| 1052 | + } |
| 1053 | + pos++; |
| 1054 | + } |
| 1055 | + |
| 1056 | + // write the last segment |
| 1057 | + out.append(value, start, pos); |
| 1058 | + out.append(quoteChar); |
| 1059 | + } |
| 1060 | + |
| 1061 | + /** |
| 1062 | + * Outputs the record separator. |
| 1063 | + * |
| 1064 | + * @param out |
| 1065 | + * where to write |
| 1066 | + * |
| 1067 | + * @throws IOException |
| 1068 | + * If an I/O error occurs |
| 1069 | + * @since 1.4 |
| 1070 | + */ |
| 1071 | + public void println(final Appendable out) throws IOException { |
| 1072 | + if (getTrailingDelimiter()) { |
| 1073 | + out.append(getDelimiter()); |
| 1074 | + } |
| 1075 | + if (recordSeparator != null) { |
| 1076 | + out.append(recordSeparator); |
| 1077 | + } |
| 1078 | + } |
| 1079 | + |
| 1080 | + /** |
| 1081 | + * Prints the given values a single record of delimiter separated values followed by the record separator. |
| 1082 | + * |
| 1083 | + * <p> |
| 1084 | + * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record |
| 1085 | + * separator to the output after printing the record, so there is no need to call {@link #println(Appendable)}. |
| 1086 | + * </p> |
| 1087 | + * |
| 1088 | + * @param out where to write |
| 1089 | + * @param values |
| 1090 | + * values to output. |
| 1091 | + * @throws IOException |
| 1092 | + * If an I/O error occurs |
| 1093 | + * @since 1.4 |
| 1094 | + */ |
| 1095 | + public void printRecord(final Appendable out, final Object... values) throws IOException { |
| 1096 | + for (int i = 0; i < values.length; i++) { |
| 1097 | + print(values[i], out, i == 0); |
| 1098 | + } |
| 1099 | + println(out); |
| 1100 | + } |
| 1101 | + |
863 | 1102 | @Override |
864 | 1103 | public String toString() { |
865 | 1104 | final StringBuilder sb = new StringBuilder(); |
@@ -917,6 +1156,23 @@ private String[] toStringArray(final Object[] values) { |
917 | 1156 | return strings; |
918 | 1157 | } |
919 | 1158 |
|
| 1159 | + private CharSequence trim(final CharSequence charSequence) { |
| 1160 | + if (charSequence instanceof String) { |
| 1161 | + return ((String) charSequence).trim(); |
| 1162 | + } |
| 1163 | + final int count = charSequence.length(); |
| 1164 | + int len = count; |
| 1165 | + int pos = 0; |
| 1166 | + |
| 1167 | + while ((pos < len) && (charSequence.charAt(pos) <= ' ')) { |
| 1168 | + pos++; |
| 1169 | + } |
| 1170 | + while ((pos < len) && (charSequence.charAt(len - 1) <= ' ')) { |
| 1171 | + len--; |
| 1172 | + } |
| 1173 | + return (pos > 0) || (len < count) ? charSequence.subSequence(pos, len) : charSequence; |
| 1174 | + } |
| 1175 | + |
920 | 1176 | /** |
921 | 1177 | * Verifies the consistency of the parameters and throws an IllegalArgumentException if necessary. |
922 | 1178 | * |
@@ -1083,6 +1339,7 @@ public CSVFormat withEscape(final Character escape) { |
1083 | 1339 | * <p> |
1084 | 1340 | * Calling this method is equivalent to calling: |
1085 | 1341 | * </p> |
| 1342 | + * |
1086 | 1343 | * <pre> |
1087 | 1344 | * CSVFormat format = aFormat.withHeader().withSkipHeaderRecord(); |
1088 | 1345 | * </pre> |
@@ -1114,8 +1371,8 @@ public CSVFormat withFirstRecordAsHeader() { |
1114 | 1371 | * </p> |
1115 | 1372 | * |
1116 | 1373 | * @param headerEnum |
1117 | | - * the enum defining the header, {@code null} if disabled, empty if parsed automatically, user |
1118 | | - * specified otherwise. |
| 1374 | + * the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified |
| 1375 | + * otherwise. |
1119 | 1376 | * |
1120 | 1377 | * @return A new CSVFormat that is equal to this but with the specified header |
1121 | 1378 | * @see #withHeader(String...) |
|
0 commit comments