001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.binary;
019
020 import java.io.UnsupportedEncodingException;
021
022 import org.apache.commons.codec.CharEncoding;
023
024 /**
025 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are specified in <a
026 * href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
027 *
028 * @see CharEncoding
029 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
030 * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
031 * @version $Id: StringUtils.java 950460 2010-06-02 09:43:02Z sebb $
032 * @since 1.4
033 */
034 public class StringUtils {
035
036 /**
037 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
038 * byte array.
039 *
040 * @param string
041 * the String to encode, may be <code>null</code>
042 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
043 * @throws IllegalStateException
044 * Thrown when the charset is missing, which should be never according the the Java specification.
045 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
046 * @see #getBytesUnchecked(String, String)
047 */
048 public static byte[] getBytesIso8859_1(String string) {
049 return StringUtils.getBytesUnchecked(string, CharEncoding.ISO_8859_1);
050 }
051
052 /**
053 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
054 * array.
055 *
056 * @param string
057 * the String to encode, may be <code>null</code>
058 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
059 * @throws IllegalStateException
060 * Thrown when the charset is missing, which should be never according the the Java specification.
061 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
062 * @see #getBytesUnchecked(String, String)
063 */
064 public static byte[] getBytesUsAscii(String string) {
065 return StringUtils.getBytesUnchecked(string, CharEncoding.US_ASCII);
066 }
067
068 /**
069 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
070 * array.
071 *
072 * @param string
073 * the String to encode, may be <code>null</code>
074 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
075 * @throws IllegalStateException
076 * Thrown when the charset is missing, which should be never according the the Java specification.
077 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
078 * @see #getBytesUnchecked(String, String)
079 */
080 public static byte[] getBytesUtf16(String string) {
081 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16);
082 }
083
084 /**
085 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
086 * array.
087 *
088 * @param string
089 * the String to encode, may be <code>null</code>
090 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
091 * @throws IllegalStateException
092 * Thrown when the charset is missing, which should be never according the the Java specification.
093 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
094 * @see #getBytesUnchecked(String, String)
095 */
096 public static byte[] getBytesUtf16Be(String string) {
097 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16BE);
098 }
099
100 /**
101 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
102 * array.
103 *
104 * @param string
105 * the String to encode, may be <code>null</code>
106 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
107 * @throws IllegalStateException
108 * Thrown when the charset is missing, which should be never according the the Java specification.
109 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
110 * @see #getBytesUnchecked(String, String)
111 */
112 public static byte[] getBytesUtf16Le(String string) {
113 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16LE);
114 }
115
116 /**
117 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
118 * array.
119 *
120 * @param string
121 * the String to encode, may be <code>null</code>
122 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
123 * @throws IllegalStateException
124 * Thrown when the charset is missing, which should be never according the the Java specification.
125 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
126 * @see #getBytesUnchecked(String, String)
127 */
128 public static byte[] getBytesUtf8(String string) {
129 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_8);
130 }
131
132 /**
133 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
134 * array.
135 * <p>
136 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
137 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
138 * </p>
139 *
140 * @param string
141 * the String to encode, may be <code>null</code>
142 * @param charsetName
143 * The name of a required {@link java.nio.charset.Charset}
144 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
145 * @throws IllegalStateException
146 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
147 * required charset name.
148 * @see CharEncoding
149 * @see String#getBytes(String)
150 */
151 public static byte[] getBytesUnchecked(String string, String charsetName) {
152 if (string == null) {
153 return null;
154 }
155 try {
156 return string.getBytes(charsetName);
157 } catch (UnsupportedEncodingException e) {
158 throw StringUtils.newIllegalStateException(charsetName, e);
159 }
160 }
161
162 private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) {
163 return new IllegalStateException(charsetName + ": " + e);
164 }
165
166 /**
167 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
168 * <p>
169 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
170 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
171 * </p>
172 *
173 * @param bytes
174 * The bytes to be decoded into characters, may be <code>null</code>
175 * @param charsetName
176 * The name of a required {@link java.nio.charset.Charset}
177 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
178 * or <code>null</code> if the input byte arrray was <code>null</code>.
179 * @throws IllegalStateException
180 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
181 * required charset name.
182 * @see CharEncoding
183 * @see String#String(byte[], String)
184 */
185 public static String newString(byte[] bytes, String charsetName) {
186 if (bytes == null) {
187 return null;
188 }
189 try {
190 return new String(bytes, charsetName);
191 } catch (UnsupportedEncodingException e) {
192 throw StringUtils.newIllegalStateException(charsetName, e);
193 }
194 }
195
196 /**
197 * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
198 *
199 * @param bytes
200 * The bytes to be decoded into characters, may be <code>null</code>
201 * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset,
202 * or <code>null</code> if the input byte array was <code>null</code>.
203 * @throws IllegalStateException
204 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
205 * charset is required.
206 */
207 public static String newStringIso8859_1(byte[] bytes) {
208 return StringUtils.newString(bytes, CharEncoding.ISO_8859_1);
209 }
210
211 /**
212 * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
213 *
214 * @param bytes
215 * The bytes to be decoded into characters
216 * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
217 * or <code>null</code> if the input byte array was <code>null</code>.
218 * @throws IllegalStateException
219 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
220 * charset is required.
221 */
222 public static String newStringUsAscii(byte[] bytes) {
223 return StringUtils.newString(bytes, CharEncoding.US_ASCII);
224 }
225
226 /**
227 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
228 *
229 * @param bytes
230 * The bytes to be decoded into characters
231 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
232 * or <code>null</code> if the input byte array was <code>null</code>.
233 * @throws IllegalStateException
234 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
235 * charset is required.
236 */
237 public static String newStringUtf16(byte[] bytes) {
238 return StringUtils.newString(bytes, CharEncoding.UTF_16);
239 }
240
241 /**
242 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
243 *
244 * @param bytes
245 * The bytes to be decoded into characters
246 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
247 * or <code>null</code> if the input byte array was <code>null</code>.
248 * @throws IllegalStateException
249 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
250 * charset is required.
251 */
252 public static String newStringUtf16Be(byte[] bytes) {
253 return StringUtils.newString(bytes, CharEncoding.UTF_16BE);
254 }
255
256 /**
257 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
258 *
259 * @param bytes
260 * The bytes to be decoded into characters
261 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
262 * or <code>null</code> if the input byte array was <code>null</code>.
263 * @throws IllegalStateException
264 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
265 * charset is required.
266 */
267 public static String newStringUtf16Le(byte[] bytes) {
268 return StringUtils.newString(bytes, CharEncoding.UTF_16LE);
269 }
270
271 /**
272 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
273 *
274 * @param bytes
275 * The bytes to be decoded into characters
276 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
277 * or <code>null</code> if the input byte array was <code>null</code>.
278 * @throws IllegalStateException
279 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
280 * charset is required.
281 */
282 public static String newStringUtf8(byte[] bytes) {
283 return StringUtils.newString(bytes, CharEncoding.UTF_8);
284 }
285
286 }