Coverage Report - ca.uhn.hl7v2.hoh.util.repackage.Base64
 
Classes in this File Line Coverage Branch Coverage Complexity
Base64
60%
133/219
41%
66/159
2.712
Base64$CharEncoding
0%
0/1
N/A
2.712
Base64$Charsets
66%
6/9
0%
0/4
2.712
Base64$Context
100%
2/2
N/A
2.712
Base64$StringUtils
17%
5/28
25%
2/8
2.712
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *      http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package ca.uhn.hl7v2.hoh.util.repackage;
 19  
 
 20  
 import java.io.UnsupportedEncodingException;
 21  
 import java.math.BigInteger;
 22  
 import java.nio.charset.Charset;
 23  
 import java.nio.charset.UnsupportedCharsetException;
 24  
 
 25  
 /**
 26  
  * Provides Base64 encoding and decoding as defined by <a
 27  
  * href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
 28  
  * 
 29  
  * <p>
 30  
  * This class implements section <cite>6.8. Base64
 31  
  * Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose Internet
 32  
  * Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by
 33  
  * Freed and Borenstein.
 34  
  * </p>
 35  
  * <p>
 36  
  * The class can be parameterized in the following manner with various
 37  
  * constructors:
 38  
  * <ul>
 39  
  * <li>URL-safe mode: Default off.</li>
 40  
  * <li>Line length: Default 76. Line length that aren't multiples of 4 will
 41  
  * still essentially end up being multiples of 4 in the encoded data.
 42  
  * <li>Line separator: Default is CRLF ("\r\n")</li>
 43  
  * </ul>
 44  
  * </p>
 45  
  * <p>
 46  
  * Since this class operates directly on byte streams, and not character
 47  
  * streams, it is hard-coded to only encode/decode character encodings which are
 48  
  * compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8,
 49  
  * etc).
 50  
  * </p>
 51  
  * <p>
 52  
  * This class is thread-safe.
 53  
  * </p>
 54  
  * 
 55  
  * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
 56  
  * @author Note that this class has been repackaged from Apache Commons-Codec
 57  
  *         and is distributed under the terms of the Apache Software License,
 58  
  *         version 2.0
 59  
  */
 60  
 public class Base64 {
 61  
 
 62  
         public static void main(String[] args) {
 63  
                 
 64  0
                 System.out.println("basic " + encodeBase64String("cgta:d@3r$@TTg2446yhhh2h4".getBytes()));
 65  
                 
 66  0
         }
 67  
         
 68  
         /**
 69  
          * BASE32 characters are 6 bits in length. They are formed by taking a block
 70  
          * of 3 octets to form a 24-bit string, which is converted into 4 BASE64
 71  
          * characters.
 72  
          */
 73  
         private static final int BITS_PER_ENCODED_BYTE = 6;
 74  
         private static final int BYTES_PER_UNENCODED_BLOCK = 3;
 75  
         private static final int BYTES_PER_ENCODED_BLOCK = 4;
 76  
 
 77  
         /**
 78  
          * Chunk separator per RFC 2045 section 2.1.
 79  
          * 
 80  
          * <p>
 81  
          * N.B. The next major release may break compatibility and make this field
 82  
          * private.
 83  
          * </p>
 84  
          * 
 85  
          * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section
 86  
          *      2.1</a>
 87  
          */
 88  5
         static final byte[] CHUNK_SEPARATOR = { '\r', '\n' };
 89  
 
 90  
         /**
 91  
          * This array is a lookup table that translates 6-bit positive integer index
 92  
          * values into their "Base64 Alphabet" equivalents as specified in Table 1
 93  
          * of RFC 2045.
 94  
          * 
 95  
          * Thanks to "commons" project in ws.apache.org for this code.
 96  
          * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
 97  
          */
 98  5
         private static final byte[] STANDARD_ENCODE_TABLE = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
 99  
                         's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' };
 100  
 
 101  
         /**
 102  
          * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and /
 103  
          * changed to - and _ to make the encoded Base64 results more URL-SAFE. This
 104  
          * table is only used when the Base64's mode is set to URL-SAFE.
 105  
          */
 106  5
         private static final byte[] URL_SAFE_ENCODE_TABLE = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
 107  
                         's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' };
 108  
 
 109  
         /**
 110  
          * This array is a lookup table that translates Unicode characters drawn
 111  
          * from the "Base64 Alphabet" (as specified in Table 1 of RFC 2045) into
 112  
          * their 6-bit positive integer equivalents. Characters that are not in the
 113  
          * Base64 alphabet but fall within the bounds of the array are translated to
 114  
          * -1.
 115  
          * 
 116  
          * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This
 117  
          * means decoder seamlessly handles both URL_SAFE and STANDARD base64. (The
 118  
          * encoder, on the other hand, needs to know ahead of time what to emit).
 119  
          * 
 120  
          * Thanks to "commons" project in ws.apache.org for this code.
 121  
          * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
 122  
          */
 123  5
         private static final byte[] DECODE_TABLE = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60,
 124  
                         61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 };
 125  
 
 126  
         /**
 127  
          * Base64 uses 6-bit fields.
 128  
          */
 129  
         /** Mask used to extract 6 bits, used when encoding */
 130  
         private static final int MASK_6BITS = 0x3f;
 131  
 
 132  
         // The static final fields above are used for the original static byte[]
 133  
         // methods on Base64.
 134  
         // The private member fields below are used with the new streaming approach,
 135  
         // which requires
 136  
         // some state be preserved between calls of encode() and decode().
 137  
 
 138  
         /**
 139  
          * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE
 140  
          * above remains static because it is able to decode both STANDARD and
 141  
          * URL_SAFE streams, but the encodeTable must be a member variable so we can
 142  
          * switch between the two modes.
 143  
          */
 144  
         private byte[] encodeTable;
 145  
 
 146  
         // Only one decode table currently; keep for consistency with Base32 code
 147  495
         private final byte[] decodeTable = DECODE_TABLE;
 148  
 
 149  
         /**
 150  
          * Line separator for encoding. Not used when decoding. Only used if
 151  
          * lineLength > 0.
 152  
          */
 153  
         private byte[] lineSeparator;
 154  
 
 155  
         /**
 156  
          * Convenience variable to help us determine when our buffer is going to run
 157  
          * out of room and needs resizing.
 158  
          * <code>decodeSize = 3 + lineSeparator.length;</code>
 159  
          */
 160  
         private int decodeSize;
 161  
 
 162  
         /**
 163  
          * Convenience variable to help us determine when our buffer is going to run
 164  
          * out of room and needs resizing.
 165  
          * <code>encodeSize = 4 + lineSeparator.length;</code>
 166  
          */
 167  
         private int encodeSize;
 168  
 
 169  
         /**
 170  
          * Creates a Base64 codec used for decoding (all modes) and encoding in
 171  
          * URL-unsafe mode.
 172  
          * <p>
 173  
          * When encoding the line length is 0 (no chunking), and the encoding table
 174  
          * is STANDARD_ENCODE_TABLE.
 175  
          * </p>
 176  
          * 
 177  
          * <p>
 178  
          * When decoding all variants are supported.
 179  
          * </p>
 180  
          */
 181  
         public Base64() {
 182  265
                 this(0);
 183  265
         }
 184  
 
 185  
         /**
 186  
          * Creates a Base64 codec used for decoding (all modes) and encoding in the
 187  
          * given URL-safe mode.
 188  
          * <p>
 189  
          * When encoding the line length is 76, the line separator is CRLF, and the
 190  
          * encoding table is STANDARD_ENCODE_TABLE.
 191  
          * </p>
 192  
          * 
 193  
          * <p>
 194  
          * When decoding all variants are supported.
 195  
          * </p>
 196  
          * 
 197  
          * @param urlSafe
 198  
          *            if {@code true}, URL-safe encoding is used. In most cases this
 199  
          *            should be set to {@code false}.
 200  
          * @since 1.4
 201  
          */
 202  
         public Base64(boolean urlSafe) {
 203  0
                 this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
 204  0
         }
 205  
 
 206  
         /**
 207  
          * Creates a Base64 codec used for decoding (all modes) and encoding in
 208  
          * URL-unsafe mode.
 209  
          * <p>
 210  
          * When encoding the line length is given in the constructor, the line
 211  
          * separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
 212  
          * </p>
 213  
          * <p>
 214  
          * Line lengths that aren't multiples of 4 will still essentially end up
 215  
          * being multiples of 4 in the encoded data.
 216  
          * </p>
 217  
          * <p>
 218  
          * When decoding all variants are supported.
 219  
          * </p>
 220  
          * 
 221  
          * @param lineLength
 222  
          *            Each line of encoded data will be at most of the given length
 223  
          *            (rounded down to nearest multiple of 4). If lineLength <= 0,
 224  
          *            then the output will not be divided into lines (chunks).
 225  
          *            Ignored when decoding.
 226  
          * @since 1.4
 227  
          */
 228  
         public Base64(int lineLength) {
 229  265
                 this(lineLength, CHUNK_SEPARATOR);
 230  265
         }
 231  
 
 232  
         /**
 233  
          * Creates a Base64 codec used for decoding (all modes) and encoding in
 234  
          * URL-unsafe mode.
 235  
          * <p>
 236  
          * When encoding the line length and line separator are given in the
 237  
          * constructor, and the encoding table is STANDARD_ENCODE_TABLE.
 238  
          * </p>
 239  
          * <p>
 240  
          * Line lengths that aren't multiples of 4 will still essentially end up
 241  
          * being multiples of 4 in the encoded data.
 242  
          * </p>
 243  
          * <p>
 244  
          * When decoding all variants are supported.
 245  
          * </p>
 246  
          * 
 247  
          * @param lineLength
 248  
          *            Each line of encoded data will be at most of the given length
 249  
          *            (rounded down to nearest multiple of 4). If lineLength <= 0,
 250  
          *            then the output will not be divided into lines (chunks).
 251  
          *            Ignored when decoding.
 252  
          * @param lineSeparator
 253  
          *            Each line of encoded data will end with this sequence of
 254  
          *            bytes.
 255  
          * @throws IllegalArgumentException
 256  
          *             Thrown when the provided lineSeparator included some base64
 257  
          *             characters.
 258  
          * @since 1.4
 259  
          */
 260  
         public Base64(int lineLength, byte[] lineSeparator) {
 261  265
                 this(lineLength, lineSeparator, false);
 262  265
         }
 263  
 
 264  
         /**
 265  
          * Creates a Base64 codec used for decoding (all modes) and encoding in
 266  
          * URL-unsafe mode.
 267  
          * <p>
 268  
          * When encoding the line length and line separator are given in the
 269  
          * constructor, and the encoding table is STANDARD_ENCODE_TABLE.
 270  
          * </p>
 271  
          * <p>
 272  
          * Line lengths that aren't multiples of 4 will still essentially end up
 273  
          * being multiples of 4 in the encoded data.
 274  
          * </p>
 275  
          * <p>
 276  
          * When decoding all variants are supported.
 277  
          * </p>
 278  
          * 
 279  
          * @param lineLength
 280  
          *            Each line of encoded data will be at most of the given length
 281  
          *            (rounded down to nearest multiple of 4). If lineLength <= 0,
 282  
          *            then the output will not be divided into lines (chunks).
 283  
          *            Ignored when decoding.
 284  
          * @param lineSeparator
 285  
          *            Each line of encoded data will end with this sequence of
 286  
          *            bytes.
 287  
          * @param urlSafe
 288  
          *            Instead of emitting '+' and '/' we emit '-' and '_'
 289  
          *            respectively. urlSafe is only applied to encode operations.
 290  
          *            Decoding seamlessly handles both modes.
 291  
          * @throws IllegalArgumentException
 292  
          *             The provided lineSeparator included some base64 characters.
 293  
          *             That's not going to work!
 294  
          * @since 1.4
 295  
          */
 296  
         public Base64(int lineLength, byte[] lineSeparator, boolean urlSafe) {
 297  495
                 this(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, lineSeparator == null ? 0 : lineSeparator.length);
 298  
                 // TODO could be simplified if there is no requirement to reject invalid
 299  
                 // line sep when length <=0
 300  
                 // @see test case Base64Test.testConstructors()
 301  495
                 if (lineSeparator != null) {
 302  495
                         if (containsAlphabetOrPad(lineSeparator)) {
 303  0
                                 String sep = StringUtils.newStringUtf8(lineSeparator);
 304  0
                                 throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]");
 305  
                         }
 306  495
                         if (lineLength > 0) { // null line-sep forces no chunking rather
 307  
                                                                         // than throwing IAE
 308  0
                                 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
 309  0
                                 this.lineSeparator = new byte[lineSeparator.length];
 310  0
                                 System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length);
 311  
                         } else {
 312  495
                                 this.encodeSize = BYTES_PER_ENCODED_BLOCK;
 313  495
                                 this.lineSeparator = null;
 314  
                         }
 315  
                 } else {
 316  0
                         this.encodeSize = BYTES_PER_ENCODED_BLOCK;
 317  0
                         this.lineSeparator = null;
 318  
                 }
 319  495
                 this.decodeSize = this.encodeSize - 1;
 320  495
                 this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
 321  495
         }
 322  
 
 323  
         /**
 324  
          * Returns our current encode mode. True if we're URL-SAFE, false otherwise.
 325  
          * 
 326  
          * @return true if we're in URL-SAFE mode, false otherwise.
 327  
          * @since 1.4
 328  
          */
 329  
         public boolean isUrlSafe() {
 330  0
                 return this.encodeTable == URL_SAFE_ENCODE_TABLE;
 331  
         }
 332  
 
 333  
         /**
 334  
          * <p>
 335  
          * Encodes all of the provided data, starting at inPos, for inAvail bytes.
 336  
          * Must be called at least twice: once with the data to encode, and once
 337  
          * with inAvail set to "-1" to alert encoder that EOF has been reached, so
 338  
          * flush last remaining bytes (if not multiple of 3).
 339  
          * </p>
 340  
          * <p>
 341  
          * Thanks to "commons" project in ws.apache.org for the bitwise operations,
 342  
          * and general approach.
 343  
          * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
 344  
          * </p>
 345  
          * 
 346  
          * @param in
 347  
          *            byte[] array of binary data to base64 encode.
 348  
          * @param inPos
 349  
          *            Position to start reading data from.
 350  
          * @param inAvail
 351  
          *            Amount of bytes available from input for encoding.
 352  
          * @param context
 353  
          *            the context to be used
 354  
          */
 355  
         void encode(byte[] in, int inPos, int inAvail, Context context) {
 356  460
                 if (context.eof) {
 357  0
                         return;
 358  
                 }
 359  
                 // inAvail < 0 is how we're informed of EOF in the underlying data we're
 360  
                 // encoding.
 361  460
                 if (inAvail < 0) {
 362  230
                         context.eof = true;
 363  230
                         if (0 == context.modulus && lineLength == 0) {
 364  155
                                 return; // no leftovers to process and not using chunking
 365  
                         }
 366  75
                         ensureBufferSize(encodeSize, context);
 367  75
                         int savedPos = context.pos;
 368  75
                         switch (context.modulus) { // 0-2
 369  
                         case 1: // 8 bits = 6 + 2
 370  0
                                 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 2) & MASK_6BITS]; // top
 371  
                                                                                                                                                                                                                 // 6
 372  
                                                                                                                                                                                                                 // bits
 373  0
                                 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 4) & MASK_6BITS]; // remaining
 374  
                                                                                                                                                                                                                 // 2
 375  
                                 // URL-SAFE skips the padding to further reduce size.
 376  0
                                 if (encodeTable == STANDARD_ENCODE_TABLE) {
 377  0
                                         context.buffer[context.pos++] = PAD;
 378  0
                                         context.buffer[context.pos++] = PAD;
 379  
                                 }
 380  
                                 break;
 381  
 
 382  
                         case 2: // 16 bits = 6 + 6 + 4
 383  75
                                 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 10) & MASK_6BITS];
 384  75
                                 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 4) & MASK_6BITS];
 385  75
                                 context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 2) & MASK_6BITS];
 386  
                                 // URL-SAFE skips the padding to further reduce size.
 387  75
                                 if (encodeTable == STANDARD_ENCODE_TABLE) {
 388  75
                                         context.buffer[context.pos++] = PAD;
 389  
                                 }
 390  
                                 break;
 391  
                         }
 392  75
                         context.currentLinePos += context.pos - savedPos; // keep track of
 393  
                                                                                                                                 // current line
 394  
                                                                                                                                 // position
 395  
                         // if currentPos == 0 we are at the start of a line, so don't add
 396  
                         // CRLF
 397  75
                         if (lineLength > 0 && context.currentLinePos > 0) {
 398  0
                                 System.arraycopy(lineSeparator, 0, context.buffer, context.pos, lineSeparator.length);
 399  0
                                 context.pos += lineSeparator.length;
 400  
                         }
 401  75
                 } else {
 402  39830
                         for (int i = 0; i < inAvail; i++) {
 403  39600
                                 ensureBufferSize(encodeSize, context);
 404  39600
                                 context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK;
 405  39600
                                 int b = in[inPos++];
 406  39600
                                 if (b < 0) {
 407  12068
                                         b += 256;
 408  
                                 }
 409  39600
                                 context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE
 410  39600
                                 if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to
 411  
                                                                                         // extract
 412  13150
                                         context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 18) & MASK_6BITS];
 413  13150
                                         context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 12) & MASK_6BITS];
 414  13150
                                         context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 6) & MASK_6BITS];
 415  13150
                                         context.buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS];
 416  13150
                                         context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
 417  13150
                                         if (lineLength > 0 && lineLength <= context.currentLinePos) {
 418  0
                                                 System.arraycopy(lineSeparator, 0, context.buffer, context.pos, lineSeparator.length);
 419  0
                                                 context.pos += lineSeparator.length;
 420  0
                                                 context.currentLinePos = 0;
 421  
                                         }
 422  
                                 }
 423  
                         }
 424  
                 }
 425  305
         }
 426  
 
 427  
         /**
 428  
          * <p>
 429  
          * Decodes all of the provided data, starting at inPos, for inAvail bytes.
 430  
          * Should be called at least twice: once with the data to decode, and once
 431  
          * with inAvail set to "-1" to alert decoder that EOF has been reached. The
 432  
          * "-1" call is not necessary when decoding, but it doesn't hurt, either.
 433  
          * </p>
 434  
          * <p>
 435  
          * Ignores all non-base64 characters. This is how chunked (e.g. 76
 436  
          * character) data is handled, since CR and LF are silently ignored, but has
 437  
          * implications for other bytes, too. This method subscribes to the
 438  
          * garbage-in, garbage-out philosophy: it will not check the provided data
 439  
          * for validity.
 440  
          * </p>
 441  
          * <p>
 442  
          * Thanks to "commons" project in ws.apache.org for the bitwise operations,
 443  
          * and general approach.
 444  
          * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
 445  
          * </p>
 446  
          * 
 447  
          * @param in
 448  
          *            byte[] array of ascii data to base64 decode.
 449  
          * @param inPos
 450  
          *            Position to start reading data from.
 451  
          * @param inAvail
 452  
          *            Amount of bytes available from input for encoding.
 453  
          * @param context
 454  
          *            the context to be used
 455  
          */
 456  
         void decode(byte[] in, int inPos, int inAvail, Context context) {
 457  530
                 if (context.eof) {
 458  125
                         return;
 459  
                 }
 460  405
                 if (inAvail < 0) {
 461  140
                         context.eof = true;
 462  
                 }
 463  34240
                 for (int i = 0; i < inAvail; i++) {
 464  33960
                         ensureBufferSize(decodeSize, context);
 465  33960
                         byte b = in[inPos++];
 466  33960
                         if (b == PAD) {
 467  
                                 // We're done.
 468  125
                                 context.eof = true;
 469  125
                                 break;
 470  
                         } else {
 471  33835
                                 if (b >= 0 && b < DECODE_TABLE.length) {
 472  33835
                                         int result = DECODE_TABLE[b];
 473  33835
                                         if (result >= 0) {
 474  33835
                                                 context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK;
 475  33835
                                                 context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result;
 476  33835
                                                 if (context.modulus == 0) {
 477  8365
                                                         context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 16) & MASK_8BITS);
 478  8365
                                                         context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
 479  8365
                                                         context.buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
 480  
                                                 }
 481  
                                         }
 482  
                                 }
 483  
                         }
 484  
                 }
 485  
 
 486  
                 // Two forms of EOF as far as base64 decoder is concerned: actual
 487  
                 // EOF (-1) and first time '=' character is encountered in stream.
 488  
                 // This approach makes the '=' padding characters completely optional.
 489  405
                 if (context.eof && context.modulus != 0) {
 490  125
                         ensureBufferSize(decodeSize, context);
 491  
 
 492  
                         // We have some spare bits remaining
 493  
                         // Output all whole multiples of 8 bits and ignore the rest
 494  125
                         switch (context.modulus) {
 495  
                         // case 1: // 6 bits - ignore entirely
 496  
                         // break;
 497  
                         case 2: // 12 bits = 8 + 4
 498  0
                                 context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the
 499  
                                                                                                                                         // extra 4
 500  
                                                                                                                                         // bits
 501  0
                                 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
 502  0
                                 break;
 503  
                         case 3: // 18 bits = 8 + 8 + 2
 504  125
                                 context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits
 505  125
                                 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
 506  125
                                 context.buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
 507  
                                 break;
 508  
                         }
 509  
                 }
 510  405
         }
 511  
 
 512  
         /**
 513  
          * Tests a given byte array to see if it contains only valid characters
 514  
          * within the Base64 alphabet. Currently the method treats whitespace as
 515  
          * valid.
 516  
          * 
 517  
          * @param arrayOctet
 518  
          *            byte array to test
 519  
          * @return {@code true} if all bytes are valid characters in the Base64
 520  
          *         alphabet or if the byte array is empty; {@code false}, otherwise
 521  
          * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0.
 522  
          */
 523  
         @Deprecated
 524  
         public static boolean isArrayByteBase64(byte[] arrayOctet) {
 525  0
                 return isBase64(arrayOctet);
 526  
         }
 527  
 
 528  
         /**
 529  
          * Returns whether or not the <code>octet</code> is in the base 64 alphabet.
 530  
          * 
 531  
          * @param octet
 532  
          *            The value to test
 533  
          * @return {@code true} if the value is defined in the the base 64 alphabet,
 534  
          *         {@code false} otherwise.
 535  
          * @since 1.4
 536  
          */
 537  
         public static boolean isBase64(byte octet) {
 538  0
                 return octet == PAD_DEFAULT || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1);
 539  
         }
 540  
 
 541  
         /**
 542  
          * Tests a given String to see if it contains only valid characters within
 543  
          * the Base64 alphabet. Currently the method treats whitespace as valid.
 544  
          * 
 545  
          * @param base64
 546  
          *            String to test
 547  
          * @return {@code true} if all characters in the String are valid characters
 548  
          *         in the Base64 alphabet or if the String is empty; {@code false},
 549  
          *         otherwise
 550  
          * @since 1.5
 551  
          */
 552  
         public static boolean isBase64(String base64) {
 553  0
                 return isBase64(StringUtils.getBytesUtf8(base64));
 554  
         }
 555  
 
 556  
         /**
 557  
          * Tests a given byte array to see if it contains only valid characters
 558  
          * within the Base64 alphabet. Currently the method treats whitespace as
 559  
          * valid.
 560  
          * 
 561  
          * @param arrayOctet
 562  
          *            byte array to test
 563  
          * @return {@code true} if all bytes are valid characters in the Base64
 564  
          *         alphabet or if the byte array is empty; {@code false}, otherwise
 565  
          * @since 1.5
 566  
          */
 567  
         public static boolean isBase64(byte[] arrayOctet) {
 568  0
                 for (int i = 0; i < arrayOctet.length; i++) {
 569  0
                         if (!isBase64(arrayOctet[i]) && !isWhiteSpace(arrayOctet[i])) {
 570  0
                                 return false;
 571  
                         }
 572  
                 }
 573  0
                 return true;
 574  
         }
 575  
 
 576  
         /**
 577  
          * Encodes binary data using the base64 algorithm but does not chunk the
 578  
          * output.
 579  
          * 
 580  
          * @param binaryData
 581  
          *            binary data to encode
 582  
          * @return byte[] containing Base64 characters in their UTF-8
 583  
          *         representation.
 584  
          */
 585  
         public static byte[] encodeBase64(byte[] binaryData) {
 586  0
                 return encodeBase64(binaryData, false);
 587  
         }
 588  
 
 589  
         /**
 590  
          * Encodes binary data using the base64 algorithm but does not chunk the
 591  
          * output.
 592  
          * 
 593  
          * NOTE: We changed the behaviour of this method from multi-line chunking
 594  
          * (commons-codec-1.4) to single-line non-chunking (commons-codec-1.5).
 595  
          * 
 596  
          * @param binaryData
 597  
          *            binary data to encode
 598  
          * @return String containing Base64 characters.
 599  
          * @since 1.4 (NOTE: 1.4 chunked the output, whereas 1.5 does not).
 600  
          */
 601  
         public static String encodeBase64String(byte[] binaryData) {
 602  230
                 return StringUtils.newStringUtf8(encodeBase64(binaryData, false));
 603  
         }
 604  
 
 605  
         /**
 606  
          * Encodes binary data using a URL-safe variation of the base64 algorithm
 607  
          * but does not chunk the output. The url-safe variation emits - and _
 608  
          * instead of + and / characters.
 609  
          * 
 610  
          * @param binaryData
 611  
          *            binary data to encode
 612  
          * @return byte[] containing Base64 characters in their UTF-8
 613  
          *         representation.
 614  
          * @since 1.4
 615  
          */
 616  
         public static byte[] encodeBase64URLSafe(byte[] binaryData) {
 617  0
                 return encodeBase64(binaryData, false, true);
 618  
         }
 619  
 
 620  
         /**
 621  
          * Encodes binary data using a URL-safe variation of the base64 algorithm
 622  
          * but does not chunk the output. The url-safe variation emits - and _
 623  
          * instead of + and / characters.
 624  
          * 
 625  
          * @param binaryData
 626  
          *            binary data to encode
 627  
          * @return String containing Base64 characters
 628  
          * @since 1.4
 629  
          */
 630  
         public static String encodeBase64URLSafeString(byte[] binaryData) {
 631  0
                 return StringUtils.newStringUtf8(encodeBase64(binaryData, false, true));
 632  
         }
 633  
 
 634  
         /**
 635  
          * Encodes binary data using the base64 algorithm and chunks the encoded
 636  
          * output into 76 character blocks
 637  
          * 
 638  
          * @param binaryData
 639  
          *            binary data to encode
 640  
          * @return Base64 characters chunked in 76 character blocks
 641  
          */
 642  
         public static byte[] encodeBase64Chunked(byte[] binaryData) {
 643  0
                 return encodeBase64(binaryData, true);
 644  
         }
 645  
 
 646  
         /**
 647  
          * Encodes binary data using the base64 algorithm, optionally chunking the
 648  
          * output into 76 character blocks.
 649  
          * 
 650  
          * @param binaryData
 651  
          *            Array containing binary data to encode.
 652  
          * @param isChunked
 653  
          *            if {@code true} this encoder will chunk the base64 output into
 654  
          *            76 character blocks
 655  
          * @return Base64-encoded data.
 656  
          * @throws IllegalArgumentException
 657  
          *             Thrown when the input array needs an output array bigger than
 658  
          *             {@link Integer#MAX_VALUE}
 659  
          */
 660  
         public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
 661  230
                 return encodeBase64(binaryData, isChunked, false);
 662  
         }
 663  
 
 664  
         /**
 665  
          * Encodes binary data using the base64 algorithm, optionally chunking the
 666  
          * output into 76 character blocks.
 667  
          * 
 668  
          * @param binaryData
 669  
          *            Array containing binary data to encode.
 670  
          * @param isChunked
 671  
          *            if {@code true} this encoder will chunk the base64 output into
 672  
          *            76 character blocks
 673  
          * @param urlSafe
 674  
          *            if {@code true} this encoder will emit - and _ instead of the
 675  
          *            usual + and / characters.
 676  
          * @return Base64-encoded data.
 677  
          * @throws IllegalArgumentException
 678  
          *             Thrown when the input array needs an output array bigger than
 679  
          *             {@link Integer#MAX_VALUE}
 680  
          * @since 1.4
 681  
          */
 682  
         public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe) {
 683  230
                 return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
 684  
         }
 685  
 
 686  
         /**
 687  
          * Encodes binary data using the base64 algorithm, optionally chunking the
 688  
          * output into 76 character blocks.
 689  
          * 
 690  
          * @param binaryData
 691  
          *            Array containing binary data to encode.
 692  
          * @param isChunked
 693  
          *            if {@code true} this encoder will chunk the base64 output into
 694  
          *            76 character blocks
 695  
          * @param urlSafe
 696  
          *            if {@code true} this encoder will emit - and _ instead of the
 697  
          *            usual + and / characters.
 698  
          * @param maxResultSize
 699  
          *            The maximum result size to accept.
 700  
          * @return Base64-encoded data.
 701  
          * @throws IllegalArgumentException
 702  
          *             Thrown when the input array needs an output array bigger than
 703  
          *             maxResultSize
 704  
          * @since 1.4
 705  
          */
 706  
         public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe, int maxResultSize) {
 707  230
                 if (binaryData == null || binaryData.length == 0) {
 708  0
                         return binaryData;
 709  
                 }
 710  
 
 711  
                 // Create this so can use the super-class method
 712  
                 // Also ensures that the same roundings are performed by the ctor and
 713  
                 // the code
 714  230
                 Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
 715  230
                 long len = b64.getEncodedLength(binaryData);
 716  230
                 if (len > maxResultSize) {
 717  0
                         throw new IllegalArgumentException("Input array too big, the output array would be bigger (" + len + ") than the specified maximum size of " + maxResultSize);
 718  
                 }
 719  
 
 720  230
                 return b64.encode(binaryData);
 721  
         }
 722  
 
 723  
         /**
 724  
          * Decodes a Base64 String into octets
 725  
          * 
 726  
          * @param base64String
 727  
          *            String containing Base64 data
 728  
          * @return Array containing decoded data.
 729  
          * @since 1.4
 730  
          */
 731  
         public static byte[] decodeBase64(String base64String) {
 732  265
                 return new Base64().decode(base64String);
 733  
         }
 734  
 
 735  
         /**
 736  
          * Decodes Base64 data into octets
 737  
          * 
 738  
          * @param base64Data
 739  
          *            Byte array containing Base64 data
 740  
          * @return Array containing decoded data.
 741  
          */
 742  
         public static byte[] decodeBase64(byte[] base64Data) {
 743  0
                 return new Base64().decode(base64Data);
 744  
         }
 745  
 
 746  
         // Implementation of the Encoder Interface
 747  
 
 748  
         // Implementation of integer encoding used for crypto
 749  
         /**
 750  
          * Decodes a byte64-encoded integer according to crypto standards such as
 751  
          * W3C's XML-Signature
 752  
          * 
 753  
          * @param pArray
 754  
          *            a byte array containing base64 character data
 755  
          * @return A BigInteger
 756  
          * @since 1.4
 757  
          */
 758  
         public static BigInteger decodeInteger(byte[] pArray) {
 759  0
                 return new BigInteger(1, decodeBase64(pArray));
 760  
         }
 761  
 
 762  
         /**
 763  
          * Encodes to a byte64-encoded integer according to crypto standards such as
 764  
          * W3C's XML-Signature
 765  
          * 
 766  
          * @param bigInt
 767  
          *            a BigInteger
 768  
          * @return A byte array containing base64 character data
 769  
          * @throws NullPointerException
 770  
          *             if null is passed in
 771  
          * @since 1.4
 772  
          */
 773  
         public static byte[] encodeInteger(BigInteger bigInt) {
 774  0
                 if (bigInt == null) {
 775  0
                         throw new NullPointerException("encodeInteger called with null parameter");
 776  
                 }
 777  0
                 return encodeBase64(toIntegerBytes(bigInt), false);
 778  
         }
 779  
 
 780  
         /**
 781  
          * Returns a byte-array representation of a <code>BigInteger</code> without
 782  
          * sign bit.
 783  
          * 
 784  
          * @param bigInt
 785  
          *            <code>BigInteger</code> to be converted
 786  
          * @return a byte array representation of the BigInteger parameter
 787  
          */
 788  
         static byte[] toIntegerBytes(BigInteger bigInt) {
 789  0
                 int bitlen = bigInt.bitLength();
 790  
                 // round bitlen
 791  0
                 bitlen = ((bitlen + 7) >> 3) << 3;
 792  0
                 byte[] bigBytes = bigInt.toByteArray();
 793  
 
 794  0
                 if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) {
 795  0
                         return bigBytes;
 796  
                 }
 797  
                 // set up params for copying everything but sign bit
 798  0
                 int startSrc = 0;
 799  0
                 int len = bigBytes.length;
 800  
 
 801  
                 // if bigInt is exactly byte-aligned, just skip signbit in copy
 802  0
                 if ((bigInt.bitLength() % 8) == 0) {
 803  0
                         startSrc = 1;
 804  0
                         len--;
 805  
                 }
 806  0
                 int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
 807  0
                 byte[] resizedBytes = new byte[bitlen / 8];
 808  0
                 System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
 809  0
                 return resizedBytes;
 810  
         }
 811  
 
 812  
         /**
 813  
          * Returns whether or not the <code>octet</code> is in the Base32 alphabet.
 814  
          * 
 815  
          * @param octet
 816  
          *            The value to test
 817  
          * @return {@code true} if the value is defined in the the Base32 alphabet
 818  
          *         {@code false} otherwise.
 819  
          */
 820  
         protected boolean isInAlphabet(byte octet) {
 821  990
                 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
 822  
         }
 823  
 
 824  
         /**
 825  
          * Holds thread context so classes can be thread-safe.
 826  
          * 
 827  
          * This class is not itself thread-safe; each thread must allocate its own
 828  
          * copy.
 829  
          * 
 830  
          * @since 1.7
 831  
          */
 832  
         static class Context {
 833  
 
 834  
                 /**
 835  
                  * Place holder for the bytes we're dealing with for our based logic.
 836  
                  * Bitwise operations store and extract the encoding or decoding from
 837  
                  * this variable.
 838  
                  */
 839  
                 int ibitWorkArea;
 840  
 
 841  
                 /**
 842  
                  * Place holder for the bytes we're dealing with for our based logic.
 843  
                  * Bitwise operations store and extract the encoding or decoding from
 844  
                  * this variable.
 845  
                  */
 846  
                 long lbitWorkArea;
 847  
 
 848  
                 /**
 849  
                  * Buffer for streaming.
 850  
                  */
 851  
                 byte[] buffer;
 852  
 
 853  
                 /**
 854  
                  * Position where next character should be written in the buffer.
 855  
                  */
 856  
                 int pos;
 857  
 
 858  
                 /**
 859  
                  * Position where next character should be read from the buffer.
 860  
                  */
 861  
                 int readPos;
 862  
 
 863  
                 /**
 864  
                  * Boolean flag to indicate the EOF has been reached. Once EOF has been
 865  
                  * reached, this object becomes useless, and must be thrown away.
 866  
                  */
 867  
                 boolean eof;
 868  
 
 869  
                 /**
 870  
                  * Variable tracks how many characters have been written to the current
 871  
                  * line. Only used when encoding. We use it to make sure each encoded
 872  
                  * line never goes beyond lineLength (if lineLength > 0).
 873  
                  */
 874  
                 int currentLinePos;
 875  
 
 876  
                 /**
 877  
                  * Writes to the buffer only occur after every 3/5 reads when encoding,
 878  
                  * and every 4/8 reads when decoding. This variable helps track that.
 879  
                  */
 880  
                 int modulus;
 881  
 
 882  495
                 Context() {
 883  495
                 }
 884  
         }
 885  
 
 886  
         /**
 887  
          * EOF
 888  
          * 
 889  
          * @since 1.7
 890  
          */
 891  
         static final int EOF = -1;
 892  
 
 893  
         /**
 894  
          * MIME chunk size per RFC 2045 section 6.8.
 895  
          * 
 896  
          * <p>
 897  
          * The {@value} character limit does not count the trailing CRLF, but counts
 898  
          * all other characters, including any equal signs.
 899  
          * </p>
 900  
          * 
 901  
          * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section
 902  
          *      6.8</a>
 903  
          */
 904  
         public static final int MIME_CHUNK_SIZE = 76;
 905  
 
 906  
         /**
 907  
          * PEM chunk size per RFC 1421 section 4.3.2.4.
 908  
          * 
 909  
          * <p>
 910  
          * The {@value} character limit does not count the trailing CRLF, but counts
 911  
          * all other characters, including any equal signs.
 912  
          * </p>
 913  
          * 
 914  
          * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section
 915  
          *      4.3.2.4</a>
 916  
          */
 917  
         public static final int PEM_CHUNK_SIZE = 64;
 918  
 
 919  
         private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
 920  
 
 921  
         /**
 922  
          * Defines the default buffer size - currently {@value} - must be large
 923  
          * enough for at least one encoded block+separator
 924  
          */
 925  
         private static final int DEFAULT_BUFFER_SIZE = 8192;
 926  
 
 927  
         /** Mask used to extract 8 bits, used in decoding bytes */
 928  
         protected static final int MASK_8BITS = 0xff;
 929  
 
 930  
         /**
 931  
          * Byte used to pad output.
 932  
          */
 933  
         protected static final byte PAD_DEFAULT = '='; // Allow static access to
 934  
                                                                                                         // default
 935  
 
 936  495
         protected final byte PAD = PAD_DEFAULT; // instance variable just in case it
 937  
                                                                                         // needs to vary later
 938  
 
 939  
         /**
 940  
          * Number of bytes in each full block of unencoded data, e.g. 4 for Base64
 941  
          * and 5 for Base32
 942  
          */
 943  
         private final int unencodedBlockSize;
 944  
 
 945  
         /**
 946  
          * Number of bytes in each full block of encoded data, e.g. 3 for Base64 and
 947  
          * 8 for Base32
 948  
          */
 949  
         private final int encodedBlockSize;
 950  
 
 951  
         /**
 952  
          * Chunksize for encoding. Not used when decoding. A value of zero or less
 953  
          * implies no chunking of the encoded data. Rounded down to nearest multiple
 954  
          * of encodedBlockSize.
 955  
          */
 956  
         protected int lineLength;
 957  
 
 958  
         /**
 959  
          * Size of chunk separator. Not used unless {@link #lineLength} > 0.
 960  
          */
 961  
         private int chunkSeparatorLength;
 962  
 
 963  
         /**
 964  
          * Note <code>lineLength</code> is rounded down to the nearest multiple of
 965  
          * {@link #encodedBlockSize} If <code>chunkSeparatorLength</code> is zero,
 966  
          * then chunking is disabled.
 967  
          * 
 968  
          * @param unencodedBlockSize
 969  
          *            the size of an unencoded block (e.g. Base64 = 3)
 970  
          * @param encodedBlockSize
 971  
          *            the size of an encoded block (e.g. Base64 = 4)
 972  
          * @param lineLength
 973  
          *            if &gt; 0, use chunking with a length <code>lineLength</code>
 974  
          * @param chunkSeparatorLength
 975  
          *            the chunk separator length, if relevant
 976  
          */
 977  495
         protected Base64(int unencodedBlockSize, int encodedBlockSize, int lineLength, int chunkSeparatorLength) {
 978  495
                 this.unencodedBlockSize = unencodedBlockSize;
 979  495
                 this.encodedBlockSize = encodedBlockSize;
 980  495
                 this.lineLength = (lineLength > 0 && chunkSeparatorLength > 0) ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
 981  495
                 this.chunkSeparatorLength = chunkSeparatorLength;
 982  495
         }
 983  
 
 984  
         /**
 985  
          * Returns true if this object has buffered data for reading.
 986  
          * 
 987  
          * @param context
 988  
          *            the context to be used
 989  
          * @return true if there is data still available for reading.
 990  
          */
 991  
         boolean hasData(Context context) { // package protected for access from I/O
 992  
                                                                                 // streams
 993  0
                 return context.buffer != null;
 994  
         }
 995  
 
 996  
         /**
 997  
          * Returns the amount of buffered data available for reading.
 998  
          * 
 999  
          * @param context
 1000  
          *            the context to be used
 1001  
          * @return The amount of buffered data available for reading.
 1002  
          */
 1003  
         int available(Context context) { // package protected for access from I/O
 1004  
                                                                                 // streams
 1005  495
                 return context.buffer != null ? context.pos - context.readPos : 0;
 1006  
         }
 1007  
 
 1008  
         /**
 1009  
          * Get the default buffer size. Can be overridden.
 1010  
          * 
 1011  
          * @return {@link #DEFAULT_BUFFER_SIZE}
 1012  
          */
 1013  
         protected int getDefaultBufferSize() {
 1014  495
                 return DEFAULT_BUFFER_SIZE;
 1015  
         }
 1016  
 
 1017  
         /**
 1018  
          * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
 1019  
          * 
 1020  
          * @param context
 1021  
          *            the context to be used
 1022  
          */
 1023  
         private void resizeBuffer(Context context) {
 1024  495
                 if (context.buffer == null) {
 1025  495
                         context.buffer = new byte[getDefaultBufferSize()];
 1026  495
                         context.pos = 0;
 1027  495
                         context.readPos = 0;
 1028  
                 } else {
 1029  0
                         byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
 1030  0
                         System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
 1031  0
                         context.buffer = b;
 1032  
                 }
 1033  495
         }
 1034  
 
 1035  
         /**
 1036  
          * Ensure that the buffer has room for <code>size</code> bytes
 1037  
          * 
 1038  
          * @param size
 1039  
          *            minimum spare space required
 1040  
          * @param context
 1041  
          *            the context to be used
 1042  
          */
 1043  
         protected void ensureBufferSize(int size, Context context) {
 1044  73760
                 if ((context.buffer == null) || (context.buffer.length < context.pos + size)) {
 1045  495
                         resizeBuffer(context);
 1046  
                 }
 1047  73760
         }
 1048  
 
 1049  
         /**
 1050  
          * Extracts buffered data into the provided byte[] array, starting at
 1051  
          * position bPos, up to a maximum of bAvail bytes. Returns how many bytes
 1052  
          * were actually extracted.
 1053  
          * 
 1054  
          * @param b
 1055  
          *            byte[] array to extract the buffered data into.
 1056  
          * @param bPos
 1057  
          *            position in byte[] array to start extraction at.
 1058  
          * @param bAvail
 1059  
          *            amount of bytes we're allowed to extract. We may extract fewer
 1060  
          *            (if fewer are available).
 1061  
          * @param context
 1062  
          *            the context to be used
 1063  
          * @return The number of bytes successfully extracted into the provided
 1064  
          *         byte[] array.
 1065  
          */
 1066  
         int readResults(byte[] b, int bPos, int bAvail, Context context) { // package
 1067  
                                                                                                                                                 // protected
 1068  
                                                                                                                                                 // for
 1069  
                                                                                                                                                 // access
 1070  
                                                                                                                                                 // from
 1071  
                                                                                                                                                 // I/O
 1072  
                                                                                                                                                 // streams
 1073  495
                 if (context.buffer != null) {
 1074  495
                         int len = Math.min(available(context), bAvail);
 1075  495
                         System.arraycopy(context.buffer, context.readPos, b, bPos, len);
 1076  495
                         context.readPos += len;
 1077  495
                         if (context.readPos >= context.pos) {
 1078  495
                                 context.buffer = null; // so hasData() will return false, and
 1079  
                                                                                 // this method can return -1
 1080  
                         }
 1081  495
                         return len;
 1082  
                 }
 1083  0
                 return context.eof ? EOF : 0;
 1084  
         }
 1085  
 
 1086  
         /**
 1087  
          * Checks if a byte value is whitespace or not. Whitespace is taken to mean:
 1088  
          * space, tab, CR, LF
 1089  
          * 
 1090  
          * @param byteToCheck
 1091  
          *            the byte to check
 1092  
          * @return true if byte is whitespace, false otherwise
 1093  
          */
 1094  
         protected static boolean isWhiteSpace(byte byteToCheck) {
 1095  0
                 switch (byteToCheck) {
 1096  
                 case ' ':
 1097  
                 case '\n':
 1098  
                 case '\r':
 1099  
                 case '\t':
 1100  0
                         return true;
 1101  
                 default:
 1102  0
                         return false;
 1103  
                 }
 1104  
         }
 1105  
 
 1106  
         /**
 1107  
          * Encodes an Object using the Base-N algorithm. This method is provided in
 1108  
          * order to satisfy the requirements of the Encoder interface, and will
 1109  
          * throw an EncoderException if the supplied object is not of type byte[].
 1110  
          * 
 1111  
          * @param obj
 1112  
          *            Object to encode
 1113  
          * @return An object (of type byte[]) containing the Base-N encoded data
 1114  
          *         which corresponds to the byte[] supplied.
 1115  
          * @throws EncoderException
 1116  
          *             if the parameter supplied is not of type byte[]
 1117  
          */
 1118  
         public Object encode(Object obj) throws Exception {
 1119  0
                 if (!(obj instanceof byte[])) {
 1120  0
                         throw new Exception("Parameter supplied to Base-N encode is not a byte[]");
 1121  
                 }
 1122  0
                 return encode((byte[]) obj);
 1123  
         }
 1124  
 
 1125  
         /**
 1126  
          * Encodes a byte[] containing binary data, into a String containing
 1127  
          * characters in the Base-N alphabet. Uses UTF8 encoding.
 1128  
          * 
 1129  
          * @param pArray
 1130  
          *            a byte array containing binary data
 1131  
          * @return A String containing only Base-N character data
 1132  
          */
 1133  
         public String encodeToString(byte[] pArray) {
 1134  0
                 return StringUtils.newStringUtf8(encode(pArray));
 1135  
         }
 1136  
 
 1137  
         /**
 1138  
          * Encodes a byte[] containing binary data, into a String containing
 1139  
          * characters in the appropriate alphabet. Uses UTF8 encoding.
 1140  
          * 
 1141  
          * @param pArray
 1142  
          *            a byte array containing binary data
 1143  
          * @return String containing only character data in the appropriate
 1144  
          *         alphabet.
 1145  
          */
 1146  
         public String encodeAsString(byte[] pArray) {
 1147  0
                 return StringUtils.newStringUtf8(encode(pArray));
 1148  
         }
 1149  
 
 1150  
         /**
 1151  
          * Decodes an Object using the Base-N algorithm. This method is provided in
 1152  
          * order to satisfy the requirements of the Decoder interface, and will
 1153  
          * throw a DecoderException if the supplied object is not of type byte[] or
 1154  
          * String.
 1155  
          * 
 1156  
          * @param obj
 1157  
          *            Object to decode
 1158  
          * @return An object (of type byte[]) containing the binary data which
 1159  
          *         corresponds to the byte[] or String supplied.
 1160  
          * @throws DecoderException
 1161  
          *             if the parameter supplied is not of type byte[]
 1162  
          */
 1163  
         public Object decode(Object obj) throws Exception {
 1164  0
                 if (obj instanceof byte[]) {
 1165  0
                         return decode((byte[]) obj);
 1166  0
                 } else if (obj instanceof String) {
 1167  0
                         return decode((String) obj);
 1168  
                 } else {
 1169  0
                         throw new Exception("Parameter supplied to Base-N decode is not a byte[] or a String");
 1170  
                 }
 1171  
         }
 1172  
 
 1173  
         /**
 1174  
          * Decodes a String containing characters in the Base-N alphabet.
 1175  
          * 
 1176  
          * @param pArray
 1177  
          *            A String containing Base-N character data
 1178  
          * @return a byte array containing binary data
 1179  
          */
 1180  
         public byte[] decode(String pArray) {
 1181  265
                 return decode(StringUtils.getBytesUtf8(pArray));
 1182  
         }
 1183  
 
 1184  
         /**
 1185  
          * Decodes a byte[] containing characters in the Base-N alphabet.
 1186  
          * 
 1187  
          * @param pArray
 1188  
          *            A byte array containing Base-N character data
 1189  
          * @return a byte array containing binary data
 1190  
          */
 1191  
         public byte[] decode(byte[] pArray) {
 1192  265
                 Context context = new Context();
 1193  265
                 if (pArray == null || pArray.length == 0) {
 1194  0
                         return pArray;
 1195  
                 }
 1196  265
                 decode(pArray, 0, pArray.length, context);
 1197  265
                 decode(pArray, 0, EOF, context); // Notify decoder of EOF.
 1198  265
                 byte[] result = new byte[context.pos];
 1199  265
                 readResults(result, 0, result.length, context);
 1200  265
                 return result;
 1201  
         }
 1202  
 
 1203  
         /**
 1204  
          * Encodes a byte[] containing binary data, into a byte[] containing
 1205  
          * characters in the alphabet.
 1206  
          * 
 1207  
          * @param pArray
 1208  
          *            a byte array containing binary data
 1209  
          * @return A byte array containing only the basen alphabetic character data
 1210  
          */
 1211  
         public byte[] encode(byte[] pArray) {
 1212  230
                 Context context = new Context();
 1213  230
                 if (pArray == null || pArray.length == 0) {
 1214  0
                         return pArray;
 1215  
                 }
 1216  230
                 encode(pArray, 0, pArray.length, context);
 1217  230
                 encode(pArray, 0, EOF, context); // Notify encoder of EOF.
 1218  230
                 byte[] buf = new byte[context.pos - context.readPos];
 1219  230
                 readResults(buf, 0, buf.length, context);
 1220  230
                 return buf;
 1221  
         }
 1222  
 
 1223  
         /**
 1224  
          * Tests a given byte array to see if it contains only valid characters
 1225  
          * within the alphabet. The method optionally treats whitespace and pad as
 1226  
          * valid.
 1227  
          * 
 1228  
          * @param arrayOctet
 1229  
          *            byte array to test
 1230  
          * @param allowWSPad
 1231  
          *            if {@code true}, then whitespace and PAD are also allowed
 1232  
          * 
 1233  
          * @return {@code true} if all bytes are valid characters in the alphabet or
 1234  
          *         if the byte array is empty; {@code false}, otherwise
 1235  
          */
 1236  
         public boolean isInAlphabet(byte[] arrayOctet, boolean allowWSPad) {
 1237  0
                 for (int i = 0; i < arrayOctet.length; i++) {
 1238  0
                         if (!isInAlphabet(arrayOctet[i]) && (!allowWSPad || (arrayOctet[i] != PAD) && !isWhiteSpace(arrayOctet[i]))) {
 1239  0
                                 return false;
 1240  
                         }
 1241  
                 }
 1242  0
                 return true;
 1243  
         }
 1244  
 
 1245  
         /**
 1246  
          * Tests a given String to see if it contains only valid characters within
 1247  
          * the alphabet. The method treats whitespace and PAD as valid.
 1248  
          * 
 1249  
          * @param basen
 1250  
          *            String to test
 1251  
          * @return {@code true} if all characters in the String are valid characters
 1252  
          *         in the alphabet or if the String is empty; {@code false},
 1253  
          *         otherwise
 1254  
          * @see #isInAlphabet(byte[], boolean)
 1255  
          */
 1256  
         public boolean isInAlphabet(String basen) {
 1257  0
                 return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
 1258  
         }
 1259  
 
 1260  
         /**
 1261  
          * Tests a given byte array to see if it contains any characters within the
 1262  
          * alphabet or PAD.
 1263  
          * 
 1264  
          * Intended for use in checking line-ending arrays
 1265  
          * 
 1266  
          * @param arrayOctet
 1267  
          *            byte array to test
 1268  
          * @return {@code true} if any byte is a valid character in the alphabet or
 1269  
          *         PAD; {@code false} otherwise
 1270  
          */
 1271  
         protected boolean containsAlphabetOrPad(byte[] arrayOctet) {
 1272  495
                 if (arrayOctet == null) {
 1273  0
                         return false;
 1274  
                 }
 1275  1485
                 for (byte element : arrayOctet) {
 1276  990
                         if (PAD == element || isInAlphabet(element)) {
 1277  0
                                 return true;
 1278  
                         }
 1279  
                 }
 1280  495
                 return false;
 1281  
         }
 1282  
 
 1283  
         /**
 1284  
          * Calculates the amount of space needed to encode the supplied array.
 1285  
          * 
 1286  
          * @param pArray
 1287  
          *            byte[] array which will later be encoded
 1288  
          * 
 1289  
          * @return amount of space needed to encoded the supplied array. Returns a
 1290  
          *         long since a max-len array will require > Integer.MAX_VALUE
 1291  
          */
 1292  
         public long getEncodedLength(byte[] pArray) {
 1293  
                 // Calculate non-chunked size - rounded up to allow for padding
 1294  
                 // cast to long is needed to avoid possibility of overflow
 1295  230
                 long len = ((pArray.length + unencodedBlockSize - 1) / unencodedBlockSize) * (long) encodedBlockSize;
 1296  230
                 if (lineLength > 0) { // We're using chunking
 1297  
                         // Round up to nearest multiple
 1298  0
                         len += ((len + lineLength - 1) / lineLength) * chunkSeparatorLength;
 1299  
                 }
 1300  230
                 return len;
 1301  
         }
 1302  
 
 1303  
         /**
 1304  
          * Character encoding names required of every implementation of the Java
 1305  
          * platform.
 1306  
          * 
 1307  
          * From the Java documentation <a href=
 1308  
          * "http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"
 1309  
          * >Standard charsets</a>:
 1310  
          * <p>
 1311  
          * <cite>Every implementation of the Java platform is required to support
 1312  
          * the following character encodings. Consult the release documentation for
 1313  
          * your implementation to see if any other encodings are supported. Consult
 1314  
          * the release documentation for your implementation to see if any other
 1315  
          * encodings are supported. </cite>
 1316  
          * </p>
 1317  
          * 
 1318  
          * <ul>
 1319  
          * <li><code>US-ASCII</code><br/>
 1320  
          * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the
 1321  
          * Unicode character set.</li>
 1322  
          * <li><code>ISO-8859-1</code><br/>
 1323  
          * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li>
 1324  
          * <li><code>UTF-8</code><br/>
 1325  
          * Eight-bit Unicode Transformation Format.</li>
 1326  
          * <li><code>UTF-16BE</code><br/>
 1327  
          * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li>
 1328  
          * <li><code>UTF-16LE</code><br/>
 1329  
          * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li>
 1330  
          * <li><code>UTF-16</code><br/>
 1331  
          * Sixteen-bit Unicode Transformation Format, byte order specified by a
 1332  
          * mandatory initial byte-order mark (either order accepted on input,
 1333  
          * big-endian used on output.)</li>
 1334  
          * </ul>
 1335  
          * 
 1336  
          * This perhaps would best belong in the [lang] project. Even if a similar
 1337  
          * interface is defined in [lang], it is not foreseen that [codec] would be
 1338  
          * made to depend on [lang].
 1339  
          * 
 1340  
          * <p>
 1341  
          * This class is immutable and thread-safe.
 1342  
          * </p>
 1343  
          * 
 1344  
          * @see <a
 1345  
          *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1346  
          *      charsets</a>
 1347  
          * @since 1.4
 1348  
          * @version $Id$
 1349  
          */
 1350  0
         public class CharEncoding {
 1351  
                 /**
 1352  
                  * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. </p>
 1353  
                  * <p>
 1354  
                  * Every implementation of the Java platform is required to support this
 1355  
                  * character encoding.
 1356  
                  * </p>
 1357  
                  * 
 1358  
                  * @see <a
 1359  
                  *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1360  
                  *      charsets</a>
 1361  
                  */
 1362  
                 public static final String ISO_8859_1 = "ISO-8859-1";
 1363  
 
 1364  
                 /**
 1365  
                  * <p>
 1366  
                  * Seven-bit ASCII, also known as ISO646-US, also known as the Basic
 1367  
                  * Latin block of the Unicode character set.
 1368  
                  * </p>
 1369  
                  * <p>
 1370  
                  * Every implementation of the Java platform is required to support this
 1371  
                  * character encoding.
 1372  
                  * </p>
 1373  
                  * 
 1374  
                  * @see <a
 1375  
                  *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1376  
                  *      charsets</a>
 1377  
                  */
 1378  
                 public static final String US_ASCII = "US-ASCII";
 1379  
 
 1380  
                 /**
 1381  
                  * <p>
 1382  
                  * Sixteen-bit Unicode Transformation Format, The byte order specified
 1383  
                  * by a mandatory initial byte-order mark (either order accepted on
 1384  
                  * input, big-endian used on output)
 1385  
                  * </p>
 1386  
                  * <p>
 1387  
                  * Every implementation of the Java platform is required to support this
 1388  
                  * character encoding.
 1389  
                  * </p>
 1390  
                  * 
 1391  
                  * @see <a
 1392  
                  *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1393  
                  *      charsets</a>
 1394  
                  */
 1395  
                 public static final String UTF_16 = "UTF-16";
 1396  
 
 1397  
                 /**
 1398  
                  * <p>
 1399  
                  * Sixteen-bit Unicode Transformation Format, big-endian byte order.
 1400  
                  * </p>
 1401  
                  * <p>
 1402  
                  * Every implementation of the Java platform is required to support this
 1403  
                  * character encoding.
 1404  
                  * </p>
 1405  
                  * 
 1406  
                  * @see <a
 1407  
                  *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1408  
                  *      charsets</a>
 1409  
                  */
 1410  
                 public static final String UTF_16BE = "UTF-16BE";
 1411  
 
 1412  
                 /**
 1413  
                  * <p>
 1414  
                  * Sixteen-bit Unicode Transformation Format, little-endian byte order.
 1415  
                  * </p>
 1416  
                  * <p>
 1417  
                  * Every implementation of the Java platform is required to support this
 1418  
                  * character encoding.
 1419  
                  * </p>
 1420  
                  * 
 1421  
                  * @see <a
 1422  
                  *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1423  
                  *      charsets</a>
 1424  
                  */
 1425  
                 public static final String UTF_16LE = "UTF-16LE";
 1426  
 
 1427  
                 /**
 1428  
                  * <p>
 1429  
                  * Eight-bit Unicode Transformation Format.
 1430  
                  * </p>
 1431  
                  * <p>
 1432  
                  * Every implementation of the Java platform is required to support this
 1433  
                  * character encoding.
 1434  
                  * </p>
 1435  
                  * 
 1436  
                  * @see <a
 1437  
                  *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1438  
                  *      charsets</a>
 1439  
                  */
 1440  
                 public static final String UTF_8 = "UTF-8";
 1441  
         }
 1442  
 
 1443  
         /**
 1444  
          * Charsets required of every implementation of the Java platform.
 1445  
          * 
 1446  
          * From the Java documentation <a href=
 1447  
          * "http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"
 1448  
          * >Standard charsets</a>:
 1449  
          * <p>
 1450  
          * <cite>Every implementation of the Java platform is required to support
 1451  
          * the following character encodings. Consult the release documentation for
 1452  
          * your implementation to see if any other encodings are supported. Consult
 1453  
          * the release documentation for your implementation to see if any other
 1454  
          * encodings are supported. </cite>
 1455  
          * </p>
 1456  
          * 
 1457  
          * <ul>
 1458  
          * <li><code>US-ASCII</code><br/>
 1459  
          * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the
 1460  
          * Unicode character set.</li>
 1461  
          * <li><code>ISO-8859-1</code><br/>
 1462  
          * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li>
 1463  
          * <li><code>UTF-8</code><br/>
 1464  
          * Eight-bit Unicode Transformation Format.</li>
 1465  
          * <li><code>UTF-16BE</code><br/>
 1466  
          * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li>
 1467  
          * <li><code>UTF-16LE</code><br/>
 1468  
          * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li>
 1469  
          * <li><code>UTF-16</code><br/>
 1470  
          * Sixteen-bit Unicode Transformation Format, byte order specified by a
 1471  
          * mandatory initial byte-order mark (either order accepted on input,
 1472  
          * big-endian used on output.)</li>
 1473  
          * </ul>
 1474  
          * 
 1475  
          * This perhaps would best belong in the Commons Lang project. Even if a
 1476  
          * similar class is defined in Commons Lang, it is not foreseen that Commons
 1477  
          * Codec would be made to depend on Commons Lang.
 1478  
          * 
 1479  
          * <p>
 1480  
          * This class is immutable and thread-safe.
 1481  
          * </p>
 1482  
          * 
 1483  
          * @see <a
 1484  
          *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1485  
          *      charsets</a>
 1486  
          * @since 1.7
 1487  
          * @version $Id: CharEncoding.java 1173287 2011-09-20 18:16:19Z ggregory $
 1488  
          */
 1489  0
         public static class Charsets {
 1490  
 
 1491  
                 //
 1492  
                 // This class should only contain Charset instances for required
 1493  
                 // encodings. This guarantees that it will load correctly and
 1494  
                 // without delay on all Java platforms.
 1495  
                 //
 1496  
 
 1497  
                 /**
 1498  
                  * Returns the given Charset or the default Charset if the given Charset
 1499  
                  * is null.
 1500  
                  * 
 1501  
                  * @param charset
 1502  
                  *            A charset or null.
 1503  
                  * @return the given Charset or the default Charset if the given Charset
 1504  
                  *         is null
 1505  
                  */
 1506  
                 public static Charset toCharset(Charset charset) {
 1507  0
                         return charset == null ? Charset.defaultCharset() : charset;
 1508  
                 }
 1509  
 
 1510  
                 /**
 1511  
                  * Returns a Charset for the named charset. If the name is null, return
 1512  
                  * the default Charset.
 1513  
                  * 
 1514  
                  * @param charset
 1515  
                  *            The name of the requested charset, may be null.
 1516  
                  * @return a Charset for the named charset
 1517  
                  * @throws UnsupportedCharsetException
 1518  
                  *             If the named charset is unavailable
 1519  
                  */
 1520  
                 public static Charset toCharset(String charset) {
 1521  0
                         return charset == null ? Charset.defaultCharset() : Charset.forName(charset);
 1522  
                 }
 1523  
 
 1524  
                 /**
 1525  
                  * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. </p>
 1526  
                  * <p>
 1527  
                  * Every implementation of the Java platform is required to support this
 1528  
                  * character encoding.
 1529  
                  * </p>
 1530  
                  * 
 1531  
                  * @see <a
 1532  
                  *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1533  
                  *      charsets</a>
 1534  
                  */
 1535  5
                 public static final Charset ISO_8859_1 = Charset.forName(CharEncoding.ISO_8859_1);
 1536  
 
 1537  
                 /**
 1538  
                  * <p>
 1539  
                  * Seven-bit ASCII, also known as ISO646-US, also known as the Basic
 1540  
                  * Latin block of the Unicode character set.
 1541  
                  * </p>
 1542  
                  * <p>
 1543  
                  * Every implementation of the Java platform is required to support this
 1544  
                  * character encoding.
 1545  
                  * </p>
 1546  
                  * 
 1547  
                  * @see <a
 1548  
                  *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1549  
                  *      charsets</a>
 1550  
                  */
 1551  5
                 public static final Charset US_ASCII = Charset.forName(CharEncoding.US_ASCII);
 1552  
 
 1553  
                 /**
 1554  
                  * <p>
 1555  
                  * Sixteen-bit Unicode Transformation Format, The byte order specified
 1556  
                  * by a mandatory initial byte-order mark (either order accepted on
 1557  
                  * input, big-endian used on output)
 1558  
                  * </p>
 1559  
                  * <p>
 1560  
                  * Every implementation of the Java platform is required to support this
 1561  
                  * character encoding.
 1562  
                  * </p>
 1563  
                  * 
 1564  
                  * @see <a
 1565  
                  *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1566  
                  *      charsets</a>
 1567  
                  */
 1568  5
                 public static final Charset UTF_16 = Charset.forName(CharEncoding.UTF_16);
 1569  
 
 1570  
                 /**
 1571  
                  * <p>
 1572  
                  * Sixteen-bit Unicode Transformation Format, big-endian byte order.
 1573  
                  * </p>
 1574  
                  * <p>
 1575  
                  * Every implementation of the Java platform is required to support this
 1576  
                  * character encoding.
 1577  
                  * </p>
 1578  
                  * 
 1579  
                  * @see <a
 1580  
                  *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1581  
                  *      charsets</a>
 1582  
                  */
 1583  5
                 public static final Charset UTF_16BE = Charset.forName(CharEncoding.UTF_16BE);
 1584  
 
 1585  
                 /**
 1586  
                  * <p>
 1587  
                  * Sixteen-bit Unicode Transformation Format, little-endian byte order.
 1588  
                  * </p>
 1589  
                  * <p>
 1590  
                  * Every implementation of the Java platform is required to support this
 1591  
                  * character encoding.
 1592  
                  * </p>
 1593  
                  * 
 1594  
                  * @see <a
 1595  
                  *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1596  
                  *      charsets</a>
 1597  
                  */
 1598  5
                 public static final Charset UTF_16LE = Charset.forName(CharEncoding.UTF_16LE);
 1599  
 
 1600  
                 /**
 1601  
                  * <p>
 1602  
                  * Eight-bit Unicode Transformation Format.
 1603  
                  * </p>
 1604  
                  * <p>
 1605  
                  * Every implementation of the Java platform is required to support this
 1606  
                  * character encoding.
 1607  
                  * </p>
 1608  
                  * 
 1609  
                  * @see <a
 1610  
                  *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1611  
                  *      charsets</a>
 1612  
                  */
 1613  5
                 public static final Charset UTF_8 = Charset.forName(CharEncoding.UTF_8);
 1614  
         }
 1615  
 
 1616  
         /**
 1617  
          * Converts String to and from bytes using the encodings required by the
 1618  
          * Java specification. These encodings are specified in <a href=
 1619  
          * "http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"
 1620  
          * >Standard charsets</a>
 1621  
          * 
 1622  
          * <p>
 1623  
          * This class is immutable and thread-safe.
 1624  
          * </p>
 1625  
          * 
 1626  
          * @see CharEncoding
 1627  
          * @see <a
 1628  
          *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1629  
          *      charsets</a>
 1630  
          * @version $Id$
 1631  
          * @since 1.4
 1632  
          */
 1633  0
         public static class StringUtils {
 1634  
 
 1635  
                 /**
 1636  
                  * Calls {@link String#getBytes(Charset)}
 1637  
                  * 
 1638  
                  * @param string
 1639  
                  *            The string to encode (if null, return null).
 1640  
                  * @param charset
 1641  
                  *            The {@link Charset} to encode the {@code String}
 1642  
                  * @return the encoded bytes
 1643  
                  */
 1644  
                 private static byte[] getBytes(String string, Charset charset) {
 1645  265
                         if (string == null) {
 1646  0
                                 return null;
 1647  
                         }
 1648  265
                         return string.getBytes(charset);
 1649  
                 }
 1650  
 
 1651  
                 /**
 1652  
                  * Encodes the given string into a sequence of bytes using the
 1653  
                  * ISO-8859-1 charset, storing the result into a new byte array.
 1654  
                  * 
 1655  
                  * @param string
 1656  
                  *            the String to encode, may be {@code null}
 1657  
                  * @return encoded bytes, or {@code null} if the input string was
 1658  
                  *         {@code null}
 1659  
                  * @throws NullPointerException
 1660  
                  *             Thrown if {@link Charsets#ISO_8859_1} is not initialized,
 1661  
                  *             which should never happen since it is required by the
 1662  
                  *             Java platform specification.
 1663  
                  * @since As of 1.7, throws {@link NullPointerException} instead of
 1664  
                  *        UnsupportedEncodingException
 1665  
                  * @see <a
 1666  
                  *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1667  
                  *      charsets</a>
 1668  
                  * @see #getBytesUnchecked(String, String)
 1669  
                  */
 1670  
                 public static byte[] getBytesIso8859_1(String string) {
 1671  0
                         return getBytes(string, Charsets.ISO_8859_1);
 1672  
                 }
 1673  
 
 1674  
                 /**
 1675  
                  * Encodes the given string into a sequence of bytes using the named
 1676  
                  * charset, storing the result into a new byte array.
 1677  
                  * <p>
 1678  
                  * This method catches {@link UnsupportedEncodingException} and rethrows
 1679  
                  * it as {@link IllegalStateException}, which should never happen for a
 1680  
                  * required charset name. Use this method when the encoding is required
 1681  
                  * to be in the JRE.
 1682  
                  * </p>
 1683  
                  * 
 1684  
                  * @param string
 1685  
                  *            the String to encode, may be {@code null}
 1686  
                  * @param charsetName
 1687  
                  *            The name of a required {@link java.nio.charset.Charset}
 1688  
                  * @return encoded bytes, or {@code null} if the input string was
 1689  
                  *         {@code null}
 1690  
                  * @throws IllegalStateException
 1691  
                  *             Thrown when a {@link UnsupportedEncodingException} is
 1692  
                  *             caught, which should never happen for a required charset
 1693  
                  *             name.
 1694  
                  * @see CharEncoding
 1695  
                  * @see String#getBytes(String)
 1696  
                  */
 1697  
                 public static byte[] getBytesUnchecked(String string, String charsetName) {
 1698  0
                         if (string == null) {
 1699  0
                                 return null;
 1700  
                         }
 1701  
                         try {
 1702  0
                                 return string.getBytes(charsetName);
 1703  0
                         } catch (UnsupportedEncodingException e) {
 1704  0
                                 throw StringUtils.newIllegalStateException(charsetName, e);
 1705  
                         }
 1706  
                 }
 1707  
 
 1708  
                 /**
 1709  
                  * Encodes the given string into a sequence of bytes using the US-ASCII
 1710  
                  * charset, storing the result into a new byte array.
 1711  
                  * 
 1712  
                  * @param string
 1713  
                  *            the String to encode, may be {@code null}
 1714  
                  * @return encoded bytes, or {@code null} if the input string was
 1715  
                  *         {@code null}
 1716  
                  * @throws NullPointerException
 1717  
                  *             Thrown if {@link Charsets#US_ASCII} is not initialized,
 1718  
                  *             which should never happen since it is required by the
 1719  
                  *             Java platform specification.
 1720  
                  * @since As of 1.7, throws {@link NullPointerException} instead of
 1721  
                  *        UnsupportedEncodingException
 1722  
                  * @see <a
 1723  
                  *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1724  
                  *      charsets</a>
 1725  
                  * @see #getBytesUnchecked(String, String)
 1726  
                  */
 1727  
                 public static byte[] getBytesUsAscii(String string) {
 1728  0
                         return getBytes(string, Charsets.US_ASCII);
 1729  
                 }
 1730  
 
 1731  
                 /**
 1732  
                  * Encodes the given string into a sequence of bytes using the UTF-16
 1733  
                  * charset, storing the result into a new byte array.
 1734  
                  * 
 1735  
                  * @param string
 1736  
                  *            the String to encode, may be {@code null}
 1737  
                  * @return encoded bytes, or {@code null} if the input string was
 1738  
                  *         {@code null}
 1739  
                  * @throws NullPointerException
 1740  
                  *             Thrown if {@link Charsets#UTF_16} is not initialized,
 1741  
                  *             which should never happen since it is required by the
 1742  
                  *             Java platform specification.
 1743  
                  * @since As of 1.7, throws {@link NullPointerException} instead of
 1744  
                  *        UnsupportedEncodingException
 1745  
                  * @see <a
 1746  
                  *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1747  
                  *      charsets</a>
 1748  
                  * @see #getBytesUnchecked(String, String)
 1749  
                  */
 1750  
                 public static byte[] getBytesUtf16(String string) {
 1751  0
                         return getBytes(string, Charsets.UTF_16);
 1752  
                 }
 1753  
 
 1754  
                 /**
 1755  
                  * Encodes the given string into a sequence of bytes using the UTF-16BE
 1756  
                  * charset, storing the result into a new byte array.
 1757  
                  * 
 1758  
                  * @param string
 1759  
                  *            the String to encode, may be {@code null}
 1760  
                  * @return encoded bytes, or {@code null} if the input string was
 1761  
                  *         {@code null}
 1762  
                  * @throws NullPointerException
 1763  
                  *             Thrown if {@link Charsets#UTF_16BE} is not initialized,
 1764  
                  *             which should never happen since it is required by the
 1765  
                  *             Java platform specification.
 1766  
                  * @since As of 1.7, throws {@link NullPointerException} instead of
 1767  
                  *        UnsupportedEncodingException
 1768  
                  * @see <a
 1769  
                  *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1770  
                  *      charsets</a>
 1771  
                  * @see #getBytesUnchecked(String, String)
 1772  
                  */
 1773  
                 public static byte[] getBytesUtf16Be(String string) {
 1774  0
                         return getBytes(string, Charsets.UTF_16BE);
 1775  
                 }
 1776  
 
 1777  
                 /**
 1778  
                  * Encodes the given string into a sequence of bytes using the UTF-16LE
 1779  
                  * charset, storing the result into a new byte array.
 1780  
                  * 
 1781  
                  * @param string
 1782  
                  *            the String to encode, may be {@code null}
 1783  
                  * @return encoded bytes, or {@code null} if the input string was
 1784  
                  *         {@code null}
 1785  
                  * @throws NullPointerException
 1786  
                  *             Thrown if {@link Charsets#UTF_16LE} is not initialized,
 1787  
                  *             which should never happen since it is required by the
 1788  
                  *             Java platform specification.
 1789  
                  * @since As of 1.7, throws {@link NullPointerException} instead of
 1790  
                  *        UnsupportedEncodingException
 1791  
                  * @see <a
 1792  
                  *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1793  
                  *      charsets</a>
 1794  
                  * @see #getBytesUnchecked(String, String)
 1795  
                  */
 1796  
                 public static byte[] getBytesUtf16Le(String string) {
 1797  0
                         return getBytes(string, Charsets.UTF_16LE);
 1798  
                 }
 1799  
 
 1800  
                 /**
 1801  
                  * Encodes the given string into a sequence of bytes using the UTF-8
 1802  
                  * charset, storing the result into a new byte array.
 1803  
                  * 
 1804  
                  * @param string
 1805  
                  *            the String to encode, may be {@code null}
 1806  
                  * @return encoded bytes, or {@code null} if the input string was
 1807  
                  *         {@code null}
 1808  
                  * @throws NullPointerException
 1809  
                  *             Thrown if {@link Charsets#UTF_8} is not initialized,
 1810  
                  *             which should never happen since it is required by the
 1811  
                  *             Java platform specification.
 1812  
                  * @since As of 1.7, throws {@link NullPointerException} instead of
 1813  
                  *        UnsupportedEncodingException
 1814  
                  * @see <a
 1815  
                  *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
 1816  
                  *      charsets</a>
 1817  
                  * @see #getBytesUnchecked(String, String)
 1818  
                  */
 1819  
                 public static byte[] getBytesUtf8(String string) {
 1820  265
                         return getBytes(string, Charsets.UTF_8);
 1821  
                 }
 1822  
 
 1823  
                 private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) {
 1824  0
                         return new IllegalStateException(charsetName + ": " + e);
 1825  
                 }
 1826  
 
 1827  
                 /**
 1828  
                  * Constructs a new <code>String</code> by decoding the specified array
 1829  
                  * of bytes using the given charset.
 1830  
                  * 
 1831  
                  * @param bytes
 1832  
                  *            The bytes to be decoded into characters
 1833  
                  * @param charset
 1834  
                  *            The {@link Charset} to encode the {@code String}
 1835  
                  * @return A new <code>String</code> decoded from the specified array of
 1836  
                  *         bytes using the given charset, or {@code null} if the input
 1837  
                  *         byte array was {@code null}.
 1838  
                  * @throws NullPointerException
 1839  
                  *             Thrown if {@link Charsets#UTF_8} is not initialized,
 1840  
                  *             which should never happen since it is required by the
 1841  
                  *             Java platform specification.
 1842  
                  */
 1843  
                 private static String newString(byte[] bytes, Charset charset) {
 1844  230
                         return bytes == null ? null : new String(bytes, charset);
 1845  
                 }
 1846  
 
 1847  
                 /**
 1848  
                  * Constructs a new <code>String</code> by decoding the specified array
 1849  
                  * of bytes using the given charset.
 1850  
                  * <p>
 1851  
                  * This method catches {@link UnsupportedEncodingException} and
 1852  
                  * re-throws it as {@link IllegalStateException}, which should never
 1853  
                  * happen for a required charset name. Use this method when the encoding
 1854  
                  * is required to be in the JRE.
 1855  
                  * </p>
 1856  
                  * 
 1857  
                  * @param bytes
 1858  
                  *            The bytes to be decoded into characters, may be
 1859  
                  *            {@code null}
 1860  
                  * @param charsetName
 1861  
                  *            The name of a required {@link java.nio.charset.Charset}
 1862  
                  * @return A new <code>String</code> decoded from the specified array of
 1863  
                  *         bytes using the given charset, or {@code null} if the input
 1864  
                  *         byte array was {@code null}.
 1865  
                  * @throws IllegalStateException
 1866  
                  *             Thrown when a {@link UnsupportedEncodingException} is
 1867  
                  *             caught, which should never happen for a required charset
 1868  
                  *             name.
 1869  
                  * @see CharEncoding
 1870  
                  * @see String#String(byte[], String)
 1871  
                  */
 1872  
                 public static String newString(byte[] bytes, String charsetName) {
 1873  0
                         if (bytes == null) {
 1874  0
                                 return null;
 1875  
                         }
 1876  
                         try {
 1877  0
                                 return new String(bytes, charsetName);
 1878  0
                         } catch (UnsupportedEncodingException e) {
 1879  0
                                 throw StringUtils.newIllegalStateException(charsetName, e);
 1880  
                         }
 1881  
                 }
 1882  
 
 1883  
                 /**
 1884  
                  * Constructs a new <code>String</code> by decoding the specified array
 1885  
                  * of bytes using the ISO-8859-1 charset.
 1886  
                  * 
 1887  
                  * @param bytes
 1888  
                  *            The bytes to be decoded into characters, may be
 1889  
                  *            {@code null}
 1890  
                  * @return A new <code>String</code> decoded from the specified array of
 1891  
                  *         bytes using the ISO-8859-1 charset, or {@code null} if the
 1892  
                  *         input byte array was {@code null}.
 1893  
                  * @throws NullPointerException
 1894  
                  *             Thrown if {@link Charsets#ISO_8859_1} is not initialized,
 1895  
                  *             which should never happen since it is required by the
 1896  
                  *             Java platform specification.
 1897  
                  * @since As of 1.7, throws {@link NullPointerException} instead of
 1898  
                  *        UnsupportedEncodingException
 1899  
                  */
 1900  
                 public static String newStringIso8859_1(byte[] bytes) {
 1901  0
                         return new String(bytes, Charsets.ISO_8859_1);
 1902  
                 }
 1903  
 
 1904  
                 /**
 1905  
                  * Constructs a new <code>String</code> by decoding the specified array
 1906  
                  * of bytes using the US-ASCII charset.
 1907  
                  * 
 1908  
                  * @param bytes
 1909  
                  *            The bytes to be decoded into characters
 1910  
                  * @return A new <code>String</code> decoded from the specified array of
 1911  
                  *         bytes using the US-ASCII charset, or {@code null} if the
 1912  
                  *         input byte array was {@code null}.
 1913  
                  * @throws NullPointerException
 1914  
                  *             Thrown if {@link Charsets#US_ASCII} is not initialized,
 1915  
                  *             which should never happen since it is required by the
 1916  
                  *             Java platform specification.
 1917  
                  * @since As of 1.7, throws {@link NullPointerException} instead of
 1918  
                  *        UnsupportedEncodingException
 1919  
                  */
 1920  
                 public static String newStringUsAscii(byte[] bytes) {
 1921  0
                         return new String(bytes, Charsets.US_ASCII);
 1922  
                 }
 1923  
 
 1924  
                 /**
 1925  
                  * Constructs a new <code>String</code> by decoding the specified array
 1926  
                  * of bytes using the UTF-16 charset.
 1927  
                  * 
 1928  
                  * @param bytes
 1929  
                  *            The bytes to be decoded into characters
 1930  
                  * @return A new <code>String</code> decoded from the specified array of
 1931  
                  *         bytes using the UTF-16 charset or {@code null} if the input
 1932  
                  *         byte array was {@code null}.
 1933  
                  * @throws NullPointerException
 1934  
                  *             Thrown if {@link Charsets#UTF_16} is not initialized,
 1935  
                  *             which should never happen since it is required by the
 1936  
                  *             Java platform specification.
 1937  
                  * @since As of 1.7, throws {@link NullPointerException} instead of
 1938  
                  *        UnsupportedEncodingException
 1939  
                  */
 1940  
                 public static String newStringUtf16(byte[] bytes) {
 1941  0
                         return new String(bytes, Charsets.UTF_16);
 1942  
                 }
 1943  
 
 1944  
                 /**
 1945  
                  * Constructs a new <code>String</code> by decoding the specified array
 1946  
                  * of bytes using the UTF-16BE charset.
 1947  
                  * 
 1948  
                  * @param bytes
 1949  
                  *            The bytes to be decoded into characters
 1950  
                  * @return A new <code>String</code> decoded from the specified array of
 1951  
                  *         bytes using the UTF-16BE charset, or {@code null} if the
 1952  
                  *         input byte array was {@code null}.
 1953  
                  * @throws NullPointerException
 1954  
                  *             Thrown if {@link Charsets#UTF_16BE} is not initialized,
 1955  
                  *             which should never happen since it is required by the
 1956  
                  *             Java platform specification.
 1957  
                  * @since As of 1.7, throws {@link NullPointerException} instead of
 1958  
                  *        UnsupportedEncodingException
 1959  
                  */
 1960  
                 public static String newStringUtf16Be(byte[] bytes) {
 1961  0
                         return new String(bytes, Charsets.UTF_16BE);
 1962  
                 }
 1963  
 
 1964  
                 /**
 1965  
                  * Constructs a new <code>String</code> by decoding the specified array
 1966  
                  * of bytes using the UTF-16LE charset.
 1967  
                  * 
 1968  
                  * @param bytes
 1969  
                  *            The bytes to be decoded into characters
 1970  
                  * @return A new <code>String</code> decoded from the specified array of
 1971  
                  *         bytes using the UTF-16LE charset, or {@code null} if the
 1972  
                  *         input byte array was {@code null}.
 1973  
                  * @throws NullPointerException
 1974  
                  *             Thrown if {@link Charsets#UTF_16LE} is not initialized,
 1975  
                  *             which should never happen since it is required by the
 1976  
                  *             Java platform specification.
 1977  
                  * @since As of 1.7, throws {@link NullPointerException} instead of
 1978  
                  *        UnsupportedEncodingException
 1979  
                  */
 1980  
                 public static String newStringUtf16Le(byte[] bytes) {
 1981  0
                         return new String(bytes, Charsets.UTF_16LE);
 1982  
                 }
 1983  
 
 1984  
                 /**
 1985  
                  * Constructs a new <code>String</code> by decoding the specified array
 1986  
                  * of bytes using the UTF-8 charset.
 1987  
                  * 
 1988  
                  * @param bytes
 1989  
                  *            The bytes to be decoded into characters
 1990  
                  * @return A new <code>String</code> decoded from the specified array of
 1991  
                  *         bytes using the UTF-8 charset, or {@code null} if the input
 1992  
                  *         byte array was {@code null}.
 1993  
                  * @throws NullPointerException
 1994  
                  *             Thrown if {@link Charsets#UTF_8} is not initialized,
 1995  
                  *             which should never happen since it is required by the
 1996  
                  *             Java platform specification.
 1997  
                  * @since As of 1.7, throws {@link NullPointerException} instead of
 1998  
                  *        UnsupportedEncodingException
 1999  
                  */
 2000  
                 public static String newStringUtf8(byte[] bytes) {
 2001  230
                         return newString(bytes, Charsets.UTF_8);
 2002  
                 }
 2003  
 
 2004  
         }
 2005  
 
 2006  
 }