001package com.randomnoun.common;
002
003/* (c) 2013 randomnoun. All Rights Reserved. This work is licensed under a
004 * BSD Simplified License. (http://www.randomnoun.com/bsd-simplified.html)
005 */
006import java.io.ByteArrayOutputStream;
007import java.io.File;
008import java.io.FileInputStream;
009import java.io.IOException;
010import java.io.Reader;
011import java.io.UnsupportedEncodingException;
012import java.security.MessageDigest;
013import java.security.NoSuchAlgorithmException;
014import java.text.Collator;
015import java.text.ParseException;
016import java.text.SimpleDateFormat;
017import java.util.ArrayList;
018import java.util.BitSet;
019import java.util.Comparator;
020import java.util.Date;
021import java.util.Iterator;
022import java.util.List;
023import java.util.Map;
024import java.util.regex.Matcher;
025import java.util.regex.Pattern;
026
027/** Text utility functions
028 *
029 * @author knoxg
030 */
031public class Text {
032    
033    /** Used to prevent massive debug dumps. See {@link #getDisplayString(String, String)} */
034    private static final int MAX_STRING_OUTPUT_CHARS = 300;
035
036        /** Left-justification constant for use in the {@link #pad(String, int, int)} method */
037        public static final int JUSTIFICATION_LEFT = 0;
038        
039        /** Center-justification constant for use in the {@link #pad(String, int, int)} method */
040        public static final int JUSTIFICATION_CENTER = 1;
041
042        /** Right-justification constant for use in the {@link #pad(String, int, int)} method */
043        public static final int JUSTIFICATION_RIGHT = 2;
044        
045        public static Pattern scriptPattern = Pattern.compile("<(/script)", Pattern.CASE_INSENSITIVE);
046
047    /** Returns true if the supplied string is null or the empty string, false otherwise
048     *
049     * @param text The string to test
050     * @return true if the supplied string is null or the empty string, false otherwise
051     */
052    public static boolean isBlank(String text) {
053        return (text == null || text.equals(""));
054    }
055
056    /** Returns true if the supplied string is non-null and only contains numeric characters
057     *
058     * @param text The string to test
059     * @return true if the supplied string is non-null and only contains numeric characters
060     */
061    public static boolean isNumeric(String text) {
062        if (text == null) {
063            return false;
064        }
065        char ch;
066        for (int i = 0; i < text.length(); i++) {
067            ch = text.charAt(i);
068            if (ch < '0' || ch > '9') {
069                return false;
070            }
071        }
072        return true;
073    }
074
075    /** Returns true if the supplied string is non-null and only contains numeric characters
076    * or a single decimal point. The value can have a leading negative ('-') symbol.
077    * 
078    * @param text The string to test
079    * @return true if the supplied string is non-null and only contains numeric characters,
080    *   which may contain a '.' character in there somewhere.
081    */
082   public static boolean isNumericDecimal(String text) {
083       if (text == null) {
084           return false;
085       }
086       boolean seenPoint = false; // existential quandary there for you
087       char ch;
088       int len = text.length();
089       for (int i = 0; i < len; i++) {
090           ch = text.charAt(i);
091           if (ch=='.') {
092                   if (seenPoint) { return false; }
093                   seenPoint = true;
094           } else if (ch == '-' && i == 0) {
095                   // leading negative symbol OK
096                   if (len == 1) {
097                           // but not if it's the only character in the string
098                           return false;
099                   }
100           } else if (ch < '0' || ch > '9') {
101               return false;
102           }
103       }
104       return true;
105   }
106
107     /** Returns true if the supplied string is non-null and only contains numeric characters
108     * or a single decimal point. The value can have a leading negative ('-') symbol.
109     * 
110     * This version allows exponents ("E+nn" or "E-nn") to the end of the value.
111     * 
112     * @param text The string to test
113     * @return true if the supplied string is non-null and only contains numeric characters,
114     *  which may contain a '.' character in there somewhere.
115     */
116    public static boolean isNumericDecimalExp(String text) {
117        if (text == null) {
118          return false;
119        }
120            boolean seenPoint = false; // existential quandary there for you
121            int expPos = -1;           // position of the 'E' character
122            char ch;
123            for (int i = 0; i < text.length(); i++) {
124                ch = text.charAt(i);
125                if (ch=='E') {
126                        if (expPos != -1) { return false; }
127                        expPos = i;
128                } else if (ch=='.' && expPos == -1) {
129                if (seenPoint) { return false; }
130                seenPoint = true;
131                } else if ((ch == '+' || ch == '-') && i == expPos + 1) {
132                        // + or - directly after 'E' OK
133            } else if (ch == '-' && i == 0) {
134                // leading negative symbol OK
135            } else if (ch < '0' || ch > '9') {
136                return false;
137            }
138            }
139            return true;
140    }
141
142
143    /** Ensures that a string returned from a browser (on any platform) conforms
144     * to unix line-EOF conventions. Any instances of consecutive CRs (<code>0xD</code>) 
145     * and LFs (<code>0xA</code>) in a string will be reduced to a series of CRs (the number of CRs will be the
146     * maximum number of CRs or LFs found in a row).  
147     * 
148     * @param input the input string
149     * 
150     * @return the canonicalised string, as described above
151     */
152    public static String reduceNewlines(String input) {
153        StringBuilder sb = new StringBuilder();
154        int len = input.length();
155        int crCount = 0;
156                int lfCount = 0;
157                boolean insertNewline = false;
158                char ch;
159        for (int i=0; i<len; i++) {
160                ch = input.charAt(i);
161                if (ch == (char) 0xA) {
162                        lfCount ++; insertNewline = true;
163                } else if (ch == (char) 0xD) {
164                        crCount ++; insertNewline = true;
165                } else if (insertNewline) {
166                                for (int j=0; j<Math.max(lfCount, crCount); j++) {
167                                        sb.append((char) 0xA);
168                                }
169                                insertNewline = false; lfCount=0; crCount=0;
170                                sb.append(ch);
171                } else {
172                        sb.append(ch);
173                }
174        }
175        if (insertNewline) {
176                        for (int j=0; j<Math.max(lfCount, crCount); j++) {
177                                sb.append((char) 0xA);
178                        }
179        }
180        
181        return sb.toString();
182    }
183
184
185    /**
186     * Returns the HTML-escaped form of a string. The <code>&amp;</code>,
187     * <code>&lt;</code>, <code>&gt;</code>, and <code>"</code> characters are converted to
188     * <code>&amp;amp;</code>, <code>&amp;lt;</code>, <code>&amp;gt;</code>, and
189     * <code>&amp;quot;</code> respectively.
190     * 
191     * <p>Characters in the unicode control code blocks ( apart from \t, \n and \r ) are converted to &amp;xfffd;
192     * <p>Characters outside of the ASCII printable range are converted into &amp;xnnnn; form
193     *
194     * @param string the string to convert
195     *
196     * @return the HTML-escaped form of the string
197     */
198    static public String escapeHtml(String string) {
199        if (string == null) {
200            return "";
201        }
202        char c;
203        String hex;
204        StringBuilder sb = new StringBuilder(string.length());
205        for (int i = 0; i < string.length(); i++) {
206            c = string.charAt(i);
207            // check for illegal characters
208            switch (c) {
209                case '&':
210                    sb.append("&amp;");
211                    break;
212                case '<':
213                    sb.append("&lt;");
214                    break;
215                case '>':
216                    sb.append("&gt;");
217                    break;
218                case '\"':
219                    // interestingly, &quote; (with the e) works fine for HTML display,
220                    // but not inside hidden field values
221                    sb.append("&quot;");
222                    break;
223                default:
224                        // 'illegal characters' according to ESAPI. 7f to 9f are control characters in unicode 
225                        if ( ( c <= 0x1f && c != '\t' && c != '\n' && c != '\r' ) || ( c >= 0x7f && c <= 0x9f ) ) {
226                                sb.append("&#xfffd;"); // REPLACEMENT_HEX in ESAPI's HtmlEntityCodec
227                        } else if ( c > 0x1f && c <= 0x7f ) {
228                                // safe printable
229                                sb.append(c);
230                        } else {
231                                // ESAPI didn't have the else block above, which was causing it escape everything 
232                                hex = getHexForNonAlphanumeric(c);
233                                sb.append("&#x" + hex + ";");
234                        }
235                        
236            }
237        }
238
239        return sb.toString();
240    }
241
242    /**
243     * Returns a regex-escaped form of a string. That is, the pattern 
244     * returned by this method, if compiled into a regex, will match
245     * the supplied string exactly. 
246     *
247     * @param string the string to convert
248     *
249     * @return the HTML-escaped form of the string
250     */
251    static public String escapeRegex(String string) {
252        if (string == null) {
253            return "";
254        }
255
256        char c;
257        StringBuilder sb = new StringBuilder(string.length());
258
259        for (int i = 0; i < string.length(); i++) {
260            c = string.charAt(i);
261
262            switch (c) {
263                case '.':
264                case '+': // intentional fall-through
265                case '?': // intentional fall-through
266                case '\\': // intentional fall-through
267                case '{': // intentional fall-through
268                case '}': // intentional fall-through
269                case '[': // intentional fall-through
270                case ']': // intentional fall-through
271                case '^': // intentional fall-through
272                case '$': // intentional fall-through
273                case '(': // intentional fall-through
274                case '|': // intentional fall-through
275                case ')': // intentional fall-through
276                        sb.append("\\");
277                    sb.append(c);
278                    break;
279                default:
280                    sb.append(c);
281            }
282        }
283
284        return sb.toString();
285    }
286    
287    
288    /**
289     * Returns the csv-escaped form of a string. A csv-escaped string is
290     * used when writing to a CSV (comma-separated-value) file. It ensures
291     * that commas included within a string are quoted. We use the Microsoft-Excel
292     * quoting rules, so that our CSV files can be imported into that. These rules
293     * (derived from experimentation) are:
294     *
295     * <ul>
296     * <li>Strings without commas (,) inverted commas ("), or newlines (\n) are returned as-is.
297     * <li>Otherwise, the string is surrounded by inverted commas, and any
298     *   inverted commas within the string are doubled-up (i.e. '"' becomes '""').
299     * <li>A value that starts with any of "=", "@", "+" or "-" has a leading single apostrophe added
300     *   to prevent the value being evaluated in Excel. The leading quote is visible to the user when the
301     *   csv is opened, which may mean that it will have to be removed when roundtripping data.
302     *   This may complicate things if the user actually wants a leading single quote in their CSV value.   
303     * </ul>
304     *
305     * <p>Embedded newlines are inserted as-is, as per Excel. This will require
306     * some care whilst parsing if we want to be able to read these files.
307     *
308     * @param string the string to convert
309     *
310     * @return the csv-escaped form of the string
311     */
312    static public String escapeCsv(String string) {
313        if (string == null) {
314            return "";
315        }
316
317        boolean quoted = false;
318        // from https://www.contextis.com/en/blog/comma-separated-vulnerabilities
319        // prefix cells that start with ‘=’ , '@', '+' or '-' with an apostrophe 
320        // This will ensure that the cell isn’t interpreted as a formula, and as a bonus in Microsoft Excel the apostrophe itself will not be displayed.
321        if (string.startsWith("=") || 
322          string.startsWith("@")) {
323            // prefix the string with an a single quote char to escape it
324            string = "'" + string;
325            quoted = true; // not sure need to quote here, but doesn't hurt
326        } else if ((string.startsWith("+") || string.startsWith("-")) && 
327                (string.length() == 1 || !Text.isNumericDecimalExp(string))) {
328                // numbers can legitimately start with '+' or '-' but anything else should be escaped
329                string = "'" + string;
330            quoted = true; 
331        }
332
333        
334        if (string.indexOf(',') == -1 && string.indexOf('"') == -1 && string.indexOf('\n') == -1 && !quoted) {
335                return string;
336        }
337        string = Text.replaceString(string, "\"", "\"\"");
338        string = "\"" + string + "\"";
339
340        return string;
341    }
342
343    /** Given a csv-encoded string (as produced by the rules in {@link #escapeCsv(String)},
344     *  produces a List of Strings which represent the individual values in the string.
345     *  Note that this method is *not* equivalent to calling <code>Arrays.asList(astring.split(","))</code>.
346     *
347     * <p>Setting the whitespaceSensitive parameter to false allows leading and trailing
348     * whitespace in *non-quoted* values to be removed, e.g. if the input string <code>text</code> is:
349     *
350     * <pre class="code">
351     * abc,def,  ghi, j k ,"lmn"," op "," q,r","""hello""", "another"
352     * </pre>
353     *
354     * then <code>parseCsv(text, <b>false</b>)</code> will return the strings:
355     * <pre class="code">
356     * abc
357     * def
358     * ghi
359     * j k
360     * lmn
361     *  op        <i>(this String has one leading space, and a trailing space after 'p')</i>
362     *  q,r       <i>(this String has one leading space)</i>
363     * "hello"
364     * another
365     * </pre>
366     *
367     * and <code>parseCsv(text, <b>true</b>)</code> would throw a ParseException (since the
368     * final element is a quoted value, but begins with a space).
369     *
370     * If the <code>, "another"</code> text is removed, however, then
371     * <code>parseCsv(text, true)</code> would return the following:
372     *
373     * and <code>parseCsv(text, true)</code> will return the string
374     * <pre>
375     * abc
376     * def
377     *   ghi      <i>(this String has two leading spaces)</i>
378     *  j k       <i>(this String has one leading space and a trailing space after the 'k' character)</i>
379     * lmn
380     *  op        <i>(this String has one leading space, and a trailing space after 'p')</i>
381     *  q,r       <i>(this String has one leading space)</i>
382     * "hello"
383     * </pre>
384     *
385     * <p>Most applications would want to use the 'whiteSpaceSensitive=false' form of this function, since
386     * (a) less chance of a ParseException, and (b) it's what an end-user would normally
387     * expect. This can be performed by calling the {@link #parseCsv(String)} method.
388     *
389     * <p>Whitespace is determined by using the <code>Character.isSpaceChar()</code> method,
390     * which is Unicode-aware.
391     *
392     * @param text   The CSV-encoded string to parse
393     * @param whitespaceSensitive   If set to true, will trim leading and trailing whitespace in *non-quoted* values.
394     *
395     * @return a List of Strings. The returned List is guaranteed to always contain at least one element.
396     *
397     * @throws NullPointerException if the text passed to this method is null
398     * @throws ParseException if a quoted value contains leading whitespace before the
399     *  opening quote, or after the trailing quote.
400     * @throws ParseException if a quoted value has a start quote, but no end quote, or
401     *   if a value has additional text after a quoted value (before the next comma or EOL).
402     */
403    static public List<String> parseCsv(String text, boolean whitespaceSensitive)
404        throws ParseException {
405        if (text == null) {
406            throw new NullPointerException("null text");
407        }
408
409        // parse state: 
410        //   0=searching for new value (at start of line or after comma) 
411        //   1=consuming non-quoted values
412        //   2=consuming quoted value
413        //   3=consumed first quote within a quoted value (may be termining quote or a "" sequence)
414        //   4=consuming whitespace up to next comma/EOL (after quoted value, not whitespaceSensitive)
415        int parseState = 0;
416        int length = text.length();
417        String element;
418        List<String> result = new ArrayList<String>();
419        char ch;
420        StringBuilder buffer = new StringBuilder();
421
422        for (int pos = 0; pos < length; pos++) {
423            ch = text.charAt(pos);
424
425            // System.out.println("pos " + pos + ", state=" + parseState + ", nextchar=" + ch + ", buf=" + buffer);
426            switch (parseState) {
427                case 0:
428                    if (Character.isSpaceChar(ch)) {
429                        if (whitespaceSensitive) {
430                            buffer.append(ch);
431                            parseState = 1;
432                        } else {
433                            // ignore
434                        }
435                    } else if (ch == '"') {
436                        parseState = 2;
437                    } else if (ch == ',') {
438                        result.add(""); // add an empty element; state remains unchanged
439                    } else {
440                        buffer.append(ch);
441                        parseState = 1;
442                    }
443                    break;
444                case 1:
445                    if (ch == ',') {
446                        element = buffer.toString();
447                        if (!whitespaceSensitive) {
448                            element = element.trim();
449                        }
450                        result.add(element);
451                        buffer.setLength(0);
452                        parseState = 0;
453                    } else {
454                        buffer.append(ch);
455                    }
456                    break;
457                case 2:
458                    if (ch == '"') {
459                        parseState = 3;
460                    } else {
461                        buffer.append(ch);
462                    }
463                    break;
464                case 3:
465                    if (ch == '"') {
466                        buffer.append('"');
467                        parseState = 2;
468                    } else if (ch == ',') {
469                        result.add(buffer.toString());
470                        buffer.setLength(0);
471                        parseState = 0;
472                    } else if (Character.isSpaceChar(ch)) {
473                        if (whitespaceSensitive) {
474                            throw new ParseException("Cannot have trailing whitespace after close quote character", pos);
475                        }
476                        parseState = 4;
477                    } else {
478                        throw new ParseException("Cannot have trailing data after close quote character", pos);
479                    }
480                    break;
481                case 4:
482                    if (Character.isSpaceChar(ch)) {
483                        // consume and ignore
484                    } else if (ch == ',') {
485                        result.add(buffer.toString());
486                        buffer.setLength(0);
487                        parseState = 0;
488                    } else {
489                        throw new ParseException("Cannot have trailing data after close quote character", pos);
490                    }
491                    break;
492                    
493                default:
494                    throw new IllegalStateException("Illegal state '" + parseState + "' in parseCsv");
495            }
496        }
497
498        // if state is 2, we are in the middle of a quoted value
499        if (parseState == 2) {
500            throw new ParseException("Missing endquote in csv text", length);
501        }
502
503        // otherwise we still need to add what's left in the buffer into the result list
504        element = buffer.toString();
505        if (parseState == 1 && !whitespaceSensitive) {
506            element = element.trim();
507        }
508        result.add(element);
509        return result;
510    }
511    
512    @FunctionalInterface
513    public interface CsvLineReader { // doesn't extend Supplier<T> as it throws exceptions
514        /** Returns the next logical line in the CSV ( quoted values can contain newlines )  
515         * 
516         * @return
517         * @throws ParseException
518         * @throws IOException
519         */
520        List<String> readLine() throws ParseException, IOException;
521    }
522    
523    // same as parseCsv(String, whitespaceSensitive) but can handle newlines in quotes by supplying a Reader
524    // the returned object will return a List<String> or null if EOF is reached
525    // ParseExceptions are wrapped in something, probably
526    static public CsvLineReader parseCsv(Reader r, boolean whitespaceSensitive) {
527        if (r == null) {
528            throw new NullPointerException("null reader");
529        }
530        return new CsvLineReader() {
531                // eof if we actually read eof or encouner a parse exception ( cannot recover )
532                boolean isAtStart = true; // for backwards compatibility with Text.parseCsv(""), first readLine() is never null
533                boolean isEOF = false;
534                        @Override
535                        public List<String> readLine() throws ParseException, IOException {
536                                if (isEOF) { return null; }
537                                
538                                // parse state: 
539                        //   0=searching for new value (at start of line or after comma) 
540                        //   1=consuming non-quoted values
541                        //   2=consuming quoted value
542                        //   3=consumed first quote within a quoted value (may be termining quote or a "" sequence)
543                        //   4=consuming whitespace up to next comma/EOL (after quoted value, not whitespaceSensitive)
544                        int parseState = 0;
545                        // int length = text.length();
546                        String element;
547                        List<String> result = new ArrayList<String>();
548                        char ch;
549                        StringBuilder buffer = new StringBuilder();
550                        int intChar = r.read();
551                        int pos = 1;
552                        if (intChar == -1 && !isAtStart) {
553                                isEOF = true;
554                                return null;
555                        }
556
557                        // @TODO better CRLF handling
558                        isAtStart = false;
559                        while (intChar != -1) {
560                            ch = (char) intChar;
561
562                            // System.out.println("pos " + pos + ", state=" + parseState + ", nextchar=" + ch + ", buf=" + buffer);
563                            switch (parseState) {
564                                case 0:
565                                        if (ch == '\n') {
566                                                // return result so far
567                                                element = buffer.toString();
568                                                result.add(buffer.toString());
569                                                return result;
570                                        } else if (Character.isSpaceChar(ch)) {
571                                        if (whitespaceSensitive) {
572                                            buffer.append(ch);
573                                            parseState = 1;
574                                        } else {
575                                            // ignore
576                                        }
577                                    } else if (ch == '"') {
578                                        parseState = 2;
579                                    } else if (ch == ',') {
580                                        result.add(""); // add an empty element; state remains unchanged
581                                    } else {
582                                        buffer.append(ch);
583                                        parseState = 1;
584                                    }
585                                    break;
586                                case 1:
587                                        if (ch == '\n') {
588                                                // return result so far
589                                                element = buffer.toString();
590                                                if (!whitespaceSensitive) {
591                                                    element = element.trim();
592                                                }
593                                                result.add(buffer.toString());
594                                                return result;
595                                        } else if (ch == ',') {
596                                        element = buffer.toString();
597                                        if (!whitespaceSensitive) {
598                                            element = element.trim();
599                                        }
600                                        result.add(element);
601                                        buffer.setLength(0);
602                                        parseState = 0;
603                                    } else {
604                                        buffer.append(ch);
605                                    }
606                                    break;
607                                case 2:
608                                    if (ch == '"') {
609                                        parseState = 3;
610                                    } else {
611                                        buffer.append(ch);
612                                    }
613                                    break;
614                                case 3:
615                                        if (ch == '\n') {
616                                        result.add(buffer.toString());
617                                        buffer.setLength(0);
618                                        parseState = 0;
619                                        return result;
620                                        } else if (ch == '"') {
621                                        buffer.append('"');
622                                        parseState = 2;
623                                    } else if (ch == ',') {
624                                        result.add(buffer.toString());
625                                        buffer.setLength(0);
626                                        parseState = 0;
627                                    } else if (Character.isSpaceChar(ch)) {
628                                        if (whitespaceSensitive) {
629                                                isEOF = true;
630                                            throw new ParseException("Cannot have trailing whitespace after close quote character", pos);
631                                        }
632                                        parseState = 4;
633                                    } else {
634                                        isEOF = true;
635                                        throw new ParseException("Cannot have trailing data after close quote character", pos);
636                                    }
637                                    break;
638                                case 4:
639                                        if (ch == '\n') {
640                                                // return result so far
641                                                result.add(buffer.toString());
642                                                return result;
643                                        } else if (Character.isSpaceChar(ch)) {
644                                        // consume and ignore
645                                    } else if (ch == ',') {
646                                        result.add(buffer.toString());
647                                        buffer.setLength(0);
648                                        parseState = 0;
649                                    } else {
650                                        isEOF = true;
651                                        throw new ParseException("Cannot have trailing data after close quote character", pos);
652                                    }
653                                    break;
654                                    
655                                default:
656                                    throw new IllegalStateException("Illegal state '" + parseState + "' in parseCsv");
657                            }
658                            
659                                intChar = r.read();
660                                pos++;
661                        }
662                        isEOF = true;
663
664                        // if state is 2, we are in the middle of a quoted value
665                        if (parseState == 2) {
666                            throw new ParseException("Missing endquote in csv text", pos);
667                        }
668
669                        // otherwise we still need to add what's left in the buffer into the result list
670                        element = buffer.toString();
671                        if (parseState == 1 && !whitespaceSensitive) {
672                            element = element.trim();
673                        }
674                        result.add(element);
675                        return result;
676                        }
677        };
678    }
679
680    /**
681     * Equivalent to <code>parseCsv(text, false);</code> (i.e. whitespace-insensitive parsing).
682     * Refer to the documentation for that method for more details.
683     *
684     * @see #parseCsv(String, boolean)
685     *
686     * @param text he CSV-encoded string to parse
687     * 
688     * @return a List of Strings. The returned List is guaranteed to always contain at least one element.
689     *
690     * @throws NullPointerException if the text passed to this method is null.
691     * @throws ParseException see {@link #parseCsv(String, boolean)} for details.
692     */
693    static public List<String> parseCsv(String text)
694        throws ParseException {
695        return Text.parseCsv(text, false);
696    }
697
698    /** Returns a java-escaped string. Replaces '"' with '\"'.
699     *
700     * <p>Since this is predominantly used in the query builder, I am not worrying about
701     * unicode sequences (SWIFT is ASCII) or newlines (although this may be necessary later)
702     * for multiline textboxes
703     *
704     * @return The java-escaped version of the string
705     */
706    public static String escapeJava(String string) {
707        return Text.replaceString(string, "\"", "\\\"");
708    }
709
710    /** Returns a javascript string. The characters <code>'</code>,
711     * <code>"</code> and <code>\</code> are converted into their Unicode equivalents,
712     *
713     * <p>Non-printable characters are converted into unicode equivalents
714     **
715     * <p>Newlines are now replaced with "\n" 
716     *
717     * @return The java-escaped version of the string
718     */
719    public static String escapeJavascript(String string) {
720        // backslashes are always escaped
721        //string = Text.replaceString(string, "\\", "\\u005C");
722        //string = Text.replaceString(string, "\"", "\\u0022");
723        //string = Text.replaceString(string, "'", "\\u0027");
724                //string = Text.replaceString(string, "\n", "\\n");
725        StringBuilder sb = new StringBuilder(string.length());
726                for (int i = 0; i<string.length(); i++) {
727                        char ch = string.charAt(i);
728                        if (ch=='\n') {
729                           sb.append("\\n");    
730                        } else if (ch=='\\' || ch=='"' || ch=='\'' || ch<32 || ch>126) {
731                                String hex = Integer.toString(ch, 16);
732                                sb.append("\\u" + "0000".substring(0, 4-hex.length()) + hex);
733                        } else {
734                                sb.append(ch);
735                        }
736                }
737                return scriptPattern.matcher(sb.toString()).replaceAll("\\\\u003C$1");
738        // return sb.toString();
739    }
740
741
742    /** Returns a javascript string. The characters <code>'</code>,
743     * <code>"</code> and <code>\</code> are converted into their Unicode equivalents,
744     *
745     * <p>Non-printable characters are converted into unicode equivalents
746     *
747     * @deprecated use {@link #escapeJavascript(String)} instead
748     * 
749     * @return The java-escaped version of the string
750     */
751    public static String escapeJavascript2(String string) {
752        // this method only exists for backwards-compatability
753        string = reduceNewlines(string);  // canonicalise CRLFs
754        return escapeJavascript(string);
755    }
756
757    
758    /** Unescapes a java-escaped string. Replaces '\"' with '"',
759     * '\\u0022' with '"', '\\u0027' with ''', '\\u005C' with '\'.
760     *
761     * <p>Since this is predominantly used in the query builder, I am not worrying about
762     * unicode sequences (SWIFT is ASCII) or newlines (although this may be necessary later)
763     * for multiline textboxes
764     *
765     * @return The java-escaped version of the string
766     */
767    public static String unescapeJava(String string) {
768        string = Text.replaceString(string, "\\\"", "\"");
769        string = Text.replaceString(string, "\\u0022", "\"");
770        string = Text.replaceString(string, "\\u0027", "'");
771        string = Text.replaceString(string, "\\u005C", "\\");
772        return string;
773    }
774
775    /** Returns a python string, escaped so that it can be enclosed in a single-quoted string. 
776     * 
777     * <p>The characters <code>'</code>,
778     * <code>"</code> and <code>\</code> are converted into their Unicode equivalents,
779     *
780     * <p>Non-printable characters are converted into unicode equivalents
781     *
782     * @return The python-escaped version of the string
783     */
784    public static String escapePython(String string) {
785        // pretty much the same as Text.escapeJavascript2(), without the reduceNewLines, which probably shouldn't be there anyway
786        string = Text.replaceString(string, "\\", "\\u005C");
787        string = Text.replaceString(string, "\"", "\\u0022");
788        string = Text.replaceString(string, "'", "\\u0027");
789                string = Text.replaceString(string, "\n", "\\n");
790                StringBuilder sb = new StringBuilder(string.length());
791                for (int i = 0; i<string.length(); i++) {
792                        char ch = string.charAt(i);
793                        if (ch>=32 && ch<=126) {
794                                sb.append(ch);
795                        } else {
796                                String hex = Integer.toString(ch, 16);
797                                sb.append("\\u" + "0000".substring(0, 4-hex.length()) + hex);
798                        }
799                }
800        return sb.toString();
801        // return string;
802    }
803    
804    /** Escape a filename or path component. 
805     * Characters that typically have special meanings in paths (":", "/", "\") are escaped with a preceding "\" character.
806     * 
807     * Does not escape glob characters ( "*" or "?" ). 
808     * Do not use this method to escape a full file path; when escaping a file path, escape each path component separately and then join 
809     * the components with "/" characters ( see {@link #createEscapedPath(String[])} ). 
810     * 
811     * @param string the filename or path component to escape
812     * 
813     * @return the escaped form of the filename (or path component)
814     */
815    // Does not escape DOS special filenames ( "NUL", "CON", "LPT1" etc ). Remember those ? Of course you do.
816    public static String escapePathComponent(String string) {
817        string = Text.replaceString(string, "\\", "\\\\");
818        string = Text.replaceString(string, "/", "\\/");
819        string = Text.replaceString(string, ":", "\\:");
820        return string;
821    }
822    
823    /** Unescape a filename or path component. 
824     * The escape sequences "\\" , "\:" and "\/" are converted to "\", ":" and "/" respectively.
825     * All other escape sequences will raise an IllegalArgumentException 
826     *  
827     * <p>See {@link #splitEscapedPath(String)} to split an escaped path into components. 
828     *  
829     * @param pathComponent the filename or path component to unescape
830     * 
831     * @return the unescaped form of the filename or path component
832     * 
833     * @throws IllegalArgumentException if an unexpected escape is encountered, or the escape is unclosed
834     */
835    public static String unescapePathComponent(String pathComponent) {
836        if (pathComponent == null) {
837            return null;
838        }
839        char c;
840        boolean inEscape = false;
841        StringBuilder sb = new StringBuilder(pathComponent.length());
842        for (int i = 0; i < pathComponent.length(); i++) {
843            c = pathComponent.charAt(i);
844            if (inEscape) {
845                switch (c) {
846                        case '\\': 
847                    case '/': // intentional fall-through
848                    case ':': // intentional fall-through
849                        sb.append(c);
850                        break;
851                    default:
852                        throw new IllegalArgumentException("Unexpected escape '\\" + c + "' in filename");
853                }
854                inEscape = false;
855            } else {
856                switch (c) {
857                        case '\\': 
858                                inEscape = true;
859                                break;
860                        default:
861                                sb.append(c);
862                }
863            }
864        }
865        if (inEscape) {
866                throw new IllegalArgumentException("Unclosed escape in filename");
867        }
868        return sb.toString();
869    }
870
871    // need to escape the \ in a regex ( \\ ) in a String ( \\\\ )
872    private static Pattern splitPathPattern = Pattern.compile("(?<!\\\\)/"); 
873    
874        /** Split a path, but allow forward slashes in path components if they're escaped by a preceding '\' character.
875     * Individual path components returned by this method will be unescaped.
876     *
877     * <pre>
878     * splitPath(null) = NPE
879     * splitPath("") = [ "" ]
880     * splitPath("abc") = [ "abc" ]
881     * splitPath("abc/def/ghi") = [ "abc", "def", "ghi" ]
882     * splitPath("abc\\/def/ghi") = [ "abc/def", "ghi" ]
883     * </pre>
884     * 
885     * <p>Opposite of {@link #createEscapedPath(String[])}
886     */
887    public static String[] splitEscapedPath(String escapedPath) {
888        String[] result = splitPathPattern.split(escapedPath);
889        for (int i=0; i<result.length; i++) {
890                result[i] = Text.unescapePathComponent(result[i]);
891        }
892        return result;
893    }
894    
895    /** Escapes the components of a path String, returning an escaped full path String.
896     * Each path component is escaped with {@link #escapePathComponent(String)} and then joined using '/' characters.
897     * 
898     * <p>Opposite of {@link #splitEscapedPath(String)}.
899     * 
900     * @param pathComponents the filename components
901     * @return an escaped path
902     */
903    public static String createEscapedPath(String[] pathComponents) {
904        String result = null;
905        if (pathComponents.length == 0) { 
906                throw new IllegalArgumentException("empty pathComponents"); 
907        }
908        for (String c : pathComponents) {
909                if (c==null) { 
910                        throw new NullPointerException("null pathComponent"); 
911                }
912                if (result == null) {
913                        result = escapePathComponent(c);
914                } else {
915                        result = result + "/" + escapePathComponent(c); 
916                }
917        }
918        return result;
919    }
920    
921    // escapeCss from ESAPI 2.0.1
922    private static final String[] esapi_hex = new String[256];
923        static {
924                for ( char c = 0; c < 0xFF; c++ ) {
925                        if ( c >= 0x30 && c <= 0x39 || c >= 0x41 && c <= 0x5A || c >= 0x61 && c <= 0x7A ) {
926                                esapi_hex[c] = null;
927                        } else {
928                                esapi_hex[c] = toHex(c).intern();
929                        }
930                }
931        }
932        private static String toHex(char c) {
933                return Integer.toHexString(c);
934        }
935        private static String getHexForNonAlphanumeric(char c) {
936                if(c<0xFF) {return esapi_hex[c]; }
937                return toHex(c);
938        }
939    private static String encodeCssCharacter(Character c) {
940                String hex = getHexForNonAlphanumeric(c);
941                if ( hex == null ) { return "" + c; }
942        return "\\" + hex + " ";
943    }
944
945    /**
946     * Returns the CSS-escaped form of a string. 
947     * 
948     * <p>Characters outside of the printable ASCII range are converted to \nnnn form
949     *
950     * @param input the string to convert
951     *
952     * @return the HTML-escaped form of the string
953     */
954    public static String escapeCss(String input) {
955        if (input == null) { return ""; }
956        StringBuilder sb = new StringBuilder();
957                for (int i = 0; i < input.length(); i++) {
958                        char c = input.charAt(i);
959                        sb.append(encodeCssCharacter(c));
960                }
961                return sb.toString();           
962    }
963
964
965    
966    
967    /** Returns the given string; but will truncate it to MAX_STRING_OUTPUT_CHARS.
968     *  If it exceeds this length, a message is appended expressing how many
969     *  characters were truncated. Strings with the key of 'exception' are
970     *  not truncated (in order to display full stack traces when these occur).
971     *  Any keys that contain the text 'password', 'Password', 'credential' or
972     *  'Credential' will be returned as eight asterisks.
973     *
974     * <p>This method is used in the debug JSP when dumping properties to the user,
975     *  in order to prevent inordinately verbose output.
976     *
977     *  @param key The key of the string we wish to display
978     *  @param string The string value
979     *  @return A (possibly truncated) version of this string
980     */
981    public static String getDisplayString(String key, String string) {
982        return getDisplayString(key, string, MAX_STRING_OUTPUT_CHARS);
983    }
984
985    /** Returns the given string; but will truncate it to MAX_STRING_OUTPUT_CHARS.
986     *  If it exceeds this length, a message is appended expressing how many
987     *  characters were truncated. Strings with the key of 'exception' are
988     *  not truncated (in order to display full stack traces when these occur).
989     *  Any keys that contain the text 'password', 'Password', 'credential' or
990     *  'Credential' will be returned as eight asterisks.
991     *
992     * <p>This method is used in the debug JSP when dumping properties to the user,
993     *  in order to prevent inordinately verbose output.
994     *
995     *  @param key The key of the string we wish to display
996     *  @param string The string value
997     *  @param maxChars The maximum number of characters to display
998     *  
999     *  @return A (possibly truncated) version of this string
1000     */
1001    public static String getDisplayString(String key, String string, int maxChars) {
1002        if (string == null) {
1003            string = "(null)";
1004        }
1005
1006        if ("exception".equals(key)) {
1007            return string;
1008        }
1009
1010        if (key.indexOf("password") >= 0 || key.indexOf("Password") >= 0 || key.indexOf("credential") >= 0 || key.indexOf("Credential") >= 0) {
1011            return "********";
1012        }
1013
1014        if (string.length() <= maxChars) {
1015            return string;
1016        } else {
1017            return string.substring(0, maxChars) + "... (" + (string.length() - maxChars) + " more characters truncated)";
1018        }
1019    }
1020
1021    /** Utility function to return a default if the supplied string is null.
1022     *  Shorthand for <code>(strText==null) ? strDefaultText : strText;</code>
1023     *
1024     * @return strText is strText is not null, otherwise strDefaultText
1025     */
1026    public static String strDefault(String strText, String strDefaultText) {
1027        return (strText == null) ? strDefaultText : strText;
1028    }
1029
1030    /** Return a string composed of a series of strings, separated with the specified delimiter
1031     *
1032     * @param elements The array of elements to join
1033     * @return delimiter The delimiter to join each string with
1034     *
1035     * @throws NullPointerException if elements or delimiter is null
1036     */
1037    public static String join(String[] elements, String delimiter) {
1038        return joinWithLast(elements, false, delimiter, delimiter);
1039    }
1040
1041    /** Return a string composed of a series of strings, separated with the specified delimiter
1042     *
1043     * @param elements A Collection or Iterable of the elements to join
1044     * @return delimiter The delimiter to join each string with
1045     *
1046     * @throws NullPointerException if elements or delimiter is null
1047     */
1048    public static String join(Iterable<?> elements, String delimiter) {
1049        return joinWithLast(elements, false, delimiter, delimiter);
1050    }
1051    
1052    /** Return a string composed of a series of strings, separated with the specified delimiter.
1053    * Each element is contained in single quotes. The final delimeter can be set to a different
1054    * value, to produce text in the form <code>"'a', 'b' or 'c'"</code> or <code>"'a', 'b' and 'c'"</code>. 
1055    *
1056    * <p>There is no special handling of values containing quotes; see {@link #escapeCsv(String)} 
1057    *
1058    * @param elements The array of elements to join
1059    * @param isQuoted If true, each element is surrounded by single quotes
1060    * @param delimiter The delimiter to join each string with
1061    * @param lastDelimiter The delimiter to join the second-last and last elements
1062    *
1063    * @throws NullPointerException if elements or delimiter is null
1064    */
1065   public static String joinWithLast(String[] elements, boolean isQuoted, String delimiter, String lastDelimiter) {
1066           StringBuilder sb = new StringBuilder();
1067       if (elements == null) {
1068           throw new NullPointerException("null elements");
1069       }
1070       if (delimiter == null) {
1071           throw new NullPointerException("null delimiter");
1072       }
1073       if (lastDelimiter == null) {
1074           throw new NullPointerException("null lastDelimiter");
1075       }
1076       int len = elements.length;
1077       if (len == 0) {
1078           return "";
1079       }
1080
1081       for (int i = 0; i < len - 1; i++) {
1082           if (isQuoted) { sb.append("'"); }
1083           sb.append(elements[i]);
1084           if (isQuoted) { sb.append("'"); }
1085           if (i == len - 2) { sb.append(lastDelimiter); } else { sb.append(delimiter); }
1086       }
1087       if (isQuoted) { sb.append("'"); }
1088       sb.append(elements[len - 1]);
1089       if (isQuoted) { sb.append("'"); }
1090       return sb.toString();
1091   }
1092
1093   /** Return a string composed of a series of strings, separated with the specified delimiter
1094    *
1095    * <p>There is no special handling of values containing quotes; see {@link #escapeCsv(String)} 
1096    *
1097    * @param elements A Collection or Iterable containing the elements to join
1098    * @param isQuoted If true, each element is surrounded by single quotes
1099    * @param delimiter The delimiter to join each string with
1100    * @param lastDelimiter The delimiter to join the second-last and last elements
1101    *
1102    * @throws NullPointerException if elements or delimiter is null
1103    *
1104    * @see #join(String[], String)
1105    */
1106   public static String joinWithLast(Iterable<?> elements, boolean isQuoted, String delimiter, String lastDelimiter) {
1107        StringBuilder sb = new StringBuilder();
1108       if (elements == null) {
1109           throw new NullPointerException("null elements");
1110       }
1111       if (delimiter == null) {
1112           throw new NullPointerException("null delimiter");
1113       }
1114       if (lastDelimiter == null) {
1115           throw new NullPointerException("null lastDelimiter");
1116       }
1117       Iterator<?> i = elements.iterator();
1118       if (!i.hasNext()) { return ""; } 
1119       
1120       Object thisEl = i.next();
1121       while (i.hasNext()) {
1122           Object nextEl = i.next();
1123           if (isQuoted) { sb.append("'"); }
1124           sb.append(thisEl);
1125           if (isQuoted) { sb.append("'"); }
1126           if (i.hasNext()) {
1127               sb.append(delimiter);
1128           } else {
1129                   sb.append(lastDelimiter);
1130           }
1131           thisEl = nextEl;
1132       }
1133       if (isQuoted) { sb.append("'"); }
1134       sb.append(thisEl);
1135       if (isQuoted) { sb.append("'"); }
1136       
1137       return sb.toString();
1138   }
1139    
1140    
1141    
1142
1143    /*
1144     * efficient search & replace ... stolen from Usenet:
1145     * http://groups.google.co.uk/groups?hl=en&lr=&selm=memo.19990629182431.344B%40none.crap
1146     */
1147
1148    /**
1149     * An efficient search &amp; replace routine. Replaces all instances of
1150     * searchString within str with replaceString.
1151     *
1152     * @param originalString The string to search
1153     * @param searchString The string to search for
1154     * @param replaceString The string to replace it with
1155     *
1156     */
1157    public static String replaceString(String originalString, String searchString, String replaceString) {
1158        if (replaceString == null) {
1159            return originalString;
1160        }
1161
1162        if (searchString == null) {
1163            return originalString;
1164        }
1165
1166        if (originalString == null) {
1167            return null;
1168        }
1169
1170        int loc = originalString.indexOf(searchString);
1171
1172        if (loc == -1) {
1173            return originalString;
1174        }
1175
1176        char[] src = originalString.toCharArray();
1177        int n = searchString.length();
1178        int m = originalString.length();
1179        StringBuilder buf = new StringBuilder(m + replaceString.length() - n);
1180        int start = 0;
1181
1182        do {
1183            if (loc > start) {
1184                buf.append(src, start, loc - start);
1185            }
1186
1187            buf.append(replaceString);
1188            start = loc + n;
1189            loc = originalString.indexOf(searchString, start);
1190        } while (loc > 0);
1191
1192        if (start < m) {
1193            buf.append(src, start, m - start);
1194        }
1195
1196        return buf.toString();
1197    }
1198
1199    /**
1200     * Reads a file, and returns its contents in a String
1201     *
1202     * @param filename The file to read
1203     *
1204     * @return The contents of the string,
1205     *
1206     * @throws IOException A problem occurred whilst attempting to read the string
1207     */
1208    public static String getFileContents(String filename)
1209        throws IOException {
1210        File file = new File(filename);
1211        FileInputStream fis = new FileInputStream(file);
1212        byte[] data = new byte[(int) file.length()];
1213        int len = fis.read(data);
1214        fis.close();
1215        if (len < file.length()) {
1216            /* this should never happen -- file has changed underneath us */
1217            throw new IOException("Buffer read != size of file");
1218        }
1219
1220        return new String(data);
1221    }
1222
1223    /**
1224     * Reads a file, and returns its contents in a String. Identical to calling
1225     * <code>getFileContents(projectFile.getCanonicalPath())</code>.
1226     *
1227     * @param file The file to read
1228     *
1229     * @return The contents of the string,
1230     * @throws IOException 
1231     *
1232     * @throws IOException A problem occurred whilst attempting to read the string
1233     */
1234        public static String getFileContents(File file) throws IOException {
1235                return getFileContents(file.getCanonicalPath());
1236        }
1237    
1238    
1239    /**
1240     * Prefixes every lines supplied with a given indent. e.g.
1241     * <code>indent("\t", "abcd\nefgh")</code> would return "\tabcd\n\tefgh". If the
1242     * string ends in a newline, then the return value also ends with a newline.
1243     *
1244     * @param indentString   The characters to indent with. Usually spaces or tabs,
1245     *   but could be something like a timestamp.
1246     * @param originalString The string to indent.
1247     * @return The originalString, with every line (as separated by the newline
1248     *   character) prefixed with indentString.
1249     */
1250    static public String indent(String indentString, String originalString) {
1251        String allButLastChar;
1252        if (originalString == null || indentString == null) {
1253            throw new NullPointerException();
1254        }
1255        if (originalString.equals("")) {
1256            return indentString;
1257        }
1258        allButLastChar = originalString.substring(0, originalString.length() - 1);
1259        return indentString + replaceString(allButLastChar, "\n", "\n" + indentString) + originalString.substring(originalString.length() - 1);
1260    }
1261    
1262    /** Ensure that a string is padded with spaces so that it meets the 
1263     * required length. If the input string exceeds this length, this it 
1264     * is returned unchanged
1265     * 
1266     * @param inputString the string to pad
1267     * @param length the desired length
1268     * @param justification a JUSTIFICATION_* constant defining whether left or 
1269     *   right justification is required.
1270     * 
1271     * @return a padded string. 
1272     */
1273    static public String pad(String inputString, int length, int justification) {
1274        // @TODO not terribly efficient, but who cares
1275        switch (justification) {
1276                case JUSTIFICATION_LEFT:
1277                        while (inputString.length() < length) { 
1278                                inputString = inputString + " ";
1279                        }
1280                        break;
1281
1282                        case JUSTIFICATION_RIGHT:
1283                                while (inputString.length() < length) { 
1284                                        inputString = " " + inputString;
1285                                }
1286                                break;
1287                        
1288                        case JUSTIFICATION_CENTER:
1289                                while (inputString.length() < length) { 
1290                                        inputString = inputString + " ";
1291                                        if (inputString.length() < length) {
1292                                                inputString = " " + inputString;
1293                                        }
1294                                }
1295                                break;
1296        }
1297        return inputString;
1298    }
1299
1300    /** Given a period-separated list of components (e.g. variable references ("a.b.c") or classnames),
1301     *  returns the last component. For example,
1302     *  getLastComponent("com.randomnoun.common.util.Text") will return "Text".
1303     *
1304     *  <p>If component is null, this function returns null.
1305     *  <p>If component contains no periods, this function returns the original string.
1306     *
1307     *  @param string The string to retrieve the last component from
1308     */
1309    static public String getLastComponent(String string) {
1310        if (string == null) {
1311            return null;
1312        }
1313        if (string.indexOf('.') == -1) {
1314            return string;
1315        }
1316        return string.substring(string.lastIndexOf('.') + 1);
1317    }
1318
1319    /** Escape this supplied string so it can represent a 'name' or 'value' component
1320     * on a HTTP queryString. This generally involves escaping special characters into %xx
1321     * form. Note that this only works for US-ASCII data.
1322     *
1323     */
1324    public static String escapeQueryString(String unescapedQueryString) {
1325        // default encoding
1326        byte[] data = encodeUrl(allowed_within_query, unescapedQueryString.getBytes());
1327
1328        try {
1329            return new String(data, "US-ASCII");
1330        } catch (UnsupportedEncodingException e) {
1331            throw new RuntimeException("encodeQueryString() requires ASCII support");
1332        }
1333    }
1334
1335    /**
1336     * Encodes an array of bytes into an array of URL safe 7-bit
1337     * characters. Unsafe characters are escaped.
1338     *
1339     * @param urlsafe bitset of characters deemed URL safe
1340     * @param bytes array of bytes to convert to URL safe characters
1341     * @return array of bytes containing URL safe characters
1342     */
1343    private static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes) {
1344        if (bytes == null) {
1345            return null;
1346        }
1347
1348        if (urlsafe == null) {
1349            throw new NullPointerException("null urlsafe");
1350        }
1351
1352        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
1353
1354        for (int i = 0; i < bytes.length; i++) {
1355            int b = bytes[i];
1356
1357            if (b < 0) {
1358                b = 256 + b;
1359            }
1360
1361            if (urlsafe.get(b)) {
1362                if (b == ' ') {
1363                    b = '+';
1364                }
1365
1366                buffer.write(b);
1367            } else {
1368                buffer.write('%');
1369
1370                char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
1371                char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
1372
1373                buffer.write(hex1);
1374                buffer.write(hex2);
1375            }
1376        }
1377
1378        return buffer.toByteArray();
1379    }
1380
1381    /**
1382     * Encodes a string into Base64 format.
1383     * No blanks or line breaks are inserted.
1384     * @param s  a String to be encoded.
1385     * @return   A String with the Base64 encoded data.
1386     */
1387    public static String encodeBase64(String s) {
1388        return new String(encodeBase64(s.getBytes()));
1389    }
1390
1391    /**
1392     * Encodes a byte array into Base64 format.
1393     * No blanks or line breaks are inserted.
1394     * @param in  an array containing the data bytes to be encoded.
1395     * @return    A character array with the Base64 encoded data.
1396     */
1397    public static char[] encodeBase64(byte[] in) {
1398        int iLen = in.length;
1399        int oDataLen = (iLen * 4 + 2) / 3; // output length without padding
1400        int oLen = ((iLen + 2) / 3) * 4; // output length including padding
1401        char[] out = new char[oLen];
1402        int ip = 0;
1403        int op = 0;
1404
1405        while (ip < iLen) {
1406            int i0 = in[ip++] & 0xff;
1407            int i1 = ip < iLen ? in[ip++] & 0xff : 0;
1408            int i2 = ip < iLen ? in[ip++] & 0xff : 0;
1409            int o0 = i0 >>> 2;
1410            int o1 = ((i0 & 3) << 4) | (i1 >>> 4);
1411            int o2 = ((i1 & 0xf) << 2) | (i2 >>> 6);
1412            int o3 = i2 & 0x3F;
1413            out[op++] = map1[o0];
1414            out[op++] = map1[o1];
1415            out[op] = op < oDataLen ? map1[o2] : '=';
1416            op++;
1417            out[op] = op < oDataLen ? map1[o3] : '=';
1418            op++;
1419        }
1420        return out;
1421    }
1422
1423        /** Used by {@link #parseData(String) to parse dates generated in Codec output.
1424         * (These dates are generated using the standard Java .toString() method, which
1425         * probably changes depending on the VM's locale, which I'm going to ignore for 
1426         * the time being).
1427         */
1428        static class DateParser {
1429                
1430                /** Parse a date generated by Date.toString() into a Date object
1431                 * 
1432                 * @param dateString a string representation of a date
1433                 * @return a Date representation of a date
1434                 */
1435                public static Date valueOf(String dateString) {
1436                        SimpleDateFormat sdf = new SimpleDateFormat("EEE MMM dd hh:mm:ss z yyyy");
1437                        try {
1438                                return sdf.parse(dateString);
1439                        } catch (ParseException pe) {
1440                                throw (IllegalArgumentException) new IllegalArgumentException("Invalid date '" + dateString + "'").initCause(pe);
1441                        }
1442                }
1443        }
1444
1445    // ---------------------- Generous characters for each component validation
1446    // -- not much of this is used in this class, so I should shorten these definitions, 
1447    // but you never know, I might use it later, so it's here for the time being.
1448    // 
1449    // compiled from
1450    //  org.apache.commons.httpclient.util.URIUtil
1451    //  org.apache.commons.codec.net.URLCodec
1452    //  org.apache.commons.httpclient.util.EncodingUtil
1453    //  org.apache.commons.httpclient.URI
1454    //
1455    // trust me... just calling escapeQueryString() is *so* much easier.
1456    private static final BitSet percent = new BitSet(256); // escape % as %25
1457    private static final BitSet digit = new BitSet(256); // 0-9
1458    private static final BitSet alpha = new BitSet(256); // lowalpha | upalpha
1459    private static final BitSet alphanum = new BitSet(256); // alpha | digit
1460    private static final BitSet hex = new BitSet(256); // digit | a-f | A-F
1461    private static final BitSet escaped = new BitSet(256); // "%" hex hex
1462    private static final BitSet mark = new BitSet(256); // -_.!~*'()
1463    private static final BitSet unreserved = new BitSet(256);
1464
1465    // alphanum | mark (URI allowed, no purpose)
1466    private static final BitSet reserved = new BitSet(256); // ;/?:"&=+$,
1467    private static final BitSet uric = new BitSet(256);
1468
1469    // reserved | unreserved | escaped
1470    private static final BitSet allowed_query = new BitSet(256); // uric - %
1471    private static final BitSet allowed_within_query = new BitSet(256);
1472
1473    /** Mapping table from 6-bit nibble to Base64 characters */
1474    private static char[] map1 = new char[64];
1475
1476    // NB: www-form-encoding appears to be alpha | numeric | -_.* ( + space) 
1477    static {
1478        percent.set('%');
1479
1480        for (int i = '0'; i <= '9'; i++) {
1481            digit.set(i);
1482        }
1483
1484        for (int i = 'a'; i <= 'z'; i++) {
1485            alpha.set(i);
1486        }
1487
1488        for (int i = 'A'; i <= 'Z'; i++) {
1489            alpha.set(i);
1490        }
1491
1492        alphanum.or(alpha);
1493        alphanum.or(digit);
1494        hex.or(digit);
1495
1496        for (int i = 'a'; i <= 'f'; i++) {
1497            hex.set(i);
1498        }
1499
1500        for (int i = 'A'; i <= 'F'; i++) {
1501            hex.set(i);
1502        }
1503
1504        escaped.or(percent);
1505        escaped.or(hex);
1506        mark.set('-');
1507        mark.set('_');
1508        mark.set('.');
1509        mark.set('!');
1510        mark.set('~');
1511        mark.set('*');
1512        mark.set('\'');
1513        mark.set('(');
1514        mark.set(')');
1515        reserved.set(';');
1516        reserved.set('/');
1517        reserved.set('?');
1518        reserved.set(':');
1519        reserved.set('@');
1520        reserved.set('&');
1521        reserved.set('=');
1522        reserved.set('+');
1523        reserved.set('$');
1524        reserved.set(',');
1525        unreserved.or(alphanum);
1526        unreserved.or(mark);
1527        uric.or(reserved);
1528        uric.or(unreserved);
1529        uric.or(escaped);
1530        allowed_query.or(uric);
1531        allowed_query.clear('%');
1532        allowed_within_query.or(allowed_query);
1533        allowed_within_query.andNot(reserved);
1534
1535
1536        // excluded 'reserved'                       
1537        // create map1 array
1538        int i = 0;
1539        for (char c = 'A'; c <= 'Z'; c++) {
1540            map1[i++] = c;
1541        }
1542        for (char c = 'a'; c <= 'z'; c++) {
1543            map1[i++] = c;
1544        }
1545        for (char c = '0'; c <= '9'; c++) {
1546            map1[i++] = c;
1547        }
1548        map1[i++] = '+';
1549        map1[i++] = '/';
1550        
1551    }
1552    
1553    
1554
1555    /**
1556     * Returns a comparator that compares contained numbers based on their numeric values and compares other parts
1557     * using the current locale's order rules.
1558     * <p>For example in German locale this will be a comparator that handles umlauts correctly and ignores
1559     * upper/lower case differences.</p>
1560     *
1561     * @return <p>A string comparator that uses the current locale's order rules and handles embedded numbers
1562     *         correctly.</p>
1563     */
1564    public static Comparator<String> getNaturalComparator() {
1565        final Collator collator = Collator.getInstance();
1566        return new Comparator<String>() {
1567            public int compare(String o1, String o2) {
1568                return compareNatural(collator, o1, o2);
1569            }
1570        };
1571    }    
1572
1573    /**
1574     * <p>Compares two strings using the current locale's rules and comparing contained numbers based on their numeric
1575     * values.</p>
1576     * <p>This is probably the best default comparison to use.</p>
1577     * <p>If you know that the texts to be compared are in a certain language that differs from the default locale's
1578     * langage, then get a collator for the desired locale ({@link java.text.Collator#getInstance(java.util.Locale)})
1579     * and pass it to {@link #compareNatural(java.text.Collator, String, String)}</p>
1580     *
1581     * @param s first string
1582     * @param t second string
1583     * @return zero iff <code>s</code> and <code>t</code> are equal,
1584     *         a value less than zero iff <code>s</code> lexicographically precedes <code>t</code>
1585     *         and a value larger than zero iff <code>s</code> lexicographically follows <code>t</code>
1586     */
1587    public static int compareNatural(Collator collator, String s, String t) {
1588        return compareNatural(s, t, false, collator);
1589    }
1590
1591
1592    /** Natural compare operation. Stolen from 
1593     * http://www.eekboom.com/java/compareNatural/src/com/eekboom/utils/Strings.java
1594     * (source file is under BSD license). 
1595     * 
1596     * @param s             first string
1597     * @param t             second string
1598     * @param caseSensitive treat characters differing in case only as equal - will be ignored if a collator is given
1599     * @param collator      used to compare subwords that aren't numbers - if null, characters will be compared
1600     *                      individually based on their Unicode value
1601     * @return zero iff <code>s</code> and <code>t</code> are equal,
1602     *         a value less than zero iff <code>s</code> lexicographically precedes <code>t</code>
1603     *         and a value larger than zero iff <code>s</code> lexicographically follows <code>t</code>
1604     */
1605    private static int compareNatural(String s, String t, boolean caseSensitive, Collator collator) {
1606        int sIndex = 0;
1607        int tIndex = 0;
1608
1609        int sLength = s.length();
1610        int tLength = t.length();
1611
1612        while(true) {
1613            // both character indices are after a subword (or at zero)
1614
1615            // Check if one string is at end
1616            if(sIndex == sLength && tIndex == tLength) {
1617                return 0;
1618            }
1619            if(sIndex == sLength) {
1620                return -1;
1621            }
1622            if(tIndex == tLength) {
1623                return 1;
1624            }
1625
1626            // Compare sub word
1627            char sChar = s.charAt(sIndex);
1628            char tChar = t.charAt(tIndex);
1629
1630            boolean sCharIsDigit = Character.isDigit(sChar);
1631            boolean tCharIsDigit = Character.isDigit(tChar);
1632
1633            if(sCharIsDigit && tCharIsDigit) {
1634                // Compare numbers
1635
1636                // skip leading 0s
1637                int sLeadingZeroCount = 0;
1638                while(sChar == '0') {
1639                    ++sLeadingZeroCount;
1640                    ++sIndex;
1641                    if(sIndex == sLength) {
1642                        break;
1643                    }
1644                    sChar = s.charAt(sIndex);
1645                }
1646                int tLeadingZeroCount = 0;
1647                while(tChar == '0') {
1648                    ++tLeadingZeroCount;
1649                    ++tIndex;
1650                    if(tIndex == tLength) {
1651                        break;
1652                    }
1653                    tChar = t.charAt(tIndex);
1654                }
1655                boolean sAllZero = sIndex == sLength || !Character.isDigit(sChar);
1656                boolean tAllZero = tIndex == tLength || !Character.isDigit(tChar);
1657                if(sAllZero && tAllZero) {
1658                    continue;
1659                }
1660                if(sAllZero && !tAllZero) {
1661                    return -1;
1662                }
1663                if(tAllZero) {
1664                    return 1;
1665                }
1666
1667                int diff = 0;
1668                do {
1669                    if(diff == 0) {
1670                        diff = sChar - tChar;
1671                    }
1672                    ++sIndex;
1673                    ++tIndex;
1674                    if(sIndex == sLength && tIndex == tLength) {
1675                        return diff != 0 ? diff : sLeadingZeroCount - tLeadingZeroCount;
1676                    }
1677                    if(sIndex == sLength) {
1678                        if(diff == 0) {
1679                            return -1;
1680                        }
1681                        return Character.isDigit(t.charAt(tIndex)) ? -1 : diff;
1682                    }
1683                    if(tIndex == tLength) {
1684                        if(diff == 0) {
1685                            return 1;
1686                        }
1687                        return Character.isDigit(s.charAt(sIndex)) ? 1 : diff;
1688                    }
1689                    sChar = s.charAt(sIndex);
1690                    tChar = t.charAt(tIndex);
1691                    sCharIsDigit = Character.isDigit(sChar);
1692                    tCharIsDigit = Character.isDigit(tChar);
1693                    if(!sCharIsDigit && !tCharIsDigit) {
1694                        // both number sub words have the same length
1695                        if(diff != 0) {
1696                            return diff;
1697                        }
1698                        break;
1699                    }
1700                    if(!sCharIsDigit) {
1701                        return -1;
1702                    }
1703                    if(!tCharIsDigit) {
1704                        return 1;
1705                    }
1706                } while(true);
1707            }
1708            else {
1709                // Compare words
1710                if(collator != null) {
1711                    // To use the collator the whole subwords have to be compared - character-by-character comparision
1712                    // is not possible. So find the two subwords first
1713                    int aw = sIndex;
1714                    int bw = tIndex;
1715                    do {
1716                        ++sIndex;
1717                    } while(sIndex < sLength && !Character.isDigit(s.charAt(sIndex)));
1718                    do {
1719                        ++tIndex;
1720                    } while(tIndex < tLength && !Character.isDigit(t.charAt(tIndex)));
1721
1722                    String as = s.substring(aw, sIndex);
1723                    String bs = t.substring(bw, tIndex);
1724                    int subwordResult = collator.compare(as, bs);
1725                    if(subwordResult != 0) {
1726                        return subwordResult;
1727                    }
1728                }
1729                else {
1730                    // No collator specified. All characters should be ascii only. Compare character-by-character.
1731                    do {
1732                        if(sChar != tChar) {
1733                            if(caseSensitive) {
1734                                return sChar - tChar;
1735                            }
1736                            sChar = Character.toUpperCase(sChar);
1737                            tChar = Character.toUpperCase(tChar);
1738                            if(sChar != tChar) {
1739                                sChar = Character.toLowerCase(sChar);
1740                                tChar = Character.toLowerCase(tChar);
1741                                if(sChar != tChar) {
1742                                    return sChar - tChar;
1743                                }
1744                            }
1745                        }
1746                        ++sIndex;
1747                        ++tIndex;
1748                        if(sIndex == sLength && tIndex == tLength) {
1749                            return 0;
1750                        }
1751                        if(sIndex == sLength) {
1752                            return -1;
1753                        }
1754                        if(tIndex == tLength) {
1755                            return 1;
1756                        }
1757                        sChar = s.charAt(sIndex);
1758                        tChar = t.charAt(tIndex);
1759                        sCharIsDigit = Character.isDigit(sChar);
1760                        tCharIsDigit = Character.isDigit(tChar);
1761                    } while(!sCharIsDigit && !tCharIsDigit);
1762                }
1763            }
1764        }
1765    }
1766
1767
1768        // taken from the W3C Jigsaw server sourcecode; class org.w3c.jigsaw.http.Request#unescape(String)
1769        /**
1770         * Unescape a HTTP escaped string
1771         * @param s The string to be unescaped
1772         * @return the unescaped string.
1773         */
1774        public static String unescapeQueryString (String s) {
1775                StringBuilder sbuf = new StringBuilder() ;
1776                int len  = s.length() ;
1777                int ch = -1 ;
1778                for (int i = 0 ; i < len ; i++) {
1779                        switch (ch = s.charAt(i)) {
1780                                case '%':
1781                                        if (i < len - 2) {
1782                                                // @TODO check to see how illegal escapes are treated
1783                                                // e.g. "%nothex"
1784                                                ch = s.charAt (++i) ;
1785                                                int hb = (Character.isDigit ((char) ch) 
1786                                                          ? ch - '0'
1787                                                          : 10+Character.toLowerCase ((char) ch)-'a') & 0xF ;
1788                                                ch = s.charAt (++i) ;
1789                                                int lb = (Character.isDigit ((char) ch)
1790                                                          ? ch - '0'
1791                                                          : 10+Character.toLowerCase ((char) ch)-'a') & 0xF ;
1792                                                sbuf.append ((char) ((hb << 4) | lb)) ;
1793                                        } else {
1794                                                sbuf.append ('%');  // hit EOL, just leave as is
1795                                        }
1796                                        break ;
1797                                case '+':
1798                                        sbuf.append (' ') ;
1799                                        break ;
1800                                default:
1801                                        sbuf.append ((char) ch) ;
1802                        }
1803                }
1804                return sbuf.toString() ;
1805        }
1806        
1807        /** Returns the largest common prefix between two other strings; e.g. 
1808         * getCommonPrefix("abcsomething", "abcsometharg") would be "abcsometh".
1809         * 
1810         * @param string1 String number one
1811         * @param string2 String number two
1812         * 
1813         * @return the large common prefix between the two strings
1814         * 
1815         * @throws NullPointerException is string1 or string2 is null
1816         */
1817        public static String getCommonPrefix(String string1, String string2) {
1818                if (string1==null) { throw new NullPointerException("null string1"); }
1819                if (string2==null) { throw new NullPointerException("null string2"); }
1820                int c = 0;
1821                int maxLen = Math.min(string1.length(), string2.length());              
1822                
1823                while (c < maxLen && string1.charAt(c)==string2.charAt(c)) {
1824                        c++;
1825                }
1826                return string1.substring(0, c);
1827        }
1828
1829        /** Uppercases the first character of a string.
1830     * 
1831     * @param text text to modify
1832     * 
1833     * @return the supplied text, with the first character converted to uppercase.
1834     */
1835    static public String toFirstUpper(String text) {
1836        return Character.toUpperCase(text.charAt(0)) + text.substring(1); 
1837    }
1838
1839
1840        /** Lowercases the first character of a string.
1841     * 
1842     * @param text text to modify
1843     * 
1844     * @return the supplied text, with the first character converted to lowercase.
1845     */
1846    static public String toFirstLower(String text) {
1847        return Character.toLowerCase(text.charAt(0)) + text.substring(1); 
1848    }
1849
1850        
1851
1852    
1853    /** Number of character edits between two strings; taken from  
1854         * http://www.merriampark.com/ldjava.htm. There's a version in commongs-lang,
1855         * apparently, but according to the comments on that page, it uses O(n^2) memory,
1856         * which can't be good.
1857         * 
1858         * @param s string 1
1859         * @param t string 2
1860         *  
1861         * @return the smallest number of edits required to convert s into t 
1862         */
1863        public static int getLevenshteinDistance (String s, String t) {
1864                  if (s == null || t == null) {
1865                    throw new IllegalArgumentException("Strings must not be null");
1866                  }
1867                                
1868                  /*
1869                    The difference between this impl. and the previous is that, rather 
1870                     than creating and retaining a matrix of size s.length()+1 by t.length()+1, 
1871                     we maintain two single-dimensional arrays of length s.length()+1.  The first, d,
1872                     is the 'current working' distance array that maintains the newest distance cost
1873                     counts as we iterate through the characters of String s.  Each time we increment
1874                     the index of String t we are comparing, d is copied to p, the second int[].  Doing so
1875                     allows us to retain the previous cost counts as required by the algorithm (taking 
1876                     the minimum of the cost count to the left, up one, and diagonally up and to the left
1877                     of the current cost count being calculated).  (Note that the arrays aren't really 
1878                     copied anymore, just switched...this is clearly much better than cloning an array 
1879                     or doing a System.arraycopy() each time  through the outer loop.)
1880
1881                     Effectively, the difference between the two implementations is this one does not 
1882                     cause an out of memory condition when calculating the LD over two very large strings.              
1883                  */            
1884                                
1885                  int n = s.length(); // length of s
1886                  int m = t.length(); // length of t
1887                                
1888                  if (n == 0) {
1889                    return m;
1890                  } else if (m == 0) {
1891                    return n;
1892                  }
1893
1894                  int p[] = new int[n+1]; //'previous' cost array, horizontally
1895                  int d[] = new int[n+1]; // cost array, horizontally
1896                  int _d[]; //placeholder to assist in swapping p and d
1897
1898                  // indexes into strings s and t
1899                  int i; // iterates through s
1900                  int j; // iterates through t
1901
1902                  char t_j; // jth character of t
1903
1904                  int cost; // cost
1905
1906                  for (i = 0; i<=n; i++) {
1907                     p[i] = i;
1908                  }
1909                                
1910                  for (j = 1; j<=m; j++) {
1911                     t_j = t.charAt(j-1);
1912                     d[0] = j;
1913                                
1914                     for (i=1; i<=n; i++) {
1915                        cost = s.charAt(i-1)==t_j ? 0 : 1;
1916                        // minimum of cell to the left+1, to the top+1, diagonally left and up +cost                            
1917                        d[i] = Math.min(Math.min(d[i-1]+1, p[i]+1),  p[i-1]+cost);  
1918                     }
1919
1920                     // copy current distance counts to 'previous row' distance counts
1921                     _d = p;
1922                     p = d;
1923                     d = _d;
1924                  } 
1925                                
1926                  // our last action in the above loop was to switch d and p, so p now 
1927                  // actually has the most recent cost counts
1928                  return p[n];
1929        }
1930    
1931        /** Return the md5 hash of a string
1932     * 
1933     * @param text text to hash
1934     * 
1935     * @return a hex-encoded version of the MD5 hash
1936     * 
1937     * @throws IllegalStateException if the java installation in use doesn't know 
1938     *   about MD5
1939     */
1940    static public String getMD5(String text) {
1941        try{
1942                MessageDigest algorithm = MessageDigest.getInstance("MD5");
1943                algorithm.reset();
1944                // algorithm.update(defaultBytes);
1945                algorithm.update(text.getBytes());
1946                byte messageDigest[] = algorithm.digest();
1947                    
1948                StringBuilder hexString = new StringBuilder();
1949                for (int i=0;i<messageDigest.length;i++) {
1950                        hexString.append(Integer.toHexString(0xFF & messageDigest[i]));
1951                }
1952                return hexString.toString();
1953        } catch (NoSuchAlgorithmException nsae) {
1954                throw (IllegalStateException) new IllegalStateException("Unknown algorithm 'MD5'").initCause(nsae);
1955        }
1956    }
1957    
1958    /** Returns a string composed of the supplied text, repeated 0 or more times 
1959     * 
1960     * @param text text to repeat
1961     * @param count number of repetitions
1962     * 
1963     * @return the repeated text
1964     */
1965    static public String repeat(String text, int count) {
1966        StringBuffer sb = new StringBuffer();
1967        for (int i=0; i<count; i++) {
1968                sb.append(text);
1969        }
1970        return sb.toString();
1971    }
1972    
1973    
1974        /** Perform ${xxxx}-style substitution of placeholders in text. Placeholders without 
1975         * values will be left as-is.
1976         * 
1977         * <p>For example, gives the set of variables:
1978         * <ul>
1979         * <li>abc = def
1980         * </ul>
1981         * 
1982         * <p>then the result of <code>substituteParameters("xxxx${abc}yyyy${def}zzzz")</code>
1983         * will be "xxxxdefyyyy${def}zzzz"
1984         * 
1985         * <p><code>$</code> followed by any other character will be left as-is. 
1986         * 
1987         * @param variables a set of variable names and values, used in the substitution 
1988         * @param text the text to be substituted.
1989         * 
1990         * @return text, with placeholders replaced with values in the variables parameter
1991         */
1992        public static String substitutePlaceholders(Map<?, ?> variables, String text) {
1993                // escaped version of (\$\{.*?\}|[^$]+|\$.)
1994                Pattern p = Pattern.compile("(\\$\\{.*?\\}|[^$]+|\\$)"); // modified regex
1995                Matcher m = p.matcher(text);
1996                String result = "";
1997                while (m.find()) {
1998                        String token = m.group(1);
1999                        if (token.startsWith("${") && token.endsWith("}")) {
2000                                Object value = variables.get(token.substring(2, token.length()-1));
2001                                if (value == null) {
2002                                        result = result + token;
2003                                } else {
2004                                        result = result + value.toString();
2005                                }
2006                        } else {
2007                                result = result + token;
2008                        }
2009                }
2010                return result;
2011        }
2012
2013        
2014}