View Javadoc
1   package com.randomnoun.common;
2   
3   /* (c) 2013 randomnoun. All Rights Reserved. This work is licensed under a
4    * BSD Simplified License. (http://www.randomnoun.com/bsd-simplified.html)
5    */
6   import java.io.ByteArrayOutputStream;
7   import java.io.File;
8   import java.io.FileInputStream;
9   import java.io.IOException;
10  import java.io.Reader;
11  import java.io.UnsupportedEncodingException;
12  import java.security.MessageDigest;
13  import java.security.NoSuchAlgorithmException;
14  import java.text.Collator;
15  import java.text.ParseException;
16  import java.text.SimpleDateFormat;
17  import java.util.ArrayList;
18  import java.util.BitSet;
19  import java.util.Comparator;
20  import java.util.Date;
21  import java.util.Iterator;
22  import java.util.List;
23  import java.util.Map;
24  import java.util.regex.Matcher;
25  import java.util.regex.Pattern;
26  
27  /** Text utility functions
28   *
29   * @author knoxg
30   */
31  public class Text {
32      
33      /** Used to prevent massive debug dumps. See {@link #getDisplayString(String, String)} */
34      private static final int MAX_STRING_OUTPUT_CHARS = 300;
35  
36  	/** Left-justification constant for use in the {@link #pad(String, int, int)} method */
37  	public static final int JUSTIFICATION_LEFT = 0;
38  	
39  	/** Center-justification constant for use in the {@link #pad(String, int, int)} method */
40  	public static final int JUSTIFICATION_CENTER = 1;
41  
42  	/** Right-justification constant for use in the {@link #pad(String, int, int)} method */
43  	public static final int JUSTIFICATION_RIGHT = 2;
44  	
45  	public static Pattern scriptPattern = Pattern.compile("<(/script)", Pattern.CASE_INSENSITIVE);
46  
47      /** Returns true if the supplied string is null or the empty string, false otherwise
48       *
49       * @param text The string to test
50       * @return true if the supplied string is null or the empty string, false otherwise
51       */
52      public static boolean isBlank(String text) {
53          return (text == null || text.equals(""));
54      }
55  
56      /** Returns true if the supplied string is non-null and only contains numeric characters
57       *
58       * @param text The string to test
59       * @return true if the supplied string is non-null and only contains numeric characters
60       */
61      public static boolean isNumeric(String text) {
62          if (text == null) {
63              return false;
64          }
65          char ch;
66          for (int i = 0; i < text.length(); i++) {
67              ch = text.charAt(i);
68              if (ch < '0' || ch > '9') {
69                  return false;
70              }
71          }
72          return true;
73      }
74  
75      /** Returns true if the supplied string is non-null and only contains numeric characters
76      * or a single decimal point. The value can have a leading negative ('-') symbol.
77      * 
78      * @param text The string to test
79      * @return true if the supplied string is non-null and only contains numeric characters,
80      *   which may contain a '.' character in there somewhere.
81      */
82     public static boolean isNumericDecimal(String text) {
83         if (text == null) {
84             return false;
85         }
86         boolean seenPoint = false; // existential quandary there for you
87         char ch;
88         int len = text.length();
89         for (int i = 0; i < len; i++) {
90             ch = text.charAt(i);
91             if (ch=='.') {
92          	   if (seenPoint) { return false; }
93          	   seenPoint = true;
94             } else if (ch == '-' && i == 0) {
95          	   // leading negative symbol OK
96          	   if (len == 1) {
97          		   // but not if it's the only character in the string
98          		   return false;
99          	   }
100            } else if (ch < '0' || ch > '9') {
101                return false;
102            }
103        }
104        return true;
105    }
106 
107      /** Returns true if the supplied string is non-null and only contains numeric characters
108      * or a single decimal point. The value can have a leading negative ('-') symbol.
109      * 
110      * This version allows exponents ("E+nn" or "E-nn") to the end of the value.
111      * 
112      * @param text The string to test
113      * @return true if the supplied string is non-null and only contains numeric characters,
114      *  which may contain a '.' character in there somewhere.
115      */
116     public static boolean isNumericDecimalExp(String text) {
117         if (text == null) {
118           return false;
119         }
120 	    boolean seenPoint = false; // existential quandary there for you
121 	    int expPos = -1;           // position of the 'E' character
122 	    char ch;
123 	    for (int i = 0; i < text.length(); i++) {
124 	    	ch = text.charAt(i);
125 	    	if (ch=='E') {
126 	    		if (expPos != -1) { return false; }
127 	    		expPos = i;
128 	    	} else if (ch=='.' && expPos == -1) {
129             	if (seenPoint) { return false; }
130             	seenPoint = true;
131 	    	} else if ((ch == '+' || ch == '-') && i == expPos + 1) {
132 	    		// + or - directly after 'E' OK
133             } else if (ch == '-' && i == 0) {
134             	// leading negative symbol OK
135             } else if (ch < '0' || ch > '9') {
136             	return false;
137             }
138 	    }
139 	    return true;
140     }
141 
142 
143     /** Ensures that a string returned from a browser (on any platform) conforms
144      * to unix line-EOF conventions. Any instances of consecutive CRs (<code>0xD</code>) 
145      * and LFs (<code>0xA</code>) in a string will be reduced to a series of CRs (the number of CRs will be the
146      * maximum number of CRs or LFs found in a row).  
147      * 
148      * @param input the input string
149      * 
150      * @return the canonicalised string, as described above
151      */
152     public static String reduceNewlines(String input) {
153     	StringBuilder sb = new StringBuilder();
154     	int len = input.length();
155     	int crCount = 0;
156 		int lfCount = 0;
157 		boolean insertNewline = false;
158 		char ch;
159     	for (int i=0; i<len; i++) {
160     		ch = input.charAt(i);
161     		if (ch == (char) 0xA) {
162     			lfCount ++; insertNewline = true;
163     		} else if (ch == (char) 0xD) {
164     			crCount ++; insertNewline = true;
165     		} else if (insertNewline) {
166 				for (int j=0; j<Math.max(lfCount, crCount); j++) {
167 					sb.append((char) 0xA);
168 				}
169 				insertNewline = false; lfCount=0; crCount=0;
170 				sb.append(ch);
171     		} else {
172     			sb.append(ch);
173     		}
174     	}
175     	if (insertNewline) {
176 			for (int j=0; j<Math.max(lfCount, crCount); j++) {
177 				sb.append((char) 0xA);
178 			}
179     	}
180     	
181     	return sb.toString();
182     }
183 
184 
185     /**
186      * Returns the HTML-escaped form of a string. The <code>&amp;</code>,
187      * <code>&lt;</code>, <code>&gt;</code>, and <code>"</code> characters are converted to
188      * <code>&amp;amp;</code>, <code>&amp;lt;</code>, <code>&amp;gt;</code>, and
189      * <code>&amp;quot;</code> respectively.
190      * 
191      * <p>Characters in the unicode control code blocks ( apart from \t, \n and \r ) are converted to &amp;xfffd;
192      * <p>Characters outside of the ASCII printable range are converted into &amp;xnnnn; form
193      *
194      * @param string the string to convert
195      *
196      * @return the HTML-escaped form of the string
197      */
198     static public String escapeHtml(String string) {
199         if (string == null) {
200             return "";
201         }
202         char c;
203         String hex;
204         StringBuilder sb = new StringBuilder(string.length());
205         for (int i = 0; i < string.length(); i++) {
206             c = string.charAt(i);
207             // check for illegal characters
208             switch (c) {
209                 case '&':
210                     sb.append("&amp;");
211                     break;
212                 case '<':
213                     sb.append("&lt;");
214                     break;
215                 case '>':
216                     sb.append("&gt;");
217                     break;
218                 case '\"':
219                     // interestingly, &quote; (with the e) works fine for HTML display,
220                     // but not inside hidden field values
221                     sb.append("&quot;");
222                     break;
223                 default:
224                 	// 'illegal characters' according to ESAPI. 7f to 9f are control characters in unicode 
225             		if ( ( c <= 0x1f && c != '\t' && c != '\n' && c != '\r' ) || ( c >= 0x7f && c <= 0x9f ) ) {
226             			sb.append("&#xfffd;"); // REPLACEMENT_HEX in ESAPI's HtmlEntityCodec
227             		} else if ( c > 0x1f && c <= 0x7f ) {
228             			// safe printable
229             			sb.append(c);
230             		} else {
231             			// ESAPI didn't have the else block above, which was causing it escape everything 
232             			hex = getHexForNonAlphanumeric(c);
233             			sb.append("&#x" + hex + ";");
234             		}
235                 	
236             }
237         }
238 
239         return sb.toString();
240     }
241 
242     /**
243      * Returns a regex-escaped form of a string. That is, the pattern 
244      * returned by this method, if compiled into a regex, will match
245      * the supplied string exactly. 
246      *
247      * @param string the string to convert
248      *
249      * @return the HTML-escaped form of the string
250      */
251     static public String escapeRegex(String string) {
252         if (string == null) {
253             return "";
254         }
255 
256         char c;
257         StringBuilder sb = new StringBuilder(string.length());
258 
259         for (int i = 0; i < string.length(); i++) {
260             c = string.charAt(i);
261 
262             switch (c) {
263                 case '.':
264                 case '+': // intentional fall-through
265                 case '?': // intentional fall-through
266                 case '\\': // intentional fall-through
267                 case '{': // intentional fall-through
268                 case '}': // intentional fall-through
269                 case '[': // intentional fall-through
270                 case ']': // intentional fall-through
271                 case '^': // intentional fall-through
272                 case '$': // intentional fall-through
273                 case '(': // intentional fall-through
274                 case '|': // intentional fall-through
275                 case ')': // intentional fall-through
276                 	sb.append("\\");
277                     sb.append(c);
278                     break;
279                 default:
280                     sb.append(c);
281             }
282         }
283 
284         return sb.toString();
285     }
286     
287     
288     /**
289      * Returns the csv-escaped form of a string. A csv-escaped string is
290      * used when writing to a CSV (comma-separated-value) file. It ensures
291      * that commas included within a string are quoted. We use the Microsoft-Excel
292      * quoting rules, so that our CSV files can be imported into that. These rules
293      * (derived from experimentation) are:
294      *
295      * <ul>
296      * <li>Strings without commas (,) inverted commas ("), or newlines (\n) are returned as-is.
297      * <li>Otherwise, the string is surrounded by inverted commas, and any
298      *   inverted commas within the string are doubled-up (i.e. '"' becomes '""').
299      * <li>A value that starts with any of "=", "@", "+" or "-" has a leading single apostrophe added
300      *   to prevent the value being evaluated in Excel. The leading quote is visible to the user when the
301      *   csv is opened, which may mean that it will have to be removed when roundtripping data.
302      *   This may complicate things if the user actually wants a leading single quote in their CSV value.   
303      * </ul>
304      *
305      * <p>Embedded newlines are inserted as-is, as per Excel. This will require
306      * some care whilst parsing if we want to be able to read these files.
307      *
308      * @param string the string to convert
309      *
310      * @return the csv-escaped form of the string
311      */
312     static public String escapeCsv(String string) {
313         if (string == null) {
314             return "";
315         }
316 
317         boolean quoted = false;
318         // from https://www.contextis.com/en/blog/comma-separated-vulnerabilities
319         // prefix cells that start with ‘=’ , '@', '+' or '-' with an apostrophe 
320         // This will ensure that the cell isn’t interpreted as a formula, and as a bonus in Microsoft Excel the apostrophe itself will not be displayed.
321         if (string.startsWith("=") || 
322           string.startsWith("@")) {
323             // prefix the string with an a single quote char to escape it
324             string = "'" + string;
325             quoted = true; // not sure need to quote here, but doesn't hurt
326         } else if ((string.startsWith("+") || string.startsWith("-")) && 
327         	(string.length() == 1 || !Text.isNumericDecimalExp(string))) {
328         	// numbers can legitimately start with '+' or '-' but anything else should be escaped
329         	string = "'" + string;
330             quoted = true; 
331         }
332 
333         
334         if (string.indexOf(',') == -1 && string.indexOf('"') == -1 && string.indexOf('\n') == -1 && !quoted) {
335         	return string;
336         }
337         string = Text.replaceString(string, "\"", "\"\"");
338         string = "\"" + string + "\"";
339 
340         return string;
341     }
342 
343     /** Given a csv-encoded string (as produced by the rules in {@link #escapeCsv(String)},
344      *  produces a List of Strings which represent the individual values in the string.
345      *  Note that this method is *not* equivalent to calling <code>Arrays.asList(astring.split(","))</code>.
346      *
347      * <p>Setting the whitespaceSensitive parameter to false allows leading and trailing
348      * whitespace in *non-quoted* values to be removed, e.g. if the input string <code>text</code> is:
349      *
350      * <pre class="code">
351      * abc,def,  ghi, j k ,"lmn"," op "," q,r","""hello""", "another"
352      * </pre>
353      *
354      * then <code>parseCsv(text, <b>false</b>)</code> will return the strings:
355      * <pre class="code">
356      * abc
357      * def
358      * ghi
359      * j k
360      * lmn
361      *  op        <i>(this String has one leading space, and a trailing space after 'p')</i>
362      *  q,r       <i>(this String has one leading space)</i>
363      * "hello"
364      * another
365      * </pre>
366      *
367      * and <code>parseCsv(text, <b>true</b>)</code> would throw a ParseException (since the
368      * final element is a quoted value, but begins with a space).
369      *
370      * If the <code>, "another"</code> text is removed, however, then
371      * <code>parseCsv(text, true)</code> would return the following:
372      *
373      * and <code>parseCsv(text, true)</code> will return the string
374      * <pre>
375      * abc
376      * def
377      *   ghi      <i>(this String has two leading spaces)</i>
378      *  j k       <i>(this String has one leading space and a trailing space after the 'k' character)</i>
379      * lmn
380      *  op        <i>(this String has one leading space, and a trailing space after 'p')</i>
381      *  q,r       <i>(this String has one leading space)</i>
382      * "hello"
383      * </pre>
384      *
385      * <p>Most applications would want to use the 'whiteSpaceSensitive=false' form of this function, since
386      * (a) less chance of a ParseException, and (b) it's what an end-user would normally
387      * expect. This can be performed by calling the {@link #parseCsv(String)} method.
388      *
389      * <p>Whitespace is determined by using the <code>Character.isSpaceChar()</code> method,
390      * which is Unicode-aware.
391      *
392      * @param text   The CSV-encoded string to parse
393      * @param whitespaceSensitive   If set to true, will trim leading and trailing whitespace in *non-quoted* values.
394      *
395      * @return a List of Strings. The returned List is guaranteed to always contain at least one element.
396      *
397      * @throws NullPointerException if the text passed to this method is null
398      * @throws ParseException if a quoted value contains leading whitespace before the
399      *  opening quote, or after the trailing quote.
400      * @throws ParseException if a quoted value has a start quote, but no end quote, or
401      *   if a value has additional text after a quoted value (before the next comma or EOL).
402      */
403     static public List<String> parseCsv(String text, boolean whitespaceSensitive)
404         throws ParseException {
405         if (text == null) {
406             throw new NullPointerException("null text");
407         }
408 
409         // parse state: 
410         //   0=searching for new value (at start of line or after comma) 
411         //   1=consuming non-quoted values
412         //   2=consuming quoted value
413         //   3=consumed first quote within a quoted value (may be termining quote or a "" sequence)
414         //   4=consuming whitespace up to next comma/EOL (after quoted value, not whitespaceSensitive)
415         int parseState = 0;
416         int length = text.length();
417         String element;
418         List<String> result = new ArrayList<String>();
419         char ch;
420         StringBuilder buffer = new StringBuilder();
421 
422         for (int pos = 0; pos < length; pos++) {
423             ch = text.charAt(pos);
424 
425             // System.out.println("pos " + pos + ", state=" + parseState + ", nextchar=" + ch + ", buf=" + buffer);
426             switch (parseState) {
427                 case 0:
428                     if (Character.isSpaceChar(ch)) {
429                         if (whitespaceSensitive) {
430                             buffer.append(ch);
431                             parseState = 1;
432                         } else {
433                             // ignore
434                         }
435                     } else if (ch == '"') {
436                         parseState = 2;
437                     } else if (ch == ',') {
438                     	result.add(""); // add an empty element; state remains unchanged
439                     } else {
440                         buffer.append(ch);
441                         parseState = 1;
442                     }
443                     break;
444                 case 1:
445                     if (ch == ',') {
446                         element = buffer.toString();
447                         if (!whitespaceSensitive) {
448                             element = element.trim();
449                         }
450                         result.add(element);
451                         buffer.setLength(0);
452                         parseState = 0;
453                     } else {
454                         buffer.append(ch);
455                     }
456                     break;
457                 case 2:
458                     if (ch == '"') {
459                         parseState = 3;
460                     } else {
461                         buffer.append(ch);
462                     }
463                     break;
464                 case 3:
465                     if (ch == '"') {
466                         buffer.append('"');
467                         parseState = 2;
468                     } else if (ch == ',') {
469                         result.add(buffer.toString());
470                         buffer.setLength(0);
471                         parseState = 0;
472                     } else if (Character.isSpaceChar(ch)) {
473                         if (whitespaceSensitive) {
474                             throw new ParseException("Cannot have trailing whitespace after close quote character", pos);
475                         }
476                         parseState = 4;
477                     } else {
478                         throw new ParseException("Cannot have trailing data after close quote character", pos);
479                     }
480                     break;
481                 case 4:
482                     if (Character.isSpaceChar(ch)) {
483                         // consume and ignore
484                     } else if (ch == ',') {
485                         result.add(buffer.toString());
486                         buffer.setLength(0);
487                         parseState = 0;
488                     } else {
489                         throw new ParseException("Cannot have trailing data after close quote character", pos);
490                     }
491                     break;
492                     
493                 default:
494                     throw new IllegalStateException("Illegal state '" + parseState + "' in parseCsv");
495             }
496         }
497 
498         // if state is 2, we are in the middle of a quoted value
499         if (parseState == 2) {
500             throw new ParseException("Missing endquote in csv text", length);
501         }
502 
503         // otherwise we still need to add what's left in the buffer into the result list
504         element = buffer.toString();
505         if (parseState == 1 && !whitespaceSensitive) {
506             element = element.trim();
507         }
508         result.add(element);
509         return result;
510     }
511     
512     @FunctionalInterface
513     public interface CsvLineReader { // doesn't extend Supplier<T> as it throws exceptions
514     	/** Returns the next logical line in the CSV ( quoted values can contain newlines )  
515     	 * 
516     	 * @return
517     	 * @throws ParseException
518     	 * @throws IOException
519     	 */
520         List<String> readLine() throws ParseException, IOException;
521     }
522     
523     // same as parseCsv(String, whitespaceSensitive) but can handle newlines in quotes by supplying a Reader
524     // the returned object will return a List<String> or null if EOF is reached
525     // ParseExceptions are wrapped in something, probably
526     static public CsvLineReader parseCsv(Reader r, boolean whitespaceSensitive) {
527         if (r == null) {
528             throw new NullPointerException("null reader");
529         }
530     	return new CsvLineReader() {
531     		// eof if we actually read eof or encouner a parse exception ( cannot recover )
532     		boolean isAtStart = true; // for backwards compatibility with Text.parseCsv(""), first readLine() is never null
533     		boolean isEOF = false;
534 			@Override
535 			public List<String> readLine() throws ParseException, IOException {
536 				if (isEOF) { return null; }
537 				
538 				// parse state: 
539 		        //   0=searching for new value (at start of line or after comma) 
540 		        //   1=consuming non-quoted values
541 		        //   2=consuming quoted value
542 		        //   3=consumed first quote within a quoted value (may be termining quote or a "" sequence)
543 		        //   4=consuming whitespace up to next comma/EOL (after quoted value, not whitespaceSensitive)
544 		        int parseState = 0;
545 		        // int length = text.length();
546 		        String element;
547 		        List<String> result = new ArrayList<String>();
548 		        char ch;
549 		        StringBuilder buffer = new StringBuilder();
550 		        int intChar = r.read();
551 		        int pos = 1;
552 		        if (intChar == -1 && !isAtStart) {
553 		        	isEOF = true;
554 		        	return null;
555 		        }
556 
557 		        // @TODO better CRLF handling
558 		        isAtStart = false;
559 		        while (intChar != -1) {
560 		            ch = (char) intChar;
561 
562 		            // System.out.println("pos " + pos + ", state=" + parseState + ", nextchar=" + ch + ", buf=" + buffer);
563 		            switch (parseState) {
564 		                case 0:
565 		                	if (ch == '\n') {
566 		                		// return result so far
567 		                		element = buffer.toString();
568 		        		        result.add(buffer.toString());
569 		                		return result;
570 		                	} else if (Character.isSpaceChar(ch)) {
571 		                        if (whitespaceSensitive) {
572 		                            buffer.append(ch);
573 		                            parseState = 1;
574 		                        } else {
575 		                            // ignore
576 		                        }
577 		                    } else if (ch == '"') {
578 		                        parseState = 2;
579 		                    } else if (ch == ',') {
580 		                    	result.add(""); // add an empty element; state remains unchanged
581 		                    } else {
582 		                        buffer.append(ch);
583 		                        parseState = 1;
584 		                    }
585 		                    break;
586 		                case 1:
587 		                	if (ch == '\n') {
588 		                		// return result so far
589 		                		element = buffer.toString();
590 		        		        if (!whitespaceSensitive) {
591 		        		            element = element.trim();
592 		        		        }
593 		        		        result.add(buffer.toString());
594 		                		return result;
595 		                	} else if (ch == ',') {
596 		                        element = buffer.toString();
597 		                        if (!whitespaceSensitive) {
598 		                            element = element.trim();
599 		                        }
600 		                        result.add(element);
601 		                        buffer.setLength(0);
602 		                        parseState = 0;
603 		                    } else {
604 		                        buffer.append(ch);
605 		                    }
606 		                    break;
607 		                case 2:
608 		                    if (ch == '"') {
609 		                        parseState = 3;
610 		                    } else {
611 		                        buffer.append(ch);
612 		                    }
613 		                    break;
614 		                case 3:
615 		                	if (ch == '\n') {
616 		                        result.add(buffer.toString());
617 		                        buffer.setLength(0);
618 		                        parseState = 0;
619 		                        return result;
620 		                	} else if (ch == '"') {
621 		                        buffer.append('"');
622 		                        parseState = 2;
623 		                    } else if (ch == ',') {
624 		                        result.add(buffer.toString());
625 		                        buffer.setLength(0);
626 		                        parseState = 0;
627 		                    } else if (Character.isSpaceChar(ch)) {
628 		                        if (whitespaceSensitive) {
629 		                        	isEOF = true;
630 		                            throw new ParseException("Cannot have trailing whitespace after close quote character", pos);
631 		                        }
632 		                        parseState = 4;
633 		                    } else {
634 		                    	isEOF = true;
635 		                        throw new ParseException("Cannot have trailing data after close quote character", pos);
636 		                    }
637 		                    break;
638 		                case 4:
639 		                	if (ch == '\n') {
640 		                		// return result so far
641 		                		result.add(buffer.toString());
642 		                		return result;
643 		                	} else if (Character.isSpaceChar(ch)) {
644 		                        // consume and ignore
645 		                    } else if (ch == ',') {
646 		                        result.add(buffer.toString());
647 		                        buffer.setLength(0);
648 		                        parseState = 0;
649 		                    } else {
650 		                    	isEOF = true;
651 		                        throw new ParseException("Cannot have trailing data after close quote character", pos);
652 		                    }
653 		                    break;
654 		                    
655 		                default:
656 		                    throw new IllegalStateException("Illegal state '" + parseState + "' in parseCsv");
657 		            }
658 		            
659 			        intChar = r.read();
660 			        pos++;
661 		        }
662 		        isEOF = true;
663 
664 		        // if state is 2, we are in the middle of a quoted value
665 		        if (parseState == 2) {
666 		            throw new ParseException("Missing endquote in csv text", pos);
667 		        }
668 
669 		        // otherwise we still need to add what's left in the buffer into the result list
670 		        element = buffer.toString();
671 		        if (parseState == 1 && !whitespaceSensitive) {
672 		            element = element.trim();
673 		        }
674 		        result.add(element);
675 		        return result;
676 			}
677     	};
678     }
679 
680     /**
681      * Equivalent to <code>parseCsv(text, false);</code> (i.e. whitespace-insensitive parsing).
682      * Refer to the documentation for that method for more details.
683      *
684      * @see #parseCsv(String, boolean)
685      *
686      * @param text he CSV-encoded string to parse
687      * 
688      * @return a List of Strings. The returned List is guaranteed to always contain at least one element.
689      *
690      * @throws NullPointerException if the text passed to this method is null.
691      * @throws ParseException see {@link #parseCsv(String, boolean)} for details.
692      */
693     static public List<String> parseCsv(String text)
694         throws ParseException {
695         return Text.parseCsv(text, false);
696     }
697 
698     /** Returns a java-escaped string. Replaces '"' with '\"'.
699      *
700      * <p>Since this is predominantly used in the query builder, I am not worrying about
701      * unicode sequences (SWIFT is ASCII) or newlines (although this may be necessary later)
702      * for multiline textboxes
703      *
704      * @return The java-escaped version of the string
705      */
706     public static String escapeJava(String string) {
707         return Text.replaceString(string, "\"", "\\\"");
708     }
709 
710     /** Returns a javascript string. The characters <code>'</code>,
711      * <code>"</code> and <code>\</code> are converted into their Unicode equivalents,
712      *
713      * <p>Non-printable characters are converted into unicode equivalents
714      **
715      * <p>Newlines are now replaced with "\n" 
716      *
717      * @return The java-escaped version of the string
718      */
719     public static String escapeJavascript(String string) {
720         // backslashes are always escaped
721         //string = Text.replaceString(string, "\\", "\\u005C");
722         //string = Text.replaceString(string, "\"", "\\u0022");
723         //string = Text.replaceString(string, "'", "\\u0027");
724 		//string = Text.replaceString(string, "\n", "\\n");
725     	StringBuilder sb = new StringBuilder(string.length());
726 		for (int i = 0; i<string.length(); i++) {
727 			char ch = string.charAt(i);
728 			if (ch=='\n') {
729 			   sb.append("\\n");	
730 			} else if (ch=='\\' || ch=='"' || ch=='\'' || ch<32 || ch>126) {
731 				String hex = Integer.toString(ch, 16);
732 				sb.append("\\u" + "0000".substring(0, 4-hex.length()) + hex);
733 			} else {
734 				sb.append(ch);
735 			}
736 		}
737 		return scriptPattern.matcher(sb.toString()).replaceAll("\\\\u003C$1");
738         // return sb.toString();
739     }
740 
741 
742     /** Returns a javascript string. The characters <code>'</code>,
743      * <code>"</code> and <code>\</code> are converted into their Unicode equivalents,
744      *
745      * <p>Non-printable characters are converted into unicode equivalents
746      *
747      * @deprecated use {@link #escapeJavascript(String)} instead
748      * 
749      * @return The java-escaped version of the string
750      */
751     public static String escapeJavascript2(String string) {
752     	// this method only exists for backwards-compatability
753         string = reduceNewlines(string);  // canonicalise CRLFs
754     	return escapeJavascript(string);
755     }
756 
757     
758     /** Unescapes a java-escaped string. Replaces '\"' with '"',
759      * '\\u0022' with '"', '\\u0027' with ''', '\\u005C' with '\'.
760      *
761      * <p>Since this is predominantly used in the query builder, I am not worrying about
762      * unicode sequences (SWIFT is ASCII) or newlines (although this may be necessary later)
763      * for multiline textboxes
764      *
765      * @return The java-escaped version of the string
766      */
767     public static String unescapeJava(String string) {
768         string = Text.replaceString(string, "\\\"", "\"");
769         string = Text.replaceString(string, "\\u0022", "\"");
770         string = Text.replaceString(string, "\\u0027", "'");
771         string = Text.replaceString(string, "\\u005C", "\\");
772         return string;
773     }
774 
775     /** Returns a python string, escaped so that it can be enclosed in a single-quoted string. 
776      * 
777      * <p>The characters <code>'</code>,
778      * <code>"</code> and <code>\</code> are converted into their Unicode equivalents,
779      *
780      * <p>Non-printable characters are converted into unicode equivalents
781      *
782      * @return The python-escaped version of the string
783      */
784     public static String escapePython(String string) {
785     	// pretty much the same as Text.escapeJavascript2(), without the reduceNewLines, which probably shouldn't be there anyway
786     	string = Text.replaceString(string, "\\", "\\u005C");
787         string = Text.replaceString(string, "\"", "\\u0022");
788         string = Text.replaceString(string, "'", "\\u0027");
789 		string = Text.replaceString(string, "\n", "\\n");
790 		StringBuilder sb = new StringBuilder(string.length());
791 		for (int i = 0; i<string.length(); i++) {
792 			char ch = string.charAt(i);
793 			if (ch>=32 && ch<=126) {
794 				sb.append(ch);
795 			} else {
796 				String hex = Integer.toString(ch, 16);
797 				sb.append("\\u" + "0000".substring(0, 4-hex.length()) + hex);
798 			}
799 		}
800         return sb.toString();
801         // return string;
802     }
803     
804     /** Escape a filename or path component. 
805      * Characters that typically have special meanings in paths (":", "/", "\") are escaped with a preceding "\" character.
806      * 
807      * Does not escape glob characters ( "*" or "?" ). 
808      * Do not use this method to escape a full file path; when escaping a file path, escape each path component separately and then join 
809      * the components with "/" characters ( see {@link #createEscapedPath(String[])} ). 
810      * 
811      * @param string the filename or path component to escape
812      * 
813      * @return the escaped form of the filename (or path component)
814      */
815     // Does not escape DOS special filenames ( "NUL", "CON", "LPT1" etc ). Remember those ? Of course you do.
816     public static String escapePathComponent(String string) {
817     	string = Text.replaceString(string, "\\", "\\\\");
818     	string = Text.replaceString(string, "/", "\\/");
819     	string = Text.replaceString(string, ":", "\\:");
820     	return string;
821     }
822     
823     /** Unescape a filename or path component. 
824      * The escape sequences "\\" , "\:" and "\/" are converted to "\", ":" and "/" respectively.
825      * All other escape sequences will raise an IllegalArgumentException 
826      *  
827      * <p>See {@link #splitEscapedPath(String)} to split an escaped path into components. 
828      *  
829      * @param pathComponent the filename or path component to unescape
830      * 
831      * @return the unescaped form of the filename or path component
832      * 
833      * @throws IllegalArgumentException if an unexpected escape is encountered, or the escape is unclosed
834      */
835     public static String unescapePathComponent(String pathComponent) {
836     	if (pathComponent == null) {
837             return null;
838         }
839         char c;
840         boolean inEscape = false;
841         StringBuilder sb = new StringBuilder(pathComponent.length());
842         for (int i = 0; i < pathComponent.length(); i++) {
843             c = pathComponent.charAt(i);
844             if (inEscape) {
845                 switch (c) {
846                 	case '\\': 
847                     case '/': // intentional fall-through
848                     case ':': // intentional fall-through
849                     	sb.append(c);
850                         break;
851                     default:
852                     	throw new IllegalArgumentException("Unexpected escape '\\" + c + "' in filename");
853                 }
854                 inEscape = false;
855             } else {
856                 switch (c) {
857 	                case '\\': 
858 	                	inEscape = true;
859 	                	break;
860 	                default:
861 	                	sb.append(c);
862                 }
863             }
864         }
865         if (inEscape) {
866         	throw new IllegalArgumentException("Unclosed escape in filename");
867         }
868         return sb.toString();
869     }
870 
871     // need to escape the \ in a regex ( \\ ) in a String ( \\\\ )
872     private static Pattern splitPathPattern = Pattern.compile("(?<!\\\\)/"); 
873     
874 	/** Split a path, but allow forward slashes in path components if they're escaped by a preceding '\' character.
875      * Individual path components returned by this method will be unescaped.
876      *
877      * <pre>
878      * splitPath(null) = NPE
879      * splitPath("") = [ "" ]
880      * splitPath("abc") = [ "abc" ]
881      * splitPath("abc/def/ghi") = [ "abc", "def", "ghi" ]
882      * splitPath("abc\\/def/ghi") = [ "abc/def", "ghi" ]
883      * </pre>
884      * 
885      * <p>Opposite of {@link #createEscapedPath(String[])}
886      */
887     public static String[] splitEscapedPath(String escapedPath) {
888     	String[] result = splitPathPattern.split(escapedPath);
889     	for (int i=0; i<result.length; i++) {
890     		result[i] = Text.unescapePathComponent(result[i]);
891     	}
892     	return result;
893     }
894     
895     /** Escapes the components of a path String, returning an escaped full path String.
896      * Each path component is escaped with {@link #escapePathComponent(String)} and then joined using '/' characters.
897      * 
898      * <p>Opposite of {@link #splitEscapedPath(String)}.
899      * 
900      * @param pathComponents the filename components
901      * @return an escaped path
902      */
903     public static String createEscapedPath(String[] pathComponents) {
904     	String result = null;
905     	if (pathComponents.length == 0) { 
906     		throw new IllegalArgumentException("empty pathComponents"); 
907     	}
908     	for (String c : pathComponents) {
909     		if (c==null) { 
910     			throw new NullPointerException("null pathComponent"); 
911     		}
912     		if (result == null) {
913     			result = escapePathComponent(c);
914     		} else {
915     			result = result + "/" + escapePathComponent(c); 
916     		}
917     	}
918     	return result;
919     }
920     
921     // escapeCss from ESAPI 2.0.1
922     private static final String[] esapi_hex = new String[256];
923 	static {
924 		for ( char c = 0; c < 0xFF; c++ ) {
925 			if ( c >= 0x30 && c <= 0x39 || c >= 0x41 && c <= 0x5A || c >= 0x61 && c <= 0x7A ) {
926 				esapi_hex[c] = null;
927 			} else {
928 				esapi_hex[c] = toHex(c).intern();
929 			}
930 		}
931 	}
932 	private static String toHex(char c) {
933 		return Integer.toHexString(c);
934 	}
935 	private static String getHexForNonAlphanumeric(char c) {
936 		if(c<0xFF) {return esapi_hex[c]; }
937 		return toHex(c);
938 	}
939     private static String encodeCssCharacter(Character c) {
940 		String hex = getHexForNonAlphanumeric(c);
941 		if ( hex == null ) { return "" + c; }
942         return "\\" + hex + " ";
943     }
944 
945     /**
946      * Returns the CSS-escaped form of a string. 
947      * 
948      * <p>Characters outside of the printable ASCII range are converted to \nnnn form
949      *
950      * @param input the string to convert
951      *
952      * @return the HTML-escaped form of the string
953      */
954     public static String escapeCss(String input) {
955     	if (input == null) { return ""; }
956     	StringBuilder sb = new StringBuilder();
957 		for (int i = 0; i < input.length(); i++) {
958 			char c = input.charAt(i);
959 			sb.append(encodeCssCharacter(c));
960 		}
961 		return sb.toString();    	
962     }
963 
964 
965     
966     
967     /** Returns the given string; but will truncate it to MAX_STRING_OUTPUT_CHARS.
968      *  If it exceeds this length, a message is appended expressing how many
969      *  characters were truncated. Strings with the key of 'exception' are
970      *  not truncated (in order to display full stack traces when these occur).
971      *  Any keys that contain the text 'password', 'Password', 'credential' or
972      *  'Credential' will be returned as eight asterisks.
973      *
974      * <p>This method is used in the debug JSP when dumping properties to the user,
975      *  in order to prevent inordinately verbose output.
976      *
977      *  @param key The key of the string we wish to display
978      *  @param string The string value
979      *  @return A (possibly truncated) version of this string
980      */
981     public static String getDisplayString(String key, String string) {
982         return getDisplayString(key, string, MAX_STRING_OUTPUT_CHARS);
983     }
984 
985     /** Returns the given string; but will truncate it to MAX_STRING_OUTPUT_CHARS.
986      *  If it exceeds this length, a message is appended expressing how many
987      *  characters were truncated. Strings with the key of 'exception' are
988      *  not truncated (in order to display full stack traces when these occur).
989      *  Any keys that contain the text 'password', 'Password', 'credential' or
990      *  'Credential' will be returned as eight asterisks.
991      *
992      * <p>This method is used in the debug JSP when dumping properties to the user,
993      *  in order to prevent inordinately verbose output.
994      *
995      *  @param key The key of the string we wish to display
996      *  @param string The string value
997      *  @param maxChars The maximum number of characters to display
998      *  
999      *  @return A (possibly truncated) version of this string
1000      */
1001     public static String getDisplayString(String key, String string, int maxChars) {
1002         if (string == null) {
1003             string = "(null)";
1004         }
1005 
1006         if ("exception".equals(key)) {
1007             return string;
1008         }
1009 
1010         if (key.indexOf("password") >= 0 || key.indexOf("Password") >= 0 || key.indexOf("credential") >= 0 || key.indexOf("Credential") >= 0) {
1011             return "********";
1012         }
1013 
1014         if (string.length() <= maxChars) {
1015             return string;
1016         } else {
1017             return string.substring(0, maxChars) + "... (" + (string.length() - maxChars) + " more characters truncated)";
1018         }
1019     }
1020 
1021     /** Utility function to return a default if the supplied string is null.
1022      *  Shorthand for <code>(strText==null) ? strDefaultText : strText;</code>
1023      *
1024      * @return strText is strText is not null, otherwise strDefaultText
1025      */
1026     public static String strDefault(String strText, String strDefaultText) {
1027         return (strText == null) ? strDefaultText : strText;
1028     }
1029 
1030     /** Return a string composed of a series of strings, separated with the specified delimiter
1031      *
1032      * @param elements The array of elements to join
1033      * @return delimiter The delimiter to join each string with
1034      *
1035      * @throws NullPointerException if elements or delimiter is null
1036      */
1037     public static String join(String[] elements, String delimiter) {
1038     	return joinWithLast(elements, false, delimiter, delimiter);
1039     }
1040 
1041     /** Return a string composed of a series of strings, separated with the specified delimiter
1042      *
1043      * @param elements A Collection or Iterable of the elements to join
1044      * @return delimiter The delimiter to join each string with
1045      *
1046      * @throws NullPointerException if elements or delimiter is null
1047      */
1048     public static String join(Iterable<?> elements, String delimiter) {
1049     	return joinWithLast(elements, false, delimiter, delimiter);
1050     }
1051     
1052     /** Return a string composed of a series of strings, separated with the specified delimiter.
1053     * Each element is contained in single quotes. The final delimeter can be set to a different
1054     * value, to produce text in the form <code>"'a', 'b' or 'c'"</code> or <code>"'a', 'b' and 'c'"</code>. 
1055     *
1056     * <p>There is no special handling of values containing quotes; see {@link #escapeCsv(String)} 
1057     *
1058     * @param elements The array of elements to join
1059     * @param isQuoted If true, each element is surrounded by single quotes
1060     * @param delimiter The delimiter to join each string with
1061     * @param lastDelimiter The delimiter to join the second-last and last elements
1062     *
1063     * @throws NullPointerException if elements or delimiter is null
1064     */
1065    public static String joinWithLast(String[] elements, boolean isQuoted, String delimiter, String lastDelimiter) {
1066    	   StringBuilder sb = new StringBuilder();
1067        if (elements == null) {
1068            throw new NullPointerException("null elements");
1069        }
1070        if (delimiter == null) {
1071            throw new NullPointerException("null delimiter");
1072        }
1073        if (lastDelimiter == null) {
1074            throw new NullPointerException("null lastDelimiter");
1075        }
1076        int len = elements.length;
1077        if (len == 0) {
1078            return "";
1079        }
1080 
1081        for (int i = 0; i < len - 1; i++) {
1082     	   if (isQuoted) { sb.append("'"); }
1083            sb.append(elements[i]);
1084            if (isQuoted) { sb.append("'"); }
1085            if (i == len - 2) { sb.append(lastDelimiter); } else { sb.append(delimiter); }
1086        }
1087        if (isQuoted) { sb.append("'"); }
1088        sb.append(elements[len - 1]);
1089        if (isQuoted) { sb.append("'"); }
1090        return sb.toString();
1091    }
1092 
1093    /** Return a string composed of a series of strings, separated with the specified delimiter
1094     *
1095     * <p>There is no special handling of values containing quotes; see {@link #escapeCsv(String)} 
1096     *
1097     * @param elements A Collection or Iterable containing the elements to join
1098     * @param isQuoted If true, each element is surrounded by single quotes
1099     * @param delimiter The delimiter to join each string with
1100     * @param lastDelimiter The delimiter to join the second-last and last elements
1101     *
1102     * @throws NullPointerException if elements or delimiter is null
1103     *
1104     * @see #join(String[], String)
1105     */
1106    public static String joinWithLast(Iterable<?> elements, boolean isQuoted, String delimiter, String lastDelimiter) {
1107    	StringBuilder sb = new StringBuilder();
1108        if (elements == null) {
1109            throw new NullPointerException("null elements");
1110        }
1111        if (delimiter == null) {
1112            throw new NullPointerException("null delimiter");
1113        }
1114        if (lastDelimiter == null) {
1115            throw new NullPointerException("null lastDelimiter");
1116        }
1117        Iterator<?> i = elements.iterator();
1118        if (!i.hasNext()) { return ""; } 
1119        
1120        Object thisEl = i.next();
1121        while (i.hasNext()) {
1122     	   Object nextEl = i.next();
1123     	   if (isQuoted) { sb.append("'"); }
1124            sb.append(thisEl);
1125            if (isQuoted) { sb.append("'"); }
1126            if (i.hasNext()) {
1127                sb.append(delimiter);
1128            } else {
1129         	   sb.append(lastDelimiter);
1130            }
1131            thisEl = nextEl;
1132        }
1133        if (isQuoted) { sb.append("'"); }
1134        sb.append(thisEl);
1135        if (isQuoted) { sb.append("'"); }
1136        
1137        return sb.toString();
1138    }
1139     
1140     
1141     
1142 
1143     /*
1144      * efficient search & replace ... stolen from Usenet:
1145      * http://groups.google.co.uk/groups?hl=en&lr=&selm=memo.19990629182431.344B%40none.crap
1146      */
1147 
1148     /**
1149      * An efficient search &amp; replace routine. Replaces all instances of
1150      * searchString within str with replaceString.
1151      *
1152      * @param originalString The string to search
1153      * @param searchString The string to search for
1154      * @param replaceString The string to replace it with
1155      *
1156      */
1157     public static String replaceString(String originalString, String searchString, String replaceString) {
1158         if (replaceString == null) {
1159             return originalString;
1160         }
1161 
1162         if (searchString == null) {
1163             return originalString;
1164         }
1165 
1166         if (originalString == null) {
1167             return null;
1168         }
1169 
1170         int loc = originalString.indexOf(searchString);
1171 
1172         if (loc == -1) {
1173             return originalString;
1174         }
1175 
1176         char[] src = originalString.toCharArray();
1177         int n = searchString.length();
1178         int m = originalString.length();
1179         StringBuilder buf = new StringBuilder(m + replaceString.length() - n);
1180         int start = 0;
1181 
1182         do {
1183             if (loc > start) {
1184                 buf.append(src, start, loc - start);
1185             }
1186 
1187             buf.append(replaceString);
1188             start = loc + n;
1189             loc = originalString.indexOf(searchString, start);
1190         } while (loc > 0);
1191 
1192         if (start < m) {
1193             buf.append(src, start, m - start);
1194         }
1195 
1196         return buf.toString();
1197     }
1198 
1199     /**
1200      * Reads a file, and returns its contents in a String
1201      *
1202      * @param filename The file to read
1203      *
1204      * @return The contents of the string,
1205      *
1206      * @throws IOException A problem occurred whilst attempting to read the string
1207      */
1208     public static String getFileContents(String filename)
1209         throws IOException {
1210         File file = new File(filename);
1211         FileInputStream fis = new FileInputStream(file);
1212         byte[] data = new byte[(int) file.length()];
1213         int len = fis.read(data);
1214         fis.close();
1215         if (len < file.length()) {
1216             /* this should never happen -- file has changed underneath us */
1217             throw new IOException("Buffer read != size of file");
1218         }
1219 
1220         return new String(data);
1221     }
1222 
1223     /**
1224      * Reads a file, and returns its contents in a String. Identical to calling
1225      * <code>getFileContents(projectFile.getCanonicalPath())</code>.
1226      *
1227      * @param file The file to read
1228      *
1229      * @return The contents of the string,
1230      * @throws IOException 
1231      *
1232      * @throws IOException A problem occurred whilst attempting to read the string
1233      */
1234 	public static String getFileContents(File file) throws IOException {
1235 		return getFileContents(file.getCanonicalPath());
1236 	}
1237     
1238     
1239     /**
1240      * Prefixes every lines supplied with a given indent. e.g.
1241      * <code>indent("\t", "abcd\nefgh")</code> would return "\tabcd\n\tefgh". If the
1242      * string ends in a newline, then the return value also ends with a newline.
1243      *
1244      * @param indentString   The characters to indent with. Usually spaces or tabs,
1245      *   but could be something like a timestamp.
1246      * @param originalString The string to indent.
1247      * @return The originalString, with every line (as separated by the newline
1248      *   character) prefixed with indentString.
1249      */
1250     static public String indent(String indentString, String originalString) {
1251         String allButLastChar;
1252         if (originalString == null || indentString == null) {
1253             throw new NullPointerException();
1254         }
1255         if (originalString.equals("")) {
1256             return indentString;
1257         }
1258         allButLastChar = originalString.substring(0, originalString.length() - 1);
1259         return indentString + replaceString(allButLastChar, "\n", "\n" + indentString) + originalString.substring(originalString.length() - 1);
1260     }
1261     
1262     /** Ensure that a string is padded with spaces so that it meets the 
1263      * required length. If the input string exceeds this length, this it 
1264      * is returned unchanged
1265      * 
1266      * @param inputString the string to pad
1267      * @param length the desired length
1268      * @param justification a JUSTIFICATION_* constant defining whether left or 
1269      *   right justification is required.
1270      * 
1271      * @return a padded string. 
1272      */
1273     static public String pad(String inputString, int length, int justification) {
1274     	// @TODO not terribly efficient, but who cares
1275     	switch (justification) {
1276     		case JUSTIFICATION_LEFT:
1277     			while (inputString.length() < length) { 
1278     				inputString = inputString + " ";
1279     			}
1280     			break;
1281 
1282 			case JUSTIFICATION_RIGHT:
1283 				while (inputString.length() < length) { 
1284 					inputString = " " + inputString;
1285 				}
1286 				break;
1287     		 	
1288 			case JUSTIFICATION_CENTER:
1289 				while (inputString.length() < length) { 
1290 					inputString = inputString + " ";
1291 					if (inputString.length() < length) {
1292 						inputString = " " + inputString;
1293 					}
1294 				}
1295 				break;
1296     	}
1297     	return inputString;
1298     }
1299 
1300     /** Given a period-separated list of components (e.g. variable references ("a.b.c") or classnames),
1301      *  returns the last component. For example,
1302      *  getLastComponent("com.randomnoun.common.util.Text") will return "Text".
1303      *
1304      *  <p>If component is null, this function returns null.
1305      *  <p>If component contains no periods, this function returns the original string.
1306      *
1307      *  @param string The string to retrieve the last component from
1308      */
1309     static public String getLastComponent(String string) {
1310         if (string == null) {
1311             return null;
1312         }
1313         if (string.indexOf('.') == -1) {
1314             return string;
1315         }
1316         return string.substring(string.lastIndexOf('.') + 1);
1317     }
1318 
1319     /** Escape this supplied string so it can represent a 'name' or 'value' component
1320      * on a HTTP queryString. This generally involves escaping special characters into %xx
1321      * form. Note that this only works for US-ASCII data.
1322      *
1323      */
1324     public static String escapeQueryString(String unescapedQueryString) {
1325         // default encoding
1326         byte[] data = encodeUrl(allowed_within_query, unescapedQueryString.getBytes());
1327 
1328         try {
1329             return new String(data, "US-ASCII");
1330         } catch (UnsupportedEncodingException e) {
1331             throw new RuntimeException("encodeQueryString() requires ASCII support");
1332         }
1333     }
1334 
1335     /**
1336      * Encodes an array of bytes into an array of URL safe 7-bit
1337      * characters. Unsafe characters are escaped.
1338      *
1339      * @param urlsafe bitset of characters deemed URL safe
1340      * @param bytes array of bytes to convert to URL safe characters
1341      * @return array of bytes containing URL safe characters
1342      */
1343     private static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes) {
1344         if (bytes == null) {
1345             return null;
1346         }
1347 
1348         if (urlsafe == null) {
1349             throw new NullPointerException("null urlsafe");
1350         }
1351 
1352         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
1353 
1354         for (int i = 0; i < bytes.length; i++) {
1355             int b = bytes[i];
1356 
1357             if (b < 0) {
1358                 b = 256 + b;
1359             }
1360 
1361             if (urlsafe.get(b)) {
1362                 if (b == ' ') {
1363                     b = '+';
1364                 }
1365 
1366                 buffer.write(b);
1367             } else {
1368                 buffer.write('%');
1369 
1370                 char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
1371                 char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
1372 
1373                 buffer.write(hex1);
1374                 buffer.write(hex2);
1375             }
1376         }
1377 
1378         return buffer.toByteArray();
1379     }
1380 
1381     /**
1382      * Encodes a string into Base64 format.
1383      * No blanks or line breaks are inserted.
1384      * @param s  a String to be encoded.
1385      * @return   A String with the Base64 encoded data.
1386      */
1387     public static String encodeBase64(String s) {
1388         return new String(encodeBase64(s.getBytes()));
1389     }
1390 
1391     /**
1392      * Encodes a byte array into Base64 format.
1393      * No blanks or line breaks are inserted.
1394      * @param in  an array containing the data bytes to be encoded.
1395      * @return    A character array with the Base64 encoded data.
1396      */
1397     public static char[] encodeBase64(byte[] in) {
1398         int iLen = in.length;
1399         int oDataLen = (iLen * 4 + 2) / 3; // output length without padding
1400         int oLen = ((iLen + 2) / 3) * 4; // output length including padding
1401         char[] out = new char[oLen];
1402         int ip = 0;
1403         int op = 0;
1404 
1405         while (ip < iLen) {
1406             int i0 = in[ip++] & 0xff;
1407             int i1 = ip < iLen ? in[ip++] & 0xff : 0;
1408             int i2 = ip < iLen ? in[ip++] & 0xff : 0;
1409             int o0 = i0 >>> 2;
1410             int o1 = ((i0 & 3) << 4) | (i1 >>> 4);
1411             int o2 = ((i1 & 0xf) << 2) | (i2 >>> 6);
1412             int o3 = i2 & 0x3F;
1413             out[op++] = map1[o0];
1414             out[op++] = map1[o1];
1415             out[op] = op < oDataLen ? map1[o2] : '=';
1416             op++;
1417             out[op] = op < oDataLen ? map1[o3] : '=';
1418             op++;
1419         }
1420         return out;
1421     }
1422 
1423 	/** Used by {@link #parseData(String) to parse dates generated in Codec output.
1424 	 * (These dates are generated using the standard Java .toString() method, which
1425 	 * probably changes depending on the VM's locale, which I'm going to ignore for 
1426 	 * the time being).
1427 	 */
1428 	static class DateParser {
1429 		
1430 		/** Parse a date generated by Date.toString() into a Date object
1431 		 * 
1432 		 * @param dateString a string representation of a date
1433 		 * @return a Date representation of a date
1434 		 */
1435 		public static Date valueOf(String dateString) {
1436 			SimpleDateFormat sdf = new SimpleDateFormat("EEE MMM dd hh:mm:ss z yyyy");
1437 			try {
1438 				return sdf.parse(dateString);
1439 			} catch (ParseException pe) {
1440 				throw (IllegalArgumentException) new IllegalArgumentException("Invalid date '" + dateString + "'").initCause(pe);
1441 			}
1442 		}
1443 	}
1444 
1445     // ---------------------- Generous characters for each component validation
1446     // -- not much of this is used in this class, so I should shorten these definitions, 
1447     // but you never know, I might use it later, so it's here for the time being.
1448     // 
1449     // compiled from
1450     //  org.apache.commons.httpclient.util.URIUtil
1451     //  org.apache.commons.codec.net.URLCodec
1452     //  org.apache.commons.httpclient.util.EncodingUtil
1453     //  org.apache.commons.httpclient.URI
1454     //
1455     // trust me... just calling escapeQueryString() is *so* much easier.
1456     private static final BitSet percent = new BitSet(256); // escape % as %25
1457     private static final BitSet digit = new BitSet(256); // 0-9
1458     private static final BitSet alpha = new BitSet(256); // lowalpha | upalpha
1459     private static final BitSet alphanum = new BitSet(256); // alpha | digit
1460     private static final BitSet hex = new BitSet(256); // digit | a-f | A-F
1461     private static final BitSet escaped = new BitSet(256); // "%" hex hex
1462     private static final BitSet mark = new BitSet(256); // -_.!~*'()
1463     private static final BitSet unreserved = new BitSet(256);
1464 
1465     // alphanum | mark (URI allowed, no purpose)
1466     private static final BitSet reserved = new BitSet(256); // ;/?:"&=+$,
1467     private static final BitSet uric = new BitSet(256);
1468 
1469     // reserved | unreserved | escaped
1470     private static final BitSet allowed_query = new BitSet(256); // uric - %
1471     private static final BitSet allowed_within_query = new BitSet(256);
1472 
1473     /** Mapping table from 6-bit nibble to Base64 characters */
1474     private static char[] map1 = new char[64];
1475 
1476     // NB: www-form-encoding appears to be alpha | numeric | -_.* ( + space) 
1477     static {
1478         percent.set('%');
1479 
1480         for (int i = '0'; i <= '9'; i++) {
1481             digit.set(i);
1482         }
1483 
1484         for (int i = 'a'; i <= 'z'; i++) {
1485             alpha.set(i);
1486         }
1487 
1488         for (int i = 'A'; i <= 'Z'; i++) {
1489             alpha.set(i);
1490         }
1491 
1492         alphanum.or(alpha);
1493         alphanum.or(digit);
1494         hex.or(digit);
1495 
1496         for (int i = 'a'; i <= 'f'; i++) {
1497             hex.set(i);
1498         }
1499 
1500         for (int i = 'A'; i <= 'F'; i++) {
1501             hex.set(i);
1502         }
1503 
1504         escaped.or(percent);
1505         escaped.or(hex);
1506         mark.set('-');
1507         mark.set('_');
1508         mark.set('.');
1509         mark.set('!');
1510         mark.set('~');
1511         mark.set('*');
1512         mark.set('\'');
1513         mark.set('(');
1514         mark.set(')');
1515         reserved.set(';');
1516         reserved.set('/');
1517         reserved.set('?');
1518         reserved.set(':');
1519         reserved.set('@');
1520         reserved.set('&');
1521         reserved.set('=');
1522         reserved.set('+');
1523         reserved.set('$');
1524         reserved.set(',');
1525         unreserved.or(alphanum);
1526         unreserved.or(mark);
1527         uric.or(reserved);
1528         uric.or(unreserved);
1529         uric.or(escaped);
1530         allowed_query.or(uric);
1531         allowed_query.clear('%');
1532         allowed_within_query.or(allowed_query);
1533         allowed_within_query.andNot(reserved);
1534 
1535 
1536         // excluded 'reserved'                       
1537         // create map1 array
1538         int i = 0;
1539         for (char c = 'A'; c <= 'Z'; c++) {
1540             map1[i++] = c;
1541         }
1542         for (char c = 'a'; c <= 'z'; c++) {
1543             map1[i++] = c;
1544         }
1545         for (char c = '0'; c <= '9'; c++) {
1546             map1[i++] = c;
1547         }
1548         map1[i++] = '+';
1549         map1[i++] = '/';
1550         
1551     }
1552     
1553     
1554 
1555     /**
1556      * Returns a comparator that compares contained numbers based on their numeric values and compares other parts
1557      * using the current locale's order rules.
1558      * <p>For example in German locale this will be a comparator that handles umlauts correctly and ignores
1559      * upper/lower case differences.</p>
1560      *
1561      * @return <p>A string comparator that uses the current locale's order rules and handles embedded numbers
1562      *         correctly.</p>
1563      */
1564     public static Comparator<String> getNaturalComparator() {
1565         final Collator collator = Collator.getInstance();
1566         return new Comparator<String>() {
1567             public int compare(String o1, String o2) {
1568                 return compareNatural(collator, o1, o2);
1569             }
1570         };
1571     }    
1572 
1573     /**
1574      * <p>Compares two strings using the current locale's rules and comparing contained numbers based on their numeric
1575      * values.</p>
1576      * <p>This is probably the best default comparison to use.</p>
1577      * <p>If you know that the texts to be compared are in a certain language that differs from the default locale's
1578      * langage, then get a collator for the desired locale ({@link java.text.Collator#getInstance(java.util.Locale)})
1579      * and pass it to {@link #compareNatural(java.text.Collator, String, String)}</p>
1580      *
1581      * @param s first string
1582      * @param t second string
1583      * @return zero iff <code>s</code> and <code>t</code> are equal,
1584      *         a value less than zero iff <code>s</code> lexicographically precedes <code>t</code>
1585      *         and a value larger than zero iff <code>s</code> lexicographically follows <code>t</code>
1586      */
1587     public static int compareNatural(Collator collator, String s, String t) {
1588         return compareNatural(s, t, false, collator);
1589     }
1590 
1591 
1592     /** Natural compare operation. Stolen from 
1593      * http://www.eekboom.com/java/compareNatural/src/com/eekboom/utils/Strings.java
1594      * (source file is under BSD license). 
1595      * 
1596      * @param s             first string
1597      * @param t             second string
1598      * @param caseSensitive treat characters differing in case only as equal - will be ignored if a collator is given
1599      * @param collator      used to compare subwords that aren't numbers - if null, characters will be compared
1600      *                      individually based on their Unicode value
1601      * @return zero iff <code>s</code> and <code>t</code> are equal,
1602      *         a value less than zero iff <code>s</code> lexicographically precedes <code>t</code>
1603      *         and a value larger than zero iff <code>s</code> lexicographically follows <code>t</code>
1604      */
1605     private static int compareNatural(String s, String t, boolean caseSensitive, Collator collator) {
1606         int sIndex = 0;
1607         int tIndex = 0;
1608 
1609         int sLength = s.length();
1610         int tLength = t.length();
1611 
1612         while(true) {
1613             // both character indices are after a subword (or at zero)
1614 
1615             // Check if one string is at end
1616             if(sIndex == sLength && tIndex == tLength) {
1617                 return 0;
1618             }
1619             if(sIndex == sLength) {
1620                 return -1;
1621             }
1622             if(tIndex == tLength) {
1623                 return 1;
1624             }
1625 
1626             // Compare sub word
1627             char sChar = s.charAt(sIndex);
1628             char tChar = t.charAt(tIndex);
1629 
1630             boolean sCharIsDigit = Character.isDigit(sChar);
1631             boolean tCharIsDigit = Character.isDigit(tChar);
1632 
1633             if(sCharIsDigit && tCharIsDigit) {
1634                 // Compare numbers
1635 
1636                 // skip leading 0s
1637                 int sLeadingZeroCount = 0;
1638                 while(sChar == '0') {
1639                     ++sLeadingZeroCount;
1640                     ++sIndex;
1641                     if(sIndex == sLength) {
1642                         break;
1643                     }
1644                     sChar = s.charAt(sIndex);
1645                 }
1646                 int tLeadingZeroCount = 0;
1647                 while(tChar == '0') {
1648                     ++tLeadingZeroCount;
1649                     ++tIndex;
1650                     if(tIndex == tLength) {
1651                         break;
1652                     }
1653                     tChar = t.charAt(tIndex);
1654                 }
1655                 boolean sAllZero = sIndex == sLength || !Character.isDigit(sChar);
1656                 boolean tAllZero = tIndex == tLength || !Character.isDigit(tChar);
1657                 if(sAllZero && tAllZero) {
1658                     continue;
1659                 }
1660                 if(sAllZero && !tAllZero) {
1661                     return -1;
1662                 }
1663                 if(tAllZero) {
1664                     return 1;
1665                 }
1666 
1667                 int diff = 0;
1668                 do {
1669                     if(diff == 0) {
1670                         diff = sChar - tChar;
1671                     }
1672                     ++sIndex;
1673                     ++tIndex;
1674                     if(sIndex == sLength && tIndex == tLength) {
1675                         return diff != 0 ? diff : sLeadingZeroCount - tLeadingZeroCount;
1676                     }
1677                     if(sIndex == sLength) {
1678                         if(diff == 0) {
1679                             return -1;
1680                         }
1681                         return Character.isDigit(t.charAt(tIndex)) ? -1 : diff;
1682                     }
1683                     if(tIndex == tLength) {
1684                         if(diff == 0) {
1685                             return 1;
1686                         }
1687                         return Character.isDigit(s.charAt(sIndex)) ? 1 : diff;
1688                     }
1689                     sChar = s.charAt(sIndex);
1690                     tChar = t.charAt(tIndex);
1691                     sCharIsDigit = Character.isDigit(sChar);
1692                     tCharIsDigit = Character.isDigit(tChar);
1693                     if(!sCharIsDigit && !tCharIsDigit) {
1694                         // both number sub words have the same length
1695                         if(diff != 0) {
1696                             return diff;
1697                         }
1698                         break;
1699                     }
1700                     if(!sCharIsDigit) {
1701                         return -1;
1702                     }
1703                     if(!tCharIsDigit) {
1704                         return 1;
1705                     }
1706                 } while(true);
1707             }
1708             else {
1709                 // Compare words
1710                 if(collator != null) {
1711                     // To use the collator the whole subwords have to be compared - character-by-character comparision
1712                     // is not possible. So find the two subwords first
1713                     int aw = sIndex;
1714                     int bw = tIndex;
1715                     do {
1716                         ++sIndex;
1717                     } while(sIndex < sLength && !Character.isDigit(s.charAt(sIndex)));
1718                     do {
1719                         ++tIndex;
1720                     } while(tIndex < tLength && !Character.isDigit(t.charAt(tIndex)));
1721 
1722                     String as = s.substring(aw, sIndex);
1723                     String bs = t.substring(bw, tIndex);
1724                     int subwordResult = collator.compare(as, bs);
1725                     if(subwordResult != 0) {
1726                         return subwordResult;
1727                     }
1728                 }
1729                 else {
1730                     // No collator specified. All characters should be ascii only. Compare character-by-character.
1731                     do {
1732                         if(sChar != tChar) {
1733                             if(caseSensitive) {
1734                                 return sChar - tChar;
1735                             }
1736                             sChar = Character.toUpperCase(sChar);
1737                             tChar = Character.toUpperCase(tChar);
1738                             if(sChar != tChar) {
1739                                 sChar = Character.toLowerCase(sChar);
1740                                 tChar = Character.toLowerCase(tChar);
1741                                 if(sChar != tChar) {
1742                                     return sChar - tChar;
1743                                 }
1744                             }
1745                         }
1746                         ++sIndex;
1747                         ++tIndex;
1748                         if(sIndex == sLength && tIndex == tLength) {
1749                             return 0;
1750                         }
1751                         if(sIndex == sLength) {
1752                             return -1;
1753                         }
1754                         if(tIndex == tLength) {
1755                             return 1;
1756                         }
1757                         sChar = s.charAt(sIndex);
1758                         tChar = t.charAt(tIndex);
1759                         sCharIsDigit = Character.isDigit(sChar);
1760                         tCharIsDigit = Character.isDigit(tChar);
1761                     } while(!sCharIsDigit && !tCharIsDigit);
1762                 }
1763             }
1764         }
1765     }
1766 
1767 
1768 	// taken from the W3C Jigsaw server sourcecode; class org.w3c.jigsaw.http.Request#unescape(String)
1769 	/**
1770 	 * Unescape a HTTP escaped string
1771 	 * @param s The string to be unescaped
1772 	 * @return the unescaped string.
1773 	 */
1774 	public static String unescapeQueryString (String s) {
1775 		StringBuilder sbuf = new StringBuilder() ;
1776 		int len  = s.length() ;
1777 		int ch = -1 ;
1778 		for (int i = 0 ; i < len ; i++) {
1779 			switch (ch = s.charAt(i)) {
1780 				case '%':
1781 					if (i < len - 2) {
1782 						// @TODO check to see how illegal escapes are treated
1783 						// e.g. "%nothex"
1784 						ch = s.charAt (++i) ;
1785 						int hb = (Character.isDigit ((char) ch) 
1786 							  ? ch - '0'
1787 							  : 10+Character.toLowerCase ((char) ch)-'a') & 0xF ;
1788 						ch = s.charAt (++i) ;
1789 						int lb = (Character.isDigit ((char) ch)
1790 							  ? ch - '0'
1791 							  : 10+Character.toLowerCase ((char) ch)-'a') & 0xF ;
1792 						sbuf.append ((char) ((hb << 4) | lb)) ;
1793 					} else {
1794 						sbuf.append ('%');  // hit EOL, just leave as is
1795 					}
1796 					break ;
1797 				case '+':
1798 					sbuf.append (' ') ;
1799 					break ;
1800 				default:
1801 					sbuf.append ((char) ch) ;
1802 			}
1803 		}
1804 		return sbuf.toString() ;
1805 	}
1806 	
1807 	/** Returns the largest common prefix between two other strings; e.g. 
1808 	 * getCommonPrefix("abcsomething", "abcsometharg") would be "abcsometh".
1809 	 * 
1810 	 * @param string1 String number one
1811 	 * @param string2 String number two
1812 	 * 
1813 	 * @return the large common prefix between the two strings
1814 	 * 
1815 	 * @throws NullPointerException is string1 or string2 is null
1816 	 */
1817 	public static String getCommonPrefix(String string1, String string2) {
1818 		if (string1==null) { throw new NullPointerException("null string1"); }
1819 		if (string2==null) { throw new NullPointerException("null string2"); }
1820 		int c = 0;
1821 		int maxLen = Math.min(string1.length(), string2.length());		
1822 		
1823 		while (c < maxLen && string1.charAt(c)==string2.charAt(c)) {
1824 			c++;
1825 		}
1826 		return string1.substring(0, c);
1827 	}
1828 
1829 	/** Uppercases the first character of a string.
1830      * 
1831      * @param text text to modify
1832      * 
1833      * @return the supplied text, with the first character converted to uppercase.
1834      */
1835     static public String toFirstUpper(String text) {
1836     	return Character.toUpperCase(text.charAt(0)) + text.substring(1); 
1837     }
1838 
1839 
1840 	/** Lowercases the first character of a string.
1841      * 
1842      * @param text text to modify
1843      * 
1844      * @return the supplied text, with the first character converted to lowercase.
1845      */
1846     static public String toFirstLower(String text) {
1847     	return Character.toLowerCase(text.charAt(0)) + text.substring(1); 
1848     }
1849 
1850 	
1851 
1852     
1853     /** Number of character edits between two strings; taken from  
1854 	 * http://www.merriampark.com/ldjava.htm. There's a version in commongs-lang,
1855 	 * apparently, but according to the comments on that page, it uses O(n^2) memory,
1856 	 * which can't be good.
1857 	 * 
1858 	 * @param s string 1
1859 	 * @param t string 2
1860 	 *  
1861 	 * @return the smallest number of edits required to convert s into t 
1862 	 */
1863 	public static int getLevenshteinDistance (String s, String t) {
1864 		  if (s == null || t == null) {
1865 		    throw new IllegalArgumentException("Strings must not be null");
1866 		  }
1867 				
1868 		  /*
1869 		    The difference between this impl. and the previous is that, rather 
1870 		     than creating and retaining a matrix of size s.length()+1 by t.length()+1, 
1871 		     we maintain two single-dimensional arrays of length s.length()+1.  The first, d,
1872 		     is the 'current working' distance array that maintains the newest distance cost
1873 		     counts as we iterate through the characters of String s.  Each time we increment
1874 		     the index of String t we are comparing, d is copied to p, the second int[].  Doing so
1875 		     allows us to retain the previous cost counts as required by the algorithm (taking 
1876 		     the minimum of the cost count to the left, up one, and diagonally up and to the left
1877 		     of the current cost count being calculated).  (Note that the arrays aren't really 
1878 		     copied anymore, just switched...this is clearly much better than cloning an array 
1879 		     or doing a System.arraycopy() each time  through the outer loop.)
1880 
1881 		     Effectively, the difference between the two implementations is this one does not 
1882 		     cause an out of memory condition when calculating the LD over two very large strings.  		
1883 		  */		
1884 				
1885 		  int n = s.length(); // length of s
1886 		  int m = t.length(); // length of t
1887 				
1888 		  if (n == 0) {
1889 		    return m;
1890 		  } else if (m == 0) {
1891 		    return n;
1892 		  }
1893 
1894 		  int p[] = new int[n+1]; //'previous' cost array, horizontally
1895 		  int d[] = new int[n+1]; // cost array, horizontally
1896 		  int _d[]; //placeholder to assist in swapping p and d
1897 
1898 		  // indexes into strings s and t
1899 		  int i; // iterates through s
1900 		  int j; // iterates through t
1901 
1902 		  char t_j; // jth character of t
1903 
1904 		  int cost; // cost
1905 
1906 		  for (i = 0; i<=n; i++) {
1907 		     p[i] = i;
1908 		  }
1909 				
1910 		  for (j = 1; j<=m; j++) {
1911 		     t_j = t.charAt(j-1);
1912 		     d[0] = j;
1913 				
1914 		     for (i=1; i<=n; i++) {
1915 		        cost = s.charAt(i-1)==t_j ? 0 : 1;
1916 		        // minimum of cell to the left+1, to the top+1, diagonally left and up +cost				
1917 		        d[i] = Math.min(Math.min(d[i-1]+1, p[i]+1),  p[i-1]+cost);  
1918 		     }
1919 
1920 		     // copy current distance counts to 'previous row' distance counts
1921 		     _d = p;
1922 		     p = d;
1923 		     d = _d;
1924 		  } 
1925 				
1926 		  // our last action in the above loop was to switch d and p, so p now 
1927 		  // actually has the most recent cost counts
1928 		  return p[n];
1929 	}
1930     
1931 	/** Return the md5 hash of a string
1932      * 
1933      * @param text text to hash
1934      * 
1935      * @return a hex-encoded version of the MD5 hash
1936      * 
1937      * @throws IllegalStateException if the java installation in use doesn't know 
1938      *   about MD5
1939      */
1940     static public String getMD5(String text) {
1941     	try{
1942     		MessageDigest algorithm = MessageDigest.getInstance("MD5");
1943     		algorithm.reset();
1944     		// algorithm.update(defaultBytes);
1945     		algorithm.update(text.getBytes());
1946     		byte messageDigest[] = algorithm.digest();
1947     	            
1948     		StringBuilder hexString = new StringBuilder();
1949     		for (int i=0;i<messageDigest.length;i++) {
1950     			hexString.append(Integer.toHexString(0xFF & messageDigest[i]));
1951     		}
1952     		return hexString.toString();
1953     	} catch (NoSuchAlgorithmException nsae) {
1954     		throw (IllegalStateException) new IllegalStateException("Unknown algorithm 'MD5'").initCause(nsae);
1955     	}
1956     }
1957     
1958     /** Returns a string composed of the supplied text, repeated 0 or more times 
1959      * 
1960      * @param text text to repeat
1961      * @param count number of repetitions
1962      * 
1963      * @return the repeated text
1964      */
1965     static public String repeat(String text, int count) {
1966     	StringBuffer sb = new StringBuffer();
1967     	for (int i=0; i<count; i++) {
1968     		sb.append(text);
1969     	}
1970     	return sb.toString();
1971     }
1972     
1973     
1974 	/** Perform ${xxxx}-style substitution of placeholders in text. Placeholders without 
1975 	 * values will be left as-is.
1976 	 * 
1977 	 * <p>For example, gives the set of variables:
1978 	 * <ul>
1979 	 * <li>abc = def
1980 	 * </ul>
1981 	 * 
1982 	 * <p>then the result of <code>substituteParameters("xxxx${abc}yyyy${def}zzzz")</code>
1983 	 * will be "xxxxdefyyyy${def}zzzz"
1984 	 * 
1985 	 * <p><code>$</code> followed by any other character will be left as-is. 
1986 	 * 
1987 	 * @param variables a set of variable names and values, used in the substitution 
1988 	 * @param text the text to be substituted.
1989 	 * 
1990 	 * @return text, with placeholders replaced with values in the variables parameter
1991 	 */
1992 	public static String substitutePlaceholders(Map<?, ?> variables, String text) {
1993 		// escaped version of (\$\{.*?\}|[^$]+|\$.)
1994 		Pattern p = Pattern.compile("(\\$\\{.*?\\}|[^$]+|\\$)"); // modified regex
1995 		Matcher m = p.matcher(text);
1996 		String result = "";
1997 		while (m.find()) {
1998 			String token = m.group(1);
1999 			if (token.startsWith("${") && token.endsWith("}")) {
2000 				Object value = variables.get(token.substring(2, token.length()-1));
2001 				if (value == null) {
2002 					result = result + token;
2003 				} else {
2004 					result = result + value.toString();
2005 				}
2006 			} else {
2007 				result = result + token;
2008 			}
2009 		}
2010 		return result;
2011 	}
2012 
2013 	
2014 }