/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2002-2003 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, if * any, must include the following acknowledgement: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgement may appear in the software itself, * if and wherever such third-party acknowledgements normally appear. * * 4. The names "The Jakarta Project", "Commons", and "Apache Software * Foundation" must not be used to endorse or promote products derived * from this software without prior written permission. For written * permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache" * nor may "Apache" appear in their names without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * . */ package org.apache.commons.lang; import java.io.IOException; import java.io.Writer; import org.apache.commons.lang.exception.NestableRuntimeException; /** *

Escapes and unescapes Strings for * Java, Java Script, HTML, XML, and SQL.

* * @author Apache Jakarta Turbine * @author GenerationJavaCore library * @author Purple Technology * @author Henri Yandell * @author Alexander Day Chaffee * @author Antony Riley * @author Helge Tesgaard * @author Sean Brown * @author Gary Gregory * @author Phil Steitz * @author Pete Gieser * @since 2.0 * @version $Id: StringEscapeUtils.java,v 1.1 2012/08/30 16:24:42 marcin Exp $ */ public class StringEscapeUtils { /** *

StringEscapeUtils instances should NOT be constructed in * standard programming.

* *

Instead, the class should be used as: *

StringEscapeUtils.escapeJava("foo");

* *

This constructor is public to permit tools that require a JavaBean * instance to operate.

*/ public StringEscapeUtils() { } // Java and JavaScript //-------------------------------------------------------------------------- /** *

Escapes the characters in a String using Java String rules.

* *

Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)

* *

So a tab becomes the characters '\\' and * 't'.

* *

The only difference between Java strings and JavaScript strings * is that in JavaScript, a single quote must be escaped.

* *

Example: *

     * input string: He didn't say, "Stop!"
     * output string: He didn't say, \"Stop!\"
     * 
*

* * @param str String to escape values in, may be null * @return String with escaped values, null if null string input */ public static String escapeJava(String str) { return escapeJavaStyleString(str, false); } /** *

Escapes the characters in a String using Java String rules to * a Writer.

* *

A null string input has no effect.

* * @see #escapeJava(java.lang.String) * @param out Writer to write escaped string into * @param str String to escape values in, may be null * @throws IllegalArgumentException if the Writer is null * @throws IOException if error occurs on undelying Writer */ public static void escapeJava(Writer out, String str) throws IOException { escapeJavaStyleString(out, str, false); } /** *

Escapes the characters in a String using JavaScript String rules.

*

Escapes any values it finds into their JavaScript String form. * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)

* *

So a tab becomes the characters '\\' and * 't'.

* *

The only difference between Java strings and JavaScript strings * is that in JavaScript, a single quote must be escaped.

* *

Example: *

     * input string: He didn't say, "Stop!"
     * output string: He didn\'t say, \"Stop!\"
     * 
*

* * @param str String to escape values in, may be null * @return String with escaped values, null if null string input */ public static String escapeJavaScript(String str) { return escapeJavaStyleString(str, true); } /** *

Escapes the characters in a String using JavaScript String rules * to a Writer.

* *

A null string input has no effect.

* * @see #escapeJavaScript(java.lang.String) * @param out Writer to write escaped string into * @param str String to escape values in, may be null * @throws IllegalArgumentException if the Writer is null * @throws IOException if error occurs on undelying Writer **/ public static void escapeJavaScript(Writer out, String str) throws IOException { escapeJavaStyleString(out, str, true); } private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes) { if (str == null) { return null; } try { StringPrintWriter writer = new StringPrintWriter(str.length() * 2); escapeJavaStyleString(writer, str, escapeSingleQuotes); return writer.getString(); } catch (IOException ioe) { // this should never ever happen while writing to a StringWriter ioe.printStackTrace(); return null; } } private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote) throws IOException { if (out == null) { throw new IllegalArgumentException("The Writer must not be null"); } if (str == null) { return; } int sz; sz = str.length(); for (int i = 0; i < sz; i++) { char ch = str.charAt(i); // handle unicode if (ch > 0xfff) { out.write("\\u" + hex(ch)); } else if (ch > 0xff) { out.write("\\u0" + hex(ch)); } else if (ch > 0x7f) { out.write("\\u00" + hex(ch)); } else if (ch < 32) { switch (ch) { case '\b': out.write('\\'); out.write('b'); break; case '\n': out.write('\\'); out.write('n'); break; case '\t': out.write('\\'); out.write('t'); break; case '\f': out.write('\\'); out.write('f'); break; case '\r': out.write('\\'); out.write('r'); break; default : if (ch > 0xf) { out.write("\\u00" + hex(ch)); } else { out.write("\\u000" + hex(ch)); } break; } } else { switch (ch) { case '\'': if (escapeSingleQuote) out.write('\\'); out.write('\''); break; case '"': out.write('\\'); out.write('"'); break; case '\\': out.write('\\'); out.write('\\'); break; default : out.write(ch); break; } } } } /** *

Returns an upper case hexadecimal String for the given * character.

* * @param ch The character to convert. * @return An upper case hexadecimal String */ private static String hex(char ch) { return Integer.toHexString(ch).toUpperCase(); } /** *

Unescapes any Java literals found in the String. * For example, it will turn a sequence of '\' and * 'n' into a newline character, unless the '\' * is preceded by another '\'.

* * @param str the String to unescape, may be null * @return a new unescaped String, null if null string input */ public static String unescapeJava(String str) { if (str == null) { return null; } try { StringPrintWriter writer = new StringPrintWriter(str.length()); unescapeJava(writer, str); return writer.getString(); } catch (IOException ioe) { // this should never ever happen while writing to a StringWriter ioe.printStackTrace(); return null; } } /** *

Unescapes any Java literals found in the String to a * Writer.

* *

For example, it will turn a sequence of '\' and * 'n' into a newline character, unless the '\' * is preceded by another '\'.

* *

A null string input has no effect.

* * @param out the Writer used to output unescaped characters * @param str the String to unescape, may be null * @throws IllegalArgumentException if the Writer is null * @throws IOException if error occurs on undelying Writer */ public static void unescapeJava(Writer out, String str) throws IOException { if (out == null) { throw new IllegalArgumentException("The Writer must not be null"); } if (str == null) { return; } int sz = str.length(); StringBuffer unicode = new StringBuffer(4); boolean hadSlash = false; boolean inUnicode = false; for (int i = 0; i < sz; i++) { char ch = str.charAt(i); if (inUnicode) { // if in unicode, then we're reading unicode // values in somehow unicode.append(ch); if (unicode.length() == 4) { // unicode now contains the four hex digits // which represents our unicode chacater try { int value = Integer.parseInt(unicode.toString(), 16); out.write((char) value); unicode.setLength(0); inUnicode = false; hadSlash = false; } catch (NumberFormatException nfe) { throw new NestableRuntimeException("Unable to parse unicode value: " + unicode, nfe); } } continue; } if (hadSlash) { // handle an escaped value hadSlash = false; switch (ch) { case '\\': out.write('\\'); break; case '\'': out.write('\''); break; case '\"': out.write('"'); break; case 'r': out.write('\r'); break; case 'f': out.write('\f'); break; case 't': out.write('\t'); break; case 'n': out.write('\n'); break; case 'b': out.write('\b'); break; case 'u': { // uh-oh, we're in unicode country.... inUnicode = true; break; } default : out.write(ch); break; } continue; } else if (ch == '\\') { hadSlash = true; continue; } out.write(ch); } if (hadSlash) { // then we're in the weird case of a \ at the end of the // string, let's output it anyway. out.write('\\'); } } /** *

Unescapes any JavaScript literals found in the String.

* *

For example, it will turn a sequence of '\' and 'n' * into a newline character, unless the '\' is preceded by another * '\'.

* * @see #unescapeJava(String) * @param str the String to unescape, may be null * @return A new unescaped String, null if null string input */ public static String unescapeJavaScript(String str) { return unescapeJava(str); } /** *

Unescapes any JavaScript literals found in the String to a * Writer.

* *

For example, it will turn a sequence of '\' and 'n' * into a newline character, unless the '\' is preceded by another * '\'.

* *

A null string input has no effect.

* * @see #unescapeJava(Writer,String) * @param out the Writer used to output unescaped characters * @param str the String to unescape, may be null * @throws IllegalArgumentException if the Writer is null * @throws IOException if error occurs on undelying Writer */ public static void unescapeJavaScript(Writer out, String str) throws IOException { unescapeJava(out, str); } // HTML and XML //-------------------------------------------------------------------------- /** *

Escapes the characters in a String using HTML entities.

* *

* For example: "bread" & "butter" => &quot;bread&quot; &amp; &quot;butter&quot;. *

* *

Supports all known HTML 4.0 entities, including funky accents.

* * @param str the String to escape, may be null * @return a new escaped String, null if null string input * * @see #unescapeHtml(String) * @see
ISO Entities * @see
HTML 3.2 Character Entities for ISO Latin-1 * @see
HTML 4.0 Character entity references * @see
HTML 4.01 Character References * @see
HTML 4.01 Code positions **/ public static String escapeHtml(String str) { if (str == null) { return null; } //todo: add a version that takes a Writer //todo: rewrite underlying method to use a Writer instead of a StringBuffer return Entities.HTML40.escape(str); } /** *

Unescapes a string containing entity escapes to a string * containing the actual Unicode characters corresponding to the * escapes. Supports HTML 4.0 entities.

* *

For example, the string "&lt;Fran&ccedil;ais&gt;" * will become "<Français>"

* *

If an entity is unrecognized, it is left alone, and inserted * verbatim into the result string. e.g. "&gt;&zzzz;x" will * become ">&zzzz;x".

* * @param str the String to unescape, may be null * @return a new unescaped String, null if null string input * @see #escapeHtml(String) **/ public static String unescapeHtml(String str) { if (str == null) { return null; } return Entities.HTML40.unescape(str); } /** *

Escapes the characters in a String using XML entities.

* *

For example: "bread" & "butter" => * &quot;bread&quot; &amp; &quot;butter&quot;. *

* *

Supports only the four basic XML entities (gt, lt, quot, amp). * Does not support DTDs or external entities.

* * @param str the String to escape, may be null * @return a new escaped String, null if null string input * @see #unescapeXml(java.lang.String) **/ public static String escapeXml(String str) { if (str == null) { return null; } return Entities.XML.escape(str); } /** *

Unescapes a string containing XML entity escapes to a string * containing the actual Unicode characters corresponding to the * escapes.

* *

Supports only the four basic XML entities (gt, lt, quot, amp). * Does not support DTDs or external entities.

* * @param str the String to unescape, may be null * @return a new unescaped String, null if null string input * @see #escapeXml(String) **/ public static String unescapeXml(String str) { if (str == null) { return null; } return Entities.XML.unescape(str); } /** *

Escapes the characters in a String to be suitable to pass to * an SQL query.

* *

For example, *

statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" + 
     *   StringEscapeUtils.escapeSql("McHale's Navy") + 
     *   "'");
*

* *

At present, this method only turns single-quotes into doubled single-quotes * ("McHale's Navy" => "McHale''s Navy"). It does not * handle the cases of percent (%) or underscore (_) for use in LIKE clauses.

* * see http://www.jguru.com/faq/view.jsp?EID=8881 * @param str the string to escape, may be null * @return a new String, escaped for SQL, null if null string input */ public static String escapeSql(String str) { if (str == null) { return null; } return StringUtils.replace(str, "'", "''"); } }