/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.lang; import java.io.IOException; import java.io.StringWriter; import java.io.Writer; import java.util.Locale; import org.apache.commons.lang.exception.NestableRuntimeException; import org.apache.commons.lang.text.StrBuilder; /** *
Escapes and unescapes String
s for
* Java, Java Script, HTML, XML, and SQL.
#ThreadSafe#
* @author Apache Software Foundation * @author Apache Jakarta Turbine * @author Purple Technology * @author Alexander Day Chaffee * @author Antony Riley * @author Helge Tesgaard * @author Sean Brown * @author Gary Gregory * @author Phil Steitz * @author Pete Gieser * @since 2.0 * @version $Id: StringEscapeUtils.java,v 1.2 2013/03/15 09:52:49 andreyb Exp $ */ public class StringEscapeUtils { private static final char CSV_DELIMITER = ','; private static final char CSV_QUOTE = '"'; private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; /** *StringEscapeUtils
instances should NOT be constructed in
* standard programming.
Instead, the class should be used as: *
StringEscapeUtils.escapeJava("foo");* *
This constructor is public to permit tools that require a JavaBean * instance to operate.
*/ public StringEscapeUtils() { super(); } // Java and JavaScript //-------------------------------------------------------------------------- /** *Escapes the characters in a String
using Java String rules.
Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)
* *So a tab becomes the characters '\\'
and
* 't'
.
The only difference between Java strings and JavaScript strings * is that in JavaScript, a single quote must be escaped.
* *Example: *
* input string: He didn't say, "Stop!" * output string: He didn't say, \"Stop!\" ** * * @param str String to escape values in, may be null * @return String with escaped values,
null
if null string input
*/
public static String escapeJava(String str) {
return escapeJavaStyleString(str, false, false);
}
/**
* Escapes the characters in a String
using Java String rules to
* a Writer
.
A null
string input has no effect.
null
* @throws IOException if error occurs on underlying Writer
*/
public static void escapeJava(Writer out, String str) throws IOException {
escapeJavaStyleString(out, str, false, false);
}
/**
* Escapes the characters in a String
using JavaScript String rules.
Escapes any values it finds into their JavaScript String form. * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)
* *So a tab becomes the characters '\\'
and
* 't'
.
The only difference between Java strings and JavaScript strings * is that in JavaScript, a single quote must be escaped.
* *Example: *
* input string: He didn't say, "Stop!" * output string: He didn\'t say, \"Stop!\" ** * * @param str String to escape values in, may be null * @return String with escaped values,
null
if null string input
*/
public static String escapeJavaScript(String str) {
return escapeJavaStyleString(str, true, true);
}
/**
* Escapes the characters in a String
using JavaScript String rules
* to a Writer
.
A null
string input has no effect.
null
* @throws IOException if error occurs on underlying Writer
**/
public static void escapeJavaScript(Writer out, String str) throws IOException {
escapeJavaStyleString(out, str, true, true);
}
/**
* Worker method for the {@link #escapeJavaScript(String)} method.
* * @param str String to escape values in, may be null * @param escapeSingleQuotes escapes single quotes iftrue
* @param escapeForwardSlash TODO
* @return the escaped string
*/
private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes, boolean escapeForwardSlash) {
if (str == null) {
return null;
}
try {
StringWriter writer = new StringWriter(str.length() * 2);
escapeJavaStyleString(writer, str, escapeSingleQuotes, escapeForwardSlash);
return writer.toString();
} catch (IOException ioe) {
// this should never ever happen while writing to a StringWriter
throw new UnhandledException(ioe);
}
}
/**
* Worker method for the {@link #escapeJavaScript(String)} method.
* * @param out write to receieve the escaped string * @param str String to escape values in, may be null * @param escapeSingleQuote escapes single quotes iftrue
* @param escapeForwardSlash TODO
* @throws IOException if an IOException occurs
*/
private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote,
boolean escapeForwardSlash) throws IOException {
if (out == null) {
throw new IllegalArgumentException("The Writer must not be null");
}
if (str == null) {
return;
}
int sz;
sz = str.length();
for (int i = 0; i < sz; i++) {
char ch = str.charAt(i);
// handle unicode
if (ch > 0xfff) {
out.write("\\u" + hex(ch));
} else if (ch > 0xff) {
out.write("\\u0" + hex(ch));
} else if (ch > 0x7f) {
out.write("\\u00" + hex(ch));
} else if (ch < 32) {
switch (ch) {
case '\b' :
out.write('\\');
out.write('b');
break;
case '\n' :
out.write('\\');
out.write('n');
break;
case '\t' :
out.write('\\');
out.write('t');
break;
case '\f' :
out.write('\\');
out.write('f');
break;
case '\r' :
out.write('\\');
out.write('r');
break;
default :
if (ch > 0xf) {
out.write("\\u00" + hex(ch));
} else {
out.write("\\u000" + hex(ch));
}
break;
}
} else {
switch (ch) {
case '\'' :
if (escapeSingleQuote) {
out.write('\\');
}
out.write('\'');
break;
case '"' :
out.write('\\');
out.write('"');
break;
case '\\' :
out.write('\\');
out.write('\\');
break;
case '/' :
if (escapeForwardSlash) {
out.write('\\');
}
out.write('/');
break;
default :
out.write(ch);
break;
}
}
}
}
/**
* Returns an upper case hexadecimal String
for the given
* character.
String
*/
private static String hex(char ch) {
return Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
}
/**
* Unescapes any Java literals found in the String
.
* For example, it will turn a sequence of '\'
and
* 'n'
into a newline character, unless the '\'
* is preceded by another '\'
.
String
to unescape, may be null
* @return a new unescaped String
, null
if null string input
*/
public static String unescapeJava(String str) {
if (str == null) {
return null;
}
try {
StringWriter writer = new StringWriter(str.length());
unescapeJava(writer, str);
return writer.toString();
} catch (IOException ioe) {
// this should never ever happen while writing to a StringWriter
throw new UnhandledException(ioe);
}
}
/**
* Unescapes any Java literals found in the String
to a
* Writer
.
For example, it will turn a sequence of '\'
and
* 'n'
into a newline character, unless the '\'
* is preceded by another '\'
.
A null
string input has no effect.
Writer
used to output unescaped characters
* @param str the String
to unescape, may be null
* @throws IllegalArgumentException if the Writer is null
* @throws IOException if error occurs on underlying Writer
*/
public static void unescapeJava(Writer out, String str) throws IOException {
if (out == null) {
throw new IllegalArgumentException("The Writer must not be null");
}
if (str == null) {
return;
}
int sz = str.length();
StrBuilder unicode = new StrBuilder(4);
boolean hadSlash = false;
boolean inUnicode = false;
for (int i = 0; i < sz; i++) {
char ch = str.charAt(i);
if (inUnicode) {
// if in unicode, then we're reading unicode
// values in somehow
unicode.append(ch);
if (unicode.length() == 4) {
// unicode now contains the four hex digits
// which represents our unicode character
try {
int value = Integer.parseInt(unicode.toString(), 16);
out.write((char) value);
unicode.setLength(0);
inUnicode = false;
hadSlash = false;
} catch (NumberFormatException nfe) {
throw new NestableRuntimeException("Unable to parse unicode value: " + unicode, nfe);
}
}
continue;
}
if (hadSlash) {
// handle an escaped value
hadSlash = false;
switch (ch) {
case '\\':
out.write('\\');
break;
case '\'':
out.write('\'');
break;
case '\"':
out.write('"');
break;
case 'r':
out.write('\r');
break;
case 'f':
out.write('\f');
break;
case 't':
out.write('\t');
break;
case 'n':
out.write('\n');
break;
case 'b':
out.write('\b');
break;
case 'u':
{
// uh-oh, we're in unicode country....
inUnicode = true;
break;
}
default :
out.write(ch);
break;
}
continue;
} else if (ch == '\\') {
hadSlash = true;
continue;
}
out.write(ch);
}
if (hadSlash) {
// then we're in the weird case of a \ at the end of the
// string, let's output it anyway.
out.write('\\');
}
}
/**
* Unescapes any JavaScript literals found in the String
.
For example, it will turn a sequence of '\'
and 'n'
* into a newline character, unless the '\'
is preceded by another
* '\'
.
String
to unescape, may be null
* @return A new unescaped String
, null
if null string input
*/
public static String unescapeJavaScript(String str) {
return unescapeJava(str);
}
/**
* Unescapes any JavaScript literals found in the String
to a
* Writer
.
For example, it will turn a sequence of '\'
and 'n'
* into a newline character, unless the '\'
is preceded by another
* '\'
.
A null
string input has no effect.
Writer
used to output unescaped characters
* @param str the String
to unescape, may be null
* @throws IllegalArgumentException if the Writer is null
* @throws IOException if error occurs on underlying Writer
*/
public static void unescapeJavaScript(Writer out, String str) throws IOException {
unescapeJava(out, str);
}
// HTML and XML
//--------------------------------------------------------------------------
/**
* Escapes the characters in a String
using HTML entities.
* For example: *
*"bread" & "butter"
* "bread" & "butter"
.
*
Supports all known HTML 4.0 entities, including funky accents. * Note that the commonly used apostrophe escape character (') * is not a legal entity and so is not supported).
* * @param str theString
to escape, may be null
* @return a new escaped String
, null
if null string input
*
* @see #unescapeHtml(String)
* @see ISO Entities
* @see HTML 3.2 Character Entities for ISO Latin-1
* @see HTML 4.0 Character entity references
* @see HTML 4.01 Character References
* @see HTML 4.01 Code positions
*/
public static String escapeHtml(String str) {
if (str == null) {
return null;
}
try {
StringWriter writer = new StringWriter ((int)(str.length() * 1.5));
escapeHtml(writer, str);
return writer.toString();
} catch (IOException ioe) {
//should be impossible
throw new UnhandledException(ioe);
}
}
/**
* Escapes the characters in a String
using HTML entities and writes
* them to a Writer
.
* For example: *
*"bread" & "butter"
* becomes:
*"bread" & "butter"
.
*
* Supports all known HTML 4.0 entities, including funky accents. * Note that the commonly used apostrophe escape character (') * is not a legal entity and so is not supported).
* * @param writer the writer receiving the escaped string, not null * @param string theString
to escape, may be null
* @throws IllegalArgumentException if the writer is null
* @throws IOException when Writer
passed throws the exception from
* calls to the {@link Writer#write(int)} methods.
*
* @see #escapeHtml(String)
* @see #unescapeHtml(String)
* @see ISO Entities
* @see HTML 3.2 Character Entities for ISO Latin-1
* @see HTML 4.0 Character entity references
* @see HTML 4.01 Character References
* @see HTML 4.01 Code positions
*/
public static void escapeHtml(Writer writer, String string) throws IOException {
if (writer == null ) {
throw new IllegalArgumentException ("The Writer must not be null.");
}
if (string == null) {
return;
}
Entities.HTML40.escape(writer, string);
}
//-----------------------------------------------------------------------
/**
* Unescapes a string containing entity escapes to a string * containing the actual Unicode characters corresponding to the * escapes. Supports HTML 4.0 entities.
* *For example, the string "<Français>" * will become "<Français>"
* *If an entity is unrecognized, it is left alone, and inserted * verbatim into the result string. e.g. ">&zzzz;x" will * become ">&zzzz;x".
* * @param str theString
to unescape, may be null
* @return a new unescaped String
, null
if null string input
* @see #escapeHtml(Writer, String)
*/
public static String unescapeHtml(String str) {
if (str == null) {
return null;
}
try {
StringWriter writer = new StringWriter ((int)(str.length() * 1.5));
unescapeHtml(writer, str);
return writer.toString();
} catch (IOException ioe) {
//should be impossible
throw new UnhandledException(ioe);
}
}
/**
* Unescapes a string containing entity escapes to a string * containing the actual Unicode characters corresponding to the * escapes. Supports HTML 4.0 entities.
* *For example, the string "<Français>" * will become "<Français>"
* *If an entity is unrecognized, it is left alone, and inserted * verbatim into the result string. e.g. ">&zzzz;x" will * become ">&zzzz;x".
* * @param writer the writer receiving the unescaped string, not null * @param string theString
to unescape, may be null
* @throws IllegalArgumentException if the writer is null
* @throws IOException if an IOException occurs
* @see #escapeHtml(String)
*/
public static void unescapeHtml(Writer writer, String string) throws IOException {
if (writer == null ) {
throw new IllegalArgumentException ("The Writer must not be null.");
}
if (string == null) {
return;
}
Entities.HTML40.unescape(writer, string);
}
//-----------------------------------------------------------------------
/**
* Escapes the characters in a String
using XML entities.
For example: "bread" & "butter" => * "bread" & "butter". *
* *Supports only the five basic XML entities (gt, lt, quot, amp, apos). * Does not support DTDs or external entities.
* *Note that unicode characters greater than 0x7f are currently escaped to * their numerical \\u equivalent. This may change in future releases.
* * @param writer the writer receiving the unescaped string, not null * @param str theString
to escape, may be null
* @throws IllegalArgumentException if the writer is null
* @throws IOException if there is a problem writing
* @see #unescapeXml(java.lang.String)
*/
public static void escapeXml(Writer writer, String str) throws IOException {
if (writer == null ) {
throw new IllegalArgumentException ("The Writer must not be null.");
}
if (str == null) {
return;
}
Entities.XML.escape(writer, str);
}
/**
* Escapes the characters in a String
using XML entities.
For example: "bread" & "butter" => * "bread" & "butter". *
* *Supports only the five basic XML entities (gt, lt, quot, amp, apos). * Does not support DTDs or external entities.
* *Note that unicode characters greater than 0x7f are currently escaped to * their numerical \\u equivalent. This may change in future releases.
* * @param str theString
to escape, may be null
* @return a new escaped String
, null
if null string input
* @see #unescapeXml(java.lang.String)
*/
public static String escapeXml(String str) {
if (str == null) {
return null;
}
return Entities.XML.escape(str);
}
//-----------------------------------------------------------------------
/**
* Unescapes a string containing XML entity escapes to a string * containing the actual Unicode characters corresponding to the * escapes.
* *Supports only the five basic XML entities (gt, lt, quot, amp, apos). * Does not support DTDs or external entities.
* *Note that numerical \\u unicode codes are unescaped to their respective * unicode characters. This may change in future releases.
* * @param writer the writer receiving the unescaped string, not null * @param str theString
to unescape, may be null
* @throws IllegalArgumentException if the writer is null
* @throws IOException if there is a problem writing
* @see #escapeXml(String)
*/
public static void unescapeXml(Writer writer, String str) throws IOException {
if (writer == null ) {
throw new IllegalArgumentException ("The Writer must not be null.");
}
if (str == null) {
return;
}
Entities.XML.unescape(writer, str);
}
/**
* Unescapes a string containing XML entity escapes to a string * containing the actual Unicode characters corresponding to the * escapes.
* *Supports only the five basic XML entities (gt, lt, quot, amp, apos). * Does not support DTDs or external entities.
* *Note that numerical \\u unicode codes are unescaped to their respective * unicode characters. This may change in future releases.
* * @param str theString
to unescape, may be null
* @return a new unescaped String
, null
if null string input
* @see #escapeXml(String)
*/
public static String unescapeXml(String str) {
if (str == null) {
return null;
}
return Entities.XML.unescape(str);
}
//-----------------------------------------------------------------------
/**
* Escapes the characters in a String
to be suitable to pass to
* an SQL query.
For example, *
statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" + * StringEscapeUtils.escapeSql("McHale's Navy") + * "'");* * *
At present, this method only turns single-quotes into doubled single-quotes
* ("McHale's Navy"
=> "McHale''s Navy"
). It does not
* handle the cases of percent (%) or underscore (_) for use in LIKE clauses.
null
if null string input
*/
public static String escapeSql(String str) {
if (str == null) {
return null;
}
return StringUtils.replace(str, "'", "''");
}
//-----------------------------------------------------------------------
/**
* Returns a String
value for a CSV column enclosed in double quotes,
* if required.
If the value contains a comma, newline or double quote, then the * String value is returned enclosed in double quotes.
* * *Any double quote characters in the value are escaped with another double quote.
* *If the value does not contain a comma, newline or double quote, then the * String value is returned unchanged.
* * * see Wikipedia and * RFC 4180. * * @param str the input CSV column String, may be null * @return the input String, enclosed in double quotes if the value contains a comma, * newline or double quote,null
if null string input
* @since 2.4
*/
public static String escapeCsv(String str) {
if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) {
return str;
}
try {
StringWriter writer = new StringWriter();
escapeCsv(writer, str);
return writer.toString();
} catch (IOException ioe) {
// this should never ever happen while writing to a StringWriter
throw new UnhandledException(ioe);
}
}
/**
* Writes a String
value for a CSV column enclosed in double quotes,
* if required.
If the value contains a comma, newline or double quote, then the * String value is written enclosed in double quotes.
* * *Any double quote characters in the value are escaped with another double quote.
* *If the value does not contain a comma, newline or double quote, then the * String value is written unchanged (null values are ignored).
* * * see Wikipedia and * RFC 4180. * * @param str the input CSV column String, may be null * @param out Writer to write input string to, enclosed in double quotes if it contains * a comma, newline or double quote * @throws IOException if error occurs on underlying Writer * @since 2.4 */ public static void escapeCsv(Writer out, String str) throws IOException { if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) { if (str != null) { out.write(str); } return; } out.write(CSV_QUOTE); for (int i = 0; i < str.length(); i++) { char c = str.charAt(i); if (c == CSV_QUOTE) { out.write(CSV_QUOTE); // escape double quote } out.write(c); } out.write(CSV_QUOTE); } /** *Returns a String
value for an unescaped CSV column.
If the value is enclosed in double quotes, and contains a comma, newline * or double quote, then quotes are removed. *
* *Any double quote escaped characters (a pair of double quotes) are unescaped * to just one double quote.
* *If the value is not enclosed in double quotes, or is and does not contain a * comma, newline or double quote, then the String value is returned unchanged.
* * * see Wikipedia and * RFC 4180. * * @param str the input CSV column String, may be null * @return the input String, with enclosing double quotes removed and embedded double * quotes unescaped,null
if null string input
* @since 2.4
*/
public static String unescapeCsv(String str) {
if (str == null) {
return null;
}
try {
StringWriter writer = new StringWriter();
unescapeCsv(writer, str);
return writer.toString();
} catch (IOException ioe) {
// this should never ever happen while writing to a StringWriter
throw new UnhandledException(ioe);
}
}
/**
* Returns a String
value for an unescaped CSV column.
If the value is enclosed in double quotes, and contains a comma, newline * or double quote, then quotes are removed. *
* *Any double quote escaped characters (a pair of double quotes) are unescaped * to just one double quote.
* *If the value is not enclosed in double quotes, or is and does not contain a * comma, newline or double quote, then the String value is returned unchanged.
* * * see Wikipedia and * RFC 4180. * * @param str the input CSV column String, may be null * @param out Writer to write the input String to, with enclosing double quotes * removed and embedded double quotes unescaped,null
if null string input
* @throws IOException if error occurs on underlying Writer
* @since 2.4
*/
public static void unescapeCsv(Writer out, String str) throws IOException {
if (str == null) {
return;
}
if (str.length() < 2) {
out.write(str);
return;
}
if ( str.charAt(0) != CSV_QUOTE || str.charAt(str.length() - 1) != CSV_QUOTE ) {
out.write(str);
return;
}
// strip quotes
String quoteless = str.substring(1, str.length() - 1);
if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
// deal with escaped quotes; ie) ""
str = StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR);
}
out.write(str);
}
}