diff --git a/src/main/java/org/apache/commons/text/StringEscapeUtils.java b/src/main/java/org/apache/commons/text/StringEscapeUtils.java
index 9720ddd723..7b9aefc5a3 100644
--- a/src/main/java/org/apache/commons/text/StringEscapeUtils.java
+++ b/src/main/java/org/apache/commons/text/StringEscapeUtils.java
@@ -202,7 +202,8 @@ public class StringEscapeUtils {
public static final CharSequenceTranslator ESCAPE_HTML3 =
new AggregateTranslator(
new LookupTranslator(EntityArrays.BASIC_ESCAPE),
- new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE)
+ new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE),
+ new LookupTranslator(EntityArrays.CP1252_ESCAPE)
);
/**
@@ -216,6 +217,7 @@ public class StringEscapeUtils {
new AggregateTranslator(
new LookupTranslator(EntityArrays.BASIC_ESCAPE),
new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE),
+ new LookupTranslator(EntityArrays.CP1252_ESCAPE),
new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE)
);
@@ -317,6 +319,7 @@ public class StringEscapeUtils {
new AggregateTranslator(
new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE),
+ new LookupTranslator(EntityArrays.CP1252_UNESCAPE),
new NumericEntityUnescaper()
);
@@ -331,10 +334,10 @@ public class StringEscapeUtils {
new AggregateTranslator(
new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE),
+ new LookupTranslator(EntityArrays.CP1252_UNESCAPE),
new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE),
new NumericEntityUnescaper()
);
-
/**
* Translator object for unescaping escaped XML.
*
diff --git a/src/main/java/org/apache/commons/text/translate/EntityArrays.java b/src/main/java/org/apache/commons/text/translate/EntityArrays.java
index 0c36c6f1de..0c64734513 100644
--- a/src/main/java/org/apache/commons/text/translate/EntityArrays.java
+++ b/src/main/java/org/apache/commons/text/translate/EntityArrays.java
@@ -426,6 +426,52 @@ public class EntityArrays {
JAVA_CTRL_CHARS_UNESCAPE = Collections.unmodifiableMap(invert(JAVA_CTRL_CHARS_ESCAPE));
}
+ /**
+ * A Map<CharSequence, CharSequence> to escape the CP-1252 encoding. This map is a superset of
+ * ISO-8859-1 encoding, with an
+ * extension for characters with code points 128 to 159. This must be used with {@link #ISO8859_1_ESCAPE}
+ * to get all CP-1252 code points.
+ */
+ public static final Map CP1252_ESCAPE;
+ static {
+ final Map initialMap = new HashMap<>();
+ initialMap.put("\u20AC", "€"); // euro sign
+ initialMap.put("\u201A", "‚"); // german single quotes left
+ initialMap.put("\u0192", "ƒ"); // florin sign
+ initialMap.put("\u201E", "„"); // hungarian first level quotes left
+ initialMap.put("\u2026", "…"); // horizontal ellipsis
+ initialMap.put("\u2020", "†"); // dagger
+ initialMap.put("\u2021", "‡"); // double dagger
+ initialMap.put("\u02C6", "ˆ"); // modifier letter circumflex accent
+ initialMap.put("\u2030", "‰"); // per mille
+ initialMap.put("\u0160", "Š"); // LATIN CAPITAL LETTER S WITH CARON
+ initialMap.put("\u2039", "‹"); // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ initialMap.put("\u0152", "Œ"); // LATIN CAPITAL LIGATURE OE
+ initialMap.put("\u017D", "Ž"); // LATIN CAPITAL LETTER Z WITH CARON
+ initialMap.put("\u2018", "‘"); // LEFT SINGLE QUOTATION MARK
+ initialMap.put("\u2019", "’"); // RIGHT SINGLE QUOTATION MARK
+ initialMap.put("\u201C", "“"); // LEFT DOUBLE QUOTATION MARK
+ initialMap.put("\u201D", "”"); // RIGHT DOUBLE QUOTATION MARK
+ initialMap.put("\u2022", "•"); // BULLET
+ initialMap.put("\u2013", "–"); // EN DASH
+ initialMap.put("\u2014", "—"); // EM DASH
+ initialMap.put("\u02DC", "˜"); // SMALL TILDE
+ initialMap.put("\u2122", "™"); // TRADE MARK SIGN
+ initialMap.put("\u0161", "š"); // LATIN SMALL LETTER S WITH CARON
+ initialMap.put("\u0153", "›"); // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ initialMap.put("\u203A", "œ"); // LATIN SMALL LIGATURE OE
+ initialMap.put("\u0178", "Ÿ"); // LATIN CAPITAL LETTER Y WITH DIAERESIS
+ CP1252_ESCAPE = Collections.unmodifiableMap(initialMap);
+ }
+
+ /**
+ * Reverse of {@link #CP1252_ESCAPE} for unescaping purposes.
+ */
+ public static final Map CP1252_UNESCAPE;
+ static {
+ CP1252_UNESCAPE = Collections.unmodifiableMap(invert(CP1252_ESCAPE));
+ }
+
/**
* Used to invert an escape Map into an unescape Map.
* @param map Map<String, String> to be inverted
diff --git a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
index d0c6ef5288..865a3dcb1c 100644
--- a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
+++ b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
@@ -16,15 +16,7 @@
*/
package org.apache.commons.text;
-import static org.apache.commons.text.StringEscapeUtils.escapeXSI;
-import static org.apache.commons.text.StringEscapeUtils.unescapeXSI;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertNull;
-import static org.junit.jupiter.api.Assertions.assertThrows;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
+import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.io.StringWriter;
@@ -35,7 +27,15 @@
import java.nio.file.Files;
import java.nio.file.Paths;
-import org.junit.jupiter.api.Test;
+import static org.apache.commons.text.StringEscapeUtils.escapeXSI;
+import static org.apache.commons.text.StringEscapeUtils.unescapeXSI;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
/**
* Unit tests for {@link StringEscapeUtils}.
@@ -49,12 +49,14 @@ public class StringEscapeUtilsTest {
private static final String FOO = "foo";
private static final String[][] HTML_ESCAPES = {
+ // message, expected, original
{"no escaping", "plain text", "plain text"},
{"no escaping", "plain text", "plain text"},
{"empty string", "", ""},
{"null", null, null},
{"ampersand", "bread & butter", "bread & butter"},
{"quotes", ""bread" & butter", "\"bread\" & butter"},
+ {"smart quotes", "“bread and circuses”", "\u201Cbread and circuses\u201d"},
{"final character only", "greater than >", "greater than >"},
{"first character only", "< less than", "< less than"},
{"apostrophe", "Huntington's chorea", "Huntington's chorea"},