Browse Source

SEC-1377: Extended HTML escaping functionality to take account of control characters, whitespace and to handle Unicode supplementary characters (surrogate pairs).

Luke Taylor 15 years ago
parent
commit
04447bdbf0

+ 31 - 9
web/src/main/java/org/springframework/security/web/util/TextEscapeUtils.java

@@ -1,9 +1,11 @@
 package org.springframework.security.web.util;
 
 /**
- * Utility for escaping characters in HTML strings.
+ * Internal utility for escaping characters in HTML strings.
  *
  * @author Luke Taylor
+ *
+ * @see http://www.owasp.org/index.php/How_to_perform_HTML_entity_encoding_in_Java
  */
 public abstract class TextEscapeUtils {
 
@@ -17,22 +19,42 @@ public abstract class TextEscapeUtils {
         for (int i=0; i < s.length(); i++) {
             char c = s.charAt(i);
 
-            if(c == '<') {
+            if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9') {
+                sb.append(c);
+            } else if(c == '<') {
                 sb.append("&lt;");
             } else if (c == '>') {
                 sb.append("&gt;");
-            } else if (c == '"') {
-                sb.append("&#034;");
-            } else if (c == '\'') {
-                sb.append("&#039;");
             } else if (c == '&') {
                 sb.append("&amp;");
-            } else {
-                sb.append(c);
+            } else if (Character.isWhitespace(c)) {
+                sb.append("&#").append((int)c).append(";");
+            } else if (Character.isISOControl(c)) {
+                // ignore control chars
+            } else if (Character.isHighSurrogate(c)) {
+                if (i + 1 >= s.length()) {
+                    // Unexpected end
+                    throw new IllegalArgumentException("Missing low surrogate character at end of string");
+                }
+                char low = s.charAt(i + 1);
+
+                if (!Character.isLowSurrogate(low)) {
+                    throw new IllegalArgumentException("Expected low surrogate character but found value = " + (int)low);
+                }
+
+                int codePoint = Character.toCodePoint(c, low);
+                if (Character.isDefined(codePoint)) {
+                    sb.append("&#").append(codePoint).append(";");
+                }
+                i++; // skip the next character as we have already dealt with it
+            } else if (Character.isLowSurrogate(c)) {
+                throw new IllegalArgumentException("Unexpected low surrogate character, value = " + (int)c);
+            } else if (Character.isDefined(c)) {
+                sb.append("&#").append((int) c).append(";");
             }
+            // Ignore anything else
         }
 
         return sb.toString();
     }
-
 }

+ 37 - 1
web/src/test/java/org/springframework/security/web/util/TextEscapeUtilsTests.java

@@ -7,9 +7,45 @@ import org.springframework.security.web.util.TextEscapeUtils;
 
 public class TextEscapeUtilsTests {
 
+    /**
+     * &amp;, &lt;, &gt;, &#34;, &#39 and&#32;(space) escaping
+     */
     @Test
     public void charactersAreEscapedCorrectly() {
-        assertEquals("a&lt;script&gt;&#034;&#039;", TextEscapeUtils.escapeEntities("a<script>\"'"));
+        assertEquals("&amp;&#32;a&lt;script&gt;&#34;&#39;", TextEscapeUtils.escapeEntities("& a<script>\"'"));
     }
 
+    @Test
+    public void nullOrEmptyStringIsHandled() throws Exception {
+        assertEquals("", TextEscapeUtils.escapeEntities(""));
+        assertNull(TextEscapeUtils.escapeEntities(null));
+    }
+
+    @Test(expected=IllegalArgumentException.class)
+    public void invalidLowSurrogateIsDetected() throws Exception {
+        TextEscapeUtils.escapeEntities("abc\uDCCCdef");
+    }
+
+    @Test(expected=IllegalArgumentException.class)
+    public void missingLowSurrogateIsDetected() throws Exception {
+        TextEscapeUtils.escapeEntities("abc\uD888a");
+    }
+
+    @Test(expected=IllegalArgumentException.class)
+    public void highSurrogateAtEndOfStringIsRejected() throws Exception {
+        TextEscapeUtils.escapeEntities("abc\uD888");
+    }
+
+    /**
+     * Delta char: &#66560;
+     */
+    @Test
+    public void validSurrogatePairIsAccepted() throws Exception {
+        assertEquals("abc&#66560;a", TextEscapeUtils.escapeEntities("abc\uD801\uDC00a"));
+    }
+
+    @Test
+    public void undefinedSurrogatePairIsIgnored() throws Exception {
+        assertEquals("abca", TextEscapeUtils.escapeEntities("abc\uD888\uDC00a"));
+    }
 }