diff --git a/rules/REQUEST-941-APPLICATION-ATTACK-XSS.conf b/rules/REQUEST-941-APPLICATION-ATTACK-XSS.conf
index 619dcad60..2a2afd0c4 100644
--- a/rules/REQUEST-941-APPLICATION-ATTACK-XSS.conf
+++ b/rules/REQUEST-941-APPLICATION-ATTACK-XSS.conf
@@ -530,12 +530,46 @@ SecRule REQUEST_COOKIES|!REQUEST_COOKIES:/__utm/|REQUEST_COOKIES_NAMES|ARGS_NAME
# US-ASCII encoding bypass listed on XSS filter evasion
# Reported by Mazin Ahmed
#
+# This evasion covered by this chain of rules is specific to webservers that deliver content in US-ASCII.
+# Only Apache Tomcat is known (according to the page linked above) to be vulnerable to this and probably has to be
+# misconfigured for this to happen.
+#
+# Since US-ASCII is a seven bit encoding, bit 8 is ignored. Consider the following ISO 8859-1 sequence:
+#
+# ¼script¾alert(¢XSS¢)¼/script¾
+#
+# A filter looking for tags will usually not match against this sequence because there are no angle brackets (< / >). However,
+# the characters where the brackets would be are ISO 8859-1 characters:
+# - ¼: 0x00BC
+# - ¾: 0x00BE
+# - ¢: 0x00A2
+#
+# And this is how the sequence looks in in US-ASCII:
+#
+#
+#
+# This enables an attacker to craft a string that will be delivered in a form that a browser will execute as script
+# while being ignored by input filters.
+#
+# This rule looks for start tag sequene that looks like "<...>" (checks fo hex and plain to be sure).
+# Because the bytes matched occur in many different languages encoded as multibyte characters (e.g. UTF-8)
+# (e.g. German umlauts, Russion characters) this isn't very helpful and can cause many false positives. We, therefore,
+# use a chained rule to also look fora an end tag sequence that looks like "". Only if the chained rule matches will
+# the request be blocked.
+#
+# This is of course still not perfect but should at least make it harder to hide most tags using this technique while
+# requiring very specifig patterns in a language to match, which should get rid of most false positives.
+# These rules would, for example, not guard against an element without an end tag, e.g. "".
+#
+# US-ASCII on Wikipedia: https://en.wikipedia.org/wiki/ASCII
+# ISO 8859-1 on Wikipedia: https://en.wikipedia.org/wiki/ISO/IEC_8859-1
SecRule REQUEST_COOKIES|!REQUEST_COOKIES:/__utm/|REQUEST_COOKIES_NAMES|ARGS_NAMES|ARGS|XML:/* "@rx \xbc[^\xbe>]*[\xbe>]|<[^\xbe]*\xbe" \
"id:941310,\
phase:2,\
block,\
capture,\
+ chain,\
t:none,t:lowercase,t:urlDecode,t:htmlEntityDecode,t:jsDecode,\
msg:'US-ASCII Malformed Encoding XSS Filter - Attack Detected',\
logdata:'Matched Data: %{TX.0} found within %{MATCHED_VAR_NAME}: %{MATCHED_VAR}',\
@@ -546,11 +580,13 @@ SecRule REQUEST_COOKIES|!REQUEST_COOKIES:/__utm/|REQUEST_COOKIES_NAMES|ARGS_NAME
tag:'paranoia-level/1',\
tag:'OWASP_CRS',\
tag:'capec/1000/152/242',\
- ctl:auditLogParts=+E,\
ver:'OWASP_CRS/3.4.0-dev',\
- severity:'CRITICAL',\
- setvar:'tx.xss_score=+%{tx.critical_anomaly_score}',\
- setvar:'tx.anomaly_score_pl1=+%{tx.critical_anomaly_score}'"
+ severity:'CRITICAL'"
+ SecRule REQUEST_COOKIES|!REQUEST_COOKIES:/__utm/|REQUEST_COOKIES_NAMES|ARGS_NAMES|ARGS|XML:/* "@rx (?:\xbc\s*/\s*[^\xbe>]*[\xbe>])|(?:<\s*/\s*[^\xbe]*\xbe)" \
+ "t:none,t:lowercase,t:urlDecode,t:htmlEntityDecode,t:jsDecode,\
+ ctl:auditLogParts=+E,\
+ setvar:'tx.xss_score=+%{tx.critical_anomaly_score}',\
+ setvar:'tx.anomaly_score_pl1=+%{tx.critical_anomaly_score}'"
#
# https://nedbatchelder.com/blog/200704/xss_with_utf7.html
diff --git a/tests/regression/tests/REQUEST-941-APPLICATION-ATTACK-XSS/941310.yaml b/tests/regression/tests/REQUEST-941-APPLICATION-ATTACK-XSS/941310.yaml
index be9b3ddc6..472ccb77a 100644
--- a/tests/regression/tests/REQUEST-941-APPLICATION-ATTACK-XSS/941310.yaml
+++ b/tests/regression/tests/REQUEST-941-APPLICATION-ATTACK-XSS/941310.yaml
@@ -41,6 +41,59 @@
log_contains: id "941310"
-
test_title: 941310-3
+ desc: Positive test using alternate utf-8
+ stages:
+ -
+ stage:
+ input:
+ dest_addr: 127.0.0.1
+ headers:
+ Host: localhost
+ Content-Type: "application/x-www-form-urlencoded; charset=us-ascii"
+ method: POST
+ port: 80
+ uri: /
+ data: var=\xd0\xbcscript\xd0\xbealert(\xc2\xa2XSS\xc2\xa2)\xd0\xbc/script\xd0\xbe
+ output:
+ log_contains: id "941310"
+ -
+ test_title: 941310-4
+ desc: Real world false positive for old rule with Russian utf-8 characters
+ stages:
+ -
+ stage:
+ input:
+ dest_addr: 127.0.0.1
+ headers:
+ Host: localhost
+ Content-Type: "application/x-www-form-urlencoded; charset=us-ascii"
+ method: POST
+ port: 80
+ uri: /
+ # Reported in https://github.com/coreruleset/coreruleset/issues/1942 as "абвгдеёжзийклмнопрстуфхцчшщъыэюя"
+ data: var=\xd0\xb0\xd0\xb1\xd0\xb2\xd0\xb3\xd0\xb4\xd0\xb5\xd1\x91\xd0\xb6\xd0\xb7\xd0\xb8\xd0\xb9\xd0\xba\xd0\xbb\xd0\xbc\xd0\xbd\xd0\xbe\xd0\xbf\xd1\x80\xd1\x81\xd1\x82\xd1\x83\xd1\x84\xd1\x85\xd1\x86\xd1\x87\xd1\x88\xd1\x89\xd1\x8a\xd1\x8b\xd1\x8d\xd1\x8e\xd1\x8f
+ output:
+ no_log_contains: id "941310"
+ -
+ test_title: 941310-5
+ desc: Real world false positive for old rule with Russian utf-8 characters
+ stages:
+ -
+ stage:
+ input:
+ dest_addr: 127.0.0.1
+ headers:
+ Host: localhost
+ Content-Type: "application/x-www-form-urlencoded; charset=us-ascii"
+ method: POST
+ port: 80
+ uri: /
+ # Reported in https://github.com/coreruleset/coreruleset/issues/1645 as "de_matten & sitzbez\xc3\xbcge > fu\xc3\x9fmatten_mt"
+ data: var=de_matten & sitzbez\xc3\x83\xc2\xbcge > fu\xc3\x83\xc2\x9fmatten_mt
+ output:
+ no_log_contains: id "941310"
+ -
+ test_title: 941310-6
desc: Negative test for opening tag
stages:
-
@@ -57,7 +110,7 @@
output:
no_log_contains: id "941310"
-
- test_title: 941310-4
+ test_title: 941310-7
desc: Negative test for closing tag
stages:
-
@@ -73,3 +126,73 @@
data: var=\xbe\xbe
output:
no_log_contains: id "941310"
+ -
+ test_title: 941310-8
+ desc: Negative for missing end tag, opening tag
+ stages:
+ -
+ stage:
+ input:
+ dest_addr: 127.0.0.1
+ headers:
+ Host: localhost
+ Content-Type: "application/x-www-form-urlencoded; charset=us-ascii"
+ method: POST
+ port: 80
+ uri: /
+ data: var=\xd0\xbcscript\xd0\xbealert(\xc2\xa2XSS\xc2\xa2)\xd0\xbc/script\xd0
+ output:
+ no_log_contains: id "941310"
+ -
+ test_title: 941310-9
+ desc: Negative for missing end tag, closing tag
+ stages:
+ -
+ stage:
+ input:
+ dest_addr: 127.0.0.1
+ headers:
+ Host: localhost
+ Content-Type: "application/x-www-form-urlencoded; charset=us-ascii"
+ method: POST
+ port: 80
+ uri: /
+ data: var=\xd0\xbcscript\xd0\xbealert(\xc2\xa2XSS\xc2\xa2)\xd0/script\xd0\xbe
+ output:
+ no_log_contains: id "941310"
+ -
+ test_title: 941310-10
+ desc: Negative using real world Russian example in utf-8
+ stages:
+ -
+ stage:
+ input:
+ dest_addr: 127.0.0.1
+ headers:
+ Host: localhost
+ Content-Type: "application/x-www-form-urlencoded; charset=us-ascii"
+ method: POST
+ port: 80
+ uri: /
+ # Reported in https://github.com/coreruleset/coreruleset/issues/1942 as "абвгдеёжзийклмнпрстуфхцчшщъыэюя"
+ data: var=\xd0\xb0\xd0\xb1\xd0\xb2\xd0\xb3\xd0\xb4\xd0\xb5\xd1\x91\xd0\xb6\xd0\xb7\xd0\xb8\xd0\xb9\xd0\xba\xd0\xbb\xd0\xbc\xd0\xbd\xd0\xbf\xd1\x80\xd1\x81\xd1\x82\xd1\x83\xd1\x84\xd1\x85\xd1\x86\xd1\x87\xd1\x88\xd1\x89\xd1\x8a\xd1\x8b\xd1\x8d\xd1\x8e\xd1\x8f
+ output:
+ no_log_contains: id "941310"
+ -
+ test_title: 941310-11
+ desc: Negative using real world Russian example in utf-8, variant
+ stages:
+ -
+ stage:
+ input:
+ dest_addr: 127.0.0.1
+ headers:
+ Host: localhost
+ Content-Type: "application/x-www-form-urlencoded; charset=us-ascii"
+ method: POST
+ port: 80
+ uri: /
+ # Reported in https://github.com/coreruleset/coreruleset/issues/1942 as "абвгдеёжзийклнопрстуфхцчшщъыэюя"
+ data: var=\xd0\xb0\xd0\xb1\xd0\xb2\xd0\xb3\xd0\xb4\xd0\xb5\xd1\x91\xd0\xb6\xd0\xb7\xd0\xb8\xd0\xb9\xd0\xba\xd0\xbb\xd0\xbd\xd0\xbe\xd0\xbf\xd1\x80\xd1\x81\xd1\x82\xd1\x83\xd1\x84\xd1\x85\xd1\x86\xd1\x87\xd1\x88\xd1\x89\xd1\x8a\xd1\x8b\xd1\x8d\xd1\x8e\xd1\x8f
+ output:
+ no_log_contains: id "941310"