From 9b2563597a9d9a68de4a0775afac3ab527c1f259 Mon Sep 17 00:00:00 2001 From: Eduardo Aguado Date: Fri, 15 Dec 2023 10:05:40 +0100 Subject: [PATCH 1/6] Fixes in parser to address bypass of the library and XSS attacks --- src/main/java/org/owasp/html/HtmlLexer.java | 14 ++++++++++- .../java/org/owasp/html/HtmlLexerTest.java | 25 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/owasp/html/HtmlLexer.java b/src/main/java/org/owasp/html/HtmlLexer.java index 00fcc7dc..119882ff 100644 --- a/src/main/java/org/owasp/html/HtmlLexer.java +++ b/src/main/java/org/owasp/html/HtmlLexer.java @@ -476,6 +476,7 @@ private static enum State { COMMENT, COMMENT_DASH, COMMENT_DASH_DASH, + COMMENT_DASH_AFTER_BANG, DIRECTIVE, DONE, BOGUS_COMMENT, @@ -640,17 +641,28 @@ && canonicalElementName(start + 2, end) case BANG: if ('-' == ch) { state = State.BANG_DASH; + } else if('>' == ch) { // is a valid html comment + state = State.DONE; + type = HtmlTokenType.COMMENT; } else { state = State.DIRECTIVE; } break; case BANG_DASH: if ('-' == ch) { - state = State.COMMENT; + state = State.COMMENT_DASH_AFTER_BANG; } else { state = State.DIRECTIVE; } break; + case COMMENT_DASH_AFTER_BANG: + if ('>' == ch) { // is a valid html comment + state = State.DONE; + type = HtmlTokenType.COMMENT; + }else{ + state = State.COMMENT; + } + break; case COMMENT: if ('-' == ch) { state = State.COMMENT_DASH; diff --git a/src/test/java/org/owasp/html/HtmlLexerTest.java b/src/test/java/org/owasp/html/HtmlLexerTest.java index fedf90d0..9718bd7b 100644 --- a/src/test/java/org/owasp/html/HtmlLexerTest.java +++ b/src/test/java/org/owasp/html/HtmlLexerTest.java @@ -121,6 +121,31 @@ public static final void testShortTags() { "TAGEND: >"); } + @Test + public static final void testCommentDeclarationWith0CommentsAndXss() throws Exception + { + //check https://datatracker.ietf.org/doc/html/rfc1866#section-3.2.5 + assertTokens("", + "COMMENT: ", + "TAGBEGIN: " + ); + } + + @Test + public static final void testCommentDeclarationWith0CommentsAndTag() throws Exception + { + assertTokens("", + "COMMENT: ", + "TAGBEGIN: " + ); + } + private static void lex(String input, Appendable out) throws Exception { HtmlLexer lexer = new HtmlLexer(input); int maxTypeLength = 0; From 1ae3478677ae48ccf5a0b2a8c7f18032e77fb243 Mon Sep 17 00:00:00 2001 From: "Aguado, Eduardo" Date: Fri, 22 Mar 2024 13:51:47 +0100 Subject: [PATCH 2/6] Fixes suggested by Mike Samuel --- src/main/java/org/owasp/html/HtmlLexer.java | 6 +++++- .../java/org/owasp/html/HtmlLexerTest.java | 18 ++++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/owasp/html/HtmlLexer.java b/src/main/java/org/owasp/html/HtmlLexer.java index 119882ff..d74ca943 100644 --- a/src/main/java/org/owasp/html/HtmlLexer.java +++ b/src/main/java/org/owasp/html/HtmlLexer.java @@ -659,7 +659,9 @@ && canonicalElementName(start + 2, end) if ('>' == ch) { // is a valid html comment state = State.DONE; type = HtmlTokenType.COMMENT; - }else{ + } else if('-' == ch) { // is a valid html comment + state = State.COMMENT_DASH_AFTER_BANG; + } else { state = State.COMMENT; } break; @@ -677,6 +679,8 @@ && canonicalElementName(start + 2, end) if ('>' == ch) { state = State.DONE; type = HtmlTokenType.COMMENT; + } else if ('!' == ch) { // --!> is also valid closing sequence + state = State.COMMENT_DASH_DASH; } else if ('-' == ch) { state = State.COMMENT_DASH_DASH; } else { diff --git a/src/test/java/org/owasp/html/HtmlLexerTest.java b/src/test/java/org/owasp/html/HtmlLexerTest.java index 9718bd7b..0917f5a7 100644 --- a/src/test/java/org/owasp/html/HtmlLexerTest.java +++ b/src/test/java/org/owasp/html/HtmlLexerTest.java @@ -137,11 +137,25 @@ public static final void testCommentDeclarationWith0CommentsAndXss() throws Exce } @Test - public static final void testCommentDeclarationWith0CommentsAndTag() throws Exception + public static final void testAbruptClosingOfEmptyComment() throws Exception { - assertTokens("", + assertTokens("abc", "COMMENT: ", "TAGBEGIN: ", + "TEXT: a", + "COMMENT: ", + "TEXT: b", + "SERVERCODE: c" + ); + } + + @Test + public static final void testIncorrectlyClosedComment() throws Exception + { + assertTokens("", + "COMMENT: ", + "TAGBEGIN: " ); } From e8b68ef1e8befd4836a4d251f3b9bca7a23b6220 Mon Sep 17 00:00:00 2001 From: "Aguado, Eduardo" Date: Fri, 22 Mar 2024 13:54:44 +0100 Subject: [PATCH 3/6] Spacing --- src/main/java/org/owasp/html/HtmlLexer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/owasp/html/HtmlLexer.java b/src/main/java/org/owasp/html/HtmlLexer.java index d74ca943..4b6a79d4 100644 --- a/src/main/java/org/owasp/html/HtmlLexer.java +++ b/src/main/java/org/owasp/html/HtmlLexer.java @@ -659,7 +659,7 @@ && canonicalElementName(start + 2, end) if ('>' == ch) { // is a valid html comment state = State.DONE; type = HtmlTokenType.COMMENT; - } else if('-' == ch) { // is a valid html comment + } else if ('-' == ch) { // is a valid html comment state = State.COMMENT_DASH_AFTER_BANG; } else { state = State.COMMENT; From ce28bc9e7adcae094ef8736425130a2cb3141fb1 Mon Sep 17 00:00:00 2001 From: "Aguado, Eduardo" Date: Tue, 24 Sep 2024 11:49:50 +0200 Subject: [PATCH 4/6] Added new test --- .../src/test/java/org/owasp/html/HtmlLexerTest.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java index be92022e..d3f781c9 100644 --- a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java +++ b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java @@ -132,6 +132,19 @@ public static final void testCommentDeclarationWith0CommentsAndXss() throws Exce ); } + @Test + public static final void testTextEndingWithTagOpenAndBang() throws Exception + { + //taken from https://html.spec.whatwg.org/#comments + assertTokens("", + "COMMENT: ", + "TAGBEGIN: ", + "TAGBEGIN: " + ); + } + @Test public static final void testAbruptClosingOfEmptyComment() throws Exception { From 7408e06902618610d0b6caa21999da78abff1893 Mon Sep 17 00:00:00 2001 From: "Aguado, Eduardo" Date: Tue, 24 Sep 2024 12:23:20 +0200 Subject: [PATCH 5/6] Implemented change suggested by mikesamuel --- .../src/main/java/org/owasp/html/HtmlLexer.java | 17 +++++++++++++++-- .../test/java/org/owasp/html/HtmlLexerTest.java | 7 +++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/owasp-java-html-sanitizer/src/main/java/org/owasp/html/HtmlLexer.java b/owasp-java-html-sanitizer/src/main/java/org/owasp/html/HtmlLexer.java index a732fdab..55223938 100644 --- a/owasp-java-html-sanitizer/src/main/java/org/owasp/html/HtmlLexer.java +++ b/owasp-java-html-sanitizer/src/main/java/org/owasp/html/HtmlLexer.java @@ -476,6 +476,7 @@ private static enum State { COMMENT, COMMENT_DASH, COMMENT_DASH_DASH, + COMMENT_DASH_DASH_BANG, COMMENT_DASH_AFTER_BANG, DIRECTIVE, DONE, @@ -668,6 +669,8 @@ && canonicalElementName(start + 2, end) case COMMENT: if ('-' == ch) { state = State.COMMENT_DASH; + } else { + state = State.COMMENT; //TODO mirar si esto se puede quitar } break; case COMMENT_DASH: @@ -680,14 +683,24 @@ && canonicalElementName(start + 2, end) state = State.DONE; type = HtmlTokenType.COMMENT; } else if ('!' == ch) { // --!> is also valid closing sequence - state = State.COMMENT_DASH_DASH; + state = State.COMMENT_DASH_DASH_BANG; //orifinally COMMENT_DASH_DASH } else if ('-' == ch) { state = State.COMMENT_DASH_DASH; } else { state = State.COMMENT_DASH; } break; - case DIRECTIVE: + case COMMENT_DASH_DASH_BANG: + if ('>' == ch) { + state = State.DONE; + type = HtmlTokenType.COMMENT; + }else if ('-' == ch) { + state = State.COMMENT_DASH; + }else { + state = State.COMMENT; //TODO mirar si esto se puede quitar + } + break; + case DIRECTIVE: if ('>' == ch) { type = HtmlTokenType.DIRECTIVE; state = State.DONE; diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java index d3f781c9..c8ca186a 100644 --- a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java +++ b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java @@ -145,6 +145,13 @@ public static final void testTextEndingWithTagOpenAndBang() throws Exception ); } + + public static final void testDashDashBangComment() throws Exception + { + assertTokens("", + "COMMENT: " + ); + } @Test public static final void testAbruptClosingOfEmptyComment() throws Exception { From ee341c992abf5cc4d566a0c2db388ced80242703 Mon Sep 17 00:00:00 2001 From: "Aguado, Eduardo" Date: Tue, 24 Sep 2024 12:28:24 +0200 Subject: [PATCH 6/6] Spacing --- .../main/java/org/owasp/html/HtmlLexer.java | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/owasp-java-html-sanitizer/src/main/java/org/owasp/html/HtmlLexer.java b/owasp-java-html-sanitizer/src/main/java/org/owasp/html/HtmlLexer.java index 55223938..52774c40 100644 --- a/owasp-java-html-sanitizer/src/main/java/org/owasp/html/HtmlLexer.java +++ b/owasp-java-html-sanitizer/src/main/java/org/owasp/html/HtmlLexer.java @@ -476,7 +476,7 @@ private static enum State { COMMENT, COMMENT_DASH, COMMENT_DASH_DASH, - COMMENT_DASH_DASH_BANG, + COMMENT_DASH_DASH_BANG, COMMENT_DASH_AFTER_BANG, DIRECTIVE, DONE, @@ -670,7 +670,7 @@ && canonicalElementName(start + 2, end) if ('-' == ch) { state = State.COMMENT_DASH; } else { - state = State.COMMENT; //TODO mirar si esto se puede quitar + state = State.COMMENT; } break; case COMMENT_DASH: @@ -683,7 +683,7 @@ && canonicalElementName(start + 2, end) state = State.DONE; type = HtmlTokenType.COMMENT; } else if ('!' == ch) { // --!> is also valid closing sequence - state = State.COMMENT_DASH_DASH_BANG; //orifinally COMMENT_DASH_DASH + state = State.COMMENT_DASH_DASH_BANG; } else if ('-' == ch) { state = State.COMMENT_DASH_DASH; } else { @@ -692,15 +692,15 @@ && canonicalElementName(start + 2, end) break; case COMMENT_DASH_DASH_BANG: if ('>' == ch) { - state = State.DONE; - type = HtmlTokenType.COMMENT; + state = State.DONE; + type = HtmlTokenType.COMMENT; }else if ('-' == ch) { - state = State.COMMENT_DASH; - }else { - state = State.COMMENT; //TODO mirar si esto se puede quitar - } - break; - case DIRECTIVE: + state = State.COMMENT_DASH; + }else { + state = State.COMMENT; + } + break; + case DIRECTIVE: if ('>' == ch) { type = HtmlTokenType.DIRECTIVE; state = State.DONE;