Skip to content

Commit b59888a

Browse files
Fix bugs in SQLScriptScanner with big String literals and PostgreSQL identifiers (as introduced by #7646) (#7818)
Co-authored-by: Eddú Meléndez Gonzales <eddu.melendez@gmail.com>
1 parent d80ce60 commit b59888a

File tree

2 files changed

+78
-7
lines changed

2 files changed

+78
-7
lines changed

modules/database-commons/src/main/java/org/testcontainers/ext/ScriptScanner.java

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,7 @@ class ScriptScanner {
2828

2929
private final Pattern whitespace = Pattern.compile("\\s+");
3030

31-
private final Pattern identifier = Pattern.compile("[a-z][a-z0-9_]*", Pattern.CASE_INSENSITIVE);
32-
33-
private final Pattern singleQuotedString = Pattern.compile("'(\\\\'|[^'])*'");
34-
35-
private final Pattern ansiQuotedString = Pattern.compile("\"(\\\\\"|[^\"])*\"");
31+
private final Pattern identifier = Pattern.compile("[a-z][a-z0-9_$]*", Pattern.CASE_INSENSITIVE);
3632

3733
private final Pattern dollarQuotedStringDelimiter = Pattern.compile("\\$\\w*\\$");
3834

@@ -54,7 +50,8 @@ private boolean matches(String substring) {
5450

5551
private boolean matches(Pattern regexp) {
5652
Matcher m = regexp.matcher(script);
57-
if (m.find(offset) && m.start() == offset) {
53+
m.region(offset, script.length());
54+
if (m.lookingAt()) {
5855
currentMatch = m.group();
5956
offset = m.end();
6057
return true;
@@ -99,6 +96,26 @@ private boolean matchesMultilineComment() {
9996
return false;
10097
}
10198

99+
private boolean matchesQuotedString(final char quote) {
100+
if (script.charAt(offset) == quote) {
101+
boolean escaped = false;
102+
for (int i = offset + 1; i < script.length(); i++) {
103+
char c = script.charAt(i);
104+
if (escaped) {
105+
//just skip the escaped character and drop the flag
106+
escaped = false;
107+
} else if (c == '\\') {
108+
escaped = true;
109+
} else if (c == quote) {
110+
currentMatch = script.substring(offset, i + 1);
111+
offset = i + 1;
112+
return true;
113+
}
114+
}
115+
}
116+
return false;
117+
}
118+
102119
private boolean matchesDollarQuotedString() {
103120
//Matches $<tag>$ .... $<tag>$
104121
if (matches(dollarQuotedStringDelimiter)) {
@@ -124,7 +141,7 @@ Lexem next() {
124141
return Lexem.SEPARATOR;
125142
} else if (matchesSingleLineComment() || matchesMultilineComment()) {
126143
return Lexem.COMMENT;
127-
} else if (matches(singleQuotedString) || matches(ansiQuotedString) || matchesDollarQuotedString()) {
144+
} else if (matchesQuotedString('\'') || matchesQuotedString('"') || matchesDollarQuotedString()) {
128145
return Lexem.QUOTED_STRING;
129146
} else if (matches(identifier)) {
130147
return Lexem.IDENTIFIER;
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package org.testcontainers.ext;
2+
3+
import org.apache.commons.lang3.StringUtils;
4+
import org.junit.Test;
5+
6+
import java.util.regex.Pattern;
7+
8+
import static org.assertj.core.api.Assertions.assertThat;
9+
10+
public class ScriptScannerTest {
11+
12+
@Test
13+
public void testHugeStringLiteral() {
14+
String script = "/* a comment */ \"" + StringUtils.repeat('~', 10000) + "\";";
15+
ScriptScanner scanner = scanner(script);
16+
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.COMMENT);
17+
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.WHITESPACE);
18+
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.QUOTED_STRING);
19+
assertThat(scanner.getCurrentMatch()).matches(Pattern.compile("\"~+\""));
20+
}
21+
22+
@Test
23+
public void testPgIdentifierWithDollarSigns() {
24+
ScriptScanner scanner = scanner(
25+
"this$is$a$valid$postgreSQL$identifier " +
26+
"$a$While this is a quoted string$a$$ --just followed by a dollar sign"
27+
);
28+
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.IDENTIFIER);
29+
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.WHITESPACE);
30+
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.QUOTED_STRING);
31+
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.OTHER);
32+
}
33+
34+
@Test
35+
public void testQuotedLiterals() {
36+
ScriptScanner scanner = scanner("'this \\'is a literal' \"this \\\" is a literal\"");
37+
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.QUOTED_STRING);
38+
assertThat(scanner.getCurrentMatch()).isEqualTo("'this \\'is a literal'");
39+
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.WHITESPACE);
40+
assertThat(scanner.next()).isEqualTo(ScriptScanner.Lexem.QUOTED_STRING);
41+
assertThat(scanner.getCurrentMatch()).isEqualTo("\"this \\\" is a literal\"");
42+
}
43+
44+
private static ScriptScanner scanner(String script) {
45+
return new ScriptScanner(
46+
"dummy",
47+
script,
48+
ScriptUtils.DEFAULT_STATEMENT_SEPARATOR,
49+
ScriptUtils.DEFAULT_COMMENT_PREFIX,
50+
ScriptUtils.DEFAULT_BLOCK_COMMENT_START_DELIMITER,
51+
ScriptUtils.DEFAULT_BLOCK_COMMENT_END_DELIMITER
52+
);
53+
}
54+
}

0 commit comments

Comments
 (0)