From 1088349b31dcf32a94f5c8db705d27cf5851a41a Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Mon, 17 Apr 2017 12:48:55 -0400 Subject: [PATCH 1/9] Add tests --- tests/test_sed.sh | 35 +++++++++++++++++++++++++++++++++++ tests/test_vim.sh | 28 ++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100755 tests/test_sed.sh create mode 100755 tests/test_vim.sh diff --git a/tests/test_sed.sh b/tests/test_sed.sh new file mode 100755 index 0000000..cc1a507 --- /dev/null +++ b/tests/test_sed.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +check_(){ + RESULT="$(echo "$3" | sed "$1" "$4")" + if [ "$RESULT" != "$5" ]; then + echo "Test failed: $2" >&2 + echo "----- expected ----- +$5 +----- returned -----" + echo "$RESULT" + exit 1 + fi +} + +# Check BRE +checkb(){ + check_ "-e" "$@" +} +# Check ERE +checke(){ + check_ "-E" "$@" +} +# Check both +check(){ + checkb "$@" + checke "$@" +} + +# Basics + +check 'Custom character class' \ +'adn +aqv' 's/[b-eq]/x/g' \ +'axn +axv' diff --git a/tests/test_vim.sh b/tests/test_vim.sh new file mode 100755 index 0000000..b9648b5 --- /dev/null +++ b/tests/test_vim.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +check(){ + FILE="$(mktemp)" + echo "$2" >"$FILE" + exec 3<<<"$3 +:w! $FILE +:q!" + vi - <"$FILE" 2<&3- + if [ "$(cat "$FILE")" != "$4" ]; then + echo "Test failed: $1" >&2 + echo "----- expected ----- +$4 +----- returned -----" + cat "$FILE" + rm "$FILE" + exit 1 + fi + rm "$FILE" +} + +# Basics + +check 'Custom character class' \ +'adn +aqv' ':%s/[b-eq]/x/g' \ +'axn +axv' From c47e61d2d4f9a76dd3b68004684258cb5f8dc6a1 Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Tue, 20 Jul 2021 09:40:28 -0400 Subject: [PATCH 2/9] Add tests to GitHub Actions configuration --- .github/workflows/test.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 37564d3..003b13a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,3 +16,17 @@ jobs: chmod +x /usr/local/bin/tidy - name: Validate HTML run: tidy -errors -quiet --drop-empty-elements no regex.html + + test_sed: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Run sed tests + run: tests/test_sed.sh + + set_vim: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Run Vim tests + run: tests/test_vim.sh From 225b91b108a3c1c2f9afdecdf94ebe30b01edec2 Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Mon, 17 Apr 2017 16:27:20 -0400 Subject: [PATCH 3/9] Write sed tests up to captures and groups --- tests/test_sed.sh | 168 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 162 insertions(+), 6 deletions(-) diff --git a/tests/test_sed.sh b/tests/test_sed.sh index cc1a507..1dce066 100755 --- a/tests/test_sed.sh +++ b/tests/test_sed.sh @@ -1,11 +1,13 @@ #!/bin/bash +export LC_ALL=en_US.UTF-8 + check_(){ - RESULT="$(echo "$3" | sed "$1" "$4")" - if [ "$RESULT" != "$5" ]; then - echo "Test failed: $2" >&2 + RESULT="$(echo "$4" | sed "$1" "$5")" + if [ "$RESULT" != "$6" ]; then + echo "Test failed: $3 ($2)" >&2 echo "----- expected ----- -$5 +$6 ----- returned -----" echo "$RESULT" exit 1 @@ -14,11 +16,11 @@ $5 # Check BRE checkb(){ - check_ "-e" "$@" + check_ "-e" "BRE" "$@" } # Check ERE checke(){ - check_ "-E" "$@" + check_ "-E" "ERE" "$@" } # Check both check(){ @@ -26,6 +28,27 @@ check(){ checke "$@" } +fail_(){ + if echo | sed "$1" "$4" &>/dev/null; then + echo "Test didn't fail as expected: $3 ($2)" >&2 + exit 1 + fi +} + +# Fail check BRE +failb(){ + fail_ "-e" "BRE" "$@" +} +# Fail check ERE +faile(){ + fail_ "-E" "ERE" "$@" +} +# Fail check both +fail(){ + failb "$@" + faile "$@" +} + # Basics check 'Custom character class' \ @@ -33,3 +56,136 @@ check 'Custom character class' \ aqv' 's/[b-eq]/x/g' \ 'axn axv' + +check 'Negated custom character class' \ +'abcdefgh' \ +'s/[^b-dg-l]/x/g' \ +'xbcdxxgh' + +check 'Backslash not special in class' \ +"a]\\b +a]\\b" \ +"1s/[\\m-p]/x/g +2s/[]]/x/g" \ +"a]xb +ax\\b" + +check 'Ranges' \ +'a-e-i +a-e-i' \ +'1s/[d-f]/x/g +2s/[d-f-]/x/g' \ +'a-x-i +axxxi' + +checkb 'Alternation' \ +'acd' \ +'s/b\|c/x/g' \ +'axd' +checke 'Alternation' \ +'acd' \ +'s/b|c/x/g' \ +'axd' + +check 'Escaped character' \ +'abc' \ +"s/\\061\\x62\\x(99)/x/g" \ +'abc' + +# Charater classes + +check 'Any character' \ +'a ;d +efg' \ +'s/./x/g' \ +'xxxx +xxx' + +check 'Word character' \ +'hello w0r|_d!' \ +'s/\w/x/g' \ +'xxxxx xxx|xx!' + +fail 'Word class' \ +'s/[[:word:]]/x/g' + +check 'Upper case' \ +'Hell0 W0r|_D' \ +'s/[[:upper:]]/X/g' \ +'Xell0 X0r|_X' + +check 'Lower case' \ +'Hell0 W0r|_D' \ +'s/[[:lower:]]/x/g' \ +'Hxxx0 W0x|_D' + +check 'Whitespace' \ +'Hello world !' \ +'s/\s/_/g' \ +'Hello_world_!' + +check 'Whitespace' \ +'Hello world !' \ +'s/[[:space:]]/_/g' \ +'Hello_world_!' + +check 'Non-whitespace' \ +'Hello world !' \ +'s/[^[:space:]]/x/g' \ +'xxxxx xxxxx x' + +check 'Digit' \ +'H3ll0 W0r|_D' \ +'s/[[:digit:]]/+/g' \ +'H+ll+ W+r|_D' + +check 'Hexadecimal digit' \ +'H3ll0 W0r|_D' \ +'s/[[:xdigit:]]/+/g' \ +'H+ll+ W+r|_+' + +fail 'Octal digit' \ +'s/[[:odigit:]]/x/g' + +check 'Punctuation' \ +'+- 01: hello, world! ;) -+' \ +'s/[[:punct:]]/_/g' \ +'__ 01_ hello_ world_ __ __' + +check 'Alphabetical characters' \ +'+- 01: hello, world! ;) -+' \ +'s/[[:alpha:]]/x/g' \ +'+- 01: xxxxx, xxxxx! ;) -+' + +check 'Alphanumerical characters' \ +'+- 01: hello, world! ;) -+' \ +'s/[[:alnum:]]/x/g' \ +'+- xx: xxxxx, xxxxx! ;) -+' + +fail 'ASCII class' \ +'s/[[:ascii:]]/x/g' + +check 'Character equivalents' \ +'Rémi est prêt' \ +'s/[[=e=]]/_/g' \ +'R_mi _st pr_t' + +check 'Word boundary' \ +'Hello, world' \ +'s/o\b/x/g' \ +'Hellx, world' + +check 'Not word boundary' \ +'Hello, world' \ +'s/o\B/x/g' \ +'Hello, wxrld' + +check 'Begining of line' \ +'testing tests' \ +'s/^t/r/g' \ +'resting tests' + +check 'End of line' \ +'testing tests' \ +'s/s$/x/g' \ +'testing testx' From ab6c874b15defc5140479837b0b62b2b2e3d677d Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Mon, 17 Apr 2017 22:06:36 -0400 Subject: [PATCH 4/9] Finish sed tests --- tests/test_sed.sh | 105 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/tests/test_sed.sh b/tests/test_sed.sh index 1dce066..62111c0 100755 --- a/tests/test_sed.sh +++ b/tests/test_sed.sh @@ -189,3 +189,108 @@ check 'End of line' \ 'testing tests' \ 's/s$/x/g' \ 'testing testx' + +# Captures and groups + +checkb 'Capturing group' \ +'Name is Remi!' \ +'s/^.*is \(.*\)!$/\1/' \ +'Remi' +checke 'Capturing group' \ +'Name is Remi!' \ +'s/^.*is (.*)!$/\1/' \ +'Remi' + +checkb 'Non-capturing parentheses' \ +'Some (dumb)text' \ +'s/(.*)//g' \ +'Some text' +checke 'Non-capturing parentheses' \ +'Some (dumb)text' \ +'s/\(.*\)//g' \ +'Some text' + +checkb 'Backreference' \ +'ab be cd cc df' \ +'s/\([a-z]\)\1/xx/g' \ +'ab be cd xx df' +checke 'Backreference' \ +'ab be cd cc df' \ +'s/([a-z])\1/xx/g' \ +'ab be cd xx df' + +# Look-around not supported in POSIX + +# Multiplicity + +checkb '0 or 1' \ +'bb bab baab baa?b baaab' \ +'s/baa\?b/x/g' \ +'bb x x baa?b baaab' +checke '0 or 1' \ +'bb bab baab baa?b baaab' \ +'s/baa?b/x/g' \ +'bb x x baa?b baaab' + +checkb '0 or 1 (negative)' \ +'bb bab baab baa?b baaab' \ +'s/baa?b/x/g' \ +'bb bab baab x baaab' +checke '0 or 1 (negative)' \ +'bb bab baab baa?b baaab' \ +'s/baa\?b/x/g' \ +'bb bab baab x baaab' + +checkb '1 or more' \ +'bb bab baab ba+b baaab' \ +'s/ba\+b/x/g' \ +'bb x x ba+b x' +checke '1 or more' \ +'bb bab baab ba+b baaab' \ +'s/ba+b/x/g' \ +'bb x x ba+b x' + +checkb '1 or more (negative)' \ +'bb bab baab ba+b baaab' \ +'s/ba+b/x/g' \ +'bb bab baab x baaab' +checke '1 or more (negative)' \ +'bb bab baab ba+b baaab' \ +'s/ba\+b/x/g' \ +'bb bab baab x baaab' + +checkb 'Specific number (1)' \ +'bb bab baab baaab baaaab' \ +'s/ba\{2\}b/x/g' \ +'bb bab x baaab baaaab' +checke 'Specific number (1)' \ +'bb bab baab baaab baaaab' \ +'s/ba{2}b/x/g' \ +'bb bab x baaab baaaab' + +checkb 'Specific number (closed)' \ +'bb bab baab baaab baaaab' \ +'s/ba\{1,3\}b/x/g' \ +'bb x x x baaaab' +checke 'Specific number (closed)' \ +'bb bab baab baaab baaaab' \ +'s/ba{1,3}b/x/g' \ +'bb x x x baaaab' + +checkb 'Specific number (open left)' \ +'bb bab baaab baaaab' \ +'s/ba\{,3\}b/x/g' \ +'x x x baaaab' +checke 'Specific number (open left)' \ +'bb bab baaab baaaab' \ +'s/ba{,3}b/x/g' \ +'x x x baaaab' + +checkb 'Specific number (open right)' \ +'bb bab baaab baaaab' \ +'s/ba\{2,\}b/x/g' \ +'bb bab x x' +checke 'Specific number (open right)' \ +'bb bab baaab baaaab' \ +'s/ba{2,}b/x/g' \ +'bb bab x x' From 16945f2707fe010bbaaa41423f22b0b66cdac1a8 Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Mon, 17 Apr 2017 22:54:25 -0400 Subject: [PATCH 5/9] Mention Vim is done with magic --- regex.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regex.html b/regex.html index 5b4e476..d3c2fcd 100644 --- a/regex.html +++ b/regex.html @@ -82,7 +82,7 @@

Syntax

- + From 6c163813c6b9a36898231cbaae10bff0d126966e Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Mon, 17 Apr 2017 22:54:39 -0400 Subject: [PATCH 6/9] Python has no `(?-i)`, make case more explicit --- regex.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/regex.html b/regex.html index d3c2fcd..94d1a5d 100644 --- a/regex.html +++ b/regex.html @@ -153,7 +153,8 @@

Syntax

- + +
WhatPerl/PCREPython's rePOSIX (BRE)POSIX extended (ERE)Vim
WhatPerl/PCREPython's rePOSIX (BRE)POSIX extended (ERE)Vim (with :set magic)
Basics
Other
Independent non-backtracking pattern(?>...)\(...\)\@>
Make case-sensitive/insensitive(?i) / (?-i)(?i) / (?-i)\c / \C
Make case-insensitive(?i)(?i)\c
Make case-sensitive(?-i)\C
From a95683a55ce7744c60ccf79e441f0fb91a1b2e82 Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Mon, 17 Apr 2017 23:05:08 -0400 Subject: [PATCH 7/9] POSIX has `\W` --- regex.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regex.html b/regex.html index 94d1a5d..ef3d87c 100644 --- a/regex.html +++ b/regex.html @@ -99,7 +99,7 @@

Syntax

Any character (including newline)\_. Match a "word" character (alphanumeric plus _)\w [[:word:]]\w\w\w\w Case[[:upper:]] / [[:lower:]][[:upper:]] / [[:lower:]][[:upper:]] / [[:lower:]]\u [[:upper:]] / \l [[:lower:]] - Match a non-"word" character\W\W\W + Match a non-"word" character\W\W\W\W\W Match a whitespace character (except newline)\s [[:space:]]\s [[:space:]]\s [[:space:]] Whitespace including newline\s [[:space:]]\s\_s Match a non-whitespace character\S\S[^[:space:]][^[:space:]]\S [^[:space:]] From fafa39e0666053c48af7721e94549107044ca3ff Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Mon, 17 Apr 2017 23:05:29 -0400 Subject: [PATCH 8/9] Convert sed tests to Vim --- tests/test_vim.sh | 180 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 179 insertions(+), 1 deletion(-) diff --git a/tests/test_vim.sh b/tests/test_vim.sh index b9648b5..1a810c5 100755 --- a/tests/test_vim.sh +++ b/tests/test_vim.sh @@ -1,12 +1,14 @@ #!/bin/bash +export LC_ALL=en_US.UTF-8 + check(){ FILE="$(mktemp)" echo "$2" >"$FILE" exec 3<<<"$3 :w! $FILE :q!" - vi - <"$FILE" 2<&3- + vi -u NONE - <"$FILE" 2<&3- if [ "$(cat "$FILE")" != "$4" ]; then echo "Test failed: $1" >&2 echo "----- expected ----- @@ -26,3 +28,179 @@ check 'Custom character class' \ aqv' ':%s/[b-eq]/x/g' \ 'axn axv' + +check 'Negated custom character class' \ +'abcdefgh' \ +':%s/[^b-dg-l]/x/g' \ +'xbcdxxgh' + +check 'Backslash not special in class' \ +"a]\\b +a]\\b" \ +":1s/[\\m-p]/x/g +:2s/[]]/x/g" \ +"a]xb +ax\\b" + +check 'Ranges' \ +'a-e-i +a-e-i' \ +':1s/[d-f]/x/g +:2s/[d-f-]/x/g' \ +'a-x-i +axxxi' + +check 'Alternation' \ +'acd' \ +':%s/b\|c/x/g' \ +'axd' + +check 'Escaped character' \ +'abc' \ +":%s/\\061\\x62\\x(99)/x/g" \ +'abc' + +# Charater classes + +check 'Any character' \ +'a ;d +efg' \ +':%s/./x/g' \ +'xxxx +xxx' + +check 'Word character' \ +'hello w0r|_d!' \ +':%s/\w/x/g' \ +'xxxxx xxx|xx!' + +check 'Upper case' \ +'Hell0 W0r|_D' \ +':%s/[[:upper:]]/X/g' \ +'Xell0 X0r|_X' + +check 'Lower case' \ +'Hell0 W0r|_D' \ +':%s/[[:lower:]]/x/g' \ +'Hxxx0 W0x|_D' + +check 'Whitespace' \ +'Hello world !' \ +':%s/\s/_/g' \ +'Hello_world_!' + +check 'Whitespace' \ +'Hello world !' \ +':%s/[[:space:]]/_/g' \ +'Hello_world_!' + +check 'Non-whitespace' \ +'Hello world !' \ +':%s/[^[:space:]]/x/g' \ +'xxxxx xxxxx x' + +check 'Digit' \ +'H3ll0 W0r|_D' \ +':%s/[[:digit:]]/+/g' \ +'H+ll+ W+r|_D' + +check 'Hexadecimal digit' \ +'H3ll0 W0r|_D' \ +':%s/[[:xdigit:]]/+/g' \ +'H+ll+ W+r|_+' + +check 'Punctuation' \ +'+- 01: hello, world! ;) -+' \ +':%s/[[:punct:]]/_/g' \ +'__ 01_ hello_ world_ __ __' + +check 'Alphabetical characters' \ +'+- 01: hello, world! ;) -+' \ +':%s/[[:alpha:]]/x/g' \ +'+- 01: xxxxx, xxxxx! ;) -+' + +check 'Alphanumerical characters' \ +'+- 01: hello, world! ;) -+' \ +':%s/[[:alnum:]]/x/g' \ +'+- xx: xxxxx, xxxxx! ;) -+' + +check 'Character equivalents' \ +'Rémi est prêt' \ +':%s/[[=e=]]/_/g' \ +'R_mi _st pr_t' + +check 'Word boundary' \ +'Hello, world' \ +':%s/o\>/x/g' \ +'Hellx, world' + +check 'Begining of line' \ +'testing tests' \ +':%s/^t/r/g' \ +'resting tests' + +check 'End of line' \ +'testing tests' \ +':%s/s$/x/g' \ +'testing testx' + +# Captures and groups + +check 'Capturing group' \ +'Name is Remi!' \ +':%s/^.*is \(.*\)!$/\1/' \ +'Remi' + +check 'Non-capturing parentheses' \ +'Some (dumb)text' \ +':%s/(.*)//g' \ +'Some text' + +check 'Backreference' \ +'ab be cd cc df' \ +':%s/\([a-z]\)\1/xx/g' \ +'ab be cd xx df' + +# Look-around not supported in POSIX + +# Multiplicity + +check '0 or 1' \ +'bb bab baab baa?b baaab' \ +':%s/baa\?b/x/g' \ +'bb x x baa?b baaab' + +check '0 or 1 (negative)' \ +'bb bab baab baa?b baaab' \ +':%s/baa?b/x/g' \ +'bb bab baab x baaab' + +check '1 or more' \ +'bb bab baab ba+b baaab' \ +':%s/ba\+b/x/g' \ +'bb x x ba+b x' + +check '1 or more (negative)' \ +'bb bab baab ba+b baaab' \ +':%s/ba+b/x/g' \ +'bb bab baab x baaab' + +check 'Specific number (1)' \ +'bb bab baab baaab baaaab' \ +':%s/ba\{2\}b/x/g' \ +'bb bab x baaab baaaab' + +check 'Specific number (closed)' \ +'bb bab baab baaab baaaab' \ +':%s/ba\{1,3\}b/x/g' \ +'bb x x x baaaab' + +check 'Specific number (open left)' \ +'bb bab baaab baaaab' \ +':%s/ba\{,3\}b/x/g' \ +'x x x baaaab' + +check 'Specific number (open right)' \ +'bb bab baaab baaaab' \ +':%s/ba\{2,\}b/x/g' \ +'bb bab x x' From 91cd205bec8f9b849eaa2943032ac3cf90292026 Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Tue, 20 Jul 2021 09:46:50 -0400 Subject: [PATCH 9/9] Run sed tests on MacOS too --- .github/workflows/test.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 003b13a..0987bf6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,7 +18,11 @@ jobs: run: tidy -errors -quiet --drop-empty-elements no regex.html test_sed: - runs-on: ubuntu-latest + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + fail-fast: false + runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v2 - name: Run sed tests