diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 37564d3..0987bf6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,3 +16,21 @@ jobs: chmod +x /usr/local/bin/tidy - name: Validate HTML run: tidy -errors -quiet --drop-empty-elements no regex.html + + test_sed: + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + fail-fast: false + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + - name: Run sed tests + run: tests/test_sed.sh + + set_vim: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Run Vim tests + run: tests/test_vim.sh diff --git a/regex.html b/regex.html index 5b4e476..ef3d87c 100644 --- a/regex.html +++ b/regex.html @@ -82,7 +82,7 @@

Syntax

- + @@ -99,7 +99,7 @@

Syntax

- + @@ -153,7 +153,8 @@

Syntax

- + +
WhatPerl/PCREPython's rePOSIX (BRE)POSIX extended (ERE)Vim
WhatPerl/PCREPython's rePOSIX (BRE)POSIX extended (ERE)Vim (with :set magic)
Basics
Any character (including newline)\_.
Match a "word" character (alphanumeric plus _)\w [[:word:]]\w\w\w\w
Case[[:upper:]] / [[:lower:]][[:upper:]] / [[:lower:]][[:upper:]] / [[:lower:]]\u [[:upper:]] / \l [[:lower:]]
Match a non-"word" character\W\W\W
Match a non-"word" character\W\W\W\W\W
Match a whitespace character (except newline)\s [[:space:]]\s [[:space:]]\s [[:space:]]
Whitespace including newline\s [[:space:]]\s\_s
Match a non-whitespace character\S\S[^[:space:]][^[:space:]]\S [^[:space:]]
Other
Independent non-backtracking pattern(?>...)\(...\)\@>
Make case-sensitive/insensitive(?i) / (?-i)(?i) / (?-i)\c / \C
Make case-insensitive(?i)(?i)\c
Make case-sensitive(?-i)\C
diff --git a/tests/test_sed.sh b/tests/test_sed.sh new file mode 100755 index 0000000..62111c0 --- /dev/null +++ b/tests/test_sed.sh @@ -0,0 +1,296 @@ +#!/bin/bash + +export LC_ALL=en_US.UTF-8 + +check_(){ + RESULT="$(echo "$4" | sed "$1" "$5")" + if [ "$RESULT" != "$6" ]; then + echo "Test failed: $3 ($2)" >&2 + echo "----- expected ----- +$6 +----- returned -----" + echo "$RESULT" + exit 1 + fi +} + +# Check BRE +checkb(){ + check_ "-e" "BRE" "$@" +} +# Check ERE +checke(){ + check_ "-E" "ERE" "$@" +} +# Check both +check(){ + checkb "$@" + checke "$@" +} + +fail_(){ + if echo | sed "$1" "$4" &>/dev/null; then + echo "Test didn't fail as expected: $3 ($2)" >&2 + exit 1 + fi +} + +# Fail check BRE +failb(){ + fail_ "-e" "BRE" "$@" +} +# Fail check ERE +faile(){ + fail_ "-E" "ERE" "$@" +} +# Fail check both +fail(){ + failb "$@" + faile "$@" +} + +# Basics + +check 'Custom character class' \ +'adn +aqv' 's/[b-eq]/x/g' \ +'axn +axv' + +check 'Negated custom character class' \ +'abcdefgh' \ +'s/[^b-dg-l]/x/g' \ +'xbcdxxgh' + +check 'Backslash not special in class' \ +"a]\\b +a]\\b" \ +"1s/[\\m-p]/x/g +2s/[]]/x/g" \ +"a]xb +ax\\b" + +check 'Ranges' \ +'a-e-i +a-e-i' \ +'1s/[d-f]/x/g +2s/[d-f-]/x/g' \ +'a-x-i +axxxi' + +checkb 'Alternation' \ +'acd' \ +'s/b\|c/x/g' \ +'axd' +checke 'Alternation' \ +'acd' \ +'s/b|c/x/g' \ +'axd' + +check 'Escaped character' \ +'abc' \ +"s/\\061\\x62\\x(99)/x/g" \ +'abc' + +# Charater classes + +check 'Any character' \ +'a ;d +efg' \ +'s/./x/g' \ +'xxxx +xxx' + +check 'Word character' \ +'hello w0r|_d!' \ +'s/\w/x/g' \ +'xxxxx xxx|xx!' + +fail 'Word class' \ +'s/[[:word:]]/x/g' + +check 'Upper case' \ +'Hell0 W0r|_D' \ +'s/[[:upper:]]/X/g' \ +'Xell0 X0r|_X' + +check 'Lower case' \ +'Hell0 W0r|_D' \ +'s/[[:lower:]]/x/g' \ +'Hxxx0 W0x|_D' + +check 'Whitespace' \ +'Hello world !' \ +'s/\s/_/g' \ +'Hello_world_!' + +check 'Whitespace' \ +'Hello world !' \ +'s/[[:space:]]/_/g' \ +'Hello_world_!' + +check 'Non-whitespace' \ +'Hello world !' \ +'s/[^[:space:]]/x/g' \ +'xxxxx xxxxx x' + +check 'Digit' \ +'H3ll0 W0r|_D' \ +'s/[[:digit:]]/+/g' \ +'H+ll+ W+r|_D' + +check 'Hexadecimal digit' \ +'H3ll0 W0r|_D' \ +'s/[[:xdigit:]]/+/g' \ +'H+ll+ W+r|_+' + +fail 'Octal digit' \ +'s/[[:odigit:]]/x/g' + +check 'Punctuation' \ +'+- 01: hello, world! ;) -+' \ +'s/[[:punct:]]/_/g' \ +'__ 01_ hello_ world_ __ __' + +check 'Alphabetical characters' \ +'+- 01: hello, world! ;) -+' \ +'s/[[:alpha:]]/x/g' \ +'+- 01: xxxxx, xxxxx! ;) -+' + +check 'Alphanumerical characters' \ +'+- 01: hello, world! ;) -+' \ +'s/[[:alnum:]]/x/g' \ +'+- xx: xxxxx, xxxxx! ;) -+' + +fail 'ASCII class' \ +'s/[[:ascii:]]/x/g' + +check 'Character equivalents' \ +'Rémi est prêt' \ +'s/[[=e=]]/_/g' \ +'R_mi _st pr_t' + +check 'Word boundary' \ +'Hello, world' \ +'s/o\b/x/g' \ +'Hellx, world' + +check 'Not word boundary' \ +'Hello, world' \ +'s/o\B/x/g' \ +'Hello, wxrld' + +check 'Begining of line' \ +'testing tests' \ +'s/^t/r/g' \ +'resting tests' + +check 'End of line' \ +'testing tests' \ +'s/s$/x/g' \ +'testing testx' + +# Captures and groups + +checkb 'Capturing group' \ +'Name is Remi!' \ +'s/^.*is \(.*\)!$/\1/' \ +'Remi' +checke 'Capturing group' \ +'Name is Remi!' \ +'s/^.*is (.*)!$/\1/' \ +'Remi' + +checkb 'Non-capturing parentheses' \ +'Some (dumb)text' \ +'s/(.*)//g' \ +'Some text' +checke 'Non-capturing parentheses' \ +'Some (dumb)text' \ +'s/\(.*\)//g' \ +'Some text' + +checkb 'Backreference' \ +'ab be cd cc df' \ +'s/\([a-z]\)\1/xx/g' \ +'ab be cd xx df' +checke 'Backreference' \ +'ab be cd cc df' \ +'s/([a-z])\1/xx/g' \ +'ab be cd xx df' + +# Look-around not supported in POSIX + +# Multiplicity + +checkb '0 or 1' \ +'bb bab baab baa?b baaab' \ +'s/baa\?b/x/g' \ +'bb x x baa?b baaab' +checke '0 or 1' \ +'bb bab baab baa?b baaab' \ +'s/baa?b/x/g' \ +'bb x x baa?b baaab' + +checkb '0 or 1 (negative)' \ +'bb bab baab baa?b baaab' \ +'s/baa?b/x/g' \ +'bb bab baab x baaab' +checke '0 or 1 (negative)' \ +'bb bab baab baa?b baaab' \ +'s/baa\?b/x/g' \ +'bb bab baab x baaab' + +checkb '1 or more' \ +'bb bab baab ba+b baaab' \ +'s/ba\+b/x/g' \ +'bb x x ba+b x' +checke '1 or more' \ +'bb bab baab ba+b baaab' \ +'s/ba+b/x/g' \ +'bb x x ba+b x' + +checkb '1 or more (negative)' \ +'bb bab baab ba+b baaab' \ +'s/ba+b/x/g' \ +'bb bab baab x baaab' +checke '1 or more (negative)' \ +'bb bab baab ba+b baaab' \ +'s/ba\+b/x/g' \ +'bb bab baab x baaab' + +checkb 'Specific number (1)' \ +'bb bab baab baaab baaaab' \ +'s/ba\{2\}b/x/g' \ +'bb bab x baaab baaaab' +checke 'Specific number (1)' \ +'bb bab baab baaab baaaab' \ +'s/ba{2}b/x/g' \ +'bb bab x baaab baaaab' + +checkb 'Specific number (closed)' \ +'bb bab baab baaab baaaab' \ +'s/ba\{1,3\}b/x/g' \ +'bb x x x baaaab' +checke 'Specific number (closed)' \ +'bb bab baab baaab baaaab' \ +'s/ba{1,3}b/x/g' \ +'bb x x x baaaab' + +checkb 'Specific number (open left)' \ +'bb bab baaab baaaab' \ +'s/ba\{,3\}b/x/g' \ +'x x x baaaab' +checke 'Specific number (open left)' \ +'bb bab baaab baaaab' \ +'s/ba{,3}b/x/g' \ +'x x x baaaab' + +checkb 'Specific number (open right)' \ +'bb bab baaab baaaab' \ +'s/ba\{2,\}b/x/g' \ +'bb bab x x' +checke 'Specific number (open right)' \ +'bb bab baaab baaaab' \ +'s/ba{2,}b/x/g' \ +'bb bab x x' diff --git a/tests/test_vim.sh b/tests/test_vim.sh new file mode 100755 index 0000000..1a810c5 --- /dev/null +++ b/tests/test_vim.sh @@ -0,0 +1,206 @@ +#!/bin/bash + +export LC_ALL=en_US.UTF-8 + +check(){ + FILE="$(mktemp)" + echo "$2" >"$FILE" + exec 3<<<"$3 +:w! $FILE +:q!" + vi -u NONE - <"$FILE" 2<&3- + if [ "$(cat "$FILE")" != "$4" ]; then + echo "Test failed: $1" >&2 + echo "----- expected ----- +$4 +----- returned -----" + cat "$FILE" + rm "$FILE" + exit 1 + fi + rm "$FILE" +} + +# Basics + +check 'Custom character class' \ +'adn +aqv' ':%s/[b-eq]/x/g' \ +'axn +axv' + +check 'Negated custom character class' \ +'abcdefgh' \ +':%s/[^b-dg-l]/x/g' \ +'xbcdxxgh' + +check 'Backslash not special in class' \ +"a]\\b +a]\\b" \ +":1s/[\\m-p]/x/g +:2s/[]]/x/g" \ +"a]xb +ax\\b" + +check 'Ranges' \ +'a-e-i +a-e-i' \ +':1s/[d-f]/x/g +:2s/[d-f-]/x/g' \ +'a-x-i +axxxi' + +check 'Alternation' \ +'acd' \ +':%s/b\|c/x/g' \ +'axd' + +check 'Escaped character' \ +'abc' \ +":%s/\\061\\x62\\x(99)/x/g" \ +'abc' + +# Charater classes + +check 'Any character' \ +'a ;d +efg' \ +':%s/./x/g' \ +'xxxx +xxx' + +check 'Word character' \ +'hello w0r|_d!' \ +':%s/\w/x/g' \ +'xxxxx xxx|xx!' + +check 'Upper case' \ +'Hell0 W0r|_D' \ +':%s/[[:upper:]]/X/g' \ +'Xell0 X0r|_X' + +check 'Lower case' \ +'Hell0 W0r|_D' \ +':%s/[[:lower:]]/x/g' \ +'Hxxx0 W0x|_D' + +check 'Whitespace' \ +'Hello world !' \ +':%s/\s/_/g' \ +'Hello_world_!' + +check 'Whitespace' \ +'Hello world !' \ +':%s/[[:space:]]/_/g' \ +'Hello_world_!' + +check 'Non-whitespace' \ +'Hello world !' \ +':%s/[^[:space:]]/x/g' \ +'xxxxx xxxxx x' + +check 'Digit' \ +'H3ll0 W0r|_D' \ +':%s/[[:digit:]]/+/g' \ +'H+ll+ W+r|_D' + +check 'Hexadecimal digit' \ +'H3ll0 W0r|_D' \ +':%s/[[:xdigit:]]/+/g' \ +'H+ll+ W+r|_+' + +check 'Punctuation' \ +'+- 01: hello, world! ;) -+' \ +':%s/[[:punct:]]/_/g' \ +'__ 01_ hello_ world_ __ __' + +check 'Alphabetical characters' \ +'+- 01: hello, world! ;) -+' \ +':%s/[[:alpha:]]/x/g' \ +'+- 01: xxxxx, xxxxx! ;) -+' + +check 'Alphanumerical characters' \ +'+- 01: hello, world! ;) -+' \ +':%s/[[:alnum:]]/x/g' \ +'+- xx: xxxxx, xxxxx! ;) -+' + +check 'Character equivalents' \ +'Rémi est prêt' \ +':%s/[[=e=]]/_/g' \ +'R_mi _st pr_t' + +check 'Word boundary' \ +'Hello, world' \ +':%s/o\>/x/g' \ +'Hellx, world' + +check 'Begining of line' \ +'testing tests' \ +':%s/^t/r/g' \ +'resting tests' + +check 'End of line' \ +'testing tests' \ +':%s/s$/x/g' \ +'testing testx' + +# Captures and groups + +check 'Capturing group' \ +'Name is Remi!' \ +':%s/^.*is \(.*\)!$/\1/' \ +'Remi' + +check 'Non-capturing parentheses' \ +'Some (dumb)text' \ +':%s/(.*)//g' \ +'Some text' + +check 'Backreference' \ +'ab be cd cc df' \ +':%s/\([a-z]\)\1/xx/g' \ +'ab be cd xx df' + +# Look-around not supported in POSIX + +# Multiplicity + +check '0 or 1' \ +'bb bab baab baa?b baaab' \ +':%s/baa\?b/x/g' \ +'bb x x baa?b baaab' + +check '0 or 1 (negative)' \ +'bb bab baab baa?b baaab' \ +':%s/baa?b/x/g' \ +'bb bab baab x baaab' + +check '1 or more' \ +'bb bab baab ba+b baaab' \ +':%s/ba\+b/x/g' \ +'bb x x ba+b x' + +check '1 or more (negative)' \ +'bb bab baab ba+b baaab' \ +':%s/ba+b/x/g' \ +'bb bab baab x baaab' + +check 'Specific number (1)' \ +'bb bab baab baaab baaaab' \ +':%s/ba\{2\}b/x/g' \ +'bb bab x baaab baaaab' + +check 'Specific number (closed)' \ +'bb bab baab baaab baaaab' \ +':%s/ba\{1,3\}b/x/g' \ +'bb x x x baaaab' + +check 'Specific number (open left)' \ +'bb bab baaab baaaab' \ +':%s/ba\{,3\}b/x/g' \ +'x x x baaaab' + +check 'Specific number (open right)' \ +'bb bab baaab baaaab' \ +':%s/ba\{2,\}b/x/g' \ +'bb bab x x'