From a8debc610838a11671a1e5244259b645a6c4ee80 Mon Sep 17 00:00:00 2001 From: Kyle Ellrott Date: Fri, 17 May 2019 15:47:13 -0700 Subject: [PATCH 1/6] Adding split method to string class --- py/string.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/py/string.go b/py/string.go index 9050f114..5b5f4d8e 100644 --- a/py/string.go +++ b/py/string.go @@ -34,6 +34,22 @@ or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.`, StrNew, nil) + +func init() { + StringType.Dict["split"] = MustNewMethod("split", func(self Object, value Object) (Object, error) { + selfStr := self.(String) + if valStr, ok := value.(String); ok { + ss := strings.Split(string(selfStr), string(valStr)) + o := List{} + for _, j := range ss { + o.Items = append(o.Items, String(j)) + } + return &o, nil + } + return nil, fmt.Errorf("Not split by string") + }, 0, "split(sub) -> split string with sub.") +} + // Type of this object func (s String) Type() *Type { return StringType From cd235f28f70c68330c7e1b859bdea8e673f54394 Mon Sep 17 00:00:00 2001 From: Kyle Ellrott Date: Fri, 17 May 2019 16:05:01 -0700 Subject: [PATCH 2/6] Adding test for str split method --- py/tests/string.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/py/tests/string.py b/py/tests/string.py index c837a441..fa836ab9 100644 --- a/py/tests/string.py +++ b/py/tests/string.py @@ -99,6 +99,9 @@ class C(): asc="hello" uni="£100世界𠜎" # 1,2,3,4 byte unicode characters +doc="split" +assert 4 == len(list("0,1,2,4".split(","))) + doc="ascii len" assert len(asc) == 5 From d3c579da5713eda8ce28f074278dd455474058a6 Mon Sep 17 00:00:00 2001 From: Kyle Ellrott Date: Fri, 24 May 2019 15:30:02 -0700 Subject: [PATCH 3/6] Fixing .split exception and added more python tests --- py/string.go | 2 +- py/tests/string.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/py/string.go b/py/string.go index 5b5f4d8e..c0cdce77 100644 --- a/py/string.go +++ b/py/string.go @@ -46,7 +46,7 @@ func init() { } return &o, nil } - return nil, fmt.Errorf("Not split by string") + return nil, ExceptionNewf(TypeError, "descriptor 'split' requires a 'str' object but received a '%s'", value.Type()) }, 0, "split(sub) -> split string with sub.") } diff --git a/py/tests/string.py b/py/tests/string.py index fa836ab9..9f0ad82f 100644 --- a/py/tests/string.py +++ b/py/tests/string.py @@ -100,7 +100,9 @@ class C(): uni="£100世界𠜎" # 1,2,3,4 byte unicode characters doc="split" -assert 4 == len(list("0,1,2,4".split(","))) +assert ["0","1","2","4"] == list("0,1,2,4".split(",")) +assert [""] == list("".split(",")) +assertRaisesText(TypeError, "descriptor 'split' requires a 'str' object but received a 'int'", lambda: "0,1,2,4".split(1)) doc="ascii len" assert len(asc) == 5 From f7ab626121d7fce3d0909a26422c467aaaf005a5 Mon Sep 17 00:00:00 2001 From: Kyle Ellrott Date: Fri, 24 May 2019 16:42:43 -0700 Subject: [PATCH 4/6] Closer matching to python string split behavior --- py/string.go | 64 +++++++++++++++++++++++++++++++++++++++++----- py/tests/string.py | 3 +++ 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/py/string.go b/py/string.go index c0cdce77..084577de 100644 --- a/py/string.go +++ b/py/string.go @@ -17,6 +17,7 @@ import ( "fmt" "strconv" "strings" + "unicode" "unicode/utf8" ) @@ -34,19 +35,70 @@ or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.`, StrNew, nil) +// standard golang strings.Fields doesn't have a 'first N' argument +func fieldsN(s string, n int) []string { + out := []string{} + cur := []rune{} + r := []rune(s) + for _, c := range r { + //until we have covered the first N elements, multiple white-spaces are 'merged' + if n < 0 || len(out) < n { + if unicode.IsSpace(c) { + if len(cur) > 0 { + out = append(out, string(cur)) + cur = []rune{} + } + } else { + cur = append(cur, c) + } + //until we see the next letter, after collecting the first N fields, continue to merge whitespaces + } else if len(out) == n && len(cur) == 0 { + if !unicode.IsSpace(c) { + cur = append(cur, c) + } + //now that enough words have been collected, just copy into the last element + } else { + cur = append(cur, c) + } + } + if len(cur) > 0 { + out = append(out, string(cur)) + } + return out +} func init() { - StringType.Dict["split"] = MustNewMethod("split", func(self Object, value Object) (Object, error) { + StringType.Dict["split"] = MustNewMethod("split", func(self Object, args Tuple) (Object, error) { selfStr := self.(String) + var value Object = None + zeroRemove := true + if len(args) > 0 { + if _, ok := args[0].(NoneType); !ok { + value = args[0] + zeroRemove = false + } + } + var maxSplit int = -2 + if len(args) > 1 { + if m, ok := args[1].(Int); ok { + maxSplit = int(m) + } + } + valArray := []string{} if valStr, ok := value.(String); ok { - ss := strings.Split(string(selfStr), string(valStr)) - o := List{} - for _, j := range ss { + valArray = strings.SplitN(string(selfStr), string(valStr), maxSplit+1) + } else if _, ok := value.(NoneType); ok { + valArray = fieldsN(string(selfStr), maxSplit) + } else { + return nil, ExceptionNewf(TypeError, "descriptor 'split' requires a 'str' object but received a '%s'", value.Type()) + } + o := List{} + for _, j := range valArray { + if len(j) > 0 || !zeroRemove { o.Items = append(o.Items, String(j)) } - return &o, nil } - return nil, ExceptionNewf(TypeError, "descriptor 'split' requires a 'str' object but received a '%s'", value.Type()) + return &o, nil }, 0, "split(sub) -> split string with sub.") } diff --git a/py/tests/string.py b/py/tests/string.py index 9f0ad82f..f8391327 100644 --- a/py/tests/string.py +++ b/py/tests/string.py @@ -102,6 +102,9 @@ class C(): doc="split" assert ["0","1","2","4"] == list("0,1,2,4".split(",")) assert [""] == list("".split(",")) +assert ['a', 'd,c'] == list("a,d,c".split(",",1)) +assert ['a', 'd', 'b'] == list(" a d b ".split()) +assert ['a', 'd b '] == list(" a d b ".split(None, 1)) assertRaisesText(TypeError, "descriptor 'split' requires a 'str' object but received a 'int'", lambda: "0,1,2,4".split(1)) doc="ascii len" From 2f4feef9522af3a34fc2b5fa300dd744e5a1c148 Mon Sep 17 00:00:00 2001 From: Kyle Ellrott Date: Mon, 27 May 2019 20:48:54 -0700 Subject: [PATCH 5/6] Fixing error message so it matches one from python3.4 --- py/string.go | 2 +- py/tests/string.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/py/string.go b/py/string.go index 084577de..4b7d723e 100644 --- a/py/string.go +++ b/py/string.go @@ -90,7 +90,7 @@ func init() { } else if _, ok := value.(NoneType); ok { valArray = fieldsN(string(selfStr), maxSplit) } else { - return nil, ExceptionNewf(TypeError, "descriptor 'split' requires a 'str' object but received a '%s'", value.Type()) + return nil, ExceptionNewf(TypeError, "must be str or None, not %s", value.Type()) } o := List{} for _, j := range valArray { diff --git a/py/tests/string.py b/py/tests/string.py index f8391327..2f3938d7 100644 --- a/py/tests/string.py +++ b/py/tests/string.py @@ -105,7 +105,7 @@ class C(): assert ['a', 'd,c'] == list("a,d,c".split(",",1)) assert ['a', 'd', 'b'] == list(" a d b ".split()) assert ['a', 'd b '] == list(" a d b ".split(None, 1)) -assertRaisesText(TypeError, "descriptor 'split' requires a 'str' object but received a 'int'", lambda: "0,1,2,4".split(1)) +assertRaisesText(TypeError, "must be str or None, not int", lambda: "0,1,2,4".split(1)) doc="ascii len" assert len(asc) == 5 From e316b2b5410c5979d839034b95dd29c7e5196b67 Mon Sep 17 00:00:00 2001 From: Kyle Ellrott Date: Mon, 27 May 2019 21:47:07 -0700 Subject: [PATCH 6/6] Fixing error message to match python3.4 (previous version was python3.7) --- py/string.go | 2 +- py/tests/string.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/py/string.go b/py/string.go index 4b7d723e..69600149 100644 --- a/py/string.go +++ b/py/string.go @@ -90,7 +90,7 @@ func init() { } else if _, ok := value.(NoneType); ok { valArray = fieldsN(string(selfStr), maxSplit) } else { - return nil, ExceptionNewf(TypeError, "must be str or None, not %s", value.Type()) + return nil, ExceptionNewf(TypeError, "Can't convert '%s' object to str implicitly", value.Type()) } o := List{} for _, j := range valArray { diff --git a/py/tests/string.py b/py/tests/string.py index 2f3938d7..c8bc1118 100644 --- a/py/tests/string.py +++ b/py/tests/string.py @@ -105,7 +105,7 @@ class C(): assert ['a', 'd,c'] == list("a,d,c".split(",",1)) assert ['a', 'd', 'b'] == list(" a d b ".split()) assert ['a', 'd b '] == list(" a d b ".split(None, 1)) -assertRaisesText(TypeError, "must be str or None, not int", lambda: "0,1,2,4".split(1)) +assertRaisesText(TypeError, "Can't convert 'int' object to str implicitly", lambda: "0,1,2,4".split(1)) doc="ascii len" assert len(asc) == 5