Skip to content

Adding split method to string class #60

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions py/string.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"
"strconv"
"strings"
"unicode"
"unicode/utf8"
)

Expand All @@ -34,6 +35,73 @@ or repr(object).
encoding defaults to sys.getdefaultencoding().
errors defaults to 'strict'.`, StrNew, nil)

// standard golang strings.Fields doesn't have a 'first N' argument
func fieldsN(s string, n int) []string {
out := []string{}
cur := []rune{}
r := []rune(s)
for _, c := range r {
//until we have covered the first N elements, multiple white-spaces are 'merged'
if n < 0 || len(out) < n {
if unicode.IsSpace(c) {
if len(cur) > 0 {
out = append(out, string(cur))
cur = []rune{}
}
} else {
cur = append(cur, c)
}
//until we see the next letter, after collecting the first N fields, continue to merge whitespaces
} else if len(out) == n && len(cur) == 0 {
if !unicode.IsSpace(c) {
cur = append(cur, c)
}
//now that enough words have been collected, just copy into the last element
} else {
cur = append(cur, c)
}
}
if len(cur) > 0 {
out = append(out, string(cur))
}
return out
}

func init() {
StringType.Dict["split"] = MustNewMethod("split", func(self Object, args Tuple) (Object, error) {
selfStr := self.(String)
var value Object = None
zeroRemove := true
if len(args) > 0 {
if _, ok := args[0].(NoneType); !ok {
value = args[0]
zeroRemove = false
}
}
var maxSplit int = -2
if len(args) > 1 {
if m, ok := args[1].(Int); ok {
maxSplit = int(m)
}
}
valArray := []string{}
if valStr, ok := value.(String); ok {
valArray = strings.SplitN(string(selfStr), string(valStr), maxSplit+1)
} else if _, ok := value.(NoneType); ok {
valArray = fieldsN(string(selfStr), maxSplit)
} else {
return nil, ExceptionNewf(TypeError, "Can't convert '%s' object to str implicitly", value.Type())
}
o := List{}
for _, j := range valArray {
if len(j) > 0 || !zeroRemove {
o.Items = append(o.Items, String(j))
}
}
return &o, nil
}, 0, "split(sub) -> split string with sub.")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Python help is

    S.split([sep [,maxsplit]]) -> list of strings
    
    Return a list of the words in the string S, using sep as the
    delimiter string.  If maxsplit is given, at most maxsplit
    splits are done. If sep is not specified or is None, any
    whitespace string is a separator and empty strings are removed
    from the result.

Which makes me see that we are missing two things.

  • If sep isn't passed (or is None) in then we should be using strings.Fields in go terms (This doesn't have a maxfields parameter though.)
  • There is another optional parameter to specify the number of splits.

Here are some cases to consider

>>> "a,d,c".split(",")
['a', 'd', 'c']
>>> "a,d,c".split(",",1)
['a', 'd,c']
>>> " a   d   b   ".split()
['a', 'd', 'b']
>>> " a   d   b   ".split(None, 1)
['a', 'd   b   ']

}

// Type of this object
func (s String) Type() *Type {
return StringType
Expand Down
8 changes: 8 additions & 0 deletions py/tests/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,14 @@ class C():
asc="hello"
uni="£100世界𠜎" # 1,2,3,4 byte unicode characters

doc="split"
assert ["0","1","2","4"] == list("0,1,2,4".split(","))
assert [""] == list("".split(","))
assert ['a', 'd,c'] == list("a,d,c".split(",",1))
assert ['a', 'd', 'b'] == list(" a d b ".split())
assert ['a', 'd b '] == list(" a d b ".split(None, 1))
assertRaisesText(TypeError, "Can't convert 'int' object to str implicitly", lambda: "0,1,2,4".split(1))

doc="ascii len"
assert len(asc) == 5

Expand Down