From 08af76b50e8dd18f02235302001db511aa186491 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Tue, 23 Feb 2021 12:25:11 +0100 Subject: [PATCH 1/6] Change lib search command to use fuzzy search --- commands/lib/search.go | 57 +++++++++++++++++++------------------ commands/lib/search_test.go | 11 +++++-- go.mod | 2 +- go.sum | 8 ++---- test/test_lib.py | 39 +++++++++++++++++++++---- 5 files changed, 75 insertions(+), 42 deletions(-) diff --git a/commands/lib/search.go b/commands/lib/search.go index d46ddef4b59..f13b2e5eb68 100644 --- a/commands/lib/search.go +++ b/commands/lib/search.go @@ -24,7 +24,7 @@ import ( "github.com/arduino/arduino-cli/arduino/libraries/librariesmanager" "github.com/arduino/arduino-cli/commands" rpc "github.com/arduino/arduino-cli/rpc/commands" - "github.com/imjasonmiller/godice" + "github.com/lithammer/fuzzysearch/fuzzy" semver "go.bug.st/relaxed-semver" ) @@ -44,33 +44,21 @@ func searchLibrary(req *rpc.LibrarySearchReq, lm *librariesmanager.LibrariesMana res := []*rpc.SearchedLibrary{} status := rpc.LibrarySearchStatus_success - for _, lib := range lm.Index.Libraries { - qry := strings.ToLower(req.GetQuery()) - if strings.Contains(strings.ToLower(lib.Name), qry) || - strings.Contains(strings.ToLower(lib.Latest.Paragraph), qry) || - strings.Contains(strings.ToLower(lib.Latest.Sentence), qry) { - releases := map[string]*rpc.LibraryRelease{} - for str, rel := range lib.Releases { - releases[str] = GetLibraryParameters(rel) - } - latest := GetLibraryParameters(lib.Latest) - - searchedLib := &rpc.SearchedLibrary{ - Name: lib.Name, - Releases: releases, - Latest: latest, - } - res = append(res, searchedLib) + // If the query is empty all libraries are returned + if strings.Trim(req.GetQuery(), " ") == "" { + for _, lib := range lm.Index.Libraries { + res = append(res, indexLibraryToRPCSearchLibrary(lib)) } + return &rpc.LibrarySearchResp{Libraries: res, Status: status}, nil } - if len(res) == 0 { - status = rpc.LibrarySearchStatus_failed - for _, lib := range lm.Index.Libraries { - if godice.CompareString(req.GetQuery(), lib.Name) > similarityThreshold { - res = append(res, &rpc.SearchedLibrary{ - Name: lib.Name, - }) + for _, lib := range lm.Index.Libraries { + words := strings.Split(req.GetQuery(), " ") + toTest := []string{lib.Name, lib.Latest.Paragraph, lib.Latest.Sentence} + + for _, word := range words { + if len(fuzzy.FindNormalizedFold(word, toTest)) > 0 { + res = append(res, indexLibraryToRPCSearchLibrary(lib)) } } } @@ -78,8 +66,23 @@ func searchLibrary(req *rpc.LibrarySearchReq, lm *librariesmanager.LibrariesMana return &rpc.LibrarySearchResp{Libraries: res, Status: status}, nil } -// GetLibraryParameters FIXMEDOC -func GetLibraryParameters(rel *librariesindex.Release) *rpc.LibraryRelease { +// indexLibraryToRPCSearchLibrary converts a librariindex.Library to rpc.SearchLibrary +func indexLibraryToRPCSearchLibrary(lib *librariesindex.Library) *rpc.SearchedLibrary { + releases := map[string]*rpc.LibraryRelease{} + for str, rel := range lib.Releases { + releases[str] = getLibraryParameters(rel) + } + latest := getLibraryParameters(lib.Latest) + + return &rpc.SearchedLibrary{ + Name: lib.Name, + Releases: releases, + Latest: latest, + } +} + +// getLibraryParameters FIXMEDOC +func getLibraryParameters(rel *librariesindex.Release) *rpc.LibraryRelease { return &rpc.LibraryRelease{ Author: rel.Author, Version: rel.Version.String(), diff --git a/commands/lib/search_test.go b/commands/lib/search_test.go index 65ca2ca801f..2fd8088d867 100644 --- a/commands/lib/search_test.go +++ b/commands/lib/search_test.go @@ -48,7 +48,12 @@ func TestSearchLibrarySimilar(t *testing.T) { } assert := assert.New(t) - assert.Equal(resp.GetStatus(), rpc.LibrarySearchStatus_failed) - assert.Equal(len(resp.GetLibraries()), 1) - assert.Equal(resp.GetLibraries()[0].Name, "Arduino") + assert.Equal(resp.GetStatus(), rpc.LibrarySearchStatus_success) + assert.Equal(len(resp.GetLibraries()), 2) + libs := map[string]*rpc.SearchedLibrary{} + for _, l := range resp.GetLibraries() { + libs[l.Name] = l + } + assert.Contains(libs, "ArduinoTestPackage") + assert.Contains(libs, "Arduino") } diff --git a/go.mod b/go.mod index 2b22111f2a9..9f6537bc54a 100644 --- a/go.mod +++ b/go.mod @@ -19,10 +19,10 @@ require ( github.com/gofrs/uuid v3.2.0+incompatible github.com/golang/protobuf v1.4.2 github.com/h2non/filetype v1.0.8 // indirect - github.com/imjasonmiller/godice v0.1.2 github.com/juju/loggo v0.0.0-20190526231331-6e530bcce5d8 // indirect github.com/kr/text v0.2.0 // indirect github.com/leonelquinteros/gotext v1.4.0 + github.com/lithammer/fuzzysearch v1.1.1 // indirect github.com/mattn/go-colorable v0.1.2 github.com/mattn/go-isatty v0.0.8 github.com/mattn/go-runewidth v0.0.9 // indirect diff --git a/go.sum b/go.sum index d343786ab25..e4474ab6ec3 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,6 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/GeertJohan/go.incremental v1.0.0 h1:7AH+pY1XUgQE4Y1HcXYaMqAI0m9yrFqo/jt0CW30vsg= github.com/GeertJohan/go.incremental v1.0.0/go.mod h1:6fAjUhbVuX1KcMD3c8TEgVUqmo4seqhv0i0kdATSkM0= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/akavel/rsrc v0.8.0/go.mod h1:uLoCtb9J+EyAqh+26kdrTgmzRBFPGOolLWKpdxkKq+c= @@ -117,8 +116,6 @@ github.com/h2non/filetype v1.0.8 h1:le8gpf+FQA0/DlDABbtisA1KiTS0Xi+YSC/E8yY3Y14= github.com/h2non/filetype v1.0.8/go.mod h1:isekKqOuhMj+s/7r3rIeTErIRy4Rub5uBWHfvMusLMU= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= -github.com/imjasonmiller/godice v0.1.2 h1:T1/sW/HoDzFeuwzOOuQjmeMELz9CzZ53I2CnD+08zD4= -github.com/imjasonmiller/godice v0.1.2/go.mod h1:8cTkdnVI+NglU2d6sv+ilYcNaJ5VSTBwvMbFULJd/QQ= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A= @@ -155,6 +152,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/leonelquinteros/gotext v1.4.0 h1:2NHPCto5IoMXbrT0bldPrxj0qM5asOCwtb1aUQZ1tys= github.com/leonelquinteros/gotext v1.4.0/go.mod h1:yZGXREmoGTtBvZHNcc+Yfug49G/2spuF/i/Qlsvz1Us= +github.com/lithammer/fuzzysearch v1.1.1 h1:8F9OAV2xPuYblToVohjanztdnPjbtA0MLgMvDKQ0Z08= +github.com/lithammer/fuzzysearch v1.1.1/go.mod h1:H2bng+w5gsR7NlfIJM8ElGZI0sX6C/9uzGqicVXGU6c= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4= github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= @@ -215,7 +214,6 @@ github.com/segmentio/objconv v1.0.1 h1:QjfLzwriJj40JibCV3MGSEiAoXixbp4ybhwfTB8RX github.com/segmentio/objconv v1.0.1/go.mod h1:auayaH5k3137Cl4SoXTgrzQcuQDmvuVtZgS0fb1Ahys= github.com/segmentio/stats/v4 v4.5.3 h1:Y/DSUWZ4c8ICgqJ9rQohzKvGqGWbLPWad5zmxVoKN+Y= github.com/segmentio/stats/v4 v4.5.3/go.mod h1:LsaahUJR7iiSs8mnkvQvdQ/RLHAS5adGLxuntg0ydGo= -github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ= github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= @@ -281,7 +279,6 @@ golang.org/x/crypto v0.0.0-20180214000028-650f4a345ab4/go.mod h1:6SG95UA2DQfeDnf golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190219172222-a4c6cb3142f2/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 h1:HuIa8hRrWRSrqYzx1qI49NNxhdi2PrY7gxVSq1JjLDc= golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200406173513-056763e48d71 h1:DOmugCavvUtnUD114C1Wh+UgTgQZ4pMLzXxi1pSt+/Y= golang.org/x/crypto v0.0.0-20200406173513-056763e48d71/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -334,7 +331,6 @@ golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3 golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190729092621-ff9f1409240a h1:mEQZbbaBjWyLNy0tmZmgEuQAR8XOQ3hL8GYi3J/NG64= golang.org/x/tools v0.0.0-20190729092621-ff9f1409240a/go.mod h1:jcCCGcm9btYwXyDqrUWc6MKQKKGJCWEQ3AfLSRIbEuI= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/test/test_lib.py b/test/test_lib.py index e5d8fb35092..04b0daeb837 100644 --- a/test/test_lib.py +++ b/test/test_lib.py @@ -383,11 +383,11 @@ def test_search(run_command): libs_json = json.loads(result.stdout) assert len(libs) == len(libs_json.get("libraries")) - result = run_command("lib search") + result = run_command("lib search --names") assert result.ok # Search for a specific target - result = run_command("lib search ArduinoJson --format json") + result = run_command("lib search --names ArduinoJson --format json") assert result.ok libs_json = json.loads(result.stdout) assert len(libs_json.get("libraries")) >= 1 @@ -399,10 +399,39 @@ def test_search_paragraph(run_command): within the index file. """ assert run_command("lib update-index") - result = run_command('lib search "A simple and efficient JSON library" --format json') + result = run_command('lib search "A simple and efficient JSON library" --names --format json') assert result.ok - libs_json = json.loads(result.stdout) - assert 1 == len(libs_json.get("libraries")) + data = json.loads(result.stdout) + libraries = [l["name"] for l in data["libraries"]] + assert "ArduinoJson" in libraries + + +def test_lib_search_fuzzy(run_command): + run_command("update") + + def run_search(search_args, expected_libraries): + res = run_command(f"lib search --names --format json {search_args}") + assert res.ok + data = json.loads(res.stdout) + libraries = [l["name"] for l in data["libraries"]] + for l in expected_libraries: + assert l in libraries + + run_search("Arduino_MKRIoTCarrier", ["Arduino_MKRIoTCarrier"]) + run_search("Arduino mkr iot carrier", ["Arduino_MKRIoTCarrier"]) + run_search("Arduinomkriotcarrier", ["Arduino_MKRIoTCarrier"]) + + run_search( + "dht", + ["DHT sensor library", "DHT sensor library for ESPx", "DHT12", "SimpleDHT", "TinyDHT sensor library", "SDHT"], + ) + run_search( + "dht11", ["DHT sensor library", "DHT sensor library for ESPx", "SimpleDHT", "TinyDHT sensor library", "SDHT"] + ) + run_search("dht12", ["AM232X", "DHT12", "DHT12 sensor library", "SDHT"]) + run_search("dht22", ["DHT sensor library", "DHT sensor library for ESPx", "SimpleDHT", "SDHT"]) + run_search("dht sensor", ["DHT sensor library", "DHT sensor library for ESPx", "SimpleDHT", "SDHT"]) + run_search("sensor dht", ["DHT sensor library", "DHT sensor library for ESPx", "SimpleDHT", "SDHT"]) def test_lib_list_with_updatable_flag(run_command): From 424fcc20135f125f8db2278bfc91ac04958ab727 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Tue, 23 Feb 2021 16:35:10 +0100 Subject: [PATCH 2/6] Change core search command to use fuzzy search --- commands/core/search.go | 49 +++++++++--------- commands/core/search_test.go | 54 +++++++++++++++++--- test/test_core.py | 99 +++++++++++++++++------------------- 3 files changed, 119 insertions(+), 83 deletions(-) diff --git a/commands/core/search.go b/commands/core/search.go index 4d3034eacb3..c57f9c02a57 100644 --- a/commands/core/search.go +++ b/commands/core/search.go @@ -23,19 +23,12 @@ import ( "github.com/arduino/arduino-cli/arduino/cores" "github.com/arduino/arduino-cli/commands" rpc "github.com/arduino/arduino-cli/rpc/commands" + "github.com/lithammer/fuzzysearch/fuzzy" ) -func match(line, searchArgs string) bool { - return strings.Contains(strings.ToLower(line), strings.ToLower(searchArgs)) -} - -func exactMatch(line, searchArgs string) bool { - return strings.Compare(strings.ToLower(line), strings.ToLower(searchArgs)) == 0 -} - // PlatformSearch FIXMEDOC func PlatformSearch(req *rpc.PlatformSearchReq) (*rpc.PlatformSearchResp, error) { - searchArgs := req.SearchArgs + searchArgs := strings.Trim(req.SearchArgs, " ") allVersions := req.AllVersions pm := commands.GetPackageManager(req.Instance.Id) if pm == nil { @@ -63,27 +56,35 @@ func PlatformSearch(req *rpc.PlatformSearchReq) (*rpc.PlatformSearchResp, error) continue } - // platform has a valid release, check if it matches the search arguments - if match(platform.Name, searchArgs) || match(platform.Architecture, searchArgs) || - exactMatch(platform.String(), searchArgs) || match(targetPackage.Name, searchArgs) || - match(targetPackage.Maintainer, searchArgs) || match(targetPackage.WebsiteURL, searchArgs) { + if searchArgs == "" { if allVersions { res = append(res, platform.GetAllReleases()...) } else { res = append(res, platformRelease) } - } else { - // if we didn't find a match in the platform data, search for - // a match in the boards manifest - for _, board := range platformRelease.BoardsManifest { - if match(board.Name, searchArgs) { - if allVersions { - res = append(res, platform.GetAllReleases()...) - } else { - res = append(res, platformRelease) - } - break + continue + } + + words := strings.Split(searchArgs, " ") + toTest := []string{ + platform.String(), + platform.Name, + platform.Architecture, + targetPackage.Name, + targetPackage.Maintainer, + targetPackage.WebsiteURL, + } + for _, board := range platformRelease.BoardsManifest { + toTest = append(toTest, board.Name) + } + for _, word := range words { + if len(fuzzy.FindNormalizedFold(word, toTest)) > 0 { + if allVersions { + res = append(res, platform.GetAllReleases()...) + } else { + res = append(res, platformRelease) } + break } } } diff --git a/commands/core/search_test.go b/commands/core/search_test.go index 0750394c125..e8f025e4e9e 100644 --- a/commands/core/search_test.go +++ b/commands/core/search_test.go @@ -24,17 +24,9 @@ import ( "github.com/arduino/arduino-cli/rpc/commands" rpc "github.com/arduino/arduino-cli/rpc/commands" "github.com/arduino/go-paths-helper" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -func TestMatch(t *testing.T) { - assert.True(t, match("this is platform Foo", "foo")) - assert.True(t, match("this is platform Foo", "FOO")) - assert.True(t, match("this is platform Foo", "")) - assert.False(t, match("this is platform Foo", "Bar")) -} - func TestPlatformSearch(t *testing.T) { dataDir := paths.TempDir().Join("test", "data_dir") @@ -238,4 +230,50 @@ func TestPlatformSearch(t *testing.T) { {Name: "Linino One"}, }, }) + + res, err = PlatformSearch(&rpc.PlatformSearchReq{ + Instance: inst, + SearchArgs: "yun", + AllVersions: true, + }) + require.Nil(t, err) + require.NotNil(t, res) + require.Len(t, res.SearchOutput, 1) + require.Contains(t, res.SearchOutput, &commands.Platform{ + ID: "arduino:avr", + Installed: "", + Latest: "1.8.3", + Name: "Arduino AVR Boards", + Maintainer: "Arduino", + Website: "https://www.arduino.cc/", + Email: "packages@arduino.cc", + Boards: []*commands.Board{ + {Name: "Arduino Yún"}, + {Name: "Arduino Uno"}, + {Name: "Arduino Uno WiFi"}, + {Name: "Arduino Diecimila"}, + {Name: "Arduino Nano"}, + {Name: "Arduino Mega"}, + {Name: "Arduino MegaADK"}, + {Name: "Arduino Leonardo"}, + {Name: "Arduino Leonardo Ethernet"}, + {Name: "Arduino Micro"}, + {Name: "Arduino Esplora"}, + {Name: "Arduino Mini"}, + {Name: "Arduino Ethernet"}, + {Name: "Arduino Fio"}, + {Name: "Arduino BT"}, + {Name: "Arduino LilyPadUSB"}, + {Name: "Arduino Lilypad"}, + {Name: "Arduino Pro"}, + {Name: "Arduino ATMegaNG"}, + {Name: "Arduino Robot Control"}, + {Name: "Arduino Robot Motor"}, + {Name: "Arduino Gemma"}, + {Name: "Adafruit Circuit Playground"}, + {Name: "Arduino Yún Mini"}, + {Name: "Arduino Industrial 101"}, + {Name: "Linino One"}, + }, + }) } diff --git a/test/test_core.py b/test/test_core.py index 1f2c0c9326c..c52d2aacfb2 100644 --- a/test/test_core.py +++ b/test/test_core.py @@ -48,68 +48,39 @@ def test_core_search(run_command, httpserver): data = json.loads(result.stdout) assert 2 == len(data) + def get_platforms(stdout): + data = json.loads(stdout) + platforms = {p["ID"]: [] for p in data} + for p in data: + platforms[p["ID"]].append(p["Latest"]) + return platforms + # Search all Retrokit platforms - result = run_command(f"core search retrokit --all --additional-urls={url}") + result = run_command(f"core search retrokit --all --additional-urls={url} --format json") assert result.ok - lines = [l.strip().split() for l in result.stdout.strip().splitlines()] - assert ["Updating", "index:", "package_index.json", "downloaded"] in lines - assert ["Updating", "index:", "package_index.json.sig", "downloaded"] in lines - assert ["Retrokits-RK002:arm", "1.0.5", "RK002"] in lines - assert ["Retrokits-RK002:arm", "1.0.6", "RK002"] in lines - header_index = lines.index(["ID", "Version", "Name"]) - # We use black to format and flake8 to lint .py files but they disagree on certain - # things like this one, thus we ignore this specific flake8 rule and stand by black - # opinion. - # We ignore this specific case because ignoring it globally would probably cause more - # issue. For more info about the rule see: https://www.flake8rules.com/rules/E203.html - assert 2 == len(lines[header_index + 1 :]) # noqa: E203 + platforms = get_platforms(result.stdout) + assert "1.0.5" in platforms["Retrokits-RK002:arm"] + assert "1.0.6" in platforms["Retrokits-RK002:arm"] # Search using Retrokit Package Maintainer - result = run_command(f"core search Retrokits-RK002 --all --additional-urls={url}") + result = run_command(f"core search Retrokits-RK002 --all --additional-urls={url} --format json") assert result.ok - lines = [l.strip().split() for l in result.stdout.strip().splitlines()] - assert ["Updating", "index:", "package_index.json", "downloaded"] in lines - assert ["Updating", "index:", "package_index.json.sig", "downloaded"] in lines - assert ["Retrokits-RK002:arm", "1.0.5", "RK002"] in lines - assert ["Retrokits-RK002:arm", "1.0.6", "RK002"] in lines - header_index = lines.index(["ID", "Version", "Name"]) - # We use black to format and flake8 to lint .py files but they disagree on certain - # things like this one, thus we ignore this specific flake8 rule and stand by black - # opinion. - # We ignore this specific case because ignoring it globally would probably cause more - # issue. For more info about the rule see: https://www.flake8rules.com/rules/E203.html - assert 2 == len(lines[header_index + 1 :]) # noqa: E203 + platforms = get_platforms(result.stdout) + assert "1.0.5" in platforms["Retrokits-RK002:arm"] + assert "1.0.6" in platforms["Retrokits-RK002:arm"] # Search using the Retrokit Platform name - result = run_command(f"core search rk002 --all --additional-urls={url}") + result = run_command(f"core search rk002 --all --additional-urls={url} --format json") assert result.ok - lines = [l.strip().split() for l in result.stdout.strip().splitlines()] - assert ["Updating", "index:", "package_index.json", "downloaded"] in lines - assert ["Updating", "index:", "package_index.json.sig", "downloaded"] in lines - assert ["Retrokits-RK002:arm", "1.0.5", "RK002"] in lines - assert ["Retrokits-RK002:arm", "1.0.6", "RK002"] in lines - header_index = lines.index(["ID", "Version", "Name"]) - # We use black to format and flake8 to lint .py files but they disagree on certain - # things like this one, thus we ignore this specific flake8 rule and stand by black - # opinion. - # We ignore this specific case because ignoring it globally would probably cause more - # issue. For more info about the rule see: https://www.flake8rules.com/rules/E203.html - assert 2 == len(lines[header_index + 1 :]) # noqa: E203 + platforms = get_platforms(result.stdout) + assert "1.0.5" in platforms["Retrokits-RK002:arm"] + assert "1.0.6" in platforms["Retrokits-RK002:arm"] # Search using a board name - result = run_command(f"core search myboard --all --additional-urls={url}") + result = run_command(f"core search myboard --all --additional-urls={url} --format json") assert result.ok - lines = [l.strip().split() for l in result.stdout.strip().splitlines()] - assert ["Updating", "index:", "package_index.json", "downloaded"] in lines - assert ["Updating", "index:", "package_index.json.sig", "downloaded"] in lines - assert ["Package:x86", "1.2.3", "Platform"] in lines - header_index = lines.index(["ID", "Version", "Name"]) - # We use black to format and flake8 to lint .py files but they disagree on certain - # things like this one, thus we ignore this specific flake8 rule and stand by black - # opinion. - # We ignore this specific case because ignoring it globally would probably cause more - # issue. For more info about the rule see: https://www.flake8rules.com/rules/E203.html - assert 1 == len(lines[header_index + 1 :]) # noqa: E203 + platforms = get_platforms(result.stdout) + assert "1.2.3" in platforms["Package:x86"] def test_core_search_no_args(run_command, httpserver): @@ -175,6 +146,32 @@ def test_core_search_no_args(run_command, httpserver): assert len(platforms) == num_platforms +def test_core_search_fuzzy(run_command): + assert run_command("update") + + def run_fuzzy_search(search_args, expected_ids): + res = run_command(f"core search --format json {search_args}") + assert res.ok + data = json.loads(res.stdout) + platform_ids = [p["ID"] for p in data] + for platform_id in expected_ids: + assert platform_id in platform_ids + + run_fuzzy_search("mkr1000", ["arduino:samd"]) + run_fuzzy_search("mkr 100", ["arduino:samd"]) + + run_fuzzy_search("yún", ["arduino:avr"]) + run_fuzzy_search("yùn", ["arduino:avr"]) + run_fuzzy_search("yun", ["arduino:avr"]) + + run_fuzzy_search("nano", ["arduino:avr", "arduino:megaavr", "arduino:samd", "arduino:mbed"]) + run_fuzzy_search("nano33", ["arduino:samd", "arduino:mbed"]) + run_fuzzy_search("nano 33", ["arduino:avr", "arduino:megaavr", "arduino:samd", "arduino:mbed"]) + run_fuzzy_search("nano ble", ["arduino:avr", "arduino:megaavr", "arduino:samd", "arduino:mbed"]) + run_fuzzy_search("ble", ["arduino:mbed"]) + run_fuzzy_search("ble nano", ["arduino:avr", "arduino:megaavr", "arduino:samd", "arduino:mbed"]) + + def test_core_updateindex_url_not_found(run_command, httpserver): assert run_command("core update-index") From c36d57f12b034130094088cc306f87b8cd8cc021 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Wed, 24 Feb 2021 10:30:25 +0100 Subject: [PATCH 3/6] Avoid splitting search arguments when doing fuzzy search --- commands/core/search.go | 17 ++++++++--------- commands/lib/search.go | 8 ++------ test/test_core.py | 6 +++--- test/test_lib.py | 6 ++++-- 4 files changed, 17 insertions(+), 20 deletions(-) diff --git a/commands/core/search.go b/commands/core/search.go index c57f9c02a57..0615e9e5840 100644 --- a/commands/core/search.go +++ b/commands/core/search.go @@ -65,7 +65,7 @@ func PlatformSearch(req *rpc.PlatformSearchReq) (*rpc.PlatformSearchResp, error) continue } - words := strings.Split(searchArgs, " ") + // Gather all strings that can be used for searching toTest := []string{ platform.String(), platform.Name, @@ -77,14 +77,13 @@ func PlatformSearch(req *rpc.PlatformSearchReq) (*rpc.PlatformSearchResp, error) for _, board := range platformRelease.BoardsManifest { toTest = append(toTest, board.Name) } - for _, word := range words { - if len(fuzzy.FindNormalizedFold(word, toTest)) > 0 { - if allVersions { - res = append(res, platform.GetAllReleases()...) - } else { - res = append(res, platformRelease) - } - break + + // Fuzzy search + if len(fuzzy.FindNormalizedFold(searchArgs, toTest)) > 0 { + if allVersions { + res = append(res, platform.GetAllReleases()...) + } else { + res = append(res, platformRelease) } } } diff --git a/commands/lib/search.go b/commands/lib/search.go index f13b2e5eb68..785cefc3315 100644 --- a/commands/lib/search.go +++ b/commands/lib/search.go @@ -53,13 +53,9 @@ func searchLibrary(req *rpc.LibrarySearchReq, lm *librariesmanager.LibrariesMana } for _, lib := range lm.Index.Libraries { - words := strings.Split(req.GetQuery(), " ") toTest := []string{lib.Name, lib.Latest.Paragraph, lib.Latest.Sentence} - - for _, word := range words { - if len(fuzzy.FindNormalizedFold(word, toTest)) > 0 { - res = append(res, indexLibraryToRPCSearchLibrary(lib)) - } + if len(fuzzy.FindNormalizedFold(req.GetQuery(), toTest)) > 0 { + res = append(res, indexLibraryToRPCSearchLibrary(lib)) } } diff --git a/test/test_core.py b/test/test_core.py index c52d2aacfb2..e69d0ed0288 100644 --- a/test/test_core.py +++ b/test/test_core.py @@ -166,10 +166,10 @@ def run_fuzzy_search(search_args, expected_ids): run_fuzzy_search("nano", ["arduino:avr", "arduino:megaavr", "arduino:samd", "arduino:mbed"]) run_fuzzy_search("nano33", ["arduino:samd", "arduino:mbed"]) - run_fuzzy_search("nano 33", ["arduino:avr", "arduino:megaavr", "arduino:samd", "arduino:mbed"]) - run_fuzzy_search("nano ble", ["arduino:avr", "arduino:megaavr", "arduino:samd", "arduino:mbed"]) + run_fuzzy_search("nano 33", ["arduino:samd", "arduino:mbed"]) + run_fuzzy_search("nano ble", ["arduino:mbed"]) run_fuzzy_search("ble", ["arduino:mbed"]) - run_fuzzy_search("ble nano", ["arduino:avr", "arduino:megaavr", "arduino:samd", "arduino:mbed"]) + run_fuzzy_search("ble nano", []) def test_core_updateindex_url_not_found(run_command, httpserver): diff --git a/test/test_lib.py b/test/test_lib.py index 04b0daeb837..894739ea269 100644 --- a/test/test_lib.py +++ b/test/test_lib.py @@ -418,7 +418,9 @@ def run_search(search_args, expected_libraries): assert l in libraries run_search("Arduino_MKRIoTCarrier", ["Arduino_MKRIoTCarrier"]) - run_search("Arduino mkr iot carrier", ["Arduino_MKRIoTCarrier"]) + run_search("Arduino mkr iot carrier", []) + run_search("mkr iot carrier", []) + run_search("mkriotcarrier", ["Arduino_MKRIoTCarrier"]) run_search("Arduinomkriotcarrier", ["Arduino_MKRIoTCarrier"]) run_search( @@ -431,7 +433,7 @@ def run_search(search_args, expected_libraries): run_search("dht12", ["AM232X", "DHT12", "DHT12 sensor library", "SDHT"]) run_search("dht22", ["DHT sensor library", "DHT sensor library for ESPx", "SimpleDHT", "SDHT"]) run_search("dht sensor", ["DHT sensor library", "DHT sensor library for ESPx", "SimpleDHT", "SDHT"]) - run_search("sensor dht", ["DHT sensor library", "DHT sensor library for ESPx", "SimpleDHT", "SDHT"]) + run_search("sensor dht", []) def test_lib_list_with_updatable_flag(run_command): From 5b09b47d1e35689e6fe2b5ac1c1d632c14f18a60 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Wed, 24 Feb 2021 15:45:43 +0100 Subject: [PATCH 4/6] Check ranking when running fuzzy search --- commands/core/search.go | 18 +++++++++++++----- commands/lib/search.go | 20 +++++++++++++++----- test/test_lib.py | 6 ++---- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/commands/core/search.go b/commands/core/search.go index 0615e9e5840..f027a3e8f6c 100644 --- a/commands/core/search.go +++ b/commands/core/search.go @@ -26,6 +26,10 @@ import ( "github.com/lithammer/fuzzysearch/fuzzy" ) +// maximumSearchDistance is the maximum Levenshtein distance accepted when using fuzzy search. +// This value is completely arbitrary and picked randomly. +const maximumSearchDistance = 20 + // PlatformSearch FIXMEDOC func PlatformSearch(req *rpc.PlatformSearchReq) (*rpc.PlatformSearchResp, error) { searchArgs := strings.Trim(req.SearchArgs, " ") @@ -79,11 +83,15 @@ func PlatformSearch(req *rpc.PlatformSearchReq) (*rpc.PlatformSearchResp, error) } // Fuzzy search - if len(fuzzy.FindNormalizedFold(searchArgs, toTest)) > 0 { - if allVersions { - res = append(res, platform.GetAllReleases()...) - } else { - res = append(res, platformRelease) + for _, rank := range fuzzy.RankFindNormalizedFold(searchArgs, toTest) { + // Accepts only results that close to the searched terms + if rank.Distance < maximumSearchDistance { + if allVersions { + res = append(res, platform.GetAllReleases()...) + } else { + res = append(res, platformRelease) + } + break } } } diff --git a/commands/lib/search.go b/commands/lib/search.go index 785cefc3315..e654c720766 100644 --- a/commands/lib/search.go +++ b/commands/lib/search.go @@ -28,8 +28,6 @@ import ( semver "go.bug.st/relaxed-semver" ) -var similarityThreshold = 0.7 - // LibrarySearch FIXMEDOC func LibrarySearch(ctx context.Context, req *rpc.LibrarySearchReq) (*rpc.LibrarySearchResp, error) { lm := commands.GetLibraryManager(req.GetInstance().GetId()) @@ -41,21 +39,33 @@ func LibrarySearch(ctx context.Context, req *rpc.LibrarySearchReq) (*rpc.Library } func searchLibrary(req *rpc.LibrarySearchReq, lm *librariesmanager.LibrariesManager) (*rpc.LibrarySearchResp, error) { + query := req.GetQuery() res := []*rpc.SearchedLibrary{} status := rpc.LibrarySearchStatus_success // If the query is empty all libraries are returned - if strings.Trim(req.GetQuery(), " ") == "" { + if strings.Trim(query, " ") == "" { for _, lib := range lm.Index.Libraries { res = append(res, indexLibraryToRPCSearchLibrary(lib)) } return &rpc.LibrarySearchResp{Libraries: res, Status: status}, nil } + // maximumSearchDistance is the maximum Levenshtein distance accepted when using fuzzy search. + // This value is completely arbitrary and picked randomly. + maximumSearchDistance := 65 + // Use a lower distance for shorter query or the user might be flooded with unrelated results + if len(query) <= 4 { + maximumSearchDistance = 40 + } + for _, lib := range lm.Index.Libraries { toTest := []string{lib.Name, lib.Latest.Paragraph, lib.Latest.Sentence} - if len(fuzzy.FindNormalizedFold(req.GetQuery(), toTest)) > 0 { - res = append(res, indexLibraryToRPCSearchLibrary(lib)) + for _, rank := range fuzzy.RankFindNormalizedFold(req.GetQuery(), toTest) { + if rank.Distance < maximumSearchDistance { + res = append(res, indexLibraryToRPCSearchLibrary(lib)) + break + } } } diff --git a/test/test_lib.py b/test/test_lib.py index 894739ea269..8e7f23c3ac0 100644 --- a/test/test_lib.py +++ b/test/test_lib.py @@ -427,10 +427,8 @@ def run_search(search_args, expected_libraries): "dht", ["DHT sensor library", "DHT sensor library for ESPx", "DHT12", "SimpleDHT", "TinyDHT sensor library", "SDHT"], ) - run_search( - "dht11", ["DHT sensor library", "DHT sensor library for ESPx", "SimpleDHT", "TinyDHT sensor library", "SDHT"] - ) - run_search("dht12", ["AM232X", "DHT12", "DHT12 sensor library", "SDHT"]) + run_search("dht11", ["DHT sensor library", "DHT sensor library for ESPx", "SimpleDHT", "SDHT"]) + run_search("dht12", ["DHT12", "DHT12 sensor library", "SDHT"]) run_search("dht22", ["DHT sensor library", "DHT sensor library for ESPx", "SimpleDHT", "SDHT"]) run_search("dht sensor", ["DHT sensor library", "DHT sensor library for ESPx", "SimpleDHT", "SDHT"]) run_search("sensor dht", []) From 8e8c250beca69a0b82034e08fbb99356255693a6 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Wed, 24 Feb 2021 16:38:54 +0100 Subject: [PATCH 5/6] Some other enhancements to fuzzy search --- commands/core/search.go | 30 ++++++++++++++++++++++-------- commands/lib/search.go | 28 +++++++++++++++++++++------- go.mod | 2 +- test/test_core.py | 2 +- test/test_lib.py | 8 ++++++-- 5 files changed, 51 insertions(+), 19 deletions(-) diff --git a/commands/core/search.go b/commands/core/search.go index f027a3e8f6c..46b55e548ca 100644 --- a/commands/core/search.go +++ b/commands/core/search.go @@ -82,18 +82,32 @@ func PlatformSearch(req *rpc.PlatformSearchReq) (*rpc.PlatformSearchResp, error) toTest = append(toTest, board.Name) } + // Removes some chars from query strings to enhance results + cleanSearchArgs := strings.Map(func(r rune) rune { + switch r { + case '_': + case '-': + case ' ': + return -1 + } + return r + }, searchArgs) + // Fuzzy search - for _, rank := range fuzzy.RankFindNormalizedFold(searchArgs, toTest) { - // Accepts only results that close to the searched terms - if rank.Distance < maximumSearchDistance { - if allVersions { - res = append(res, platform.GetAllReleases()...) - } else { - res = append(res, platformRelease) + for _, arg := range []string{searchArgs, cleanSearchArgs} { + for _, rank := range fuzzy.RankFindNormalizedFold(arg, toTest) { + // Accepts only results that close to the searched terms + if rank.Distance < maximumSearchDistance { + if allVersions { + res = append(res, platform.GetAllReleases()...) + } else { + res = append(res, platformRelease) + } + goto nextPlatform } - break } } + nextPlatform: } } } diff --git a/commands/lib/search.go b/commands/lib/search.go index e654c720766..37e3a423e6e 100644 --- a/commands/lib/search.go +++ b/commands/lib/search.go @@ -53,18 +53,32 @@ func searchLibrary(req *rpc.LibrarySearchReq, lm *librariesmanager.LibrariesMana // maximumSearchDistance is the maximum Levenshtein distance accepted when using fuzzy search. // This value is completely arbitrary and picked randomly. - maximumSearchDistance := 65 + maximumSearchDistance := 150 // Use a lower distance for shorter query or the user might be flooded with unrelated results if len(query) <= 4 { maximumSearchDistance = 40 } - for _, lib := range lm.Index.Libraries { - toTest := []string{lib.Name, lib.Latest.Paragraph, lib.Latest.Sentence} - for _, rank := range fuzzy.RankFindNormalizedFold(req.GetQuery(), toTest) { - if rank.Distance < maximumSearchDistance { - res = append(res, indexLibraryToRPCSearchLibrary(lib)) - break + // Removes some chars from query strings to enhance results + cleanQuery := strings.Map(func(r rune) rune { + switch r { + case '_': + case '-': + case ' ': + return -1 + } + return r + }, query) + + // Use both uncleaned and cleaned query + for _, q := range []string{query, cleanQuery} { + for _, lib := range lm.Index.Libraries { + toTest := []string{lib.Name, lib.Latest.Paragraph, lib.Latest.Sentence} + for _, rank := range fuzzy.RankFindNormalizedFold(q, toTest) { + if rank.Distance < maximumSearchDistance { + res = append(res, indexLibraryToRPCSearchLibrary(lib)) + break + } } } } diff --git a/go.mod b/go.mod index 9f6537bc54a..f8835077737 100644 --- a/go.mod +++ b/go.mod @@ -22,7 +22,7 @@ require ( github.com/juju/loggo v0.0.0-20190526231331-6e530bcce5d8 // indirect github.com/kr/text v0.2.0 // indirect github.com/leonelquinteros/gotext v1.4.0 - github.com/lithammer/fuzzysearch v1.1.1 // indirect + github.com/lithammer/fuzzysearch v1.1.1 github.com/mattn/go-colorable v0.1.2 github.com/mattn/go-isatty v0.0.8 github.com/mattn/go-runewidth v0.0.9 // indirect diff --git a/test/test_core.py b/test/test_core.py index e69d0ed0288..dbc4663aa56 100644 --- a/test/test_core.py +++ b/test/test_core.py @@ -158,7 +158,7 @@ def run_fuzzy_search(search_args, expected_ids): assert platform_id in platform_ids run_fuzzy_search("mkr1000", ["arduino:samd"]) - run_fuzzy_search("mkr 100", ["arduino:samd"]) + run_fuzzy_search("mkr 1000", ["arduino:samd"]) run_fuzzy_search("yún", ["arduino:avr"]) run_fuzzy_search("yùn", ["arduino:avr"]) diff --git a/test/test_lib.py b/test/test_lib.py index 8e7f23c3ac0..2bab6e4e055 100644 --- a/test/test_lib.py +++ b/test/test_lib.py @@ -418,8 +418,8 @@ def run_search(search_args, expected_libraries): assert l in libraries run_search("Arduino_MKRIoTCarrier", ["Arduino_MKRIoTCarrier"]) - run_search("Arduino mkr iot carrier", []) - run_search("mkr iot carrier", []) + run_search("Arduino mkr iot carrier", ["Arduino_MKRIoTCarrier"]) + run_search("mkr iot carrier", ["Arduino_MKRIoTCarrier"]) run_search("mkriotcarrier", ["Arduino_MKRIoTCarrier"]) run_search("Arduinomkriotcarrier", ["Arduino_MKRIoTCarrier"]) @@ -433,6 +433,10 @@ def run_search(search_args, expected_libraries): run_search("dht sensor", ["DHT sensor library", "DHT sensor library for ESPx", "SimpleDHT", "SDHT"]) run_search("sensor dht", []) + run_search("arduino json", ["ArduinoJson", "Arduino_JSON"]) + run_search("arduinojson", ["ArduinoJson", "Arduino_JSON"]) + run_search("json", ["ArduinoJson", "Arduino_JSON"]) + def test_lib_list_with_updatable_flag(run_command): # Init the environment explicitly From aa59e42a791e848374ff30eb2b325e1b26477b47 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Wed, 24 Feb 2021 17:55:27 +0100 Subject: [PATCH 6/6] Fix duplicated results in lib search command --- commands/lib/search.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/commands/lib/search.go b/commands/lib/search.go index 37e3a423e6e..5bb55063f10 100644 --- a/commands/lib/search.go +++ b/commands/lib/search.go @@ -69,18 +69,18 @@ func searchLibrary(req *rpc.LibrarySearchReq, lm *librariesmanager.LibrariesMana } return r }, query) - - // Use both uncleaned and cleaned query - for _, q := range []string{query, cleanQuery} { - for _, lib := range lm.Index.Libraries { + for _, lib := range lm.Index.Libraries { + // Use both uncleaned and cleaned query + for _, q := range []string{query, cleanQuery} { toTest := []string{lib.Name, lib.Latest.Paragraph, lib.Latest.Sentence} for _, rank := range fuzzy.RankFindNormalizedFold(q, toTest) { if rank.Distance < maximumSearchDistance { res = append(res, indexLibraryToRPCSearchLibrary(lib)) - break + goto nextLib } } } + nextLib: } return &rpc.LibrarySearchResp{Libraries: res, Status: status}, nil