Skip to content

Commit 738a833

Browse files
committed
Implement matching priority for licenses
Based on usage statistics extracted from Flora.pm database dump on 22.06.23
1 parent 095d271 commit 738a833

File tree

2 files changed

+112
-5
lines changed

2 files changed

+112
-5
lines changed

ghcide/src/Text/Fuzzy/Parallel.hs

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
-- | Parallel versions of 'filter' and 'simpleFilter'
22

33
module Text.Fuzzy.Parallel
4-
( filter,
5-
simpleFilter,
4+
( filter, filter',
5+
simpleFilter, simpleFilter',
66
match,
77
Scored(..)
88
) where
@@ -102,6 +102,40 @@ simpleFilter :: Int -- ^ Chunk size. 1000 works well.
102102
simpleFilter chunk maxRes pattern xs =
103103
filter chunk maxRes pattern xs id
104104

105+
106+
-- | The function to filter a list of values by fuzzy search on the text extracted from them.
107+
filter' :: Int -- ^ Chunk size. 1000 works well.
108+
-> Int -- ^ Max. number of results wanted
109+
-> T.Text -- ^ Pattern.
110+
-> [t] -- ^ The list of values containing the text to search in.
111+
-> (t -> T.Text) -- ^ The function to extract the text from the container.
112+
-> (T.Text -> T.Text -> Maybe Int) -- ^ Function to use for matching
113+
-> [Scored t] -- ^ The list of results, sorted, highest score first.
114+
filter' chunkSize maxRes pattern ts extract match' = partialSortByAscScore maxRes perfectScore (concat vss)
115+
where
116+
-- Preserve case for the first character, make all others lowercase
117+
pattern' = case T.uncons pattern of
118+
Just (c, rest) -> T.cons c (T.toLower rest)
119+
_ -> pattern
120+
vss = map (mapMaybe (\t -> flip Scored t <$> match' pattern' (extract t))) (chunkList chunkSize ts)
121+
`using` parList (evalList rseq)
122+
perfectScore = fromMaybe (error $ T.unpack pattern) $ match' pattern' pattern'
123+
124+
-- | Return all elements of the list that have a fuzzy
125+
-- match against the pattern, using a custom match function. Runs with default settings where
126+
-- nothing is added around the matches, as case insensitive.
127+
--
128+
-- >>> simpleFilter 1000 10 "vm" ["vim", "emacs", "virtual machine"]
129+
-- [Scored {score = 4, original = "vim"},Scored {score = 4, original = "virtual machine"}]
130+
{-# INLINABLE simpleFilter' #-}
131+
simpleFilter' :: Int -- ^ Chunk size. 1000 works well.
132+
-> Int -- ^ Max. number of results wanted
133+
-> T.Text -- ^ Pattern to look for.
134+
-> [T.Text] -- ^ List of texts to check.
135+
-> (T.Text -> T.Text -> Maybe Int) -- ^ Function to use for matching
136+
-> [Scored T.Text] -- ^ The ones that match.
137+
simpleFilter' chunk maxRes pattern xs match' =
138+
filter' chunk maxRes pattern xs id match'
105139
--------------------------------------------------------------------------------
106140

107141
chunkList :: Int -> [a] -> [[a]]

plugins/hls-cabal-plugin/src/Ide/Plugin/Cabal/Completions.hs

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import Data.Maybe (fromMaybe)
1313
import qualified Data.Text as T
1414
import Data.Text.Utf16.Rope (Rope)
1515
import qualified Data.Text.Utf16.Rope as Rope
16+
import Debug.Trace (traceShowM)
1617
import Development.IDE as D
1718
import Distribution.CabalSpecVersion (CabalSpecVersion (CabalSpecV2_2),
1819
showCabalSpecVersion)
@@ -25,6 +26,7 @@ import qualified Language.LSP.Protocol.Types as Compls (CompletionItem
2526
import qualified Language.LSP.Protocol.Types as LSP
2627
import qualified Language.LSP.VFS as VFS
2728
import qualified Text.Fuzzy.Parallel as Fuzzy
29+
import Data.Ord (Down(..))
2830

2931
-- ----------------------------------------------------------------
3032
-- Public API for Completions
@@ -265,8 +267,29 @@ constantCompleter completions _ ctxInfo = do
265267
let range = completionRange ctxInfo
266268
pure $ map (makeSimpleCabalCompletionItem range . Fuzzy.original) scored
267269

270+
weightedConstantCompleter :: [T.Text] -> Map T.Text Double -> Completer
271+
weightedConstantCompleter completions weights _ ctxInfo = do
272+
let scored = if perfectScore > 0
273+
then fmap Fuzzy.original $ Fuzzy.simpleFilter' 1000 10 prefix completions customMatch
274+
else topTenByWeight
275+
let range = completionRange ctxInfo
276+
pure $ map (makeSimpleCabalCompletionItem range) scored
277+
where
278+
prefix = completionPrefix ctxInfo
279+
perfectScore = fromMaybe (error "match is broken") $ Fuzzy.match prefix prefix
280+
customMatch :: (T.Text -> T.Text -> Maybe Int)
281+
customMatch toSearch searchSpace = do
282+
matched <- Fuzzy.match toSearch searchSpace
283+
let weight = fromMaybe 0 $ Map.lookup searchSpace weights
284+
let score = min
285+
perfectScore
286+
(round (fromIntegral matched * (1 + weight)))
287+
pure score
288+
topTenByWeight :: [T.Text]
289+
topTenByWeight = take 10 $ map fst $ List.sortOn (Down . snd) $ Map.assocs weights
290+
268291
{- | Completer to be used when a file path can be
269-
completed for a field, takes the file path of the directory to start from.
292+
completed for a field, takes the file path of the directory to start from.
270293
Completes file paths as well as directories.
271294
-}
272295
filePathCompleter :: Completer
@@ -327,7 +350,7 @@ cabalKeywords =
327350
[ ("name:", noopCompleter) -- TODO: should complete to filename, needs meta info
328351
, ("version:", noopCompleter)
329352
, ("build-type:", constantCompleter ["Simple", "Custom", "Configure", "Make"])
330-
, ("license:", constantCompleter licenseNames)
353+
, ("license:", weightedConstantCompleter licenseNames weightedLicenseNames)
331354
, ("license-file:", filePathCompleter)
332355
, ("license-files:", filePathCompleter) -- list of filenames
333356
, ("copyright:", noopCompleter)
@@ -411,7 +434,7 @@ stanzaKeywordMap =
411434
)
412435
,
413436
( "source-repository"
414-
, Map.fromList $
437+
, Map.fromList
415438
[
416439
( "type:"
417440
, constantCompleter
@@ -476,6 +499,56 @@ stanzaKeywordMap =
476499
, ("mixins:", noopCompleter)
477500
]
478501

502+
weightedLicenseNames :: Map T.Text Double
503+
weightedLicenseNames = fmap statisticsToWeight $ Map.fromList
504+
[("BSD-3-Clause",9955)
505+
, ("MIT",3336)
506+
, ("GPL-3.0-only",679)
507+
, ("LicenseRef-OtherLicense",521)
508+
, ("Apache-2.0",514)
509+
, ("LicenseRef-GPL",443)
510+
, ("LicenseRef-PublicDomain",318)
511+
, ("MPL-2.0",288)
512+
, ("BSD-2-Clause",174)
513+
, ("GPL-2.0-only",160)
514+
, ("LicenseRef-LGPL",146)
515+
, ("LGPL-2.1-only",112)
516+
, ("LGPL-3.0-only",100)
517+
, ("AGPL-3.0-only",96)
518+
, ("ISC",89)
519+
, ("LicenseRef-Apache",45)
520+
, ("GPL-3.0-or-later",43)
521+
, ("BSD-2-Clause-Patent",33)
522+
, ("GPL-2.0-or-later",21)
523+
, ("CC0-1.0",16)
524+
, ("AGPL-3.0-or-later",15)
525+
, ("LGPL-2.1-or-later",12)
526+
, ("(BSD-2-Clause OR Apache-2.0)",10)
527+
, ("(Apache-2.0 OR MPL-2.0)",8)
528+
, ("LicenseRef-AGPL",6)
529+
, ("(BSD-3-Clause OR Apache-2.0)",4)
530+
, ("0BSD",3)
531+
, ("BSD-4-Clause",3)
532+
, ("LGPL-3.0-or-later",3)
533+
, ("LicenseRef-LGPL-2",2)
534+
, ("GPL-2.0-or-later AND BSD-3-Clause",2)
535+
, ("NONE",2)
536+
, ("Zlib",2)
537+
, ("(Apache-2.0 OR BSD-3-Clause)",2)
538+
, ("BSD-3-Clause AND GPL-2.0-or-later",2)
539+
, ("BSD-3-Clause AND GPL-3.0-or-later",2)
540+
]
541+
where
542+
statisticsToWeight :: Int -> Double
543+
statisticsToWeight stat
544+
| stat < 10 = 0.1
545+
| stat < 20 = 0.3
546+
| stat < 50 = 0.4
547+
| stat < 100 = 0.5
548+
| stat < 500 = 0.6
549+
| stat < 650 = 0.7
550+
| otherwise = 0.9
551+
479552
-- cabalFlagKeywords :: [(T.Text, T.Text)]
480553
-- cabalFlagKeywords =
481554
-- [

0 commit comments

Comments
 (0)