From d22565145da796bb1fa2fd6411fdc4f2d117704a Mon Sep 17 00:00:00 2001 From: kokobd Date: Sat, 3 Sep 2022 13:06:33 +0800 Subject: [PATCH 1/5] unescape printable characters --- .../Development/IDE/GHC/Compat/Outputable.hs | 6 ++-- ghcide/src/Development/IDE/GHC/Util.hs | 3 +- hls-plugin-api/hls-plugin-api.cabal | 1 + hls-plugin-api/src/Ide/PluginUtils.hs | 33 ++++++++++++++++++- 4 files changed, 38 insertions(+), 5 deletions(-) diff --git a/ghcide/src/Development/IDE/GHC/Compat/Outputable.hs b/ghcide/src/Development/IDE/GHC/Compat/Outputable.hs index 084a48a04b..23b9a3b832 100644 --- a/ghcide/src/Development/IDE/GHC/Compat/Outputable.hs +++ b/ghcide/src/Development/IDE/GHC/Compat/Outputable.hs @@ -78,9 +78,9 @@ import qualified Outputable as Out import SrcLoc #endif #if MIN_VERSION_ghc(9,3,0) -import GHC.Utils.Logger -import GHC.Driver.Config.Diagnostic -import Data.Maybe +import Data.Maybe +import GHC.Driver.Config.Diagnostic +import GHC.Utils.Logger #endif -- | A compatible function to print `Outputable` instances diff --git a/ghcide/src/Development/IDE/GHC/Util.hs b/ghcide/src/Development/IDE/GHC/Util.hs index 8dd99b8bde..6799955def 100644 --- a/ghcide/src/Development/IDE/GHC/Util.hs +++ b/ghcide/src/Development/IDE/GHC/Util.hs @@ -81,6 +81,7 @@ import GHC.IO.Exception import GHC.IO.Handle.Internals import GHC.IO.Handle.Types import GHC.Stack +import Ide.PluginUtils (unescape) import System.Environment.Blank (getEnvDefault) import System.FilePath import System.IO.Unsafe @@ -292,5 +293,5 @@ instance Outputable SDoc where -- -- It internal using `showSDocUnsafe` with `unsafeGlobalDynFlags`. printOutputable :: Outputable a => a -> T.Text -printOutputable = T.pack . printWithoutUniques +printOutputable = unescape . T.pack . printWithoutUniques {-# INLINE printOutputable #-} diff --git a/hls-plugin-api/hls-plugin-api.cabal b/hls-plugin-api/hls-plugin-api.cabal index 67e57b578b..3fb3f5854f 100644 --- a/hls-plugin-api/hls-plugin-api.cabal +++ b/hls-plugin-api/hls-plugin-api.cabal @@ -57,6 +57,7 @@ library , text , transformers , unordered-containers + , megaparsec > 9 if os(windows) build-depends: Win32 diff --git a/hls-plugin-api/src/Ide/PluginUtils.hs b/hls-plugin-api/src/Ide/PluginUtils.hs index 617b898fcb..a67665bfa3 100644 --- a/hls-plugin-api/src/Ide/PluginUtils.hs +++ b/hls-plugin-api/src/Ide/PluginUtils.hs @@ -32,6 +32,7 @@ module Ide.PluginUtils handleMaybe, handleMaybeM, throwPluginError, + unescape, ) where @@ -43,10 +44,11 @@ import Control.Monad.Trans.Except (ExceptT, runExceptT, throwE) import Data.Algorithm.Diff import Data.Algorithm.DiffOutput import Data.Bifunctor (Bifunctor (first)) +import Data.Char (isPrint, showLitChar) import qualified Data.HashMap.Strict as H -import Data.List (find) import Data.String (IsString (fromString)) import qualified Data.Text as T +import Data.Void (Void) import Ide.Plugin.Config import Ide.Plugin.Properties import Ide.Types @@ -57,6 +59,9 @@ import Language.LSP.Types hiding SemanticTokensEdit (_start)) import qualified Language.LSP.Types as J import Language.LSP.Types.Capabilities +import qualified Text.Megaparsec as P +import qualified Text.Megaparsec.Char as P +import qualified Text.Megaparsec.Char.Lexer as P -- --------------------------------------------------------------------- @@ -258,3 +263,29 @@ pluginResponse :: Monad m => ExceptT String m a -> m (Either ResponseError a) pluginResponse = fmap (first (\msg -> ResponseError InternalError (fromString msg) Nothing)) . runExceptT + +-- --------------------------------------------------------------------- + +type TextParser = P.Parsec Void T.Text + +unescape :: T.Text -> T.Text +unescape input = + case P.runParser escapedTextParser "inline" input of + Left _ -> input + Right strs -> T.pack strs + +escapedTextParser :: TextParser String +escapedTextParser = do + xs <- P.many (P.try stringLiteral) + x <- P.manyTill P.anySingle P.eof + pure $ concat xs ++ x + where + stringLiteral :: TextParser String + stringLiteral = do + before <- P.manyTill P.anySingle (P.char '"') + inside <- P.manyTill P.charLiteral (P.char '"') + let f '"' = "\\\"" + f ch = if isPrint ch then [ch] else showLitChar ch "" + inside' = concatMap f inside + + pure $ before <> "\"" <> inside' <> "\"" From 3e45c23a8b51ee4821931c7727f49e16be9fbb02 Mon Sep 17 00:00:00 2001 From: kokobd Date: Thu, 8 Sep 2022 22:05:19 +0800 Subject: [PATCH 2/5] add comments --- ghcide/src/Development/IDE/GHC/Util.hs | 10 +++++++--- hls-plugin-api/src/Ide/PluginUtils.hs | 12 +++++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/ghcide/src/Development/IDE/GHC/Util.hs b/ghcide/src/Development/IDE/GHC/Util.hs index 6799955def..1a5fd5c6a6 100644 --- a/ghcide/src/Development/IDE/GHC/Util.hs +++ b/ghcide/src/Development/IDE/GHC/Util.hs @@ -289,9 +289,13 @@ instance Outputable SDoc where -- | Print a GHC value in `defaultUserStyle` without unique symbols. -- --- This is the most common print utility, will print with a user-friendly style like: `a_a4ME` as `a`. +-- This is the most common print utility, and it will print with a user-friendly style like: `a_a4ME` as `a`. -- --- It internal using `showSDocUnsafe` with `unsafeGlobalDynFlags`. +-- It uses `showSDocUnsafe` with `unsafeGlobalDynFlags` internally. printOutputable :: Outputable a => a -> T.Text -printOutputable = unescape . T.pack . printWithoutUniques +printOutputable = + -- IfaceTyLit from GHC.Iface.Type implements Outputable with 'show'. + -- Showing a String escapes non-ascii printable characters. We unescape it here. + -- More discussion at https://github.com/haskell/haskell-language-server/issues/3115. + unescape . T.pack . printWithoutUniques {-# INLINE printOutputable #-} diff --git a/hls-plugin-api/src/Ide/PluginUtils.hs b/hls-plugin-api/src/Ide/PluginUtils.hs index a67665bfa3..f2d7edeb29 100644 --- a/hls-plugin-api/src/Ide/PluginUtils.hs +++ b/hls-plugin-api/src/Ide/PluginUtils.hs @@ -268,23 +268,29 @@ pluginResponse = type TextParser = P.Parsec Void T.Text +-- | Unescape printable escape sequences within double quotes. +-- This is useful if you have to call 'show' indirectly, and it escapes some characters which you would prefer to +-- display as is. unescape :: T.Text -> T.Text unescape input = case P.runParser escapedTextParser "inline" input of Left _ -> input Right strs -> T.pack strs +-- | Parser for a string that contains double quotes. Returns unescaped string. escapedTextParser :: TextParser String escapedTextParser = do xs <- P.many (P.try stringLiteral) - x <- P.manyTill P.anySingle P.eof + x <- P.manyTill P.anySingle P.eof -- consume characters after the final double quote pure $ concat xs ++ x where stringLiteral :: TextParser String stringLiteral = do - before <- P.manyTill P.anySingle (P.char '"') + before <- P.manyTill P.anySingle (P.char '"') -- include any character before the first double quote inside <- P.manyTill P.charLiteral (P.char '"') - let f '"' = "\\\"" + let f '"' = "\\\"" -- double quote should still be escaped + -- Despite the docs, 'showLitChar' and 'showLitString' from 'Data.Char' DOES ESCAPE unicode printable + -- characters. So we need to call 'isPrint' from 'Data.Char' manually. f ch = if isPrint ch then [ch] else showLitChar ch "" inside' = concatMap f inside From ae498b6723077d74facef2df25f140b62530b604 Mon Sep 17 00:00:00 2001 From: kokobd Date: Fri, 9 Sep 2022 14:51:23 +0800 Subject: [PATCH 3/5] add tests --- hls-plugin-api/hls-plugin-api.cabal | 1 + hls-plugin-api/test/Ide/PluginUtilsTest.hs | 38 +++++++++++++--------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/hls-plugin-api/hls-plugin-api.cabal b/hls-plugin-api/hls-plugin-api.cabal index 3fb3f5854f..77afa4309c 100644 --- a/hls-plugin-api/hls-plugin-api.cabal +++ b/hls-plugin-api/hls-plugin-api.cabal @@ -91,4 +91,5 @@ test-suite tests , tasty , tasty-hunit , tasty-rerun + , text , lsp-types diff --git a/hls-plugin-api/test/Ide/PluginUtilsTest.hs b/hls-plugin-api/test/Ide/PluginUtilsTest.hs index c6bedfdf28..360fc69451 100644 --- a/hls-plugin-api/test/Ide/PluginUtilsTest.hs +++ b/hls-plugin-api/test/Ide/PluginUtilsTest.hs @@ -1,29 +1,35 @@ +{-# LANGUAGE OverloadedStrings #-} + module Ide.PluginUtilsTest ( tests ) where -import Ide.PluginUtils (positionInRange) +import Data.Char (isPrint) +import qualified Data.Text as T +import Ide.PluginUtils (positionInRange, unescape) import Language.LSP.Types (Position (Position), Range (Range)) import Test.Tasty import Test.Tasty.HUnit tests :: TestTree tests = testGroup "PluginUtils" - [ positionInRangeTest + [ unescapeTest ] -positionInRangeTest :: TestTree -positionInRangeTest = testGroup "positionInRange" - [ testCase "single line, after the end" $ - positionInRange (Position 1 10) (Range (Position 1 1) (Position 1 3)) @?= False - , testCase "single line, before the begining" $ - positionInRange (Position 1 0) (Range (Position 1 1) (Position 1 6)) @?= False - , testCase "single line, in range" $ - positionInRange (Position 1 5) (Range (Position 1 1) (Position 1 6)) @?= True - , testCase "single line, at the end" $ - positionInRange (Position 1 5) (Range (Position 1 1) (Position 1 5)) @?= False - , testCase "multiline, in range" $ - positionInRange (Position 3 5) (Range (Position 1 1) (Position 5 6)) @?= True - , testCase "multiline, out of range" $ - positionInRange (Position 3 5) (Range (Position 3 6) (Position 4 10)) @?= False +unescapeTest :: TestTree +unescapeTest = testGroup "unescape" + [ testCase "no double quote" $ + unescape "hello世界" @?= "hello世界" + , testCase "whole string quoted" $ + unescape "\"hello\\19990\\30028\"" @?= "\"hello世界\"" + , testCase "text before quotes should not be unescaped" $ + unescape "\\19990a\"hello\\30028\"" @?= "\\19990a\"hello界\"" + , testCase "some text after quotes" $ + unescape "\"hello\\19990\\30028\"abc" @?= "\"hello世界\"abc" + , testCase "many pairs of quote" $ + unescape "oo\"hello\\19990\\30028\"abc\"\1087\1088\1080\1074\1077\1090\"hh" @?= "oo\"hello世界\"abc\"привет\"hh" + , testCase "double quote itself should not be unescaped" $ + unescape "\"\\\"o\"" @?= "\"\\\"o\"" + , testCase "control characters should not be escaped" $ + unescape "\"\\n\\t\"" @?= "\"\\n\\t\"" ] From db0a19b0249e416f648ca25c8ecd33b8c0cae94d Mon Sep 17 00:00:00 2001 From: kokobd Date: Thu, 15 Sep 2022 19:54:32 +0800 Subject: [PATCH 4/5] improve the parser --- hls-plugin-api/src/Ide/PluginUtils.hs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/hls-plugin-api/src/Ide/PluginUtils.hs b/hls-plugin-api/src/Ide/PluginUtils.hs index 169d0117ca..0cff0daefb 100644 --- a/hls-plugin-api/src/Ide/PluginUtils.hs +++ b/hls-plugin-api/src/Ide/PluginUtils.hs @@ -45,6 +45,7 @@ import Data.Algorithm.Diff import Data.Algorithm.DiffOutput import Data.Bifunctor (Bifunctor (first)) import Data.Char (isPrint, showLitChar) +import Data.Functor (void) import qualified Data.HashMap.Strict as H import Data.String (IsString (fromString)) import qualified Data.Text as T @@ -277,18 +278,19 @@ unescape input = -- | Parser for a string that contains double quotes. Returns unescaped string. escapedTextParser :: TextParser String escapedTextParser = do - xs <- P.many (P.try stringLiteral) - x <- P.manyTill P.anySingle P.eof -- consume characters after the final double quote - pure $ concat xs ++ x + xs <- P.many (outsideStringLiteral P.<|> stringLiteral) + pure $ concat xs where + outsideStringLiteral :: TextParser String + outsideStringLiteral = P.someTill (P.anySingleBut '"') (P.lookAhead (void (P.char '"') P.<|> P.eof)) + stringLiteral :: TextParser String stringLiteral = do - before <- P.manyTill P.anySingle (P.char '"') -- include any character before the first double quote - inside <- P.manyTill P.charLiteral (P.char '"') + inside <- P.char '"' >> P.manyTill P.charLiteral (P.char '"') let f '"' = "\\\"" -- double quote should still be escaped -- Despite the docs, 'showLitChar' and 'showLitString' from 'Data.Char' DOES ESCAPE unicode printable -- characters. So we need to call 'isPrint' from 'Data.Char' manually. f ch = if isPrint ch then [ch] else showLitChar ch "" inside' = concatMap f inside - pure $ before <> "\"" <> inside' <> "\"" + pure $ "\"" <> inside' <> "\"" From ac5ea88cec0c540c06cf66d15112e17318e9271c Mon Sep 17 00:00:00 2001 From: kokobd Date: Thu, 15 Sep 2022 21:06:53 +0800 Subject: [PATCH 5/5] simplify code & add more docs --- ghcide/src/Development/IDE/GHC/Util.hs | 7 +++++-- hls-plugin-api/src/Ide/PluginUtils.hs | 4 +--- hls-plugin-api/test/Ide/PluginUtilsTest.hs | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ghcide/src/Development/IDE/GHC/Util.hs b/ghcide/src/Development/IDE/GHC/Util.hs index 1a5fd5c6a6..69cc2adf77 100644 --- a/ghcide/src/Development/IDE/GHC/Util.hs +++ b/ghcide/src/Development/IDE/GHC/Util.hs @@ -288,10 +288,13 @@ instance Outputable SDoc where #endif -- | Print a GHC value in `defaultUserStyle` without unique symbols. +-- It uses `showSDocUnsafe` with `unsafeGlobalDynFlags` internally. -- --- This is the most common print utility, and it will print with a user-friendly style like: `a_a4ME` as `a`. +-- This is the most common print utility. +-- It will do something additionally compared to what the 'Outputable' instance does. -- --- It uses `showSDocUnsafe` with `unsafeGlobalDynFlags` internally. +-- 1. print with a user-friendly style: `a_a4ME` as `a`. +-- 2. unescape escape sequences of printable unicode characters within a pair of double quotes printOutputable :: Outputable a => a -> T.Text printOutputable = -- IfaceTyLit from GHC.Iface.Type implements Outputable with 'show'. diff --git a/hls-plugin-api/src/Ide/PluginUtils.hs b/hls-plugin-api/src/Ide/PluginUtils.hs index 0cff0daefb..5c93407974 100644 --- a/hls-plugin-api/src/Ide/PluginUtils.hs +++ b/hls-plugin-api/src/Ide/PluginUtils.hs @@ -277,9 +277,7 @@ unescape input = -- | Parser for a string that contains double quotes. Returns unescaped string. escapedTextParser :: TextParser String -escapedTextParser = do - xs <- P.many (outsideStringLiteral P.<|> stringLiteral) - pure $ concat xs +escapedTextParser = concat <$> P.many (outsideStringLiteral P.<|> stringLiteral) where outsideStringLiteral :: TextParser String outsideStringLiteral = P.someTill (P.anySingleBut '"') (P.lookAhead (void (P.char '"') P.<|> P.eof)) diff --git a/hls-plugin-api/test/Ide/PluginUtilsTest.hs b/hls-plugin-api/test/Ide/PluginUtilsTest.hs index 360fc69451..bad3c1dfbc 100644 --- a/hls-plugin-api/test/Ide/PluginUtilsTest.hs +++ b/hls-plugin-api/test/Ide/PluginUtilsTest.hs @@ -29,7 +29,7 @@ unescapeTest = testGroup "unescape" , testCase "many pairs of quote" $ unescape "oo\"hello\\19990\\30028\"abc\"\1087\1088\1080\1074\1077\1090\"hh" @?= "oo\"hello世界\"abc\"привет\"hh" , testCase "double quote itself should not be unescaped" $ - unescape "\"\\\"o\"" @?= "\"\\\"o\"" + unescape "\"\\\"\\19990o\"" @?= "\"\\\"世o\"" , testCase "control characters should not be escaped" $ unescape "\"\\n\\t\"" @?= "\"\\n\\t\"" ]