diff --git a/ghcide/.hlint.yaml b/ghcide/.hlint.yaml index 5044ebff09..4f271f5e7d 100644 --- a/ghcide/.hlint.yaml +++ b/ghcide/.hlint.yaml @@ -112,6 +112,7 @@ - Development.IDE.Spans.Documentation - Development.IDE.Spans.Common - Development.IDE.Spans.AtPoint + - Development.IDE.Spans.Pragmas - Development.IDE.Plugin.CodeAction - Development.IDE.Plugin.Completions - Development.IDE.Plugin.Completions.Logic diff --git a/ghcide/ghcide.cabal b/ghcide/ghcide.cabal index 60552e80be..2471bf0e38 100644 --- a/ghcide/ghcide.cabal +++ b/ghcide/ghcide.cabal @@ -186,6 +186,7 @@ library Development.IDE.Spans.Documentation Development.IDE.Spans.AtPoint Development.IDE.Spans.LocalBindings + Development.IDE.Spans.Pragmas Development.IDE.Types.Diagnostics Development.IDE.Types.Exports Development.IDE.Types.HscEnvEq diff --git a/ghcide/src/Development/IDE/Spans/Pragmas.hs b/ghcide/src/Development/IDE/Spans/Pragmas.hs new file mode 100644 index 0000000000..81de0a00c2 --- /dev/null +++ b/ghcide/src/Development/IDE/Spans/Pragmas.hs @@ -0,0 +1,400 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE MultiWayIf #-} + +module Development.IDE.Spans.Pragmas + ( NextPragmaInfo(..) + , LineSplitTextEdits(..) + , getNextPragmaInfo ) where + +import Data.Bits (Bits (setBit)) +import Data.Function ((&)) +import qualified Data.List as List +import qualified Data.Maybe as Maybe +import Data.Text (Text) +import qualified Data.Text as Text +import Development.IDE (srcSpanToRange) +import Development.IDE.GHC.Compat +import Development.IDE.GHC.Compat.Util +import qualified Language.LSP.Types as LSP + +getNextPragmaInfo :: DynFlags -> Maybe Text -> NextPragmaInfo +getNextPragmaInfo dynFlags sourceText = + if | Just sourceText <- sourceText + , let sourceStringBuffer = stringToStringBuffer (Text.unpack sourceText) + , POk _ parserState <- parsePreDecl dynFlags sourceStringBuffer + -> case parserState of + ParserStateNotDone{ nextPragma } -> nextPragma + ParserStateDone{ nextPragma } -> nextPragma + | otherwise + -> NextPragmaInfo 0 Nothing + +-- Pre-declaration comments parser ----------------------------------------------------- + +-- | Each mode represents the "strongest" thing we've seen so far. +-- From strongest to weakest: +-- ModePragma, ModeHaddock, ModeComment, ModeInitial +data Mode = ModePragma | ModeHaddock | ModeComment | ModeInitial deriving Show + +data LineSplitTextEdits = LineSplitTextEdits { + lineSplitInsertTextEdit :: !LSP.TextEdit, + lineSplitDeleteTextEdit :: !LSP.TextEdit +} deriving Show + +data NextPragmaInfo = NextPragmaInfo { + nextPragmaLine :: !Int, + lineSplitTextEdits :: !(Maybe LineSplitTextEdits) +} deriving Show + +data ParserState + = ParserStateNotDone + { nextPragma :: !NextPragmaInfo + , mode :: !Mode + , lastBlockCommentLine :: !Int + , lastPragmaLine :: !Int + , isLastTokenHash :: !Bool + } + | ParserStateDone { nextPragma :: NextPragmaInfo } + deriving Show + +isPragma :: String -> Bool +isPragma = List.isPrefixOf "{-#" + +isDownwardBlockHaddock :: String -> Bool +isDownwardBlockHaddock = List.isPrefixOf "{-|" + +isDownwardLineHaddock :: String -> Bool +isDownwardLineHaddock = List.isPrefixOf "-- |" + +-- need to merge tokens that are deleted/inserted into one TextEdit each +-- to work around some weird TextEdits applied in reversed order issue +updateLineSplitTextEdits :: LSP.Range -> String -> Maybe LineSplitTextEdits -> LineSplitTextEdits +updateLineSplitTextEdits tokenRange tokenString prevLineSplitTextEdits + | Just prevLineSplitTextEdits <- prevLineSplitTextEdits + , let LineSplitTextEdits + { lineSplitInsertTextEdit = prevInsertTextEdit + , lineSplitDeleteTextEdit = prevDeleteTextEdit } = prevLineSplitTextEdits + , let LSP.TextEdit prevInsertRange prevInsertText = prevInsertTextEdit + , let LSP.TextEdit prevDeleteRange _prevDeleteText = prevDeleteTextEdit + , let LSP.Range prevInsertStartPos prevInsertEndPos = prevInsertRange + , let LSP.Position _prevInsertStartLine _prevInsertStartCol = prevInsertStartPos + , let LSP.Position _prevInsertEndLine _prevInsertEndCol = prevInsertEndPos + , let LSP.Range prevDeleteStartPos prevDeleteEndPos = prevDeleteRange + , let LSP.Position _prevDeleteStartLine _prevDeleteStartCol = prevDeleteStartPos + , let LSP.Position _prevDeleteEndLine prevDeleteEndCol = prevDeleteEndPos + , let currInsertRange = prevInsertRange + , let currInsertText = + Text.init prevInsertText + <> Text.replicate (startCol - prevDeleteEndCol) " " + <> Text.pack (List.take newLineCol tokenString) + <> "\n" + , let currInsertTextEdit = LSP.TextEdit currInsertRange currInsertText + , let currDeleteStartPos = prevDeleteStartPos + , let currDeleteEndPos = LSP.Position endLine endCol + , let currDeleteRange = LSP.Range currDeleteStartPos currDeleteEndPos + , let currDeleteTextEdit = LSP.TextEdit currDeleteRange "" + = LineSplitTextEdits currInsertTextEdit currDeleteTextEdit + | otherwise + , let LSP.Range startPos _ = tokenRange + , let deleteTextEdit = LSP.TextEdit (LSP.Range startPos startPos{ LSP._character = startCol + newLineCol }) "" + , let insertPosition = LSP.Position (startLine + 1) 0 + , let insertRange = LSP.Range insertPosition insertPosition + , let insertText = Text.pack (List.take newLineCol tokenString) <> "\n" + , let insertTextEdit = LSP.TextEdit insertRange insertText + = LineSplitTextEdits insertTextEdit deleteTextEdit + where + LSP.Range (LSP.Position startLine startCol) (LSP.Position endLine endCol) = tokenRange + + newLineCol = Maybe.fromMaybe (length tokenString) (List.elemIndex '\n' tokenString) + +-- ITvarsym "#" after a block comment is a parse error so we don't need to worry about it +updateParserState :: Token -> LSP.Range -> ParserState -> ParserState +updateParserState token range prevParserState + | ParserStateNotDone + { nextPragma = prevNextPragma@NextPragmaInfo{ lineSplitTextEdits = prevLineSplitTextEdits } + , mode = prevMode + , lastBlockCommentLine + , lastPragmaLine + } <- prevParserState + , let defaultParserState = prevParserState { isLastTokenHash = False } + , let LSP.Range (LSP.Position startLine _) (LSP.Position endLine _) = range + = case prevMode of + ModeInitial -> + case token of + ITvarsym "#" -> defaultParserState{ isLastTokenHash = True } + ITlineComment s + | isDownwardLineHaddock s -> defaultParserState{ mode = ModeHaddock } + | otherwise -> + defaultParserState + { nextPragma = NextPragmaInfo (endLine + 1) Nothing + , mode = ModeComment } + ITblockComment s + | isPragma s -> + defaultParserState + { nextPragma = NextPragmaInfo (endLine + 1) Nothing + , mode = ModePragma + , lastPragmaLine = endLine } + | isDownwardBlockHaddock s -> defaultParserState{ mode = ModeHaddock } + | otherwise -> + defaultParserState + { nextPragma = NextPragmaInfo (endLine + 1) Nothing + , mode = ModeComment + , lastBlockCommentLine = endLine } + _ -> ParserStateDone prevNextPragma + ModeComment -> + case token of + ITvarsym "#" -> defaultParserState{ isLastTokenHash = True } + ITlineComment s + | hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits + , let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits -> + defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } + | isDownwardLineHaddock s + , lastBlockCommentLine == startLine + , let currLineSplitTextEdits = updateLineSplitTextEdits range s Nothing -> + defaultParserState + { nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } + , mode = ModeHaddock } + | otherwise -> + defaultParserState { nextPragma = NextPragmaInfo (endLine + 1) Nothing } + ITblockComment s + | isPragma s -> + defaultParserState + { nextPragma = NextPragmaInfo (endLine + 1) Nothing + , mode = ModePragma + , lastPragmaLine = endLine } + | hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits + , let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits -> + defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } + | isDownwardBlockHaddock s + , lastBlockCommentLine == startLine + , let currLineSplitTextEdits = updateLineSplitTextEdits range s Nothing -> + defaultParserState{ + nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits }, + mode = ModeHaddock } + | otherwise -> + defaultParserState{ + nextPragma = NextPragmaInfo (endLine + 1) Nothing, + lastBlockCommentLine = endLine } + _ -> ParserStateDone prevNextPragma + ModeHaddock -> + case token of + ITvarsym "#" -> + defaultParserState{ isLastTokenHash = True } + ITlineComment s + | hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits + , let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits -> + defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } + | otherwise -> + defaultParserState + ITblockComment s + | isPragma s -> + defaultParserState{ + nextPragma = NextPragmaInfo (endLine + 1) Nothing, + mode = ModePragma, + lastPragmaLine = endLine } + | hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits + , let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits -> + defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } + | otherwise -> defaultParserState{ lastBlockCommentLine = endLine } + _ -> ParserStateDone prevNextPragma + ModePragma -> + case token of + ITvarsym "#" -> defaultParserState{ isLastTokenHash = True } + ITlineComment s + | hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits + , let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits -> + defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } + | isDownwardLineHaddock s + , lastPragmaLine == startLine + , let currLineSplitTextEdits = updateLineSplitTextEdits range s Nothing -> + defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } + | otherwise -> + defaultParserState + ITblockComment s + | isPragma s -> + defaultParserState{ nextPragma = NextPragmaInfo (endLine + 1) Nothing, lastPragmaLine = endLine } + | hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits + , let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits -> + defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } + | isDownwardBlockHaddock s + , lastPragmaLine == startLine + , let currLineSplitTextEdits = updateLineSplitTextEdits range s Nothing -> + defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } + | lastPragmaLine == startLine && startLine < endLine + , let currLineSplitTextEdits = updateLineSplitTextEdits range s Nothing -> + defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } + | otherwise -> + defaultParserState{ lastBlockCommentLine = endLine } + _ -> ParserStateDone prevNextPragma + | otherwise = prevParserState + where + hasDeleteStartedOnSameLine :: Int -> Maybe LineSplitTextEdits -> Bool + hasDeleteStartedOnSameLine line lineSplitTextEdits + | Just lineSplitTextEdits <- lineSplitTextEdits + , let LineSplitTextEdits{ lineSplitDeleteTextEdit } = lineSplitTextEdits + , let LSP.TextEdit deleteRange _ = lineSplitDeleteTextEdit + , let LSP.Range _ deleteEndPosition = deleteRange + , let LSP.Position deleteEndLine _ = deleteEndPosition + = deleteEndLine == line + | otherwise = False + +lexUntilNextLineIncl :: P (Located Token) +lexUntilNextLineIncl = do + PState{ last_loc } <- getPState +#if MIN_VERSION_ghc(9,0,0) + let PsSpan{ psRealSpan = lastRealSrcSpan } = last_loc +#else + let lastRealSrcSpan = last_loc +#endif + let prevEndLine = lastRealSrcSpan & realSrcSpanEnd & srcLocLine + locatedToken@(L srcSpan _token) <- lexer False pure + if | RealSrcLoc currEndRealSrcLoc _ <- srcSpan & srcSpanEnd + , let currEndLine = currEndRealSrcLoc & srcLocLine + -> if prevEndLine < currEndLine then + pure locatedToken + else lexUntilNextLineIncl + | otherwise -> pure locatedToken + +dropWhileStringBuffer :: (Char -> Bool) -> StringBuffer -> StringBuffer +dropWhileStringBuffer predicate buffer + | atEnd buffer = buffer + | let (c, remainingBuffer) = nextChar buffer + = if predicate c then + dropWhileStringBuffer predicate remainingBuffer + else + buffer + +isHorizontalSpace :: Char -> Bool +isHorizontalSpace c = c == ' ' || c == '\t' + +data ShebangParserState = ShebangParserState { + nextPragmaLine :: !Int, + newlineCount :: !Int, + prevCharIsHash :: !Bool, + buffer :: !StringBuffer +} + +-- lexer seems to ignore shebangs completely hence this function +parseShebangs :: ShebangParserState -> ShebangParserState +parseShebangs prev@ShebangParserState{ newlineCount = prevNewlineCount, prevCharIsHash, buffer = prevBuffer } + | atEnd prevBuffer + = prev + | let (c, currBuffer) = nextChar (dropWhileStringBuffer isHorizontalSpace prevBuffer) + = if c == '#' then + parseShebangs prev{ prevCharIsHash = True, buffer = currBuffer } + else if c == '!' && prevCharIsHash then + parseShebangs prev{ nextPragmaLine = prevNewlineCount + 1, buffer = dropWhileStringBuffer (/= '\n') currBuffer } + else if c == '\n' then + parseShebangs prev{ newlineCount = prevNewlineCount + 1, buffer = currBuffer } + else + prev + + +-- | Parses blank lines, comments, haddock comments ("-- |"), lines that start +-- with "#!", lines that start with "#", pragma lines using the GHC API lexer. +-- When it doesn't find one of these things then it's assumed that we've found +-- a declaration, end-of-file, or a ghc parse error, and the parser stops. +-- Shebangs are parsed separately than the rest becaues the lexer ignores them. +-- +-- The reason for custom parsing instead of using annotations, or turning on/off +-- extensions in the dynflags is because there are a number of extensions that +-- while removing parse errors, can also introduce them. Hence, there are +-- cases where the file cannot be parsed without error when we want to insert +-- extension (and other) pragmas. The compiler (8.10.7) doesn't include +-- annotations in its failure state. So if the compiler someday returns +-- annotation or equivalent information when it fails then we can replace this +-- with that. +-- +-- The reason for using the compiler lexer is to reduce duplicated +-- implementation, particularly nested comments, but in retrospect this comes +-- with the disadvantage of the logic feeling more complex, and not being able +-- to handle whitespace directly. +-- +-- The parser keeps track of state in order to place the next pragma line +-- according to some rules: +-- +-- - Ignore lines starting with '#' except for shebangs. +-- - If pragmas exist place after last pragma +-- - else if haddock comments exist: +-- - If comments exist place after last comment +-- - else if shebangs exist place after last shebang +-- - else place at first line +-- - else if comments exist place after last comment +-- - else if shebangs exist place after last shebang +-- - else place at first line +-- +-- Additionally the parser keeps track of information to be able to insert +-- pragmas inbetween lines. +-- +-- For example the parser keeps track of information so that +-- +-- > {- block comment -} -- | haddock +-- +-- can become +-- +-- > {- block comment -} +-- > {-# pragma #-} +-- > -- | haddock +-- +-- This information does not respect the type of whitespace, because the lexer +-- strips whitespace and gives locations. +-- +-- In this example the tabs are converted to spaces in the TextEdits: +-- +-- > {- block comment -}-- | haddock +-- +parsePreDecl :: DynFlags -> StringBuffer -> ParseResult ParserState +parsePreDecl dynFlags buffer = unP (go initialParserState) pState + where + initialShebangParserState = ShebangParserState{ + nextPragmaLine = 0, + newlineCount = 0, + prevCharIsHash = False, + buffer = buffer } + ShebangParserState{ nextPragmaLine } = parseShebangs initialShebangParserState + pState = mkLexerPState dynFlags buffer + initialParserState = ParserStateNotDone (NextPragmaInfo nextPragmaLine Nothing) ModeInitial (-1) (-1) False + + go :: ParserState -> P ParserState + go prevParserState = + case prevParserState of + ParserStateDone _ -> pure prevParserState + ParserStateNotDone{..} -> do + L srcSpan token <- + if isLastTokenHash then + lexUntilNextLineIncl + else + lexer False pure + case srcSpanToRange srcSpan of + Just range -> go (updateParserState token range prevParserState) + Nothing -> pure prevParserState + +mkLexerPState :: DynFlags -> StringBuffer -> PState +mkLexerPState dynFlags stringBuffer = + let + startRealSrcLoc = mkRealSrcLoc "asdf" 1 1 + updateDynFlags = flip gopt_unset Opt_Haddock . flip gopt_set Opt_KeepRawTokenStream + finalDynFlags = updateDynFlags dynFlags +#if !MIN_VERSION_ghc(8,8,1) + pState = mkPState finalDynFlags stringBuffer startRealSrcLoc + finalPState = pState{ use_pos_prags = False } +#elif !MIN_VERSION_ghc(8,10,1) + mkLexerParserFlags = + mkParserFlags' + <$> warningFlags + <*> extensionFlags + <*> homeUnitId_ + <*> safeImportsOn + <*> gopt Opt_Haddock + <*> gopt Opt_KeepRawTokenStream + <*> const False + finalPState = mkPStatePure (mkLexerParserFlags dynFlags) stringBuffer startRealSrcLoc +#else + pState = mkPState finalDynFlags stringBuffer startRealSrcLoc + PState{ options = pStateOptions } = pState + finalExtBitsMap = setBit (pExtsBitmap pStateOptions) (fromEnum UsePosPragsBit) + finalPStateOptions = pStateOptions{ pExtsBitmap = finalExtBitsMap } + finalPState = pState{ options = finalPStateOptions } +#endif + in + finalPState diff --git a/plugins/hls-pragmas-plugin/src/Ide/Plugin/Pragmas.hs b/plugins/hls-pragmas-plugin/src/Ide/Plugin/Pragmas.hs index 7a6a62796c..05c3e4b9cf 100644 --- a/plugins/hls-pragmas-plugin/src/Ide/Plugin/Pragmas.hs +++ b/plugins/hls-pragmas-plugin/src/Ide/Plugin/Pragmas.hs @@ -4,7 +4,6 @@ {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE ViewPatterns #-} @@ -52,6 +51,7 @@ import Development.IDE.GHC.Compat import Development.IDE.GHC.Compat.Util (StringBuffer, atEnd, nextChar, stringToStringBuffer) +import qualified Development.IDE.Spans.Pragmas as Pragmas import Development.IDE.Types.HscEnvEq (HscEnvEq, hscEnv) import Ide.Types import qualified Language.LSP.Server as LSP @@ -87,16 +87,7 @@ codeActionProvider state _plId (J.CodeActionParams _ _ docId _ (J.CodeActionCont case ghcSession of Just (hscEnv -> hsc_dflags -> sessionDynFlags, _) -> - let nextPragmaInfo = - if | Just sourceText <- fileContents - , let sourceStringBuffer = stringToStringBuffer (T.unpack sourceText) - , POk _ parserState <- parsePreDecl sessionDynFlags sourceStringBuffer - , let nextPragma = case parserState of - ParserStateNotDone { nextPragma } -> nextPragma - ParserStateDone { nextPragma } -> nextPragma - -> nextPragma - | otherwise - -> NextPragma 0 Nothing + let nextPragmaInfo = Pragmas.getNextPragmaInfo sessionDynFlags fileContents pedits = nubOrdOn snd . concat $ suggest parsedModuleDynFlags <$> diags in pure $ Right $ List $ pragmaEditToAction uri nextPragmaInfo <$> pedits @@ -107,8 +98,8 @@ codeActionProvider state _plId (J.CodeActionParams _ _ docId _ (J.CodeActionCont -- | Add a Pragma to the given URI at the top of the file. -- It is assumed that the pragma name is a valid pragma, -- thus, not validated. -pragmaEditToAction :: Uri -> NextPragma -> PragmaEdit -> (J.Command J.|? J.CodeAction) -pragmaEditToAction uri NextPragma{ nextPragmaLine, lineSplitTextEdits } (title, p) = +pragmaEditToAction :: Uri -> Pragmas.NextPragmaInfo -> PragmaEdit -> (J.Command J.|? J.CodeAction) +pragmaEditToAction uri Pragmas.NextPragmaInfo{ nextPragmaLine, lineSplitTextEdits } (title, p) = J.InR $ J.CodeAction title (Just J.CodeActionQuickFix) (Just (J.List [])) Nothing Nothing (Just edit) Nothing Nothing where render (OptGHC x) = "{-# OPTIONS_GHC -Wno-" <> x <> " #-}\n" @@ -118,7 +109,7 @@ pragmaEditToAction uri NextPragma{ nextPragmaLine, lineSplitTextEdits } (title, -- workaround the fact that for some reason lsp-test applies text -- edits in reverse order than lsp (tried in both coc.nvim and vscode) textEdits = - if | Just (LineSplitTextEdits insertTextEdit deleteTextEdit) <- lineSplitTextEdits + if | Just (Pragmas.LineSplitTextEdits insertTextEdit deleteTextEdit) <- lineSplitTextEdits , let J.TextEdit{ _range, _newText } = insertTextEdit -> [J.TextEdit _range (render p <> _newText), deleteTextEdit] | otherwise -> [J.TextEdit pragmaInsertRange (render p)] @@ -296,393 +287,5 @@ mkExtCompl label = Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing --- Parser stuff ----------------------------------------------------- --- | Each mode represents the "strongest" thing we've seen so far. --- From strongest to weakest: --- ModePragma, ModeHaddock, ModeComment, ModeInitial -data Mode = ModePragma | ModeHaddock | ModeComment | ModeInitial deriving Show - -data LineSplitTextEdits = LineSplitTextEdits { - lineSplitInsertTextEdit :: J.TextEdit, - lineSplitDeleteTextEdit :: J.TextEdit -} deriving Show - -data NextPragma = NextPragma { - nextPragmaLine :: Int, - lineSplitTextEdits :: Maybe LineSplitTextEdits -} deriving Show - -data ParserState - = ParserStateNotDone - { nextPragma :: !NextPragma - , mode :: !Mode - , lastBlockCommentLine :: !Int - , lastPragmaLine :: !Int - , isLastTokenHash :: !Bool - } - | ParserStateDone { nextPragma :: NextPragma } - deriving Show - -isPragma :: String -> Bool -isPragma = List.isPrefixOf "{-#" - -isDownwardBlockHaddock :: String -> Bool -isDownwardBlockHaddock = List.isPrefixOf "{-|" - -isDownwardLineHaddock :: String -> Bool -isDownwardLineHaddock = List.isPrefixOf "-- |" - -isLineComment :: String -> Bool -isLineComment = List.isPrefixOf "--" - --- LSP spec describes the horizontal part of a Range as (paraphrasing) --- "0-based positions between characters" -srcSpanToRange :: SrcSpan -> Maybe J.Range -srcSpanToRange srcSpan - | RealSrcLoc startRealSrcLoc _ <- srcSpanStart srcSpan - , RealSrcLoc endRealSrcLoc _ <- srcSpanEnd srcSpan - , let startLine = srcLocLine startRealSrcLoc - , let startCol = srcLocCol startRealSrcLoc - , let endLine = srcLocLine endRealSrcLoc - , let endCol = srcLocCol endRealSrcLoc - , let startPosition = J.Position (startLine - 1) (startCol - 1) - , let endPosition = J.Position (endLine - 1) endCol - , let range = J.Range startPosition endPosition - = Just range - | otherwise - = Nothing - --- need to merge tokens that are deleted/inserted into one TextEdit each --- to work around some weird TextEdits applied in reversed order issue -updateLineSplitTextEdits :: J.Range -> String -> Maybe LineSplitTextEdits -> LineSplitTextEdits -updateLineSplitTextEdits tokenRange tokenString prevLineSplitTextEdits - | Just prevLineSplitTextEdits <- prevLineSplitTextEdits - , let LineSplitTextEdits - { lineSplitInsertTextEdit = prevInsertTextEdit - , lineSplitDeleteTextEdit = prevDeleteTextEdit } = prevLineSplitTextEdits - , let J.TextEdit prevInsertRange prevInsertText = prevInsertTextEdit - , let J.TextEdit prevDeleteRange prevDeleteText = prevDeleteTextEdit - , let J.Range prevInsertStartPos prevInsertEndPos = prevInsertRange - , let J.Position prevInsertStartLine prevInsertStartCol = prevInsertStartPos - , let J.Position prevInsertEndLine prevInsertEndCol = prevInsertEndPos - , let J.Range prevDeleteStartPos prevDeleteEndPos = prevDeleteRange - , let J.Position prevDeleteStartLine prevDeleteStartCol = prevDeleteStartPos - , let J.Position prevDeleteEndLine prevDeleteEndCol = prevDeleteEndPos - , let currInsertRange = prevInsertRange - , let currInsertText = - T.init prevInsertText - <> T.replicate (startCol - prevDeleteEndCol) " " - <> T.pack (List.take newLineCol tokenString) - <> "\n" - , let currInsertTextEdit = J.TextEdit currInsertRange currInsertText - , let currDeleteStartPos = prevDeleteStartPos - , let currDeleteEndPos = J.Position endLine endCol - , let currDeleteRange = J.Range currDeleteStartPos currDeleteEndPos - , let currDeleteTextEdit = J.TextEdit currDeleteRange "" - = LineSplitTextEdits currInsertTextEdit currDeleteTextEdit - | otherwise - , let J.Range startPos _ = tokenRange - , let deleteTextEdit = J.TextEdit (J.Range startPos startPos{ J._character = startCol + newLineCol }) "" - , let insertPosition = J.Position (startLine + 1) 0 - , let insertRange = J.Range insertPosition insertPosition - , let insertText = T.pack (List.take newLineCol tokenString) <> "\n" - , let insertTextEdit = J.TextEdit insertRange insertText - = LineSplitTextEdits insertTextEdit deleteTextEdit - where - J.Range (J.Position startLine startCol) (J.Position endLine endCol) = tokenRange - - newLineCol = Maybe.fromMaybe (length tokenString) (List.elemIndex '\n' tokenString) - --- ITvarsym "#" after a block comment is a parse error so we don't need to worry about it -updateParserState :: Token -> J.Range -> ParserState -> ParserState -updateParserState token range prevParserState - | ParserStateNotDone - { nextPragma = prevNextPragma@NextPragma{ lineSplitTextEdits = prevLineSplitTextEdits } - , mode = prevMode - , lastBlockCommentLine - , lastPragmaLine - } <- prevParserState - , let defaultParserState = prevParserState { isLastTokenHash = False } - , let J.Range (J.Position startLine _) (J.Position endLine _) = range - = case prevMode of - ModeInitial -> - case token of - ITvarsym "#" -> defaultParserState{ isLastTokenHash = True } - ITlineComment s - | isDownwardLineHaddock s -> defaultParserState{ mode = ModeHaddock } - | otherwise -> - defaultParserState - { nextPragma = NextPragma (endLine + 1) Nothing - , mode = ModeComment } - ITblockComment s - | isPragma s -> - defaultParserState - { nextPragma = NextPragma (endLine + 1) Nothing - , mode = ModePragma - , lastPragmaLine = endLine } - | isDownwardBlockHaddock s -> defaultParserState{ mode = ModeHaddock } - | otherwise -> - defaultParserState - { nextPragma = NextPragma (endLine + 1) Nothing - , mode = ModeComment - , lastBlockCommentLine = endLine } - _ -> ParserStateDone prevNextPragma - ModeComment -> - case token of - ITvarsym "#" -> defaultParserState{ isLastTokenHash = True } - ITlineComment s - | hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits - , let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits -> - defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } - | isDownwardLineHaddock s - , lastBlockCommentLine == startLine - , let currLineSplitTextEdits = updateLineSplitTextEdits range s Nothing -> - defaultParserState - { nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } - , mode = ModeHaddock } - | otherwise -> - defaultParserState { nextPragma = NextPragma (endLine + 1) Nothing } - ITblockComment s - | isPragma s -> - defaultParserState - { nextPragma = NextPragma (endLine + 1) Nothing - , mode = ModePragma - , lastPragmaLine = endLine } - | hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits - , let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits -> - defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } - | isDownwardBlockHaddock s - , lastBlockCommentLine == startLine - , let currLineSplitTextEdits = updateLineSplitTextEdits range s Nothing -> - defaultParserState{ - nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits }, - mode = ModeHaddock } - | otherwise -> - defaultParserState{ - nextPragma = NextPragma (endLine + 1) Nothing, - lastBlockCommentLine = endLine } - _ -> ParserStateDone prevNextPragma - ModeHaddock -> - case token of - ITvarsym "#" -> - defaultParserState{ isLastTokenHash = True } - ITlineComment s - | hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits - , let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits -> - defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } - | otherwise -> - defaultParserState - ITblockComment s - | isPragma s -> - defaultParserState{ - nextPragma = NextPragma (endLine + 1) Nothing, - mode = ModePragma, - lastPragmaLine = endLine } - | hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits - , let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits -> - defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } - | otherwise -> defaultParserState{ lastBlockCommentLine = endLine } - _ -> ParserStateDone prevNextPragma - ModePragma -> - case token of - ITvarsym "#" -> defaultParserState{ isLastTokenHash = True } - ITlineComment s - | hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits - , let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits -> - defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } - | isDownwardLineHaddock s - , lastPragmaLine == startLine - , let currLineSplitTextEdits = updateLineSplitTextEdits range s Nothing -> - defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } - | otherwise -> - defaultParserState - ITblockComment s - | isPragma s -> - defaultParserState{ nextPragma = NextPragma (endLine + 1) Nothing, lastPragmaLine = endLine } - | hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits - , let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits -> - defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } - | isDownwardBlockHaddock s - , lastPragmaLine == startLine - , let currLineSplitTextEdits = updateLineSplitTextEdits range s Nothing -> - defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } - | lastPragmaLine == startLine && startLine < endLine - , let currLineSplitTextEdits = updateLineSplitTextEdits range s Nothing -> - defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } } - | otherwise -> - defaultParserState{ lastBlockCommentLine = endLine } - _ -> ParserStateDone prevNextPragma - | otherwise = prevParserState - where - hasDeleteStartedOnSameLine :: Int -> Maybe LineSplitTextEdits -> Bool - hasDeleteStartedOnSameLine line lineSplitTextEdits - | Just lineSplitTextEdits <- lineSplitTextEdits - , let LineSplitTextEdits{ lineSplitDeleteTextEdit } = lineSplitTextEdits - , let J.TextEdit deleteRange _ = lineSplitDeleteTextEdit - , let J.Range _ deleteEndPosition = deleteRange - , let J.Position deleteEndLine _ = deleteEndPosition - = deleteEndLine == line - | otherwise = False - -lexUntilNextLineIncl :: P (Located Token) -lexUntilNextLineIncl = do - PState{ last_loc } <- getPState -#if MIN_VERSION_ghc(9,0,0) - let PsSpan{ psRealSpan = lastRealSrcSpan } = last_loc -#else - let lastRealSrcSpan = last_loc -#endif - let prevEndLine = lastRealSrcSpan & realSrcSpanEnd & srcLocLine - locatedToken@(L srcSpan token) <- lexer False pure - if | RealSrcLoc currEndRealSrcLoc _ <- srcSpan & srcSpanEnd - , let currEndLine = currEndRealSrcLoc & srcLocLine - -> if prevEndLine < currEndLine then - pure locatedToken - else lexUntilNextLineIncl - | otherwise -> pure locatedToken - -dropWhileStringBuffer :: (Char -> Bool) -> StringBuffer -> StringBuffer -dropWhileStringBuffer predicate buffer - | atEnd buffer = buffer - | let (c, remainingBuffer) = nextChar buffer - = if predicate c then - dropWhileStringBuffer predicate remainingBuffer - else - buffer - -isHorizontalSpace :: Char -> Bool -isHorizontalSpace c = c == ' ' || c == '\t' - -data ShebangParserState = ShebangParserState { - nextPragmaLine :: !Int, - newlineCount :: !Int, - prevCharIsHash :: !Bool, - buffer :: !StringBuffer -} - --- lexer seems to ignore shebangs completely hence this function -parseShebangs :: ShebangParserState -> ShebangParserState -parseShebangs prev@ShebangParserState{ nextPragmaLine, newlineCount = prevNewlineCount, prevCharIsHash, buffer = prevBuffer } - | atEnd prevBuffer - = prev - | let (c, currBuffer) = nextChar (dropWhileStringBuffer isHorizontalSpace prevBuffer) - = if c == '#' then - parseShebangs prev{ prevCharIsHash = True, buffer = currBuffer } - else if c == '!' && prevCharIsHash then - parseShebangs prev{ nextPragmaLine = prevNewlineCount + 1, buffer = dropWhileStringBuffer (/= '\n') currBuffer } - else if c == '\n' then - parseShebangs prev{ newlineCount = prevNewlineCount + 1, buffer = currBuffer } - else - prev - - --- | Parses blank lines, comments, haddock comments ("-- |"), lines that start --- with "#!", lines that start with "#", pragma lines using the GHC API lexer. --- When it doesn't find one of these things then it's assumed that we've found --- a declaration, end-of-file, or a ghc parse error, and the parser stops. --- Shebangs are parsed separately than the rest becaues the lexer ignores them. --- --- The reason for custom parsing instead of using annotations, or turning on/off --- extensions in the dynflags is because there are a number of extensions that --- while removing parse errors, can also introduce them. Hence, there are --- cases where the file cannot be parsed without error when we want to insert --- extension (and other) pragmas. The compiler (8.10.7) doesn't include --- annotations in its failure state. So if the compiler someday returns --- annotation or equivalent information when it fails then we can replace this --- with that. --- --- The reason for using the compiler lexer is to reduce duplicated --- implementation, particularly nested comments, but in retrospect this comes --- with the disadvantage of the logic feeling more complex, and not being able --- to handle whitespace directly. --- --- The parser keeps track of state in order to place the next pragma line --- according to some rules: --- --- - Ignore lines starting with '#' except for shebangs. --- - If pragmas exist place after last pragma --- - else if haddock comments exist: --- - If comments exist place after last comment --- - else if shebangs exist place after last shebang --- - else place at first line --- - else if comments exist place after last comment --- - else if shebangs exist place after last shebang --- - else place at first line --- --- Additionally the parser keeps track of information to be able to insert --- pragmas inbetween lines. --- --- For example the parser keeps track of information so that --- --- > {- block comment -} -- | haddock --- --- can become --- --- > {- block comment -} --- > {-# pragma #-} --- > -- | haddock --- --- This information does not respect the type of whitespace, because the lexer --- strips whitespace and gives locations. --- --- In this example the tabs are converted to spaces in the TextEdits: --- --- > {- block comment -}-- | haddock --- -parsePreDecl :: DynFlags -> StringBuffer -> ParseResult ParserState -parsePreDecl dynFlags buffer = unP (go initialParserState) pState - where - initialShebangParserState = ShebangParserState{ - nextPragmaLine = 0, - newlineCount = 0, - prevCharIsHash = False, - buffer = buffer } - ShebangParserState{ nextPragmaLine } = parseShebangs initialShebangParserState - pState = mkLexerPState dynFlags buffer - initialParserState = ParserStateNotDone (NextPragma nextPragmaLine Nothing) ModeInitial (-1) (-1) False - - go :: ParserState -> P ParserState - go prevParserState = - case prevParserState of - ParserStateDone _ -> pure prevParserState - ParserStateNotDone{..} -> do - L srcSpan token <- - if isLastTokenHash then - lexUntilNextLineIncl - else - lexer False pure - case D.srcSpanToRange srcSpan of - Just range -> go (updateParserState token range prevParserState) - Nothing -> pure prevParserState - -mkLexerPState :: DynFlags -> StringBuffer -> PState -mkLexerPState dynFlags stringBuffer = - let - startRealSrcLoc = mkRealSrcLoc "asdf" 1 1 - updateDynFlags = flip gopt_unset Opt_Haddock . flip gopt_set Opt_KeepRawTokenStream - finalDynFlags = updateDynFlags dynFlags -#if !MIN_VERSION_ghc(8,8,1) - pState = mkPState finalDynFlags stringBuffer startRealSrcLoc - finalPState = pState{ use_pos_prags = False } -#elif !MIN_VERSION_ghc(8,10,1) - mkLexerParserFlags = - mkParserFlags' - <$> warningFlags - <*> extensionFlags - <*> homeUnitId_ - <*> safeImportsOn - <*> gopt Opt_Haddock - <*> gopt Opt_KeepRawTokenStream - <*> const False - finalPState = mkPStatePure (mkLexerParserFlags dynFlags) stringBuffer startRealSrcLoc -#else - pState = mkPState finalDynFlags stringBuffer startRealSrcLoc - PState{ options = pStateOptions } = pState - finalExtBitsMap = setBit (pExtsBitmap pStateOptions) (fromEnum UsePosPragsBit) - finalPStateOptions = pStateOptions{ pExtsBitmap = finalExtBitsMap } - finalPState = pState{ options = finalPStateOptions } -#endif - in - finalPState