Skip to content

Commit

Permalink
[#139] Ignore build-related files
Browse files Browse the repository at this point in the history
Problem: At the moment, we're using the ignored option for mainly 2
purposes: 1) to ignore all files in the `.git` folder (`.git/**/*`) to
ignore all build-related temporary files (the default config ignores
`.stack-work/**/*`). A more robust alternative might be to ignore all
files implicitly ignored by git.

Solution: Use `git ls-files` to ignore all files implicitly ignored by git.
  • Loading branch information
Sereja313 committed Oct 21, 2022
1 parent c2aad89 commit bfbe20a
Show file tree
Hide file tree
Showing 22 changed files with 281 additions and 173 deletions.
2 changes: 1 addition & 1 deletion .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ steps:
artifact_paths:
- "result/bin/*"

- command: nix run -f ci.nix xrefcheck-static -c xrefcheck --ignored 'tests/**/*'
- command: nix run -f ci.nix xrefcheck-static -c xrefcheck --ignored 'tests/markdowns/**/*' --ignored 'tests/golden/**/*'
label: Xrefcheck itself

- label: lint
Expand Down
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ Unreleased
as broken (with message `Link targets a local file outside repository`).
Same for links that are using directories outside repository (e.g. `/../repo/a.md`),
since such things are not supported by GitHub markdown renderer.
* [#174](https://github.com/serokell/xrefcheck/pull/174)
+ Make xrefcheck only scan files that are tracked by git.
+ Fixed bug where links to ignored files were valid.
+ Fixed bug where links with trailing slashes were invalid.

0.2.1
==========
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ Both relative and absolute local links are supported out of the box.

At the moment of writing, the listed solutions don't support ftp/ftps links.

## Dependencies [](#xrefcheck)

Xrefcheck requires you to have `git` version 2.18.0 or later in your PATH.

## Usage [](#xrefcheck)

We provide the following ways for you to use xrefcheck:
Expand Down
2 changes: 1 addition & 1 deletion package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ library:
- containers
- cmark-gfm >= 0.2.5
- data-default
- directory-tree
- directory
- dlist
- exceptions
Expand All @@ -99,6 +98,7 @@ library:
- mtl
- o-clock
- optparse-applicative
- process
- regex-tdfa
- req
- tagsoup
Expand Down
7 changes: 1 addition & 6 deletions src/Xrefcheck/Config/Default.hs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,7 @@ defConfigUnfilled =
traversal:
# Glob patterns describing files which we pretend do not exist
# (so they are neither analyzed nor can be referenced).
ignored:
# Git files
- .git/**/*

# Stack files
- .stack-work/**/*
ignored: []

# Verification parameters.
verification:
Expand Down
12 changes: 9 additions & 3 deletions src/Xrefcheck/Core.hs
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,13 @@ makeLenses ''FileInfo
instance Default FileInfo where
def = diffToFileInfo mempty

newtype RepoInfo = RepoInfo (Map FilePath FileInfo)
deriving stock (Show)
-- | All tracked files and directories.
data RepoInfo = RepoInfo
{ riFiles :: Map FilePath (Maybe FileInfo)
-- ^ Files from the repo with `FileInfo` attached to files that we can scan.
, riDirectories :: Set FilePath
-- ^ Tracked directories.
} deriving stock (Show)

-----------------------------------------------------------
-- Instances
Expand Down Expand Up @@ -171,7 +176,8 @@ instance Given ColorMode => Buildable FileInfo where
]

instance Given ColorMode => Buildable RepoInfo where
build (RepoInfo m) = blockListF' "" buildFileReport (M.toList m)
build (RepoInfo m _) =
blockListF' "" buildFileReport (mapMaybe sequence $ M.toList m)
where
buildFileReport (name, info) = mconcat
[ colorIfNeeded Cyan $ fromString name <> ":\n"
Expand Down
83 changes: 53 additions & 30 deletions src/Xrefcheck/Scan.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@ module Xrefcheck.Scan
import Universum

import Data.Aeson (FromJSON (..), genericParseJSON)
import Data.Foldable qualified as F
import Data.List qualified as L
import Data.Map qualified as M
import Data.Reflection (Given)
import Fmt (Buildable (..), nameF, (+|), (|+))
import System.Directory (doesDirectoryExist)
import System.Directory.Tree qualified as Tree
import System.FilePath (dropTrailingPathSeparator, equalFilePath, takeDirectory, takeExtension)
import System.FilePath
(dropTrailingPathSeparator, equalFilePath, splitDirectories, takeDirectory, takeExtension, (</>))
import System.Process (cwd, readCreateProcess, shell)

import Xrefcheck.Core
import Xrefcheck.Progress
Expand Down Expand Up @@ -107,6 +108,31 @@ specificFormatsSupport formats = \ext -> M.lookup ext formatsMap
, extension <- extensions
]

-- | Process files that are tracked by git and not ignored by the config.
readDirectoryWith
:: forall a. TraversalConfig
-> (FilePath -> IO a)
-> FilePath
-> IO [(FilePath, a)]
readDirectoryWith config scanner root =
traverse scanFile
. filter (not . isIgnored)
. fmap (location </>)
. L.lines =<< readCreateProcess (shell "git ls-files"){cwd = Just root} ""
where
scanFile :: FilePath -> IO (FilePath, a)
scanFile = sequence . (normaliseWithNoTrailing &&& scanner)

isIgnored :: FilePath -> Bool
isIgnored = matchesGlobPatterns root $ tcIgnored config

-- Strip leading "." and trailing "/"
location :: FilePath
location =
if root `equalFilePath` "."
then ""
else dropTrailingPathSeparator root

scanRepo
:: MonadIO m
=> Rewrite -> FormatsSupport -> TraversalConfig -> FilePath -> m ScanResult
Expand All @@ -116,36 +142,33 @@ scanRepo rw formatsSupport config root = do
when (not $ isDirectory root) $
die $ "Repository's root does not seem to be a directory: " <> root

_ Tree.:/ repoTree <- liftIO $ Tree.readDirectoryWithL processFile root
let (errs, fileInfos) = gatherScanErrs &&& gatherFileInfos
$ dropSndMaybes . F.toList
$ Tree.zipPaths $ location Tree.:/ repoTree
return . ScanResult errs $ RepoInfo (M.fromList fileInfos)
(errs, fileInfos) <- liftIO
$ (gatherScanErrs &&& gatherFileInfos)
<$> readDirectoryWith config processFile root

let dirs = fromList $ foldMap (getDirs . fst) fileInfos

return . ScanResult errs $ RepoInfo (M.fromList fileInfos) dirs
where
isDirectory :: FilePath -> Bool
isDirectory = readingSystem . doesDirectoryExist
gatherScanErrs = foldMap (snd . snd)
gatherFileInfos = map (bimap normaliseWithNoTrailing fst)

-- Get all directories from filepath.
getDirs :: FilePath -> [FilePath]
getDirs = scanl (</>) "" . splitDirectories . takeDirectory

gatherScanErrs
:: [(FilePath, Maybe (FileInfo, [ScanError]))]
-> [ScanError]
gatherScanErrs = fold . mapMaybe (fmap snd . snd)

gatherFileInfos
:: [(FilePath, Maybe (FileInfo, [ScanError]))]
-> [(FilePath, Maybe FileInfo)]
gatherFileInfos = map (second (fmap fst))

processFile :: FilePath -> IO $ Maybe (FileInfo, [ScanError])
processFile file = do
let ext = takeExtension file
let mscanner = formatsSupport ext
if isIgnored file
then pure Nothing
else forM mscanner ($ file)
dropSndMaybes l = [(a, b) | (a, Just b) <- l]

isIgnored = matchesGlobPatterns root $ tcIgnored config

-- The context location of the root.
-- This is done by removing the last component from the path.
-- > root = "./folder/file.md" ==> location = "./folder"
-- > root = "./folder/subfolder" ==> location = "./folder"
-- > root = "./folder/subfolder/" ==> location = "./folder"
-- > root = "./folder/subfolder/./" ==> location = "./folder/subfolder"
-- > root = "." ==> location = ""
-- > root = "/absolute/path" ==> location = "/absolute"
-- > root = "/" ==> location = "/"
location =
if root `equalFilePath` "."
then ""
else takeDirectory $ dropTrailingPathSeparator root
forM mscanner ($ file)
50 changes: 39 additions & 11 deletions src/Xrefcheck/Verify.hs
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ import Network.HTTP.Req
HttpMethod, NoReqBody (..), defaultHttpConfig, ignoreResponse, req, runReq, useURI)
import Network.HTTP.Types.Header (hRetryAfter)
import Network.HTTP.Types.Status (Status, statusCode, statusMessage)
import System.Directory (doesDirectoryExist, doesFileExist)
import System.FilePath (makeRelative, normalise, splitDirectories, takeDirectory, (</>))
import System.FilePath
(equalFilePath, joinPath, makeRelative, normalise, splitDirectories, takeDirectory, (</>))
import Text.ParserCombinators.ReadPrec qualified as ReadPrec (lift)
import Text.Regex.TDFA.Text (Regex, regexec)
import Text.URI (Authority (..), ParseExceptionBs, URI (..), mkURIBs)
Expand Down Expand Up @@ -260,13 +260,16 @@ verifyRepo
config@VerifyConfig{..}
mode
root
repoInfo'@(RepoInfo repoInfo)
repoInfo'@(RepoInfo files _)
= do
let toScan = do
(file, fileInfo) <- M.toList repoInfo
(file, fileInfo) <- M.toList files
guard . not $ matchesGlobPatterns root vcNotScanned file
ref <- _fiReferences fileInfo
return (file, ref)
case fileInfo of
Just fi -> do
ref <- _fiReferences fi
return (file, ref)
Nothing -> empty -- no support for such file, can do nothing

progressRef <- newIORef $ initVerifyProgress (map snd toScan)

Expand Down Expand Up @@ -312,7 +315,7 @@ verifyReference
config@VerifyConfig{..}
mode
progressRef
(RepoInfo repoInfo)
(RepoInfo files dirs)
root
fileWithReference
ref@Reference{..}
Expand Down Expand Up @@ -417,11 +420,30 @@ verifyReference
unless (isVirtual referredFile) do
checkReferredFileIsInsideRepo referredFile
checkReferredFileExists referredFile
case M.lookup referredFile repoInfo of
case lookupFilePath referredFile $ M.toList files of
Nothing -> pass -- no support for such file, can do nothing
Just referredFileInfo -> whenJust mAnchor $
checkAnchor referredFile (_fiAnchors referredFileInfo)

lookupFilePath :: FilePath -> [(FilePath, Maybe FileInfo)] -> Maybe FileInfo
lookupFilePath fp = snd <=< find (equalFilePath (expandIndirections fp) . fst)

-- expands ".." and "."
-- expandIndirections "a/b/../c" = "a/c"
-- expandIndirections "a/b/c/../../d" = "a/d"
-- expandIndirections "../../a" = "../../a"
-- expandIndirections "a/./b" = "a/b"
-- expandIndirections "a/b/./../c" = "a/c"
expandIndirections :: FilePath -> FilePath
expandIndirections = joinPath . reverse . expand 0 . reverse . splitDirectories
where
expand :: Int -> [FilePath] -> [FilePath]
expand acc ("..":xs) = expand (acc+1) xs
expand acc (".":xs) = expand acc xs
expand 0 (x:xs) = x : expand 0 xs
expand acc (_:xs) = expand (acc-1) xs
expand acc [] = replicate acc ".."

checkReferredFileIsInsideRepo file = unless
(noNegativeNesting $ makeRelative root file) $
throwError (LocalFileOutsideRepo file)
Expand All @@ -439,11 +461,17 @@ verifyReference
nestingChange _ = 1

checkReferredFileExists file = do
let fileExists = readingSystem $ doesFileExist file
let dirExists = readingSystem $ doesDirectoryExist file

unless (fileExists || dirExists) $
throwError (LocalFileDoesNotExist file)
where
matchesFilePath :: FilePath -> Bool
matchesFilePath = equalFilePath $ expandIndirections file

fileExists :: Bool
fileExists = any matchesFilePath $ M.keys files

dirExists :: Bool
dirExists = any matchesFilePath dirs

checkAnchor file fileAnchors anchor = do
checkAnchorReferenceAmbiguity file fileAnchors anchor
Expand Down
2 changes: 1 addition & 1 deletion tests/Test/Xrefcheck/TooManyRequestsSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ test_tooManyRequests = testGroup "429 response tests"
verifyReferenceWithProgress reference progRef = do
fmap wrlItem <$> verifyReference
((cVerification $ defConfig GitHub) { vcIgnoreRefs = [] }) FullMode
progRef (RepoInfo M.empty) "." "" reference
progRef (RepoInfo M.empty mempty) "." "" reference

-- | When called for the first time, returns with a 429 and `Retry-After: @retryAfter@`.
-- Subsequent calls will respond with @status@.
Expand Down
2 changes: 1 addition & 1 deletion tests/Test/Xrefcheck/TrailingSlashSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ test_slash = testGroup "Trailing forward slash detection" $
testCase ("All the files within the root \"" <>
root <>
"\" should exist") $ do
(ScanResult _ (RepoInfo repoInfo)) <- allowRewrite False $ \rw ->
(ScanResult _ (RepoInfo repoInfo _)) <- allowRewrite False $ \rw ->
scanRepo rw format TraversalConfig{ tcIgnored = [] } root
nonExistentFiles <- lefts <$> forM (keys repoInfo) (\filePath -> do
predicate <- doesFileExist filePath
Expand Down
7 changes: 1 addition & 6 deletions tests/configs/github-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,7 @@
traversal:
# Glob patterns describing files which we pretend do not exist
# (so they are neither analyzed nor can be referenced).
ignored:
# Git files
- .git/**/*

# Stack files
- .stack-work/**/*
ignored: []

# Verification parameters.
verification:
Expand Down
58 changes: 58 additions & 0 deletions tests/golden/check-git/check-git.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env bats

# SPDX-FileCopyrightText: 2022 Serokell <https://serokell.io>
#
# SPDX-License-Identifier: MPL-2.0

load '../helpers/bats-support/load'
load '../helpers/bats-assert/load'
load '../helpers/bats-file/load'
load '../helpers'

@test "Git: not a repo" {
cd $TEST_TEMP_DIR

run xrefcheck

assert_output --partial "fatal: not a git repository"
}

@test "Git: file not tracked" {
cd $TEST_TEMP_DIR

git init

echo "[a](/a.md)" >> "git.md"

run xrefcheck

assert_output --partial "All repository links are valid."
}

@test "Git: file tracked, check failure" {
cd $TEST_TEMP_DIR

git init

echo "[a](./a.md)" >> "git.md"

git add git.md

to_temp xrefcheck

assert_diff - <<EOF
=== Invalid references found ===
➥ In file git.md
bad reference (relative) at src:1:1-11:
- text: "a"
- link: ./a.md
- anchor: -
⛀ File does not exist:
a.md
Invalid references dumped, 1 in total.
EOF
}
Loading

0 comments on commit bfbe20a

Please sign in to comment.