diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/ModuleExtensionContext.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/ModuleExtensionContext.java index 603202a5b43465..751ce5ef24cb3e 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/ModuleExtensionContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/ModuleExtensionContext.java @@ -90,7 +90,7 @@ protected ModuleExtensionContext( this.rootModuleHasNonDevDependency = rootModuleHasNonDevDependency; // Record inputs to the extension that are known prior to evaluation. RepoRecordedInput.EnvVar.wrap(staticEnvVars) - .forEach((input, value) -> recordInput(input, value.orElse(null))); + .forEach((input, value) -> recordInputWithValue(input, value.orElse(null))); repoMappingRecorder.record(staticRepoMappingEntries); } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/SingleExtensionEvalFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/SingleExtensionEvalFunction.java index aea1b754a10a97..07e50ed2e6b69e 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/SingleExtensionEvalFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/SingleExtensionEvalFunction.java @@ -383,12 +383,18 @@ private static Optional didRecordedInputsChange( BlazeDirectories directories, List recordedInputs) throws InterruptedException, NeedsSkyframeRestartException { - Optional outdated = - RepoRecordedInput.isAnyValueOutdated(env, directories, recordedInputs); - if (env.valuesMissing()) { - throw new NeedsSkyframeRestartException(); + // Check inputs in batches to prevent Skyframe cycles caused by outdated dependencies. + for (ImmutableList batch : + RepoRecordedInput.WithValue.splitIntoBatches(recordedInputs)) { + Optional outdated = RepoRecordedInput.isAnyValueOutdated(env, directories, batch); + if (env.valuesMissing()) { + throw new NeedsSkyframeRestartException(); + } + if (outdated.isPresent()) { + return outdated; + } } - return outdated; + return Optional.empty(); } private SingleExtensionValue createSingleExtensionValue( diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD index 86567785c2f13c..cf56970f452f66 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD @@ -42,10 +42,10 @@ java_library( java_library( name = "repository_fetch_function", srcs = [ - "DigestWriter.java", "RepositoryFetchFunction.java", ], deps = [ + ":digest_writer", ":exception", ":repo_definition", ":repo_definition_value", @@ -83,6 +83,32 @@ java_library( ], ) +java_library( + name = "digest_writer", + srcs = [ + "DigestWriter.java", + ], + deps = [ + ":exception", + ":repo_definition", + ":utils", + "//src/main/java/com/google/devtools/build/lib/analysis:blaze_directories", + "//src/main/java/com/google/devtools/build/lib/bazel/bzlmod:serialization", + "//src/main/java/com/google/devtools/build/lib/cmdline", + "//src/main/java/com/google/devtools/build/lib/packages/semantics", + "//src/main/java/com/google/devtools/build/lib/repository:repository_events", + "//src/main/java/com/google/devtools/build/lib/rules:repository/repo_recorded_input", + "//src/main/java/com/google/devtools/build/lib/skyframe:repo_environment_function", + "//src/main/java/com/google/devtools/build/lib/util", + "//src/main/java/com/google/devtools/build/lib/vfs", + "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", + "//src/main/java/com/google/devtools/build/skyframe:skyframe-objects", + "//src/main/java/net/starlark/java/eval", + "//third_party:guava", + "//third_party:jsr305", + ], +) + java_library( name = "repo_definition", srcs = [ @@ -144,6 +170,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/skyframe:skyframe-objects", "//src/main/java/net/starlark/java/eval", + "//third_party:guava", "//third_party:jsr305", ], ) diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/DigestWriter.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/DigestWriter.java index b9473c77b2c62d..9678d7784a1cec 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/DigestWriter.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/DigestWriter.java @@ -18,6 +18,7 @@ import com.google.common.base.Preconditions; import com.google.common.base.Splitter; +import com.google.common.collect.Collections2; import com.google.common.collect.ImmutableList; import com.google.devtools.build.lib.analysis.BlazeDirectories; import com.google.devtools.build.lib.bazel.bzlmod.GsonTypeAdapterUtil; @@ -38,7 +39,7 @@ import net.starlark.java.eval.StarlarkSemantics; /** Handles writing and reading of repo marker files. */ -class DigestWriter { +public class DigestWriter { // The marker file version is inject in the rule key digest so the rule key is always different // when we decide to update the format. @@ -91,53 +92,53 @@ void writeMarkerFile(List recordedInputValues) } } - sealed interface RepoDirectoryState { - record UpToDate() implements RepoDirectoryState {} - - record OutOfDate(String reason) implements RepoDirectoryState {} - } - - RepoDirectoryState areRepositoryAndMarkerFileConsistent(Environment env) + Optional areRepositoryAndMarkerFileConsistent(Environment env) throws InterruptedException, RepositoryFunctionException { return areRepositoryAndMarkerFileConsistent(env, markerPath); } /** - * Checks if the state of the repository in the file system is consistent with the rule in the - * WORKSPACE file. - * - *

Returns null if a Skyframe status is needed. + * Checks if the state of the repo in the filesystem is consistent with its current definition. + * Returns {@link Optional#empty()} if they are consistent; otherwise, returns a description of + * why they are not. * - *

We check the repository root for existence here, but we can't depend on the FileValue, - * because it's possible that we eventually create that directory in which case the FileValue and - * the state of the file system would be inconsistent. + *

This method treats a missing Skyframe dependency as if the repo is not up to date. The + * caller is responsible for checking {@code env.valuesMissing()}. */ - @Nullable - RepoDirectoryState areRepositoryAndMarkerFileConsistent(Environment env, Path markerPath) + Optional areRepositoryAndMarkerFileConsistent(Environment env, Path markerPath) throws RepositoryFunctionException, InterruptedException { if (!markerPath.exists()) { - return new RepoDirectoryState.OutOfDate("repo hasn't been fetched yet"); + return Optional.of("repo hasn't been fetched yet"); } try { String content = FileSystemUtils.readContent(markerPath, ISO_8859_1); - var recordedInputValues = + Optional> recordedInputValues = readMarkerFile(content, Preconditions.checkNotNull(predeclaredInputHash)); - Optional outdatedReason = - RepoRecordedInput.isAnyValueOutdated(env, directories, recordedInputValues); - if (env.valuesMissing()) { - return null; + if (recordedInputValues.isEmpty()) { + return Optional.of("Bazel version, flags, repo rule definition or attributes changed"); } - if (outdatedReason.isPresent()) { - return new RepoDirectoryState.OutOfDate(outdatedReason.get()); + // Check inputs in batches to prevent Skyframe cycles caused by outdated dependencies. + for (ImmutableList batch : + RepoRecordedInput.WithValue.splitIntoBatches(recordedInputValues.get())) { + Optional outdatedReason = + RepoRecordedInput.isAnyValueOutdated(env, directories, batch); + if (outdatedReason.isPresent()) { + return outdatedReason; + } } - return new RepoDirectoryState.UpToDate(); + return Optional.empty(); } catch (IOException e) { throw new RepositoryFunctionException(e, Transience.TRANSIENT); } } - private static ImmutableList readMarkerFile( + /** + * Returns a list of recorded inputs with their values parsed from the given marker file if the + * predeclared input hash matches, or {@code Optional.empty()} if the hash doesn't match or any + * error occurs during parsing. + */ + public static Optional> readMarkerFile( String content, String predeclaredInputHash) { Iterable lines = Splitter.on('\n').split(content); @@ -151,26 +152,22 @@ private static ImmutableList readMarkerFile( if (!line.equals(predeclaredInputHash)) { // Break early, need to reload anyway. This also detects marker file version changes // so that unknown formats are not parsed. - return ImmutableList.of( - new RepoRecordedInput.WithValue( - new NeverUpToDateRepoRecordedInput( - "Bazel version, flags, repo rule definition or attributes changed"), - "")); + return Optional.empty(); } firstLineVerified = true; } else { var inputAndValue = RepoRecordedInput.WithValue.parse(line); if (inputAndValue.isEmpty()) { // On parse failure, just forget everything else and mark the whole input out of date. - return PARSE_FAILURE; + return Optional.empty(); } recordedInputValues.add(inputAndValue.get()); } } if (!firstLineVerified) { - return PARSE_FAILURE; + return Optional.empty(); } - return recordedInputValues.build(); + return Optional.of(recordedInputValues.build()); } @Nullable diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryFetchFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryFetchFunction.java index fb40b771c0d401..b6e0b22e570405 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryFetchFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryFetchFunction.java @@ -49,6 +49,7 @@ import com.google.devtools.build.lib.rules.repository.RepoRecordedInput; import com.google.devtools.build.lib.rules.repository.RepositoryDirectoryValue; import com.google.devtools.build.lib.rules.repository.RepositoryDirectoryValue.Failure; +import com.google.devtools.build.lib.rules.repository.RepositoryDirectoryValue.Success; import com.google.devtools.build.lib.runtime.ProcessWrapper; import com.google.devtools.build.lib.runtime.RemoteRepoContentsCache; import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor; @@ -134,21 +135,6 @@ public void setRemoteRepoContentsCache(RemoteRepoContentsCache remoteRepoContent this.remoteRepoContentsCache = remoteRepoContentsCache; } - /** - * The result of the {@link #fetch} method. - * - * @param recordedInputValues Any recorded inputs (and their values) encountered during the fetch - * of the repo. Changes to these inputs will result in the repo being refetched in the future. - * @param reproducible Whether the fetched repo contents are reproducible, hence cacheable. - */ - private record FetchResult( - ImmutableList recordedInputValues, - Reproducibility reproducible) {} - - private static class State extends WorkerSkyKeyComputeState { - @Nullable FetchResult result; - } - @Nullable @Override public SkyValue compute(SkyKey skyKey, Environment env) @@ -168,206 +154,232 @@ public SkyValue compute(SkyKey skyKey, Environment env) try (SilentCloseable c = Profiler.instance().profile(ProfilerTask.REPOSITORY_FETCH, repositoryName.toString())) { - Path repoRoot = - RepositoryUtils.getExternalRepositoryDirectory(directories) - .getRelative(repositoryName.getName()); - - RepoDefinition repoDefinition; - switch ((RepoDefinitionValue) env.getValue(RepoDefinitionValue.key(repositoryName))) { - case null -> { - return null; - } - case RepoDefinitionValue.NotFound() -> { - return new Failure(String.format("Repository '%s' is not defined", repositoryName)); - } - case RepoDefinitionValue.RepoOverride(PathFragment repoPath) -> { - return setupOverride(repoPath, env, repoRoot, repositoryName); - } - case RepoDefinitionValue.Found(RepoDefinition rd) -> { - repoDefinition = rd; + // See below (the `catch CancellationException` clause) for why there's a `while` loop here. + while (true) { + var state = env.getState(WorkerSkyKeyComputeState::new); + try { + return state.startOrContinueWork( + env, + "starlark-repository-" + repositoryName.getName(), + (workerEnv) -> { + return computeInternal(workerEnv, repositoryName, starlarkSemantics); + }); + } catch (ExecutionException e) { + Throwables.throwIfInstanceOf(e.getCause(), RepositoryFunctionException.class); + Throwables.throwIfInstanceOf(e.getCause(), InterruptedException.class); + Throwables.throwIfUnchecked(e.getCause()); + throw new IllegalStateException( + "unexpected exception type: " + e.getCause().getClass(), e.getCause()); + } catch (CancellationException e) { + // This can only happen if the state object was invalidated due to memory pressure, in + // which case we can simply reattempt the fetch. Show a message and continue into the next + // `while` iteration. + env.getListener() + .post( + RepositoryFetchProgress.ongoing( + repositoryName, "fetch interrupted due to memory pressure; restarting.")); } } + } + } - var digestWriter = - DigestWriter.create(env, directories, repositoryName, repoDefinition, starlarkSemantics); - if (digestWriter == null) { + /** + * The actual SkyFunction logic, run in a worker thread. Note that, although the worker thread + * never sees Skyframe restarts, {@code env.valuesMissing()} can still be true due to deps in + * error. So this function still needs to return {@code null} when appropriate. See Javadoc of + * {@link WorkerSkyFunctionEnvironment} for more information. + */ + @Nullable + private RepositoryDirectoryValue computeInternal( + Environment env, RepositoryName repositoryName, StarlarkSemantics starlarkSemantics) + throws InterruptedException, RepositoryFunctionException { + Path repoRoot = + RepositoryUtils.getExternalRepositoryDirectory(directories) + .getRelative(repositoryName.getName()); + + RepoDefinition repoDefinition; + switch ((RepoDefinitionValue) env.getValue(RepoDefinitionValue.key(repositoryName))) { + case null -> { return null; } + case RepoDefinitionValue.NotFound() -> { + return new Failure(String.format("Repository '%s' is not defined", repositoryName)); + } + case RepoDefinitionValue.RepoOverride(PathFragment repoPath) -> { + return setupOverride(repoPath, env, repoRoot, repositoryName); + } + case RepoDefinitionValue.Found(RepoDefinition rd) -> { + repoDefinition = rd; + } + } - boolean excludeRepoFromVendoring = true; - if (RepositoryDirectoryValue.VENDOR_DIRECTORY.get(env).isPresent()) { // If vendor mode is on - VendorFileValue vendorFile = (VendorFileValue) env.getValue(VendorFileValue.KEY); + var digestWriter = + DigestWriter.create(env, directories, repositoryName, repoDefinition, starlarkSemantics); + if (digestWriter == null) { + return null; + } + + boolean excludeRepoFromVendoring = true; + if (RepositoryDirectoryValue.VENDOR_DIRECTORY.get(env).isPresent()) { // If vendor mode is on + VendorFileValue vendorFile = (VendorFileValue) env.getValue(VendorFileValue.KEY); + if (env.valuesMissing()) { + return null; + } + boolean excludeRepoByDefault = isRepoExcludedFromVendoringByDefault(repoDefinition); + if (!excludeRepoByDefault && !vendorFile.ignoredRepos().contains(repositoryName)) { + RepositoryDirectoryValue repositoryDirectoryValue = + tryGettingValueUsingVendoredRepo( + env, repoRoot, repositoryName, digestWriter, vendorFile); if (env.valuesMissing()) { return null; } - boolean excludeRepoByDefault = isRepoExcludedFromVendoringByDefault(repoDefinition); - if (!excludeRepoByDefault && !vendorFile.ignoredRepos().contains(repositoryName)) { - RepositoryDirectoryValue repositoryDirectoryValue = - tryGettingValueUsingVendoredRepo( - env, repoRoot, repositoryName, digestWriter, vendorFile); - if (env.valuesMissing()) { - return null; - } - if (repositoryDirectoryValue != null) { - return repositoryDirectoryValue; - } + if (repositoryDirectoryValue != null) { + return repositoryDirectoryValue; } - excludeRepoFromVendoring = - excludeRepoByDefault - || vendorFile.ignoredRepos().contains(repositoryName) - || vendorFile.pinnedRepos().contains(repositoryName); } + excludeRepoFromVendoring = + excludeRepoByDefault + || vendorFile.ignoredRepos().contains(repositoryName) + || vendorFile.pinnedRepos().contains(repositoryName); + } - if (shouldUseCachedRepoContents(env, repoDefinition)) { - // Make sure marker file is up-to-date; correctly describes the current repository state - var repoState = digestWriter.areRepositoryAndMarkerFileConsistent(env); - if (repoState == null) { - return null; - } - if (repoState instanceof DigestWriter.RepoDirectoryState.UpToDate) { - return new RepositoryDirectoryValue.Success( - Root.fromPath(repoRoot), excludeRepoFromVendoring); - } + if (shouldUseCachedRepoContents(env, repoDefinition)) { + // Make sure marker file is up-to-date; correctly describes the current repository state + if (digestWriter.areRepositoryAndMarkerFileConsistent(env).isEmpty()) { + return new Success(Root.fromPath(repoRoot), excludeRepoFromVendoring); + } + if (env.valuesMissing()) { + return null; + } - // Then check if the global repo contents cache has this. - if (repoContentsCache.isEnabled()) { - for (CandidateRepo candidate : - repoContentsCache.getCandidateRepos(digestWriter.predeclaredInputHash)) { - repoState = - digestWriter.areRepositoryAndMarkerFileConsistent( - env, candidate.recordedInputsFile()); - if (repoState == null) { + // Then check if the local repo contents cache has this. + if (repoContentsCache.isEnabled()) { + ImmutableList candidateRepos = + repoContentsCache.getCandidateRepos(digestWriter.predeclaredInputHash); + for (CandidateRepo candidate : candidateRepos) { + if (digestWriter + .areRepositoryAndMarkerFileConsistent(env, candidate.recordedInputsFile()) + .isEmpty()) { + if (setupOverride(candidate.contentsDir().asFragment(), env, repoRoot, repositoryName) + == null) { return null; } - if (repoState instanceof DigestWriter.RepoDirectoryState.UpToDate) { - if (setupOverride(candidate.contentsDir().asFragment(), env, repoRoot, repositoryName) - == null) { - return null; - } - candidate.touch(); - return new RepositoryDirectoryValue.Success( - Root.fromPath(repoRoot), excludeRepoFromVendoring); - } + candidate.touch(); + return new Success(Root.fromPath(repoRoot), excludeRepoFromVendoring); + } + if (env.valuesMissing()) { + return null; } } + } - if (remoteRepoContentsCache != null) { - try { - if (remoteRepoContentsCache.lookupCache( - repositoryName, repoRoot, digestWriter.predeclaredInputHash, env.getListener())) { - return new RepositoryDirectoryValue.Success( - Root.fromPath(repoRoot), excludeRepoFromVendoring); - } - } catch (IOException e) { - env.getListener() - .handle( - Event.warn( - "Remote repo contents cache lookup failed for %s: %s" - .formatted(repositoryName, e.getMessage()))); + if (remoteRepoContentsCache != null) { + try { + if (remoteRepoContentsCache.lookupCache( + repositoryName, repoRoot, digestWriter.predeclaredInputHash, env.getListener())) { + return new Success(Root.fromPath(repoRoot), excludeRepoFromVendoring); } + } catch (IOException e) { + env.getListener() + .handle( + Event.warn( + "Remote repo contents cache lookup failed for %s: %s" + .formatted(repositoryName, e.getMessage()))); } } + } - /* At this point: This is a force fetch, a local repository, OR The repository cache is old or - didn't exist. In any of those cases, we initiate the fetching process UNLESS this is offline - mode (fetching is disabled) */ - if (!RepositoryDirectoryValue.FETCH_DISABLED.get(env)) { - // Fetching a repository is a long-running operation that can easily be interrupted. If it - // is and the marker file exists on disk, a new call of this method may treat this - // repository as valid even though it is in an inconsistent state. Clear the marker file and - // only recreate it after fetching is done to prevent this scenario. - DigestWriter.clearMarkerFile(directories, repositoryName); - FetchResult result = fetchAndHandleEvents(repoDefinition, repoRoot, env, skyKey); - if (result == null) { - return null; - } - digestWriter.writeMarkerFile(result.recordedInputValues()); - if (result.reproducible() == Reproducibility.YES && !repoDefinition.repoRule().local()) { - if (repoContentsCache.isEnabled()) { - // This repo is eligible for the repo contents cache. - Path cachedRepoDir; - try { - cachedRepoDir = - repoContentsCache.moveToCache( - repoRoot, digestWriter.markerPath, digestWriter.predeclaredInputHash); - } catch (IOException e) { - throw new RepositoryFunctionException( - new IOException( - "error moving repo %s into the repo contents cache: %s" - .formatted(repositoryName, e.getMessage()), - e), - Transience.TRANSIENT); - } - // Don't forget to register a FileStateValue on the cache repo dir, so that we know to - // refetch if the cache entry gets GC'd from under us or the entire cache is deleted. - // - // Note that registering a FileValue dependency instead would lead to subtly incorrect - // behavior when the repo contents cache directory is deleted between builds: - // 1. We register a FileValue dependency on the cache entry. - // 2. Before the next build, the repo contents cache directory is deleted. - // 3. On the next build, FileSystemValueChecker invalidates the underlying - // FileStateValue, which in turn results in the FileValue and the current - // RepositoryDirectoryValue being marked as dirty. - // 4. Skyframe visits the dirty nodes bottom up to check for actual changes. In - // particular, it reevaluates FileFunction before RepositoryFetchFunction and thus - // the FileValue of the repo contents cache directory is locked in as non-existent - // before RepositoryFetchFunction can recreate it. - // 5. Any other SkyFunction that depends on the FileValue of a file in the repo (e.g. - // PackageFunction) will report that file as missing since the resolved path has a - // parent that is non-existent. - // By using FileStateValue directly, which benefits from special logic built into - // DirtinessCheckerUtils that recognizes the repo contents cache directories with - // non-UUID names and prevents locking in their value during dirtiness checking, we - // avoid 4. and thus the incorrect missing file errors in 5. - if (env.getValue( - FileStateValue.key( - RootedPath.toRootedPath( - Root.absoluteRoot(cachedRepoDir.getFileSystem()), cachedRepoDir))) - == null) { - return null; - } - // This is never reached: the repo dir in the repo contents cache is created under a new - // UUID-named directory and thus the FileStateValue above will always be missing from - // Skyframe. After the restart, the repo will either encounter the just created cache - // entry as a candidate or will create a new one if it got GC'd in the meantime. - throw new IllegalStateException( - "FileStateValue unexpectedly present for " + cachedRepoDir); + /* At this point: This is a force fetch, a local repository, OR The repository cache is old or + didn't exist. In any of those cases, we initiate the fetching process UNLESS this is offline + mode (fetching is disabled) */ + if (!RepositoryDirectoryValue.FETCH_DISABLED.get(env)) { + // Fetching a repository is a long-running operation that can easily be interrupted. If it + // is and the marker file exists on disk, a new call of this method may treat this + // repository as valid even though it is in an inconsistent state. Clear the marker file and + // only recreate it after fetching is done to prevent this scenario. + DigestWriter.clearMarkerFile(directories, repositoryName); + FetchResult result = fetchAndHandleEvents(repoDefinition, repoRoot, env, repositoryName); + if (result == null) { + return null; + } + digestWriter.writeMarkerFile(result.recordedInputValues()); + if (result.reproducible() == Reproducibility.YES && !repoDefinition.repoRule().local()) { + // This repo is eligible for the local and remote repo contents cache. + if (repoContentsCache.isEnabled()) { + CandidateRepo newCacheEntry; + try { + newCacheEntry = + repoContentsCache.moveToCache( + repoRoot, digestWriter.markerPath, digestWriter.predeclaredInputHash); + } catch (IOException e) { + throw new RepositoryFunctionException( + new IOException( + "error moving repo %s into the repo contents cache: %s" + .formatted(repositoryName, e.getMessage()), + e), + Transience.TRANSIENT); } - if (remoteRepoContentsCache != null) { - remoteRepoContentsCache.addToCache( - repositoryName, - repoRoot, - digestWriter.markerPath, - digestWriter.predeclaredInputHash, - env.getListener()); + Path cachedRepoDir = newCacheEntry.contentsDir(); + RootedPath cachedRepoDirRootedPath = + RootedPath.toRootedPath( + Root.absoluteRoot(cachedRepoDir.getFileSystem()), cachedRepoDir); + // Don't forget to register a FileStateValue on the cache repo dir, so that we know to + // refetch if the cache entry gets GC'd from under us or the entire cache is deleted. + // + // Note that registering a FileValue dependency instead would lead to subtly incorrect + // behavior when the repo contents cache directory is deleted between builds: + // 1. We register a FileValue dependency on the cache entry. + // 2. Before the next build, the repo contents cache directory is deleted. + // 3. On the next build, FileSystemValueChecker invalidates the underlying + // FileStateValue, which in turn results in the FileValue and the current + // RepositoryDirectoryValue being marked as dirty. + // 4. Skyframe visits the dirty nodes bottom up to check for actual changes. In + // particular, it reevaluates FileFunction before RepositoryFetchFunction and thus + // the FileValue of the repo contents cache directory is locked in as non-existent + // before RepositoryFetchFunction can recreate it. + // 5. Any other SkyFunction that depends on the FileValue of a file in the repo (e.g. + // PackageFunction) will report that file as missing since the resolved path has a + // parent that is non-existent. + // By using FileStateValue directly, which benefits from special logic built into + // DirtinessCheckerUtils that recognizes the repo contents cache directories with + // non-UUID names and prevents locking in their value during dirtiness checking, we + // avoid 4. and thus the incorrect missing file errors in 5. + if (env.getValue(FileStateValue.key(cachedRepoDirRootedPath)) == null) { + return null; } } - return new RepositoryDirectoryValue.Success( - Root.fromPath(repoRoot), excludeRepoFromVendoring); - } - - if (!repoRoot.exists()) { - // The repository isn't on the file system, there is nothing we can do. - throw new RepositoryFunctionException( - new IOException( - "to fix, run\n\tbazel fetch //...\nExternal repository " - + repositoryName - + " not found and fetching repositories is disabled."), - Transience.TRANSIENT); + if (remoteRepoContentsCache != null) { + remoteRepoContentsCache.addToCache( + repositoryName, + repoRoot, + digestWriter.markerPath, + digestWriter.predeclaredInputHash, + env.getListener()); + } } + return new Success(Root.fromPath(repoRoot), excludeRepoFromVendoring); + } - // Try to build with whatever is on the file system and emit a warning. - env.getListener() - .handle( - Event.warn( - String.format( - "External repository '%s' is not up-to-date and fetching is disabled. To" - + " update, run the build without the '--nofetch' command line option.", - repositoryName))); - - return new RepositoryDirectoryValue.Success( - Root.fromPath(repoRoot), excludeRepoFromVendoring); + if (!repoRoot.exists()) { + // The repository isn't on the file system, there is nothing we can do. + throw new RepositoryFunctionException( + new IOException( + "to fix, run\n\tbazel fetch //...\nExternal repository " + + repositoryName + + " not found and fetching repositories is disabled."), + Transience.TRANSIENT); } + + // Try to build with whatever is on the file system and emit a warning. + env.getListener() + .handle( + Event.warn( + String.format( + "External repository '%s' is not up-to-date and fetching is disabled. To" + + " update, run the build without the '--nofetch' command line option.", + repositoryName))); + + return new Success(Root.fromPath(repoRoot), excludeRepoFromVendoring); } @Nullable @@ -394,17 +406,17 @@ private RepositoryDirectoryValue tryGettingValueUsingVendoredRepo( return setupOverride(vendorRepoPath.asFragment(), env, repoRoot, repositoryName); } - DigestWriter.RepoDirectoryState vendoredRepoState = + Optional vendoredRepoOutOfDateReason = digestWriter.areRepositoryAndMarkerFileConsistent(env, vendorMarker); - if (vendoredRepoState == null) { + if (env.valuesMissing()) { return null; } // If our repo is up-to-date, or this is an offline build (--nofetch), then the vendored repo // is used. - if (vendoredRepoState instanceof DigestWriter.RepoDirectoryState.UpToDate + if (vendoredRepoOutOfDateReason.isEmpty() || (!RepositoryDirectoryValue.IS_VENDOR_COMMAND.get(env) && RepositoryDirectoryValue.FETCH_DISABLED.get(env))) { - if (vendoredRepoState instanceof DigestWriter.RepoDirectoryState.OutOfDate(String reason)) { + if (vendoredRepoOutOfDateReason.isPresent()) { env.getListener() .handle( Event.warn( @@ -412,7 +424,7 @@ private RepositoryDirectoryValue tryGettingValueUsingVendoredRepo( "Vendored repository '%s' is out-of-date (%s) and fetching is disabled." + " Run build without the '--nofetch' option or run" + " the bazel vendor command to update it", - repositoryName.getName(), reason))); + repositoryName.getName(), vendoredRepoOutOfDateReason.get()))); } return setupOverride(vendorRepoPath.asFragment(), env, repoRoot, repositoryName); } else if (!RepositoryDirectoryValue.IS_VENDOR_COMMAND @@ -426,8 +438,7 @@ private RepositoryDirectoryValue tryGettingValueUsingVendoredRepo( "Vendored repository '%s' is out-of-date (%s). The up-to-date version will" + " be fetched into the external cache and used. To update the repo" + " in the vendor directory, run the bazel vendor command", - repositoryName.getName(), - ((DigestWriter.RepoDirectoryState.OutOfDate) vendoredRepoState).reason()))); + repositoryName.getName(), vendoredRepoOutOfDateReason.get()))); } } else if (vendorFile.pinnedRepos().contains(repositoryName)) { throw new RepositoryFunctionException( @@ -455,14 +466,6 @@ private RepositoryDirectoryValue tryGettingValueUsingVendoredRepo( */ private boolean shouldUseCachedRepoContents(Environment env, RepoDefinition repoDefinition) throws InterruptedException { - if (env.getState(State::new).result != null) { - // If this SkyFunction has finished fetching once, then we should always use the cached - // result. This means that we _very_ recently (as in, in the same command invocation) fetched - // this repo (possibly with --force or --configure), and are only here again due to a Skyframe - // restart very late into RepositoryDelegatorFunction. - return true; - } - /* If fetching is enabled & this is a local repo: do NOT use cache! * Local repository are generally fast and do not rely on non-local data, making caching them * across server instances impractical. */ @@ -491,14 +494,13 @@ private boolean isRepoExcludedFromVendoringByDefault(RepoDefinition repoDefiniti @Nullable private FetchResult fetchAndHandleEvents( - RepoDefinition repoDefinition, Path repoRoot, Environment env, SkyKey skyKey) + RepoDefinition repoDefinition, Path repoRoot, Environment env, RepositoryName repoName) throws InterruptedException, RepositoryFunctionException { - RepositoryName repoName = (RepositoryName) skyKey.argument(); env.getListener().post(RepositoryFetchProgress.ongoing(repoName, "starting")); FetchResult result; try { - result = fetch(repoDefinition, repoRoot, env, skyKey); + result = fetch(repoDefinition, repoRoot, env, repoName); } catch (RepositoryFunctionException e) { // Upon an exceptional exit, the fetching of that repository is over as well. env.getListener().post(RepositoryFetchProgress.finished(repoName)); @@ -533,51 +535,22 @@ private static void setupRepoRoot(Path repoRoot) throws RepositoryFunctionExcept } } - @Nullable - private FetchResult fetch( - RepoDefinition repoDefinition, Path outputDirectory, Environment env, SkyKey key) - throws RepositoryFunctionException, InterruptedException { - // See below (the `catch CancellationException` clause) for why there's a `while` loop here. - while (true) { - var state = env.getState(State::new); - if (state.result != null) { - // Escape early if we've already finished fetching once. This can happen if - // a Skyframe restart is triggered _after_ fetch() is finished. - return state.result; - } - try { - state.result = - state.startOrContinueWork( - env, - "starlark-repository-" + repoDefinition.name(), - (workerEnv) -> { - setupRepoRoot(outputDirectory); - return fetchInternal(repoDefinition, outputDirectory, workerEnv, key); - }); - return state.result; - } catch (ExecutionException e) { - Throwables.throwIfInstanceOf(e.getCause(), RepositoryFunctionException.class); - Throwables.throwIfInstanceOf(e.getCause(), InterruptedException.class); - Throwables.throwIfUnchecked(e.getCause()); - throw new IllegalStateException( - "unexpected exception type: " + e.getCause().getClass(), e.getCause()); - } catch (CancellationException e) { - // This can only happen if the state object was invalidated due to memory pressure, in - // which case we can simply reattempt the fetch. Show a message and continue into the next - // `while` iteration. - env.getListener() - .post( - RepositoryFetchProgress.ongoing( - RepositoryName.createUnvalidated(repoDefinition.name()), - "fetch interrupted due to memory pressure; restarting.")); - } - } - } + /** + * The result of the {@link #fetch} method. + * + * @param recordedInputValues Any recorded inputs (and their values) encountered during the fetch + * of the repo. Changes to these inputs will result in the repo being refetched in the future. + * @param reproducible Whether the fetched repo contents are reproducible, hence cacheable. + */ + private record FetchResult( + ImmutableList recordedInputValues, + Reproducibility reproducible) {} @Nullable - private FetchResult fetchInternal( - RepoDefinition repoDefinition, Path outputDirectory, Environment env, SkyKey key) + private FetchResult fetch( + RepoDefinition repoDefinition, Path outputDirectory, Environment env, RepositoryName repoName) throws RepositoryFunctionException, InterruptedException { + setupRepoRoot(outputDirectory); String defInfo = RepositoryResolvedEvent.getRuleDefinitionInformation(repoDefinition); env.getListener() @@ -641,8 +614,8 @@ private FetchResult fetchInternal( StarlarkThread.create( mu, starlarkSemantics, - "repository " + ((RepositoryName) key.argument()).getDisplayForm(mainRepoMapping), - SymbolGenerator.create(key)); + "repository " + repoName.getDisplayForm(mainRepoMapping), + SymbolGenerator.create("fetching " + repoName)); thread.setPrintHandler(Event.makeDebugPrintHandler(env.getListener())); starlarkRepositoryContext.storeRepoMappingRecorderInThread(thread); @@ -823,7 +796,6 @@ public static RepositoryDirectoryValue symlinkRepoRoot( new IOException("No MODULE.bazel, REPO.bazel, or WORKSPACE file found in " + destination), Transience.TRANSIENT); } - return new RepositoryDirectoryValue.Success( - Root.fromPath(source), /* excludeFromVendoring= */ true); + return new Success(Root.fromPath(source), /* excludeFromVendoring= */ true); } } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/LocalRepoContentsCache.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/LocalRepoContentsCache.java index 2d825374561797..dd0e71ca6a8bf5 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/LocalRepoContentsCache.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/cache/LocalRepoContentsCache.java @@ -152,9 +152,9 @@ private Path ensureTrashDir() throws IOException { /** * Moves a freshly fetched repo into the contents cache. * - * @return the repo dir in the contents cache. + * @return the new cache entry */ - public Path moveToCache( + public CandidateRepo moveToCache( Path fetchedRepoDir, Path fetchedRepoMarkerFile, String predeclaredInputHash) throws IOException { Preconditions.checkState(path != null); @@ -181,7 +181,7 @@ public Path moveToCache( // Set up a symlink at the original fetched repo dir path. fetchedRepoDir.deleteTree(); FileSystemUtils.ensureSymbolicLink(fetchedRepoDir, cacheRepoDir); - return cacheRepoDir; + return new CandidateRepo(cacheRecordedInputsFile, cacheRepoDir); } public void acquireSharedLock() throws IOException, InterruptedException { diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java index 83c9a95d59bb25..bf09399cf87a81 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java @@ -23,13 +23,10 @@ import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSortedMap; -import com.google.common.collect.Iterables; import com.google.common.collect.Maps; import com.google.common.flogger.GoogleLogger; import com.google.common.util.concurrent.Futures; -import com.google.devtools.build.lib.actions.FileValue; import com.google.devtools.build.lib.analysis.BlazeDirectories; import com.google.devtools.build.lib.bazel.debug.WorkspaceRuleEvent; import com.google.devtools.build.lib.bazel.repository.RepositoryFunctionException; @@ -56,11 +53,11 @@ import com.google.devtools.build.lib.profiler.SilentCloseable; import com.google.devtools.build.lib.remote.RemoteExternalOverlayFileSystem; import com.google.devtools.build.lib.rules.repository.RepoRecordedInput; +import com.google.devtools.build.lib.rules.repository.RepoRecordedInput.MaybeValue; import com.google.devtools.build.lib.rules.repository.RepoRecordedInput.RepoCacheFriendlyPath; import com.google.devtools.build.lib.runtime.ProcessWrapper; import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor; import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor.ExecutionResult; -import com.google.devtools.build.lib.skyframe.RepoEnvironmentFunction; import com.google.devtools.build.lib.unsafe.StringUnsafe; import com.google.devtools.build.lib.util.OsUtils; import com.google.devtools.build.lib.util.io.OutErr; @@ -71,6 +68,7 @@ import com.google.devtools.build.lib.vfs.Symlinks; import com.google.devtools.build.skyframe.SkyFunction.Environment; import com.google.devtools.build.skyframe.SkyFunctionException.Transience; +import com.google.errorprone.annotations.CanIgnoreReturnValue; import com.google.errorprone.annotations.ForOverride; import java.io.File; import java.io.IOException; @@ -213,7 +211,7 @@ protected StarlarkBaseExternalContext( // apparent repo names to canonical repo names. See #20721 for why this is necessary. this.repoMappingRecorder = (fromRepo, apparentRepoName, canonicalRepoName) -> - recordInput( + recordInputWithValue( new RepoRecordedInput.RecordedRepoMapping(fromRepo, apparentRepoName), canonicalRepoName.isVisible() ? canonicalRepoName.getName() : null); } @@ -252,7 +250,7 @@ public void storeRepoMappingRecorderInThread(StarlarkThread thread) { repoMappingRecorder.storeInThread(thread); } - protected void recordInput(RepoRecordedInput input, @Nullable String value) { + protected void recordInputWithValue(RepoRecordedInput input, @Nullable String value) { if (recordedInputs.containsKey(input) && !Objects.equals(recordedInputs.get(input), value)) { throw new IllegalStateException( "Conflicting values recorded for input %s: '%s' vs. '%s'" @@ -261,6 +259,23 @@ protected void recordInput(RepoRecordedInput input, @Nullable String value) { recordedInputs.put(input, value); } + @CanIgnoreReturnValue + @Nullable + protected String getValueAndRecordInput(RepoRecordedInput input) + throws InterruptedException, NeedsSkyframeRestartException, IOException { + var maybeValue = input.getValue(env, directories); + if (env.valuesMissing()) { + throw new NeedsSkyframeRestartException(); + } + return switch (maybeValue) { + case MaybeValue.Invalid(String reason) -> throw new IOException(reason); + case MaybeValue.Valid(String value) -> { + recordInputWithValue(input, value); + yield value; + } + }; + } + private boolean cancelPendingAsyncTasks() { boolean hadPendingItems = false; for (AsyncTask task : asyncTasks) { @@ -1476,13 +1491,12 @@ private static T nullIfNone(Object object, Class type) { @Nullable public String getEnvironmentValue(String name, Object defaultValue) throws InterruptedException, NeedsSkyframeRestartException { - var nameAndValue = RepoEnvironmentFunction.getEnvironmentView(env, ImmutableSet.of(name)); - if (nameAndValue == null) { - throw new NeedsSkyframeRestartException(); + try { + String value = getValueAndRecordInput(new RepoRecordedInput.EnvVar(name)); + return value != null ? value : nullIfNone(defaultValue, String.class); + } catch (IOException e) { + throw new IllegalStateException("getting EnvVar never throws IOException", e); } - var entry = Iterables.getOnlyElement(RepoRecordedInput.EnvVar.wrap(nameAndValue).entrySet()); - recordInput(entry.getKey(), entry.getValue().orElse(null)); - return entry.getValue().orElseGet(() -> nullIfNone(defaultValue, String.class)); } @StarlarkMethod( @@ -1646,17 +1660,8 @@ protected void maybeWatch(StarlarkPath starlarkPath, ShouldWatch shouldWatch) if (repoCacheFriendlyPath == null) { return; } - var recordedInput = new RepoRecordedInput.File(repoCacheFriendlyPath); - var skyKey = recordedInput.getSkyKey(directories); try { - FileValue fileValue = (FileValue) env.getValueOrThrow(skyKey, IOException.class); - if (fileValue == null) { - throw new NeedsSkyframeRestartException(); - } - - recordInput( - recordedInput, - RepoRecordedInput.File.fileValueToMarkerValue((RootedPath) skyKey.argument(), fileValue)); + getValueAndRecordInput(new RepoRecordedInput.File(repoCacheFriendlyPath)); } catch (IOException e) { throw new RepositoryFunctionException(e, Transience.TRANSIENT); } @@ -1668,12 +1673,8 @@ protected void maybeWatchDirents(Path path, ShouldWatch shouldWatch) if (repoCacheFriendlyPath == null) { return; } - var recordedInput = new RepoRecordedInput.Dirents(repoCacheFriendlyPath); - if (env.getValue(recordedInput.getSkyKey(directories)) == null) { - throw new NeedsSkyframeRestartException(); - } try { - recordInput(recordedInput, RepoRecordedInput.Dirents.getDirentsMarkerValue(path)); + getValueAndRecordInput(new RepoRecordedInput.Dirents(repoCacheFriendlyPath)); } catch (IOException e) { throw new RepositoryFunctionException(e, Transience.TRANSIENT); } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java index 9798363771a8c2..1c671ec3d244c6 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java @@ -35,7 +35,6 @@ import com.google.devtools.build.lib.rules.repository.RepoRecordedInput.RepoCacheFriendlyPath; import com.google.devtools.build.lib.runtime.ProcessWrapper; import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor; -import com.google.devtools.build.lib.skyframe.DirectoryTreeDigestValue; import com.google.devtools.build.lib.util.StringUtilities; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; @@ -566,15 +565,7 @@ public void watchTree(Object path) return; } try { - var recordedInput = new RepoRecordedInput.DirTree(repoCacheFriendlyPath); - DirectoryTreeDigestValue digestValue = - (DirectoryTreeDigestValue) - env.getValueOrThrow(recordedInput.getSkyKey(directories), IOException.class); - if (digestValue == null) { - throw new NeedsSkyframeRestartException(); - } - - recordInput(recordedInput, digestValue.hexDigest()); + getValueAndRecordInput(new RepoRecordedInput.DirTree(repoCacheFriendlyPath)); } catch (IOException e) { throw new RepositoryFunctionException(e, Transience.TRANSIENT); } diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java b/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java index 71ac30b344d2ae..91da25785ac068 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInput.java @@ -23,6 +23,8 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Splitter; +import com.google.common.collect.Collections2; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.io.BaseEncoding; import com.google.devtools.build.lib.actions.FileValue; @@ -31,7 +33,6 @@ import com.google.devtools.build.lib.cmdline.LabelSyntaxException; import com.google.devtools.build.lib.cmdline.LabelValidator; import com.google.devtools.build.lib.cmdline.RepositoryName; -import com.google.devtools.build.lib.skyframe.DirectoryListingKey; import com.google.devtools.build.lib.skyframe.DirectoryListingValue; import com.google.devtools.build.lib.skyframe.DirectoryTreeDigestValue; import com.google.devtools.build.lib.skyframe.EnvironmentVariableValue; @@ -40,13 +41,15 @@ import com.google.devtools.build.lib.skyframe.RepositoryMappingValue; import com.google.devtools.build.lib.skyframe.serialization.autocodec.AutoCodec; import com.google.devtools.build.lib.util.Fingerprint; -import com.google.devtools.build.lib.vfs.Path; +import com.google.devtools.build.lib.vfs.Dirent; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.build.lib.vfs.Root; import com.google.devtools.build.lib.vfs.RootedPath; import com.google.devtools.build.skyframe.SkyFunction.Environment; import com.google.devtools.build.skyframe.SkyKey; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Objects; @@ -124,60 +127,45 @@ public static Optional parse(String s) { return Optional.empty(); } + /** + * Splits the given list of recorded input values into batches such that within each batch, all + * recorded inputs's {@link SkyKey}s can be requested together. + */ + public static ImmutableList> splitIntoBatches( + List recordedInputValues) { + var batches = ImmutableList.>builder(); + var currentBatch = new ArrayList(); + for (var recordedInputValue : recordedInputValues) { + if (!recordedInputValue.input().canBeRequestedUnconditionally() + && !currentBatch.isEmpty()) { + batches.add(ImmutableList.copyOf(currentBatch)); + currentBatch.clear(); + } + currentBatch.add(recordedInputValue); + } + if (!currentBatch.isEmpty()) { + batches.add(ImmutableList.copyOf(currentBatch)); + } + return batches.build(); + } + /** Converts this {@link WithValue} to a string in a format compatible with {@link #parse}. */ @Override public String toString() { - return escape(input.toString()) + " " + escape(value); - } - - @VisibleForTesting - static String escape(String str) { - return str == null - ? "\\0" - : str.replace("\\", "\\\\").replace("\n", "\\n").replace(" ", "\\s"); - } - - @VisibleForTesting - @Nullable - static String unescape(String str) { - if (str.equals("\\0")) { - return null; // \0 == null string - } - StringBuilder result = new StringBuilder(); - boolean escaped = false; - for (int i = 0; i < str.length(); i++) { - char c = str.charAt(i); - if (escaped) { - if (c == 'n') { // n means new line - result.append("\n"); - } else if (c == 's') { // s means space - result.append(" "); - } else { // Any other escaped characters are just un-escaped - result.append(c); - } - escaped = false; - } else if (c == '\\') { - escaped = true; - } else { - result.append(c); - } - } - return result.toString(); + return input.toString() + " " + escape(value); } } /** - * Returns whether all values are still up-to-date for each recorded input. If Skyframe values are - * missing, the return value should be ignored; callers are responsible for checking {@code - * env.valuesMissing()} and triggering a Skyframe restart if needed. + * Returns whether all values are still up-to-date for each recorded input or a human-readable + * reason for why that's not the case. If Skyframe values are missing, the return value should be + * ignored; callers are responsible for checking {@code env.valuesMissing()} and triggering a + * Skyframe restart if needed. */ public static Optional isAnyValueOutdated( Environment env, BlazeDirectories directories, List recordedInputValues) throws InterruptedException { - env.getValuesAndExceptions( - recordedInputValues.stream() - .map(riv -> riv.input().getSkyKey(directories)) - .collect(toImmutableSet())); + prefetch(env, directories, Collections2.transform(recordedInputValues, WithValue::input)); if (env.valuesMissing()) { return UNDECIDED; } @@ -191,40 +179,142 @@ public static Optional isAnyValueOutdated( return Optional.empty(); } + /** + * Requests the information from Skyframe that is required by future calls to {@link + * #isAnyValueOutdated} for the given set of inputs. + */ + public static void prefetch( + Environment env, BlazeDirectories directories, Collection recordedInputs) + throws InterruptedException { + var keys = + recordedInputs.stream().map(rri -> rri.getSkyKey(directories)).collect(toImmutableSet()); + if (env.valuesMissing()) { + return; + } + var results = env.getValuesAndExceptions(keys); + // Per the contract of Environment.getValuesAndExceptions, we need to access the results to + // actually find all missing values. + keys.forEach(results::get); + } + + /** + * Returns a human-readable reason for why the given {@code oldValue} is no longer up-to-date for + * this recorded input, or an empty Optional if it is still up-to-date. + * + *

The method can request Skyframe evaluations, and if any values are missing, this method can + * return any value (doesn't matter what, although {@link #UNDECIDED} is recommended for clarity) + * and will be reinvoked after a Skyframe restart. + */ + private Optional isOutdated( + Environment env, BlazeDirectories directories, @Nullable String oldValue) + throws InterruptedException { + MaybeValue wrappedNewValue = getValue(env, directories); + if (env.valuesMissing()) { + return UNDECIDED; + } + return switch (wrappedNewValue) { + case MaybeValue.Invalid(String reason) -> Optional.of(reason); + case MaybeValue.Valid(String newValue) -> + Objects.equals(oldValue, newValue) + ? Optional.empty() + : Optional.of(describeChange(oldValue, newValue)); + }; + } + @Override public abstract boolean equals(Object obj); @Override public abstract int hashCode(); + @VisibleForTesting + static String escape(String str) { + return str == null ? "\\0" : str.replace("\\", "\\\\").replace("\n", "\\n").replace(" ", "\\s"); + } + + @VisibleForTesting + @Nullable + static String unescape(String str) { + if (str.equals("\\0")) { + return null; // \0 == null string + } + StringBuilder result = new StringBuilder(); + boolean escaped = false; + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if (escaped) { + if (c == 'n') { // n means new line + result.append("\n"); + } else if (c == 's') { // s means space + result.append(" "); + } else { // Any other escaped characters are just un-escaped + result.append(c); + } + escaped = false; + } else if (c == '\\') { + escaped = true; + } else { + result.append(c); + } + } + return result.toString(); + } + + /** + * Returns the string representation of this recorded input, in the format suitable for parsing + * back via {@link #parse}. + * + *

The returned string never contains spaces or newlines; those characters are escaped. + */ @Override public final String toString() { - return getParser().getPrefix() + ":" + toStringInternal(); + return getParser().getPrefix() + ":" + escape(toStringInternal()); } + /** + * Represents the possible values returned by {@link #getValue}: either a valid value (which may + * be null), or an invalid value with a reason (e.g. due to I/O failure). + */ + public sealed interface MaybeValue { + MaybeValue VALUES_MISSING = new MaybeValue.Invalid("values missing"); + + record Valid(@Nullable String value) implements MaybeValue {} + + record Invalid(String reason) implements MaybeValue {} + } + + /** + * Returns the current value of this input, which may be null, wrapped in a {@link + * MaybeValue.Valid}, or a {@link MaybeValue.Invalid} if the value is known to be invalid. + * + *

The method can request Skyframe evaluations, and if any values are missing, this method can + * return any value and will be reinvoked after a Skyframe restart. + */ + public abstract MaybeValue getValue(Environment env, BlazeDirectories directories) + throws InterruptedException; + + /** + * Returns a human-readable description of the change from {@code oldValue} to {@code newValue}. + */ + protected abstract String describeChange(String oldValue, String newValue); + /** * Returns the post-colon substring that identifies the specific input: for example, the {@code * MY_ENV_VAR} part of {@code ENV:MY_ENV_VAR}. */ - public abstract String toStringInternal(); + protected abstract String toStringInternal(); /** Returns the parser object for this type of recorded inputs. */ - public abstract Parser getParser(); + protected abstract Parser getParser(); /** Returns the {@link SkyKey} that is necessary to determine {@link #isOutdated}. */ - public abstract SkyKey getSkyKey(BlazeDirectories directories); + protected abstract SkyKey getSkyKey(BlazeDirectories directories); /** - * Returns a human-readable reason for why the given {@code oldValue} is no longer up-to-date for - * this recorded input, or an empty Optional if it is still up-to-date. This method can assume - * that {@link #getSkyKey(BlazeDirectories)} is already evaluated; it can request further Skyframe - * evaluations, and if any values are missing, this method can return any value (doesn't matter - * what, although {@link #UNDECIDED} is recommended for clarity) and will be reinvoked after a - * Skyframe restart. + * Returns true if the {@link #getValue} can be requested even if previous recorded inputs have + * not been verified to be up to date. */ - public abstract Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) - throws InterruptedException; + protected abstract boolean canBeRequestedUnconditionally(); private static final Optional UNDECIDED = Optional.of("values missing"); @@ -303,6 +393,11 @@ public final RootedPath getRootedPath(BlazeDirectories directories) { } return RootedPath.toRootedPath(root, path()); } + + /** Returns true if the path points into an external repository. */ + public boolean inExternalRepo() { + return repoName().isPresent() && !repoName().get().isMain(); + } } /** @@ -367,7 +462,8 @@ public String toStringInternal() { * for placing in a repository marker file. The file need not exist, and can be a file or a * directory. */ - public static String fileValueToMarkerValue(RootedPath rootedPath, FileValue fileValue) + @VisibleForTesting + static String fileValueToMarkerValue(RootedPath rootedPath, FileValue fileValue) throws IOException { if (fileValue.isDirectory()) { return "DIR"; @@ -390,23 +486,32 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Requesting files in external repositories can result in cycles if the external repo now + // transitively depends on the requesting repo. + return !path.inExternalRepo(); + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { var skyKey = getSkyKey(directories); try { - FileValue fileValue = (FileValue) env.getValueOrThrow(skyKey, IOException.class); + var fileValue = (FileValue) env.getValueOrThrow(skyKey, IOException.class); if (fileValue == null) { - return UNDECIDED; + return MaybeValue.VALUES_MISSING; } - if (!oldValue.equals(fileValueToMarkerValue((RootedPath) skyKey.argument(), fileValue))) { - return Optional.of("file info or contents of %s changed".formatted(path)); - } - return Optional.empty(); + return new MaybeValue.Valid( + fileValueToMarkerValue((RootedPath) skyKey.argument(), fileValue)); } catch (IOException e) { - return Optional.of("failed to stat %s: %s".formatted(path, e.getMessage())); + return new MaybeValue.Invalid("failed to stat %s: %s".formatted(path, e.getMessage())); } } + + @Override + public String describeChange(String oldValue, String newValue) { + return "file info or contents of %s changed".formatted(path); + } } /** Represents the list of entries under a directory accessed during the fetch. */ @@ -461,38 +566,42 @@ public Parser getParser() { return PARSER; } + private String directoryListingValueToMarkerValue(DirectoryListingValue directoryListingValue) { + var fp = new Fingerprint(); + fp.addStrings( + directoryListingValue.getDirents().stream() + .map(Dirent::getName) + .sorted() + .collect(toImmutableList())); + return fp.hexDigestAndReset(); + } + @Override public SkyKey getSkyKey(BlazeDirectories directories) { return DirectoryListingValue.key(path.getRootedPath(directories)); } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Requesting directories in external repositories can result in cycles if the external repo + // transitively depends on the requesting repo. + return !path.inExternalRepo(); + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { - SkyKey skyKey = getSkyKey(directories); - if (env.getValue(skyKey) == null) { - return UNDECIDED; - } - try { - if (!oldValue.equals( - getDirentsMarkerValue(((DirectoryListingKey) skyKey).argument().asPath()))) { - return Optional.of("directory entries of %s changed".formatted(path)); - } - return Optional.empty(); - } catch (IOException e) { - return Optional.of("failed to readdir %s: %s".formatted(path, e.getMessage())); + var skyKey = getSkyKey(directories); + var directoryListingValue = (DirectoryListingValue) env.getValue(skyKey); + if (directoryListingValue == null) { + return MaybeValue.VALUES_MISSING; } + return new MaybeValue.Valid(directoryListingValueToMarkerValue(directoryListingValue)); } - public static String getDirentsMarkerValue(Path path) throws IOException { - Fingerprint fp = new Fingerprint(); - fp.addStrings( - path.getDirectoryEntries().stream() - .map(Path::getBaseName) - .sorted() - .collect(toImmutableList())); - return fp.hexDigestAndReset(); + @Override + public String describeChange(String oldValue, String newValue) { + return "directory entries of %s changed".formatted(path); } } @@ -558,18 +667,32 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Requesting directory trees in external repositories can result in cycles if the external + // repo now transitively depends on the requesting repo. + return !path.inExternalRepo(); + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { - DirectoryTreeDigestValue value = - (DirectoryTreeDigestValue) env.getValue(getSkyKey(directories)); - if (value == null) { - return UNDECIDED; - } - if (!oldValue.equals(value.hexDigest())) { - return Optional.of("directory tree at %s changed".formatted(path)); + var skyKey = getSkyKey(directories); + try { + var directoryTreeDigestValue = + (DirectoryTreeDigestValue) env.getValueOrThrow(skyKey, IOException.class); + if (directoryTreeDigestValue == null) { + return MaybeValue.VALUES_MISSING; + } + return new MaybeValue.Valid(directoryTreeDigestValue.hexDigest()); + } catch (IOException e) { + return new MaybeValue.Invalid( + "failed to digest directory tree at %s: %s".formatted(path, e.getMessage())); } - return Optional.empty(); + } + + @Override + public String describeChange(String oldValue, String newValue) { + return "directory tree at %s changed".formatted(path); } } @@ -597,7 +720,7 @@ public static ImmutableMap> wrap( .collect(toImmutableMap(e -> new EnvVar(e.getKey()), Map.Entry::getValue)); } - private EnvVar(String name) { + public EnvVar(String name) { this.name = name; } @@ -633,20 +756,29 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Environment variables are static data injected into Skyframe, so there is no risk of + // cycles. + return true; + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { - String v = ((EnvironmentVariableValue) env.getValue(getSkyKey(directories))).value(); - // Note that `oldValue` can be null if the env var was not set. - if (!Objects.equals(oldValue, v)) { - return Optional.of( - "environment variable %s changed: %s -> %s" - .formatted( - name, - oldValue == null ? "" : "'%s'".formatted(oldValue), - v == null ? "" : "'%s'".formatted(v))); + var value = (EnvironmentVariableValue) env.getValue(getSkyKey(directories)); + if (value == null) { + return MaybeValue.VALUES_MISSING; } - return Optional.empty(); + return new MaybeValue.Valid(value.value()); + } + + @Override + public String describeChange(String oldValue, String newValue) { + return "environment variable %s changed: %s -> %s" + .formatted( + name, + oldValue == null ? "" : "'%s'".formatted(oldValue), + newValue == null ? "" : "'%s'".formatted(newValue)); } } @@ -716,32 +848,32 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) + protected boolean canBeRequestedUnconditionally() { + // Starlark can only request the mapping of the repo it is currently executing from, which + // means that the repo has already been fetched (either to execute the code or to verify the + // transitive .bzl hash). Further cycles aren't possible. + return true; + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) throws InterruptedException { - RepositoryMappingValue repoMappingValue = - (RepositoryMappingValue) env.getValue(getSkyKey(directories)); + var repoMappingValue = (RepositoryMappingValue) env.getValue(getSkyKey(directories)); if (Objects.equals(repoMappingValue, RepositoryMappingValue.NOT_FOUND_VALUE)) { - return Optional.of("source repo %s doesn't exist anymore".formatted(sourceRepo)); - } - RepositoryName oldCanonicalName; - try { - oldCanonicalName = RepositoryName.create(oldValue); - } catch (LabelSyntaxException e) { - // malformed old value causes refetch - return Optional.of("invalid recorded repo name: %s".formatted(e.getMessage())); + return new MaybeValue.Invalid("source repo %s doesn't exist anymore".formatted(sourceRepo)); } - RepositoryName newCanonicalName = repoMappingValue.repositoryMapping().get(apparentName); - if (!oldCanonicalName.equals(newCanonicalName)) { - return Optional.of( - "canonical name for @%s in %s changed: %s -> %s" - .formatted( - apparentName, - sourceRepo, - oldCanonicalName, - newCanonicalName == null ? "" : newCanonicalName)); - } - return Optional.empty(); + RepositoryName canonicalName = repoMappingValue.repositoryMapping().get(apparentName); + return new MaybeValue.Valid(canonicalName != null ? canonicalName.getName() : null); + } + + @Override + public String describeChange(String oldValue, String newValue) { + return "canonical name for @%s in %s changed: %s -> %s" + .formatted( + apparentName, + sourceRepo, + oldValue == null ? "" : oldValue, + newValue == null ? "" : newValue); } } @@ -784,9 +916,20 @@ public SkyKey getSkyKey(BlazeDirectories directories) { } @Override - public Optional isOutdated( - Environment env, BlazeDirectories directories, @Nullable String oldValue) { - return Optional.of(reason); + protected boolean canBeRequestedUnconditionally() { + return true; + } + + @Override + public MaybeValue getValue(Environment env, BlazeDirectories directories) + throws InterruptedException { + return new MaybeValue.Invalid(reason); + } + + @Override + public String describeChange(String oldValue, String newValue) { + throw new UnsupportedOperationException( + "the value for this sentinel input is always invalid"); } } } diff --git a/src/test/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInputTest.java b/src/test/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInputTest.java index dae6be132a65a1..ea928c53d6115f 100644 --- a/src/test/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInputTest.java +++ b/src/test/java/com/google/devtools/build/lib/rules/repository/RepoRecordedInputTest.java @@ -15,8 +15,11 @@ package com.google.devtools.build.lib.rules.repository; import static com.google.common.truth.Truth.assertThat; +import static com.google.devtools.build.lib.rules.repository.RepoRecordedInput.WithValue.parse; +import static com.google.devtools.build.lib.rules.repository.RepoRecordedInput.WithValue.splitIntoBatches; import static org.mockito.Mockito.when; +import com.google.common.collect.ImmutableList; import com.google.common.io.BaseEncoding; import com.google.devtools.build.lib.actions.FileContentsProxy; import com.google.devtools.build.lib.actions.FileStateValue.RegularFileStateValueWithContentsProxy; @@ -35,8 +38,8 @@ @RunWith(JUnit4.class) public class RepoRecordedInputTest extends BuildViewTestCase { private static void assertMarkerFileEscaping(String testCase) { - String escaped = RepoRecordedInput.WithValue.escape(testCase); - assertThat(RepoRecordedInput.WithValue.unescape(escaped)).isEqualTo(testCase); + String escaped = RepoRecordedInput.escape(testCase); + assertThat(RepoRecordedInput.unescape(escaped)).isEqualTo(testCase); } @Test @@ -70,4 +73,22 @@ public void testFileValueToMarkerValue() throws Exception { String expectedDigest = BaseEncoding.base16().lowerCase().encode(path.asPath().getDigest()); assertThat(RepoRecordedInput.File.fileValueToMarkerValue(path, fv)).isEqualTo(expectedDigest); } + + @Test + public void testSplitIntoBatches() { + assertThat(splitIntoBatches(ImmutableList.of())).isEmpty(); + assertThat( + splitIntoBatches( + ImmutableList.of( + parse("FILE:@@//foo:bar abc").orElseThrow(), + parse("FILE:@@//:baz cba").orElseThrow(), + parse("FILE:@@foo//:baz bac").orElseThrow(), + parse("ENV:KEY value").orElseThrow()))) + .containsExactly( + ImmutableList.of( + parse("FILE:@@//foo:bar abc").orElseThrow(), + parse("FILE:@@//:baz cba").orElseThrow()), + ImmutableList.of( + parse("FILE:@@foo//:baz bac").orElseThrow(), parse("ENV:KEY value").orElseThrow())); + } } diff --git a/src/test/py/bazel/bzlmod/bazel_module_test.py b/src/test/py/bazel/bzlmod/bazel_module_test.py index bba57ca5a332dc..9ddea4f956c543 100644 --- a/src/test/py/bazel/bzlmod/bazel_module_test.py +++ b/src/test/py/bazel/bzlmod/bazel_module_test.py @@ -1389,6 +1389,66 @@ def testUnknownRegistryInModuleMirrors(self): ) self.assertIn('https://unknown-registry.example.com', stderr) + def testReverseDependencyDirection(self): + # Set up two module extensions that retain their predeclared input hashes + # across two builds but still reverse their dependency direction. Depending + # on how repo cache candidates are checked, this could lead to a Skyframe + # cycle. + self.ScratchFile( + 'MODULE.bazel', + [ + 'foo_ext = use_extension("//:repo.bzl", "foo_ext")', + 'use_repo(foo_ext, "foo")', + 'bar_ext = use_extension("//:repo.bzl", "bar_ext")', + 'use_repo(bar_ext, "bar")', + ], + ) + self.ScratchFile('BUILD.bazel') + self.ScratchFile( + 'repo.bzl', + [ + 'def _repo_impl(rctx):', + ' rctx.file("output.txt", rctx.attr.content)', + ' rctx.file("BUILD", "exports_files([\'output.txt\'])")', + ' print("JUST FETCHED: %s" % rctx.original_name)', + ' return rctx.repo_metadata(reproducible=True)', + 'repo = repository_rule(', + ' implementation = _repo_impl,', + ' attrs = {"content": attr.string()},', + ')', + 'def _foo_ext_impl(mctx):', + ' deps = mctx.read(Label("@//:foo_deps.txt")).splitlines()', + ' content = "foo"', + ' for dep in deps:', + ' if dep:', + ' content += " + " + mctx.read(Label(dep))', + ' repo(name = "foo", content = content)', + 'foo_ext = module_extension(implementation = _foo_ext_impl)', + 'def _bar_ext_impl(mctx):', + ' deps = mctx.read(Label("@//:bar_deps.txt")).splitlines()', + ' content = "bar"', + ' for dep in deps:', + ' if dep:', + ' content += " + " + mctx.read(Label(dep))', + ' repo(name = "bar", content = content)', + 'bar_ext = module_extension(implementation = _bar_ext_impl)', + ], + ) + + self.ScratchFile('foo_deps.txt', ['@bar//:output.txt']) + self.ScratchFile('bar_deps.txt') + + # First fetch: not cached + _, _, stderr = self.RunBazel(['build', '@foo//:output.txt']) + self.assertIn('JUST FETCHED: bar', '\n'.join(stderr)) + self.assertIn('JUST FETCHED: foo', '\n'.join(stderr)) + + # After expunging and reversing: cached + self.RunBazel(['clean', '--expunge']) + self.ScratchFile('foo_deps.txt') + self.ScratchFile('bar_deps.txt', ['@foo//:output.txt']) + self.RunBazel(['build', '@foo//:output.txt']) + if __name__ == '__main__': absltest.main() diff --git a/src/test/py/bazel/bzlmod/repo_contents_cache_test.py b/src/test/py/bazel/bzlmod/repo_contents_cache_test.py index efaa4e182e6be7..7afd2cbc5cc89c 100644 --- a/src/test/py/bazel/bzlmod/repo_contents_cache_test.py +++ b/src/test/py/bazel/bzlmod/repo_contents_cache_test.py @@ -480,14 +480,7 @@ def testReverseDependencyDirection(self): self.RunBazel(['clean', '--expunge']) self.ScratchFile('foo_deps.txt', ['']) self.ScratchFile('bar_deps.txt', ['@foo//:output.txt']) - exit_code, stdout, stderr = self.RunBazel( - ['build', '@foo//:output.txt'], allow_failure=True - ) - # TODO: b/xxxxxxx - This is NOT the intended behavior. - self.AssertNotExitCode(exit_code, 0, stderr, stdout) - self.assertIn('.-> @@+repo+foo', stderr) - self.assertIn('| @@+repo+bar', stderr) - self.assertIn('`-- @@+repo+foo', stderr) + self.RunBazel(['build', '@foo//:output.txt']) def doTestRepoContentsCacheDeleted(self, check_external_repository_files): repo_contents_cache = self.ScratchDir('repo_contents_cache')