conveyal · abyrd · Jan 3, 2024 · Jan 11, 2024 · Jan 11, 2024 · Jan 11, 2024
diff --git a/src/main/java/com/conveyal/gtfs/GTFSCache.java b/src/main/java/com/conveyal/gtfs/GTFSCache.java
@@ -48,10 +48,10 @@ public class GTFSCache implements Component {
     // The following two caches hold spatial indexes of GTFS geometries for generating Mapbox vector tiles, one spatial
     // index per feed keyed on BundleScopedFeedId. They could potentially be combined such that cache values are a
     // compound type holding two indexes, or cache values are a single index containing a mix of different geometry
-    // types that are filtered on iteration. They could also be integreated into the GTFSFeed values of the main
-    // GTFSCache#cache. However GTFSFeed is already a very long class, and we may want to tune eviction parameters
+    // types that are filtered on iteration. They could also be integrated into the GTFSFeed values of the main
+    // GTFSCache#cache. However, GTFSFeed is already a very long class, and we may want to tune eviction parameters
     // separately for GTFSFeed and these indexes. While GTFSFeeds are expected to incur constant memory use, the
-    // spatial indexes are potentially unlimited in size and we may want to evict them faster or limit their quantity.
+    // spatial indexes are potentially unlimited in size, so we may want to evict them faster or limit their quantity.
     // We have decided to keep them as separate caches until we're certain of the chosen eviction tuning parameters.
 
     /** A cache of spatial indexes of TripPattern shapes, keyed on the BundleScopedFeedId. */
@@ -127,6 +127,8 @@ public FileStorageKey getFileKey (String id, String extension) {
         // The feedId of the GTFSFeed objects may not be unique - we can have multiple versions of the same feed
         // covering different time periods, uploaded by different users. Therefore we record another ID here that is
         // known to be unique across the whole application - the ID used to fetch the feed.
+        // NOTE as of 2023, this is no longer true. All uploaded feeds have assigned unique UUIDs so as far as I know
+        // they can't collide, we don't need this uniqueId field, and we may not even need bundle-scoped feed IDs.
         feed.uniqueId = id;
         return feed;
     }

diff --git a/src/main/java/com/conveyal/gtfs/GTFSFeed.java b/src/main/java/com/conveyal/gtfs/GTFSFeed.java
@@ -85,16 +85,18 @@ public class GTFSFeed implements Cloneable, Closeable {
     /** The MapDB database handling persistence of Maps to a pair of disk files behind the scenes. */
     private DB db;
 
-    /** An ID (sometimes declared by the feed itself) which may remain the same across successive feed versions. */
+    /**
+     *  An ID (sometimes declared by the feed itself) which may remain the same across successive feed versions.
+     *  In R5 as of 2023 this is always overwritten with a unique UUID to avoid problems with successive feed versions
+     *  or edited/modified versions of the same feeds.
+     */
     public String feedId;
 
     /**
-     * This field was merged in from the wrapper FeedSource. It is a unique identifier for this particular GTFS file.
-     * Successive versions of the data for the same operators, or even different copies of the same operator's data
-     * uploaded by different people, should have different uniqueIds.
-     * In practice this is mostly copied into WrappedGTFSEntity instances used in the Analysis GraphQL API.
+     * In R5 as of 2023, this field will contain the bundle-scoped feed ID used to fetch the feed object from the
+     * GTFSCache (but is not present on disk or before saving - only after it's been reloaded from a file by the cache).
      */
-    public transient String uniqueId; // set this to feedId until it is overwritten, to match FeedSource behavior
+    public transient String uniqueId;
 
     // All tables below should be MapDB maps so the entire GTFSFeed is persistent and uses constant memory.
 

diff --git a/src/main/java/com/conveyal/gtfs/GeometryCache.java b/src/main/java/com/conveyal/gtfs/GeometryCache.java
@@ -19,7 +19,7 @@
  * LoadingCache so should be thread safe and provide granular per-key locking, which is convenient when serving up
  * lots of simultaneous vector tile requests.
  *
- * This is currently used only for looking up geomertries when producing Mapbox vector map tiles, hence the single
+ * This is currently used only for looking up geometries when producing Mapbox vector map tiles, hence the single
  * set of hard-wired cache eviction parameters. For more general use we'd want another constructor to change them.
  */
 public class GeometryCache<T extends Geometry> {

diff --git a/src/main/java/com/conveyal/r5/analyst/Grid.java b/src/main/java/com/conveyal/r5/analyst/Grid.java
@@ -53,6 +53,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -170,7 +171,8 @@ public List<PixelWeight> getPixelWeights (Geometry geometry, boolean relativeToP
 
         double area = geometry.getArea();
         if (area < 1e-12) {
-            throw new IllegalArgumentException("Feature geometry is too small");
+            LOG.warn("Discarding feature. Its area is too small to serve as a denominator ({} square degrees).", area);
+            return Collections.EMPTY_LIST;
         }
 
         if (area > MAX_FEATURE_AREA_SQ_DEG) {

diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java b/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java
@@ -9,9 +9,15 @@
 import com.google.common.collect.Multimap;
 import gnu.trove.list.TIntList;
 import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.set.TIntSet;
+import gnu.trove.set.hash.TIntHashSet;
 import org.apache.commons.lang3.ArrayUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
+import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.List;
@@ -32,6 +38,8 @@
 
 public class PathResult {
 
+    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
     /**
      * The maximum number of destinations for which we'll generate detailed path information in a single request.
      * Detailed path information was added on to the original design, which returned a simple grid of travel times.
@@ -41,12 +49,21 @@ public class PathResult {
     public static final int MAX_PATH_DESTINATIONS = 5_000;
 
     private final int nDestinations;
+
     /**
      * Array with one entry per destination. Each entry is a map from a "path template" to the associated iteration
      * details. For now, the path template is a route-based path ignoring per-iteration details such as wait time.
      * With additional changes, patterns could be collapsed further to route combinations or modes.
      */
     public final Multimap<RouteSequence, Iteration>[] iterationsForPathTemplates;
+
+    /**
+     * The total number of iterations that reached each destination can be derived from iterationsForPathTemplates
+     * as long as every path is being retained. When filtering down to a subset of paths, such as only those passing
+     * through a selected link, we need this additional array to retain the information.
+     */
+    private final int[] nUnfilteredIterationsReachingDestination;
+
     private final TransitLayer transitLayer;
 
     public static final String[] DATA_COLUMNS = new String[]{
@@ -74,7 +91,9 @@ public PathResult(AnalysisWorkerTask task, TransitLayer transitLayer) {
                 throw new UnsupportedOperationException("Number of detailed path destinations exceeds limit of " + MAX_PATH_DESTINATIONS);
             }
         }
+        // FIXME should we be allocating these large arrays when not recording paths?
         iterationsForPathTemplates = new Multimap[nDestinations];
+        nUnfilteredIterationsReachingDestination = new int[nDestinations];
         this.transitLayer = transitLayer;
     }
 
@@ -83,6 +102,18 @@ public PathResult(AnalysisWorkerTask task, TransitLayer transitLayer) {
      * pattern-based keys
      */
     public void setTarget(int targetIndex, Multimap<PatternSequence, Iteration> patterns) {
+        // The size of a multimap is the number of mappings (number of values), not number of unique keys.
+        // This size method appears to be O(1), see: com.google.common.collect.AbstractMapBasedMultimap.size
+        nUnfilteredIterationsReachingDestination[targetIndex] = patterns.size();
+
+        // When selected link analysis is enabled, filter down the PatternSequence-Iteration Multimap to retain only
+        // those keys passing through the selected links.
+        // Maybe selectedLink instance should be on TransitLayer not TransportNetwork.
+        if (transitLayer.parentNetwork.selectedLink != null) {
+            patterns = transitLayer.parentNetwork.selectedLink.filterPatterns(patterns);
+        }
+
+        // The rest of this runs independent of whether a SelectedLink filtered down the patterns-iterations map.
         Multimap<RouteSequence, Iteration> routes = HashMultimap.create();
         patterns.forEach(((patternSeq, iteration) -> routes.put(new RouteSequence(patternSeq, transitLayer), iteration)));
         iterationsForPathTemplates[targetIndex] = routes;
@@ -103,6 +134,43 @@ public ArrayList<String[]>[] summarizeIterations(Stat stat) {
             summary[d] = new ArrayList<>();
             Multimap<RouteSequence, Iteration> iterationMap = iterationsForPathTemplates[d];
             if (iterationMap != null) {
+                // SelectedLink case: collapse all RouteSequences and Iterations for this OD pair into one to simplify.
+                // iterationMap is empty (not null) for destinations that were reached without using the selected link.
+                // This could also be done by merging all Iterations under a single RouteSequence with all route IDs.
+                if (transitLayer.parentNetwork.selectedLink != null) {
+                    int nIterations = 0;
+                    TIntSet allRouteIds = new TIntHashSet();
+                    double summedTotalTime = 0;
+                    for (RouteSequence routeSequence: iterationMap.keySet()) {
+                        Collection<Iteration> iterations = iterationMap.get(routeSequence);
+                        nIterations += iterations.size();
+                        allRouteIds.addAll(routeSequence.routes);
+                        summedTotalTime += iterations.stream().mapToInt(i -> i.totalTime).sum();
+                    }
+                    // Many destinations will have no iterations at all passing through the SelectedLink area.
+                    // Skip those to keep the CSV output short (and to avoid division by zero below).
+                    if (nIterations == 0) {
+                        continue;
+                    }
+                    String[] row = new String[DATA_COLUMNS.length];
+                    Arrays.fill(row, "ALL");
+                    transitLayer.routeString(1, true);
+                    String allRouteIdsPipeSeparated = Arrays.stream(allRouteIds.toArray())
+                            // If includeName is set to false we record only the ID without the name.
+                            // Name works better than ID for routes added by modifications, which have random IDs.
+                            .mapToObj(ri -> transitLayer.routeString(ri, true))
+                            .collect(Collectors.joining("|"));
+                    String iterationProportion = "%.3f".formatted(
+                            nIterations / (double)(nUnfilteredIterationsReachingDestination[d]));
+                    row[0] = allRouteIdsPipeSeparated;
+                    row[row.length - 1] = iterationProportion;
+                    // Report average of total time over all retained iterations, different than mean/min approach below.
+                    row[row.length - 2] = String.format("%.1f", summedTotalTime / nIterations / 60d);
+                    summary[d].add(row);
+                    // Fall through to the standard case below, so the summary row is followed by its component parts.
+                    // We could optionally continue to the next loop iteration here, to return only the summary row.
+                }
+                // Standard (non SelectedLink) case.
                 for (RouteSequence routeSequence: iterationMap.keySet()) {
                     Collection<Iteration> iterations = iterationMap.get(routeSequence);
                     int nIterations = iterations.size();
@@ -135,7 +203,11 @@ public ArrayList<String[]>[] summarizeIterations(Stat stat) {
                             score = thisScore;
                         }
                     }
-                    String[] row = ArrayUtils.addAll(path, transfer, waits, totalTime, String.valueOf(nIterations));
+                    // Check above guarantees that nIterations is nonzero, so total iterations must be nonzero,
+                    // avoiding divide by zero.
+                    String iterationProportion = "%.3f".formatted(
+                            nIterations / (double)(nUnfilteredIterationsReachingDestination[d]));
+                    String[] row = ArrayUtils.addAll(path, transfer, waits, totalTime, iterationProportion);
                     checkState(row.length == DATA_COLUMNS.length);
                     summary[d].add(row);
                 }