diff --git a/.github/workflows/cypress-integration.yml b/.github/workflows/cypress-integration.yml index 31072191a..0fd16c257 100644 --- a/.github/workflows/cypress-integration.yml +++ b/.github/workflows/cypress-integration.yml @@ -16,7 +16,7 @@ jobs: - uses: actions/checkout@v2 with: repository: conveyal/analysis-ui - ref: a869cd11919343a163e110e812b5d27f3a4ad4c8 + ref: 1701bd9aea859a4714bc3f35f5bcf767a3256a64 path: ui - uses: actions/checkout@v2 with: diff --git a/README.md b/README.md index 6cb8b12d1..9db4bd806 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,12 @@ # Conveyal R5 Routing Engine ## R5: Rapid Realistic Routing on Real-world and Reimagined networks +R5 is the routing engine for [Conveyal](https://www.conveyal.com/learn), a web-based system that allows users to create transportation scenarios and evaluate them in terms of cumulative opportunities accessibility indicators. See the [Conveyal user manual](https://docs.conveyal.com/) for more information. -R5 is Conveyal's routing engine for multimodal (transit/bike/walk/car) networks, with a particular focus on public transit. It is intended primarily for analysis applications (one-to-many trees, travel time matrices, and cumulative opportunities accessibility indicators). - -We refer to the routing method as "realistic" because it works by planning many trips at different departure times in a time window, which better reflects how people use transportation system than planning a single trip at an exact departure time. R5 handles both scheduled public transit and headway-based lines, using novel methods to characterize variation and uncertainty in travel times. +We refer to the routing method as "realistic" because it works by planning door-to-door trips at many different departure times in a time window, which better reflects how people use transportation systems than planning a single trip at an exact departure time. R5 handles both scheduled public transit and headway-based lines, using novel methods to characterize variation and uncertainty in travel times. It is designed for one-to-many and many-to-many travel-time calculations used in access indicators, offering substantially better performance than repeated calls to older tools that provide one-to-one routing results. For a comparison with OpenTripPlanner, see [this background](http://docs.opentripplanner.org/en/latest/Version-Comparison/#commentary-on-otp1-features-removed-from-otp2). We say "Real-world and Reimagined" networks because R5's networks are built from widely available open OSM and GTFS data describing baseline transportation systems, but R5 includes a system for applying light-weight patches to those networks for immediate, interactive scenario comparison. -R5 is a core component of [Conveyal Analysis](https://www.conveyal.com/learn), which allows users to create transportation scenarios and evaluate them in terms of cumulative opportunities accessibility indicators. See the [methodology section](https://docs.conveyal.com/analysis/methodology) of the [Conveyal user manual](https://docs.conveyal.com/) for more information. - **Please note** that the Conveyal team does not provide technical support for third-party deployments of its analysis platform. We provide paid subscriptions to a cloud-based deployment of this system, which performs these complex calculations hundreds of times faster using a compute cluster. This project is open source primarily to ensure transparency and reproducibility in public planning and decision making processes, and in hopes that it may help researchers, students, and potential collaborators to understand and build upon our methodology. ## Methodology diff --git a/analysis.properties.template b/analysis.properties.template index f5b5c2b5f..2f9ac6367 100644 --- a/analysis.properties.template +++ b/analysis.properties.template @@ -9,11 +9,6 @@ database-uri=mongodb://localhost # The name of the database in the Mongo instance. database-name=analysis -# The URL where the frontend is hosted. -# In production this should point to a cached CDN for speed. e.g. https://d1uqjuy3laovxb.cloudfront.net -# In staging this should be the underlying S3 URL so files are not cached and you see the most recent deployment. -frontend-url=https://localhost - # The S3 bucket where we can find tiles of the entire US census, built with Conveyal seamless-census. seamless-census-bucket=lodes-data-2014 seamless-census-region=us-east-1 @@ -28,6 +23,11 @@ aws-region=eu-west-1 # The port on which the server will listen for connections from clients and workers. server-port=7070 +# The origin where the frontend is hosted. When running locally, this will generally be http://localhost:3000. +# It should be relatively safe to set this to * (allowing requests from anywhere) when authentication is enabled. +# This increases attack surface though, so it is preferable to set this to the specific origin where the UI is hosted. +access-control-allow-origin=http://localhost:3000 + # A temporary location to store scratch files. The path can be absolute or relative. # This allows you to locate temporary storage on an extra drive in case your main drive does not have enough space. # local-cache=/home/ec2-user/cache diff --git a/build.gradle b/build.gradle index 50390bd7d..ee0c41c9b 100644 --- a/build.gradle +++ b/build.gradle @@ -125,6 +125,11 @@ repositories { maven { url 'https://nexus.axiomalaska.com/nexus/content/repositories/public-releases' } } +// Exclude all JUnit 4 transitive dependencies - IntelliJ bug causes it to think we're using Junit 4 instead of 5. +configurations.all { + exclude group: "junit", module: "junit" +} + dependencies { // Provides our logging API implementation 'org.slf4j:slf4j-api:1.7.30' @@ -154,13 +159,14 @@ dependencies { implementation 'com.beust:jcommander:1.30' // GeoTools provides GIS functionality on top of JTS topology suite. - def geotoolsVersion = '21.2' + def geotoolsVersion = '25.2' implementation group: 'org.geotools', version: geotoolsVersion, name: 'gt-main' implementation group: 'org.geotools', version: geotoolsVersion, name: 'gt-opengis' implementation group: 'org.geotools', version: geotoolsVersion, name: 'gt-referencing' implementation group: 'org.geotools', version: geotoolsVersion, name: 'gt-shapefile' implementation group: 'org.geotools', version: geotoolsVersion, name: 'gt-coverage' - implementation group: 'org.geotools', version: geotoolsVersion, name: 'gt-geojson' + implementation group: 'org.geotools', version: geotoolsVersion, name: 'gt-geojsondatastore' + implementation group: 'org.geotools', version: geotoolsVersion, name: 'gt-geopkg' implementation group: 'org.geotools', version: geotoolsVersion, name: 'gt-geotiff' // Provides the EPSG coordinate reference system catalog as an HSQL database. implementation group: 'org.geotools', version: geotoolsVersion, name: 'gt-epsg-hsql' @@ -201,10 +207,6 @@ dependencies { // Now used only for Seamless Census TODO eliminate this final AWS dependency implementation 'com.amazonaws:aws-java-sdk-s3:1.11.341' - // Old version of GraphQL-Java used by legacy gtfs-api embedded in analysis-backend. - // TODO eliminate GraphQL in future API revisions - implementation 'com.graphql-java:graphql-java:2.1.0' - // Commons Math gives us FastMath, MersenneTwister, and low-discrepancy vector generators. implementation 'org.apache.commons:commons-math3:3.0' diff --git a/src/main/java/com/conveyal/analysis/AnalysisServerException.java b/src/main/java/com/conveyal/analysis/AnalysisServerException.java index de160c667..7f2bce11a 100644 --- a/src/main/java/com/conveyal/analysis/AnalysisServerException.java +++ b/src/main/java/com/conveyal/analysis/AnalysisServerException.java @@ -1,12 +1,9 @@ package com.conveyal.analysis; import com.conveyal.r5.util.ExceptionUtils; -import graphql.GraphQLError; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.List; - public class AnalysisServerException extends RuntimeException { private static final Logger LOG = LoggerFactory.getLogger(AnalysisServerException.class); @@ -40,17 +37,6 @@ public static AnalysisServerException forbidden(String message) { return new AnalysisServerException(Type.FORBIDDEN, message, 403); } - public static AnalysisServerException graphQL(List errors) { - return new AnalysisServerException( - Type.GRAPHQL, - errors - .stream() - .map(e -> e.getMessage()) - .reduce("", (a, b) -> a + " " + b), - 400 - ); - } - public static AnalysisServerException nonce() { return new AnalysisServerException(Type.NONCE, "The data you attempted to change is out of date and could not be " + "updated. This project may be open by another user or in another browser tab.", 400); @@ -60,6 +46,8 @@ public static AnalysisServerException notFound(String message) { return new AnalysisServerException(Type.NOT_FOUND, message, 404); } + // Note that there is a naming mistake in the HTTP codes. 401 "unauthorized" actually means "unauthenticated". + // 403 "forbidden" is what is usually referred to as "unauthorized" in other contexts. public static AnalysisServerException unauthorized(String message) { return new AnalysisServerException(Type.UNAUTHORIZED, message, 401); } diff --git a/src/main/java/com/conveyal/analysis/BackendConfig.java b/src/main/java/com/conveyal/analysis/BackendConfig.java index 65eeb6da8..68a747560 100644 --- a/src/main/java/com/conveyal/analysis/BackendConfig.java +++ b/src/main/java/com/conveyal/analysis/BackendConfig.java @@ -4,7 +4,6 @@ import com.conveyal.analysis.components.LocalWorkerLauncher; import com.conveyal.analysis.components.TaskScheduler; import com.conveyal.analysis.components.broker.Broker; -import com.conveyal.analysis.controllers.OpportunityDatasetController; import com.conveyal.analysis.grids.SeamlessCensusGridExtractor; import com.conveyal.analysis.persistence.AnalysisDB; import com.conveyal.file.LocalFileStorage; @@ -36,6 +35,7 @@ public class BackendConfig extends ConfigBase implements private final String databaseUri; private final String localCacheDirectory; private final int serverPort; + private final String allowOrigin; private final String seamlessCensusBucket; private final String seamlessCensusRegion; private final int lightThreads; @@ -63,6 +63,7 @@ protected BackendConfig (Properties properties) { localCacheDirectory = strProp("local-cache"); serverPort = intProp("server-port"); offline = boolProp("offline"); + allowOrigin = strProp("access-control-allow-origin"); seamlessCensusBucket = strProp("seamless-census-bucket"); seamlessCensusRegion = strProp("seamless-census-region"); lightThreads = intProp("light-threads"); @@ -81,6 +82,7 @@ protected BackendConfig (Properties properties) { @Override public String databaseName() { return databaseName; } @Override public String localCacheDirectory() { return localCacheDirectory;} @Override public boolean testTaskRedelivery() { return testTaskRedelivery; } + @Override public String allowOrigin() { return allowOrigin; } @Override public String seamlessCensusRegion() { return seamlessCensusRegion; } @Override public String seamlessCensusBucket() { return seamlessCensusBucket; } @Override public int serverPort() { return serverPort; } diff --git a/src/main/java/com/conveyal/analysis/BackendMain.java b/src/main/java/com/conveyal/analysis/BackendMain.java index b3729682d..f543808ae 100644 --- a/src/main/java/com/conveyal/analysis/BackendMain.java +++ b/src/main/java/com/conveyal/analysis/BackendMain.java @@ -3,7 +3,6 @@ import com.conveyal.analysis.components.BackendComponents; import com.conveyal.analysis.components.LocalBackendComponents; import com.conveyal.analysis.persistence.Persistence; -import com.conveyal.gtfs.api.ApiMain; import com.conveyal.r5.SoftwareVersion; import com.conveyal.r5.analyst.PointSetCache; import com.conveyal.r5.analyst.WorkerCategory; @@ -46,20 +45,20 @@ private static void startServerInternal (BackendComponents components, TaskActio // TODO remove the static ApiMain abstraction layer. We do not use it anywhere but in handling GraphQL queries. // TODO we could move this to something like BackendComponents.initialize() Persistence.initializeStatically(components.config); - ApiMain.initialize(components.gtfsCache); PointSetCache.initializeStatically(components.fileStorage); // TODO handle this via components without explicit "if (offline)" if (components.config.offline()) { LOG.info("Running in OFFLINE mode."); LOG.info("Pre-starting local cluster of Analysis workers..."); + // WorkerCategory(null, null) means a worker is not on any network, and is waiting to be assigned one. components.workerLauncher.launch(new WorkerCategory(null, null), null, 1, 0); } LOG.info("Conveyal Analysis server is ready."); for (TaskAction taskAction : postStartupTasks) { components.taskScheduler.enqueue( - Task.create(Runnable.class.getSimpleName()).setHeavy(true).forUser("SYSTEM").withAction(taskAction) + Task.create(taskAction.getClass().getSimpleName()).setHeavy(true).forUser("SYSTEM").withAction(taskAction) ); } diff --git a/src/main/java/com/conveyal/analysis/SelectingGridReducer.java b/src/main/java/com/conveyal/analysis/SelectingGridReducer.java index 38cd73e36..bbff784d1 100644 --- a/src/main/java/com/conveyal/analysis/SelectingGridReducer.java +++ b/src/main/java/com/conveyal/analysis/SelectingGridReducer.java @@ -20,7 +20,6 @@ * When storing bootstrap replications of travel time, we also store the point estimate (using all Monte Carlo draws * equally weighted) as the first value, so a SelectingGridReducer(0) can be used to retrieve the point estimate. * - * This class is not referenced within R5, but is used by the Analysis front end. */ public class SelectingGridReducer { @@ -64,7 +63,7 @@ public Grid compute (InputStream rawInput) throws IOException { // median travel time. int nSamples = input.readInt(); - Grid outputGrid = new Grid(zoom, width, height, north, west); + Grid outputGrid = new Grid(west, north, width, height, zoom); int[] valuesThisOrigin = new int[nSamples]; diff --git a/src/main/java/com/conveyal/analysis/UserPermissions.java b/src/main/java/com/conveyal/analysis/UserPermissions.java index b755495e8..695d1639b 100644 --- a/src/main/java/com/conveyal/analysis/UserPermissions.java +++ b/src/main/java/com/conveyal/analysis/UserPermissions.java @@ -1,5 +1,9 @@ package com.conveyal.analysis; +import spark.Request; + +import static com.conveyal.analysis.components.HttpApi.USER_PERMISSIONS_ATTRIBUTE; + /** * Groups together all information about what a user is allowed to do. * Currently all such information is known from the group ID. @@ -19,6 +23,16 @@ public UserPermissions (String email, boolean admin, String accessGroup) { this.accessGroup = accessGroup; } + /** + * From an HTTP request object, extract a strongly typed UserPermissions object containing the user's email and + * access group. This should be used almost everywhere instead of String email and accessGroup variables. Use this + * method to encapsulate all calls to req.attribute(String) because those calls are not typesafe (they cast an Object + * to whatever type seems appropriate in the context, or is supplied by the "req.attribute(String)" syntax). + */ + public static UserPermissions from (Request req) { + return req.attribute(USER_PERMISSIONS_ATTRIBUTE); + } + @Override public String toString () { return "UserPermissions{" + diff --git a/src/main/java/com/conveyal/analysis/components/BackendComponents.java b/src/main/java/com/conveyal/analysis/components/BackendComponents.java index 197f46b22..db0e3ce1a 100644 --- a/src/main/java/com/conveyal/analysis/components/BackendComponents.java +++ b/src/main/java/com/conveyal/analysis/components/BackendComponents.java @@ -7,14 +7,12 @@ import com.conveyal.analysis.controllers.BrokerController; import com.conveyal.analysis.controllers.BundleController; import com.conveyal.analysis.controllers.FileStorageController; -import com.conveyal.analysis.controllers.GTFSGraphQLController; +import com.conveyal.analysis.controllers.GtfsController; import com.conveyal.analysis.controllers.GtfsTileController; import com.conveyal.analysis.controllers.HttpController; -import com.conveyal.analysis.controllers.ModificationController; import com.conveyal.analysis.controllers.OpportunityDatasetController; -import com.conveyal.analysis.controllers.ProjectController; import com.conveyal.analysis.controllers.RegionalAnalysisController; -import com.conveyal.analysis.controllers.TimetableController; +import com.conveyal.analysis.controllers.DataSourceController; import com.conveyal.analysis.controllers.UserActivityController; import com.conveyal.analysis.grids.SeamlessCensusGridExtractor; import com.conveyal.analysis.persistence.AnalysisDB; @@ -86,15 +84,12 @@ public List standardHttpControllers () { return Lists.newArrayList( // These handlers are at paths beginning with /api // and therefore subject to authentication and authorization. - new ModificationController(), - new ProjectController(), - new GTFSGraphQLController(gtfsCache), + new GtfsController(gtfsCache), new BundleController(this), - new OpportunityDatasetController(fileStorage, taskScheduler, censusExtractor), + new OpportunityDatasetController(fileStorage, taskScheduler, censusExtractor, database), new RegionalAnalysisController(broker, fileStorage), - new AggregationAreaController(fileStorage), - new TimetableController(), new FileStorageController(fileStorage, database), + new AggregationAreaController(fileStorage, database, taskScheduler), // This broker controller registers at least one handler at URL paths beginning with /internal, which // is exempted from authentication and authorization, but should be hidden from the world // outside the cluster by the reverse proxy. Perhaps we should serve /internal on a separate @@ -102,7 +97,8 @@ public List standardHttpControllers () { // InternalHttpApi component with its own spark service, renaming this ExternalHttpApi. new BrokerController(broker, eventBus), new UserActivityController(taskScheduler), - new GtfsTileController(gtfsCache) + new GtfsTileController(gtfsCache), + new DataSourceController(fileStorage, database, taskScheduler, censusExtractor) ); } diff --git a/src/main/java/com/conveyal/analysis/components/HttpApi.java b/src/main/java/com/conveyal/analysis/components/HttpApi.java index a5315c728..6b188c290 100644 --- a/src/main/java/com/conveyal/analysis/components/HttpApi.java +++ b/src/main/java/com/conveyal/analysis/components/HttpApi.java @@ -1,7 +1,6 @@ package com.conveyal.analysis.components; import com.conveyal.analysis.AnalysisServerException; -import com.conveyal.r5.SoftwareVersion; import com.conveyal.analysis.UserPermissions; import com.conveyal.analysis.components.eventbus.ErrorEvent; import com.conveyal.analysis.components.eventbus.EventBus; @@ -9,8 +8,9 @@ import com.conveyal.analysis.controllers.HttpController; import com.conveyal.analysis.util.JsonUtil; import com.conveyal.file.FileStorage; +import com.conveyal.r5.SoftwareVersion; +import com.fasterxml.jackson.databind.node.ObjectNode; import org.apache.commons.fileupload.FileUploadException; -import org.json.simple.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import spark.Request; @@ -22,7 +22,9 @@ import java.util.List; import static com.conveyal.analysis.AnalysisServerException.Type.BAD_REQUEST; +import static com.conveyal.analysis.AnalysisServerException.Type.FORBIDDEN; import static com.conveyal.analysis.AnalysisServerException.Type.RUNTIME; +import static com.conveyal.analysis.AnalysisServerException.Type.UNAUTHORIZED; import static com.conveyal.analysis.AnalysisServerException.Type.UNKNOWN; /** @@ -36,12 +38,11 @@ public class HttpApi implements Component { // These "attributes" are attached to an incoming HTTP request with String keys, making them available in handlers private static final String REQUEST_START_TIME_ATTRIBUTE = "requestStartTime"; public static final String USER_PERMISSIONS_ATTRIBUTE = "permissions"; - public static final String USER_EMAIL_ATTRIBUTE = "email"; - public static final String USER_GROUP_ATTRIBUTE = "accessGroup"; public interface Config { boolean offline (); // TODO remove this parameter, use different Components types instead int serverPort (); + String allowOrigin (); } private final FileStorage fileStorage; @@ -82,29 +83,29 @@ private spark.Service configureSparkService () { // Record when the request started, so we can measure elapsed response time. req.attribute(REQUEST_START_TIME_ATTRIBUTE, Instant.now()); - // Don't require authentication to view the main page, or for internal API endpoints contacted by workers. - // FIXME those internal endpoints should be hidden from the outside world by the reverse proxy. - // Or now with non-static Spark we can run two HTTP servers on different ports. - - // Set CORS headers, to allow requests to this API server from any page. - res.header("Access-Control-Allow-Origin", "*"); + // Set CORS headers to allow requests to this API server from a frontend hosted on a different domain. + // This used to be hardwired to Access-Control-Allow-Origin: * but that leaves the server open to XSRF + // attacks when authentication is disabled (e.g. when running locally). + res.header("Access-Control-Allow-Origin", config.allowOrigin()); + // For caching, signal to the browser that responses may be different based on origin. + // TODO clarify why this is important, considering that normally all requests come from the same origin. + res.header("Vary", "Origin"); // The default MIME type is JSON. This will be overridden by the few controllers that do not return JSON. res.type("application/json"); // Do not require authentication for internal API endpoints contacted by workers or for OPTIONS requests. + // FIXME those internal endpoints should be hidden from the outside world by the reverse proxy. + // Or now with non-static Spark we can run two HTTP servers on different ports. String method = req.requestMethod(); String pathInfo = req.pathInfo(); boolean authorize = pathInfo.startsWith("/api") && !"OPTIONS".equalsIgnoreCase(method); if (authorize) { // Determine which user is sending the request, and which permissions that user has. // This method throws an exception if the user cannot be authenticated. - // Store the resulting permissions object in the request so it can be examined by any handler. UserPermissions userPermissions = authentication.authenticate(req); + // Store the resulting permissions object in the request so it can be examined by any handler. req.attribute(USER_PERMISSIONS_ATTRIBUTE, userPermissions); - // TODO stop using these two separate attributes, and use the permissions object directly - req.attribute(USER_EMAIL_ATTRIBUTE, userPermissions.email); - req.attribute(USER_GROUP_ATTRIBUTE, userPermissions.accessGroup); } }); @@ -114,12 +115,16 @@ private spark.Service configureSparkService () { Instant requestStartTime = req.attribute(REQUEST_START_TIME_ATTRIBUTE); Duration elapsed = Duration.between(requestStartTime, Instant.now()); eventBus.send(new HttpApiEvent(req.requestMethod(), res.status(), req.pathInfo(), elapsed.toMillis()) - .forUser(req.attribute(USER_PERMISSIONS_ATTRIBUTE))); + .forUser(UserPermissions.from(req))); }); // Handle CORS preflight requests (which are OPTIONS requests). + // See comment above about Access-Control-Allow-Origin sparkService.options("/*", (req, res) -> { + // Cache the preflight response for up to one day (the maximum allowed by browsers) + res.header("Access-Control-Max-Age", "86400"); res.header("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS"); + // Allowing credentials is necessary to send an Authorization header res.header("Access-Control-Allow-Credentials", "true"); res.header("Access-Control-Allow-Headers", "Accept,Authorization,Content-Type,Origin," + "X-Requested-With,Content-Length,X-Conveyal-Access-Group" @@ -137,20 +142,7 @@ private spark.Service configureSparkService () { // Can we consolidate all these exception handlers and get rid of the hard-wired "BAD_REQUEST" parameters? sparkService.exception(AnalysisServerException.class, (e, request, response) -> { - // Include a stack trace, except when the error is known to be about unauthenticated or unauthorized access, - // in which case we don't want to leak information about the server to people scanning it for weaknesses. - if (e.type == AnalysisServerException.Type.UNAUTHORIZED || - e.type == AnalysisServerException.Type.FORBIDDEN - ){ - JSONObject body = new JSONObject(); - body.put("type", e.type.toString()); - body.put("message", e.message); - response.status(e.httpCode); - response.type("application/json"); - response.body(body.toJSONString()); - } else { - respondToException(e, request, response, e.type, e.message, e.httpCode); - } + respondToException(e, request, response, e.type, e.message, e.httpCode); }); sparkService.exception(IOException.class, (e, request, response) -> { @@ -176,17 +168,21 @@ private void respondToException(Exception e, Request request, Response response, AnalysisServerException.Type type, String message, int code) { // Stacktrace in ErrorEvent reused below to avoid repeatedly generating String of stacktrace. - ErrorEvent errorEvent = new ErrorEvent(e); + ErrorEvent errorEvent = new ErrorEvent(e, request.pathInfo()); eventBus.send(errorEvent.forUser(request.attribute(USER_PERMISSIONS_ATTRIBUTE))); - JSONObject body = new JSONObject(); - body.put("type", type.toString()); - body.put("message", message); - body.put("stackTrace", errorEvent.stackTrace); + ObjectNode body = JsonUtil.objectNode() + .put("type", type.toString()) + .put("message", message); + // Include a stack trace except when the error is known to be about unauthenticated or unauthorized access, + // in which case we don't want to leak information about the server to people scanning it for weaknesses. + if (type != UNAUTHORIZED && type != FORBIDDEN) { + body.put("stackTrace", errorEvent.stackTrace); + } response.status(code); response.type("application/json"); - response.body(body.toJSONString()); + response.body(JsonUtil.toJsonString(body)); } // Maybe this should be done or called with a JVM shutdown hook diff --git a/src/main/java/com/conveyal/analysis/components/broker/Broker.java b/src/main/java/com/conveyal/analysis/components/broker/Broker.java index e9c0221da..b701f1a47 100644 --- a/src/main/java/com/conveyal/analysis/components/broker/Broker.java +++ b/src/main/java/com/conveyal/analysis/components/broker/Broker.java @@ -338,7 +338,12 @@ public synchronized void markTaskCompleted (Job job, int taskId) { } } - /** This method ensures synchronization of writes to Jobs from the unsynchronized worker poll HTTP handler. */ + /** + * When job.errors is non-empty, job.isErrored() becomes true and job.isActive() becomes false. + * The Job will stop delivering tasks, allowing workers to shut down, but will continue to exist allowing the user + * to see the error message. User will then need to manually delete it, which will remove the result assembler. + * This method ensures synchronization of writes to Jobs from the unsynchronized worker poll HTTP handler. + */ private synchronized void recordJobError (Job job, String error) { job.errors.add(error); } @@ -446,14 +451,13 @@ public void handleRegionalWorkResult(RegionalWorkResult workResult) { job = findJob(workResult.jobId); assembler = resultAssemblers.get(workResult.jobId); } - if (job == null || assembler == null) { - // This will happen naturally for all delivered tasks when a job is deleted by the user. - LOG.debug("Received result for unrecognized job ID {}, discarding.", workResult.jobId); + if (job == null || assembler == null || !job.isActive()) { + // This will happen naturally for all delivered tasks when a job is deleted by the user or after it errors. + LOG.debug("Received result for unrecognized, deleted, or inactive job ID {}, discarding.", workResult.jobId); return; } if (workResult.error != null) { - // Just record the error reported by the worker and don't pass the result on to regional result assembly. - // The Job will stop delivering tasks, allowing workers to shut down. User will need to manually delete it. + // Record any error reported by the worker and don't pass the (bad) result on to regional result assembly. recordJobError(job, workResult.error); return; } @@ -488,15 +492,6 @@ private void requestExtraWorkersIfAppropriate(Job job) { } } - public File getPartialRegionalAnalysisResults (String jobId) { - MultiOriginAssembler resultAssembler = resultAssemblers.get(jobId); - if (resultAssembler == null) { - return null; - } else { - return null; // Was: resultAssembler.getGridBufferFile(); TODO implement fetching partially completed? - } - } - public synchronized boolean anyJobsActive () { for (Job job : jobs.values()) { if (job.isActive()) return true; diff --git a/src/main/java/com/conveyal/analysis/components/broker/WorkerCatalog.java b/src/main/java/com/conveyal/analysis/components/broker/WorkerCatalog.java index 0a21ac086..c91ca7ae8 100644 --- a/src/main/java/com/conveyal/analysis/components/broker/WorkerCatalog.java +++ b/src/main/java/com/conveyal/analysis/components/broker/WorkerCatalog.java @@ -12,6 +12,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; /** * A catalog of all the workers this broker has been contacted by recently. @@ -164,7 +165,7 @@ public synchronized boolean noWorkersAvailable(WorkerCategory category, boolean purgeDeadWorkers(); if (ignoreWorkerVersion) { // Look for workers on the right network ID, independent of their worker software version. - return observationsByWorkerId.values().stream().noneMatch(obs -> obs.category.graphId.equals(category.graphId)); + return observationsByWorkerId.values().stream().noneMatch(obs -> Objects.equals(category.graphId, obs.category.graphId)); } return workerIdsByCategory.get(category).isEmpty(); } diff --git a/src/main/java/com/conveyal/analysis/components/broker/WorkerTags.java b/src/main/java/com/conveyal/analysis/components/broker/WorkerTags.java index ceb564694..8afb92ae6 100644 --- a/src/main/java/com/conveyal/analysis/components/broker/WorkerTags.java +++ b/src/main/java/com/conveyal/analysis/components/broker/WorkerTags.java @@ -1,11 +1,12 @@ package com.conveyal.analysis.components.broker; +import com.conveyal.analysis.UserPermissions; import com.conveyal.analysis.models.RegionalAnalysis; /** * An immutable group of tags to be added to the worker instance to assist in usage analysis and cost breakdowns. - * These Strings are purely for categorization of workers and should not be used for other purposes, only passed through - * to the AWS SDK. + * These Strings are purely for categorization of workers and should not be used for other purposes, + * only passed through to the AWS SDK. */ public class WorkerTags { @@ -15,16 +16,16 @@ public class WorkerTags { /** A unique ID for the user (the user's email address). */ public final String user; - /** The UUID for the project. */ - public final String projectId; - - /** The UUID for the project. */ + /** The UUID for the region. */ public final String regionId; - public WorkerTags (String group, String user, String projectId, String regionId) { + public WorkerTags (UserPermissions userPermissions, String regionId) { + this(userPermissions.accessGroup, userPermissions.email, regionId); + } + + public WorkerTags (String group, String user, String regionId) { this.group = group; this.user = user; - this.projectId = projectId; this.regionId = regionId; } @@ -32,7 +33,6 @@ public static WorkerTags fromRegionalAnalysis (RegionalAnalysis regionalAnalysis return new WorkerTags( regionalAnalysis.accessGroup, regionalAnalysis.createdBy, - regionalAnalysis.projectId, regionalAnalysis.regionId ); } diff --git a/src/main/java/com/conveyal/analysis/components/eventbus/ErrorEvent.java b/src/main/java/com/conveyal/analysis/components/eventbus/ErrorEvent.java index bc86bebc0..24dc542f1 100644 --- a/src/main/java/com/conveyal/analysis/components/eventbus/ErrorEvent.java +++ b/src/main/java/com/conveyal/analysis/components/eventbus/ErrorEvent.java @@ -17,11 +17,62 @@ public class ErrorEvent extends Event { public final String summary; + /** + * The path portion of the HTTP URL, if the error has occurred while responding to an HTTP request from a user. + * May be null if this information is unavailable or unknown (in components where user information is not retained). + */ + public final String httpPath; + public final String stackTrace; - public ErrorEvent (Throwable throwable) { + public ErrorEvent (Throwable throwable, String httpPath) { this.summary = ExceptionUtils.shortCauseString(throwable); this.stackTrace = ExceptionUtils.stackTraceString(throwable); + this.httpPath = httpPath; + } + + public ErrorEvent (Throwable throwable) { + this(throwable, null); + } + + /** Return a string intended for logging on Slack or the console. */ + public String traceWithContext (boolean verbose) { + StringBuilder builder = new StringBuilder(); + if (user == null && accessGroup == null) { + builder.append("Unknown/unauthenticated user"); + } else { + builder.append("User "); + builder.append(user); + builder.append(" of group "); + builder.append(accessGroup); + } + if (httpPath != null) { + builder.append(" accessing "); + builder.append(httpPath); + } + builder.append(": "); + if (verbose) { + builder.append(stackTrace); + } else { + builder.append(filterStackTrace(stackTrace)); + } + return builder.toString(); + } + + private static String filterStackTrace (String stackTrace) { + if (stackTrace == null) return null; + final String unknownFrame = "Unknown stack frame, probably optimized out by JVM."; + String error = stackTrace.lines().findFirst().get(); + String frame = stackTrace.lines() + .map(String::strip) + .filter(s -> s.startsWith("at ")) + .findFirst().orElse(unknownFrame); + String conveyalFrame = stackTrace.lines() + .map(String::strip) + .filter(s -> s.startsWith("at com.conveyal.")) + .filter(s -> !frame.equals(s)) + .findFirst().orElse(""); + return String.join("\n", error, frame, conveyalFrame); } } diff --git a/src/main/java/com/conveyal/analysis/components/eventbus/ErrorLogger.java b/src/main/java/com/conveyal/analysis/components/eventbus/ErrorLogger.java index 539753aec..e1be9b004 100644 --- a/src/main/java/com/conveyal/analysis/components/eventbus/ErrorLogger.java +++ b/src/main/java/com/conveyal/analysis/components/eventbus/ErrorLogger.java @@ -15,7 +15,8 @@ public class ErrorLogger implements EventHandler { public void handleEvent (Event event) { if (event instanceof ErrorEvent) { ErrorEvent errorEvent = (ErrorEvent) event; - LOG.error("User {} of {}: {}", errorEvent.user, errorEvent.accessGroup, errorEvent.stackTrace); + // Verbose message (full stack traces) for console logs. + LOG.error(errorEvent.traceWithContext(true)); } } diff --git a/src/main/java/com/conveyal/analysis/components/eventbus/SinglePointEvent.java b/src/main/java/com/conveyal/analysis/components/eventbus/SinglePointEvent.java index a223a5a3b..bbce44c18 100644 --- a/src/main/java/com/conveyal/analysis/components/eventbus/SinglePointEvent.java +++ b/src/main/java/com/conveyal/analysis/components/eventbus/SinglePointEvent.java @@ -9,16 +9,16 @@ public class SinglePointEvent extends Event { // but also has a CRC since the scenario with a given index can change over time. public final String scenarioId; - public final String projectId; + public final String bundleId; - public final int variant; + public final String regionId; public final int durationMsec; - public SinglePointEvent (String scenarioId, String projectId, int variant, int durationMsec) { + public SinglePointEvent (String scenarioId, String bundleId, String regionId, int durationMsec) { this.scenarioId = scenarioId; - this.projectId = projectId; - this.variant = variant; + this.bundleId = bundleId; + this.regionId = regionId; this.durationMsec = durationMsec; } @@ -26,8 +26,8 @@ public SinglePointEvent (String scenarioId, String projectId, int variant, int d public String toString () { return "SinglePointEvent{" + "scenarioId='" + scenarioId + '\'' + - ", projectId='" + projectId + '\'' + - ", variant=" + variant + + ", regionId='" + regionId + '\'' + + ", bundleId=" + bundleId + ", durationMsec=" + durationMsec + ", user='" + user + '\'' + ", accessGroup=" + accessGroup + diff --git a/src/main/java/com/conveyal/analysis/controllers/AggregationAreaController.java b/src/main/java/com/conveyal/analysis/controllers/AggregationAreaController.java index faca62d73..286fa5593 100644 --- a/src/main/java/com/conveyal/analysis/controllers/AggregationAreaController.java +++ b/src/main/java/com/conveyal/analysis/controllers/AggregationAreaController.java @@ -1,223 +1,114 @@ package com.conveyal.analysis.controllers; -import com.conveyal.analysis.AnalysisServerException; +import com.conveyal.analysis.UserPermissions; +import com.conveyal.analysis.components.TaskScheduler; +import com.conveyal.analysis.datasource.derivation.AggregationAreaDerivation; +import com.conveyal.analysis.datasource.derivation.DataDerivation; import com.conveyal.analysis.models.AggregationArea; -import com.conveyal.analysis.persistence.Persistence; -import com.conveyal.analysis.util.HttpUtils; +import com.conveyal.analysis.models.DataSource; +import com.conveyal.analysis.persistence.AnalysisCollection; +import com.conveyal.analysis.persistence.AnalysisDB; +import com.conveyal.analysis.util.JsonUtil; import com.conveyal.file.FileStorage; -import com.conveyal.file.FileStorageKey; -import com.conveyal.file.FileUtils; -import com.conveyal.r5.analyst.Grid; -import com.conveyal.r5.util.ShapefileReader; -import com.google.common.io.Files; -import com.mongodb.QueryBuilder; -import org.apache.commons.fileupload.FileItem; -import org.json.simple.JSONObject; -import org.locationtech.jts.geom.Envelope; -import org.locationtech.jts.geom.Geometry; -import org.locationtech.jts.operation.union.UnaryUnionOp; -import org.opengis.feature.simple.SimpleFeature; +import com.conveyal.r5.analyst.progress.Task; +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.bson.conversions.Bson; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import spark.Request; import spark.Response; -import java.io.File; -import java.io.IOException; -import java.io.OutputStream; -import java.text.MessageFormat; +import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import java.util.zip.GZIPOutputStream; import static com.conveyal.analysis.util.JsonUtil.toJson; -import static com.conveyal.file.FileCategory.GRIDS; -import static com.conveyal.r5.analyst.WebMercatorGridPointSet.parseZoom; +import static com.google.common.base.Preconditions.checkNotNull; +import static com.mongodb.client.model.Filters.and; +import static com.mongodb.client.model.Filters.eq; /** * Stores vector aggregationAreas (used to define the region of a weighted average accessibility metric). */ public class AggregationAreaController implements HttpController { - private static final Logger LOG = LoggerFactory.getLogger(AggregationAreaController.class); - - /** - * Arbitrary limit to prevent UI clutter from many aggregation areas (e.g. if someone uploads thousands of blocks). - * Someone might reasonably request an aggregation area for each of Chicago's 50 wards, so that's a good approximate - * limit for now. - */ - private static int MAX_FEATURES = 100; + private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private final FileStorage fileStorage; + private final AnalysisDB analysisDb; + private final TaskScheduler taskScheduler; - public AggregationAreaController (FileStorage fileStorage) { + private final AnalysisCollection dataSourceCollection; + private final AnalysisCollection aggregationAreaCollection; + + public AggregationAreaController ( + FileStorage fileStorage, + AnalysisDB database, + TaskScheduler taskScheduler + ) { this.fileStorage = fileStorage; + this.analysisDb = database; + this.taskScheduler = taskScheduler; + dataSourceCollection = database.getAnalysisCollection("dataSources", DataSource.class); + aggregationAreaCollection = database.getAnalysisCollection("aggregationAreas", AggregationArea.class); } - private FileStorageKey getStoragePath (AggregationArea area) { - return new FileStorageKey(GRIDS, area.getS3Key()); + /** + * Create binary .grid files for aggregation (aka mask) areas, save them to FileStorage, and persist their metadata + * to Mongo. The supplied request (req) must include query parameters specifying the dataSourceId of a + * SpatialDataSoure containing the polygonal aggregation area geometries. If the mergePolygons query parameter is + * supplied and is true, all polygons will be merged into one large (multi)polygon aggregation area. + * If the mergePolygons query parameter is not supplied or is false, the nameProperty query parameter must be + * the name of a text attribute in that SpatialDataSource. One aggregation area will be created for each polygon + * drawing the names from that attribute. + * @return the ID of the Task representing the enqueued background action that will create the aggregation areas. + */ + private String createAggregationAreas (Request req, Response res) throws Exception { + // Create and enqueue an asynchronous background action to derive aggreagation areas from spatial data source. + // The constructor will extract query parameters and range check them (not ideal separation, but it works). + DataDerivation derivation = AggregationAreaDerivation.fromRequest(req, fileStorage, analysisDb); + Task backgroundTask = Task.create("Aggregation area creation: " + derivation.dataSource().name) + .forUser(UserPermissions.from(req)) + .setHeavy(true) + .withAction(derivation); + + taskScheduler.enqueue(backgroundTask); + return backgroundTask.id.toString(); } /** - * Create binary .grid files for aggregation (aka mask) areas, save them to S3, and persist their details. - * @param req Must include a shapefile on which the aggregation area(s) will be based. - * If HTTP query parameter union is "true", features will be merged to a single aggregation area, named - * using the value of the "name" query parameter. If union is false or if the parameter is missing, each - * feature will be a separate aggregation area, named using the value for the shapefile property - * specified by the HTTP query parameter "nameAttribute." + * Get all aggregation area documents meeting the supplied criteria. + * The request must contain a query parameter for the regionId or the dataGroupId or both. */ - private List createAggregationAreas (Request req, Response res) throws Exception { - ArrayList aggregationAreas = new ArrayList<>(); - Map> query = HttpUtils.getRequestFiles(req.raw()); - - // 1. Extract relevant files: .shp, .prj, .dbf, and .shx. ====================================================== - Map filesByName = query.get("files").stream() - .collect(Collectors.toMap(FileItem::getName, f -> f)); - - String fileName = filesByName.keySet().stream().filter(f -> f.endsWith(".shp")).findAny().orElse(null); - if (fileName == null) { - throw AnalysisServerException.fileUpload("Shapefile upload must contain .shp, .prj, and .dbf"); - } - String baseName = fileName.substring(0, fileName.length() - 4); - - if (!filesByName.containsKey(baseName + ".shp") || - !filesByName.containsKey(baseName + ".prj") || - !filesByName.containsKey(baseName + ".dbf")) { - throw AnalysisServerException.fileUpload("Shapefile upload must contain .shp, .prj, and .dbf"); - } - - String regionId = req.params("regionId"); - - File tempDir = Files.createTempDir(); - - File shpFile = new File(tempDir, "grid.shp"); - filesByName.get(baseName + ".shp").write(shpFile); - - File prjFile = new File(tempDir, "grid.prj"); - filesByName.get(baseName + ".prj").write(prjFile); - - File dbfFile = new File(tempDir, "grid.dbf"); - filesByName.get(baseName + ".dbf").write(dbfFile); - - // shx is optional, not needed for dense shapefiles - if (filesByName.containsKey(baseName + ".shx")) { - File shxFile = new File(tempDir, "grid.shx"); - filesByName.get(baseName + ".shx").write(shxFile); - } - - // 2. Read features ============================================================================================ - ShapefileReader reader = null; - List features; - try { - reader = new ShapefileReader(shpFile); - features = reader.wgs84Stream().collect(Collectors.toList()); - } finally { - if (reader != null) reader.close(); - } - - - Map areas = new HashMap<>(); - - boolean unionRequested = Boolean.parseBoolean(query.get("union").get(0).getString()); - String zoomString = query.get("zoom") == null ? null : query.get("zoom").get(0).getString(); - final int zoom = parseZoom(zoomString); - - if (!unionRequested && features.size() > MAX_FEATURES) { - throw AnalysisServerException.fileUpload(MessageFormat.format("The uploaded shapefile has {0} features, " + - "which exceeds the limit of {1}", features.size(), MAX_FEATURES)); + private Collection getAggregationAreas (Request req, Response res) { + List filters = new ArrayList<>(); + String regionId = req.queryParams("regionId"); + if (regionId != null) { + filters.add(eq("regionId", regionId)); } - - if (unionRequested) { - // Union (single combined aggregation area) requested - List geometries = features.stream().map(f -> (Geometry) f.getDefaultGeometry()).collect(Collectors.toList()); - UnaryUnionOp union = new UnaryUnionOp(geometries); - // Name the area using the name in the request directly - String maskName = query.get("name").get(0).getString("UTF-8"); - areas.put(maskName, union.union()); - } else { - // Don't union. Name each area by looking up its value for the name property in the request. - String nameProperty = query.get("nameProperty").get(0).getString("UTF-8"); - features.forEach(f -> areas.put(readProperty(f, nameProperty), (Geometry) f.getDefaultGeometry())); + String dataGroupId = req.queryParams("dataGroupId"); + if (dataGroupId != null) { + filters.add(eq("dataGroupId", dataGroupId)); } - // 3. Convert to raster grids, then store them. ================================================================ - areas.forEach((String name, Geometry geometry) -> { - Envelope env = geometry.getEnvelopeInternal(); - Grid maskGrid = new Grid(zoom, env); - - // Store the percentage each cell overlaps the mask, scaled as 0 to 100,000 - List weights = maskGrid.getPixelWeights(geometry, true); - weights.forEach(pixel -> maskGrid.grid[pixel.x][pixel.y] = pixel.weight * 100_000); - - AggregationArea aggregationArea = new AggregationArea(); - aggregationArea.name = name; - aggregationArea.regionId = regionId; - - // Set `createdBy` and `accessGroup` - aggregationArea.accessGroup = req.attribute("accessGroup"); - aggregationArea.createdBy = req.attribute("email"); - - try { - File gridFile = FileUtils.createScratchFile("grid"); - OutputStream os = new GZIPOutputStream(FileUtils.getOutputStream(gridFile)); - maskGrid.write(os); - os.close(); - - // Create the aggregation area before generating the S3 key so that the `_id` is generated - Persistence.aggregationAreas.create(aggregationArea); - aggregationAreas.add(aggregationArea); - - fileStorage.moveIntoStorage(getStoragePath(aggregationArea), gridFile); - } catch (IOException e) { - throw new AnalysisServerException("Error processing/uploading aggregation area"); - } - - tempDir.delete(); - }); - - return aggregationAreas; - } - - private String readProperty (SimpleFeature feature, String propertyName) { - try { - return feature.getProperty(propertyName).getValue().toString(); - } catch (NullPointerException e) { - String message = String.format("The specified property '%s' was not present on the uploaded features. " + - "Please verify that '%s' corresponds to a shapefile column.", propertyName, propertyName); - throw new AnalysisServerException(message); + if (filters.isEmpty()) { + throw new IllegalArgumentException("You must supply either a regionId or a dataGroupId or both."); } + return aggregationAreaCollection.findPermitted(and(filters), UserPermissions.from(req)); } - private Collection getAggregationAreas (Request req, Response res) { - return Persistence.aggregationAreas.findPermitted( - QueryBuilder.start("regionId").is(req.params("regionId")).get(), - req.attribute("accessGroup") - ); - } - - private Object getAggregationArea (Request req, Response res) { - final String accessGroup = req.attribute("accessGroup"); - final String maskId = req.params("maskId"); - - AggregationArea aggregationArea = Persistence.aggregationAreas.findByIdIfPermitted(maskId, accessGroup); - - String url = fileStorage.getURL(getStoragePath(aggregationArea)); - JSONObject wrappedUrl = new JSONObject(); - wrappedUrl.put("url", url); - - return wrappedUrl; + /** Returns a JSON-wrapped URL for the mask grid of the aggregation area whose id matches the path parameter. */ + private ObjectNode getAggregationAreaGridUrl (Request req, Response res) { + AggregationArea aggregationArea = aggregationAreaCollection.findPermittedByRequestParamId(req); + String url = fileStorage.getURL(aggregationArea.getStorageKey()); + return JsonUtil.objectNode().put("url", url); } @Override public void registerEndpoints (spark.Service sparkService) { - sparkService.path("/api/region/", () -> { - sparkService.get("/:regionId/aggregationArea", this::getAggregationAreas, toJson); - sparkService.get("/:regionId/aggregationArea/:maskId", this::getAggregationArea, toJson); - sparkService.post("/:regionId/aggregationArea", this::createAggregationAreas, toJson); - }); + sparkService.get("/api/aggregationArea", this::getAggregationAreas, toJson); + sparkService.get("/api/aggregationArea/:_id", this::getAggregationAreaGridUrl, toJson); + sparkService.post("/api/aggregationArea", this::createAggregationAreas, toJson); } } diff --git a/src/main/java/com/conveyal/analysis/controllers/BrokerController.java b/src/main/java/com/conveyal/analysis/controllers/BrokerController.java index 39f9cd18f..211a5ddb6 100644 --- a/src/main/java/com/conveyal/analysis/controllers/BrokerController.java +++ b/src/main/java/com/conveyal/analysis/controllers/BrokerController.java @@ -11,7 +11,6 @@ import com.conveyal.analysis.models.AnalysisRequest; import com.conveyal.analysis.models.Bundle; import com.conveyal.analysis.models.OpportunityDataset; -import com.conveyal.analysis.models.Project; import com.conveyal.analysis.persistence.Persistence; import com.conveyal.analysis.util.HttpStatus; import com.conveyal.analysis.util.JsonUtil; @@ -27,12 +26,12 @@ import com.google.common.collect.ImmutableMap; import com.google.common.io.ByteStreams; import com.mongodb.QueryBuilder; -import com.sun.net.httpserver.Headers; import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpPost; +import org.apache.http.conn.HttpHostConnectException; import org.apache.http.entity.ByteArrayEntity; import org.apache.http.util.EntityUtils; import org.mongojack.DBCursor; @@ -126,16 +125,23 @@ private Object singlePoint(Request request, Response response) { // Deserialize the task in the request body so we can see what kind of worker it wants. // Perhaps we should allow both travel time surface and accessibility calculation tasks to be done as single points. // AnalysisRequest (backend) vs. AnalysisTask (R5) - // The accessgroup stuff is copypasta from the old single point controller. // We already know the user is authenticated, and we need not check if they have access to the graphs etc, // as they're all coded with UUIDs which contain significantly more entropy than any human's account password. - final String accessGroup = request.attribute("accessGroup"); - final String userEmail = request.attribute("email"); + UserPermissions userPermissions = UserPermissions.from(request); final long startTimeMsec = System.currentTimeMillis(); + AnalysisRequest analysisRequest = objectFromRequestBody(request, AnalysisRequest.class); - Project project = Persistence.projects.findByIdIfPermitted(analysisRequest.projectId, accessGroup); + // Some parameters like regionId weren't sent by older frontends. Fail fast on missing parameters. + checkNotNull(analysisRequest.regionId); + checkNotNull(analysisRequest.projectId); + checkNotNull(analysisRequest.bundleId); + checkNotNull(analysisRequest.modificationIds); + checkNotNull(analysisRequest.workerVersion); + // Transform the analysis UI/backend task format into a slightly different type for R5 workers. - TravelTimeSurfaceTask task = (TravelTimeSurfaceTask) analysisRequest.populateTask(new TravelTimeSurfaceTask(), project); + TravelTimeSurfaceTask task = new TravelTimeSurfaceTask(); + analysisRequest.populateTask(task, userPermissions); + // If destination opportunities are supplied, prepare to calculate accessibility worker-side if (notNullOrEmpty(analysisRequest.destinationPointSetIds)){ // Look up all destination opportunity data sets from the database and derive their storage keys. @@ -146,7 +152,7 @@ private Object singlePoint(Request request, Response response) { for (String destinationPointSetId : analysisRequest.destinationPointSetIds) { OpportunityDataset opportunityDataset = Persistence.opportunityDatasets.findByIdIfPermitted( destinationPointSetId, - accessGroup + userPermissions ); checkNotNull(opportunityDataset, "Opportunity dataset could not be found in database."); opportunityDatasets.add(opportunityDataset); @@ -170,7 +176,7 @@ private Object singlePoint(Request request, Response response) { String address = broker.getWorkerAddress(workerCategory); if (address == null) { // There are no workers that can handle this request. Request some. - WorkerTags workerTags = new WorkerTags(accessGroup, userEmail, project._id, project.regionId); + WorkerTags workerTags = new WorkerTags(userPermissions, analysisRequest.regionId); broker.createOnDemandWorkerInCategory(workerCategory, workerTags); // No workers exist. Kick one off and return "service unavailable". response.header("Retry-After", "30"); @@ -180,7 +186,8 @@ private Object singlePoint(Request request, Response response) { // FIXME the tracking of which workers are starting up should really be encapsulated using a "start up if needed" method. broker.recentlyRequestedWorkers.remove(workerCategory); } - String workerUrl = "http://" + address + ":7080/single"; // TODO remove hard-coded port number. + // Port number is hard-coded until we have a good reason to make it configurable. + String workerUrl = "http://" + address + ":7080/single"; LOG.debug("Re-issuing HTTP request from UI to worker at {}", workerUrl); HttpPost httpPost = new HttpPost(workerUrl); // httpPost.setHeader("Accept", "application/x-analysis-time-grid"); @@ -207,11 +214,11 @@ private Object singlePoint(Request request, Response response) { if (response.status() == 200) { int durationMsec = (int) (System.currentTimeMillis() - startTimeMsec); eventBus.send(new SinglePointEvent( - task.scenarioId, - analysisRequest.projectId, - analysisRequest.variantIndex, + analysisRequest.scenarioId, + analysisRequest.bundleId, + analysisRequest.regionId, durationMsec - ).forUser(userEmail, accessGroup) + ).forUser(userPermissions) ); } // If you return a stream to the Spark Framework, its SerializerChain will copy that stream out to the @@ -228,7 +235,15 @@ private Object singlePoint(Request request, Response response) { "complexity of this scenario, your request may have too many simulated schedules. If you are " + "using Routing Engine version < 4.5.1, your scenario may still be in preparation and you should " + "try again in a few minutes."); - } catch (NoRouteToHostException nrthe){ + } catch (NoRouteToHostException | HttpHostConnectException e) { + // NoRouteToHostException occurs when a single-point worker shuts down (normally due to inactivity) but is + // not yet removed from the worker catalog. + // HttpHostConnectException has also been observed, presumably after a worker shuts down and a new one + // starts up but claims the same IP address as the defunct single point worker. + // Yet another even rarer case is possible, where a single point worker starts for a different network and + // is assigned the same IP as the defunct worker. + // All these cases could be avoided by more rapidly removing workers from the catalog via frequent regular + // polling with backpressure, potentially including an "I'm shutting down" flag. LOG.warn("Worker in category {} was previously cataloged but is not reachable now. This is expected if a " + "user made a single-point request within WORKER_RECORD_DURATION_MSEC after shutdown.", workerCategory); httpPost.abort(); @@ -366,7 +381,7 @@ private static T objectFromRequestBody (Request request, Class classe) { } private static void enforceAdmin (Request request) { - if (!request.attribute("permissions").admin) { + if (!UserPermissions.from(request).admin) { throw AnalysisServerException.forbidden("You do not have access."); } } diff --git a/src/main/java/com/conveyal/analysis/controllers/BundleController.java b/src/main/java/com/conveyal/analysis/controllers/BundleController.java index 68d4d28a7..2cb8afd63 100644 --- a/src/main/java/com/conveyal/analysis/controllers/BundleController.java +++ b/src/main/java/com/conveyal/analysis/controllers/BundleController.java @@ -8,15 +8,16 @@ import com.conveyal.analysis.persistence.Persistence; import com.conveyal.analysis.util.HttpUtils; import com.conveyal.analysis.util.JsonUtil; -import com.conveyal.r5.analyst.progress.ProgressInputStream; import com.conveyal.file.FileStorage; import com.conveyal.file.FileStorageKey; import com.conveyal.file.FileUtils; import com.conveyal.gtfs.GTFSCache; import com.conveyal.gtfs.GTFSFeed; import com.conveyal.gtfs.model.Stop; +import com.conveyal.osmlib.Node; import com.conveyal.osmlib.OSM; import com.conveyal.r5.analyst.cluster.BundleManifest; +import com.conveyal.r5.analyst.progress.ProgressInputStream; import com.conveyal.r5.analyst.progress.Task; import com.conveyal.r5.streets.OSMCache; import com.conveyal.r5.util.ExceptionUtils; @@ -43,10 +44,10 @@ import java.util.stream.Collectors; import java.util.zip.ZipFile; -import static com.conveyal.analysis.components.HttpApi.USER_PERMISSIONS_ATTRIBUTE; -import static com.conveyal.r5.analyst.progress.WorkProductType.BUNDLE; import static com.conveyal.analysis.util.JsonUtil.toJson; import static com.conveyal.file.FileCategory.BUNDLES; +import static com.conveyal.r5.analyst.progress.WorkProductType.BUNDLE; +import static com.conveyal.r5.common.GeometryUtils.checkWgsEnvelopeSize; /** * This Controller provides HTTP REST endpoints for manipulating Bundles. Bundles are sets of GTFS feeds and OSM @@ -134,8 +135,9 @@ private Bundle create (Request req, Response res) { bundle.feedsComplete = bundleWithFeed.feedsComplete; bundle.totalFeeds = bundleWithFeed.totalFeeds; } - bundle.accessGroup = req.attribute("accessGroup"); - bundle.createdBy = req.attribute("email"); + UserPermissions userPermissions = UserPermissions.from(req); + bundle.accessGroup = userPermissions.accessGroup; + bundle.createdBy = userPermissions.email; } catch (Exception e) { throw AnalysisServerException.badRequest(ExceptionUtils.stackTraceString(e)); } @@ -146,7 +148,7 @@ private Bundle create (Request req, Response res) { // Submit all slower work for asynchronous processing on the backend, then immediately return the partially // constructed bundle from the HTTP handler. Process OSM first, then each GTFS feed sequentially. - UserPermissions userPermissions = req.attribute(USER_PERMISSIONS_ATTRIBUTE); + final UserPermissions userPermissions = UserPermissions.from(req); taskScheduler.enqueue(Task.create("Processing bundle " + bundle.name) .forUser(userPermissions) .setHeavy(true) @@ -164,6 +166,13 @@ private Bundle create (Request req, Response res) { // Wrapping in buffered input stream should reduce number of progress updates. osm.readPbf(ProgressInputStream.forFileItem(fi, progressListener)); // osm.readPbf(new BufferedInputStream(fi.getInputStream())); + Envelope osmBounds = new Envelope(); + for (Node n : osm.nodes.values()) { + osmBounds.expandToInclude(n.getLon(), n.getLat()); + } + osm.close(); + checkWgsEnvelopeSize(osmBounds, "OSM data"); + // Store the source OSM file. Note that we're not storing the derived MapDB file here. fileStorage.moveIntoStorage(osmCache.getKey(bundle.osmId), fi.getStoreLocation()); } @@ -196,6 +205,7 @@ private Bundle create (Request req, Response res) { for (Stop s : feed.stops.values()) { bundleBounds.expandToInclude(s.stop_lon, s.stop_lat); } + checkWgsEnvelopeSize(bundleBounds, "GTFS data"); if (bundle.serviceStart.isAfter(feedSummary.serviceStart)) { bundle.serviceStart = feedSummary.serviceStart; @@ -211,7 +221,7 @@ private Bundle create (Request req, Response res) { } catch (IOException e) { throw new RuntimeException(e); } - // Save some space in the MapDB after we've summarized the errors to Mongo and a JSON file. + // Release some memory after we've summarized the errors to Mongo and a JSON file. feed.errors.clear(); // Flush db files to disk @@ -226,7 +236,6 @@ private Bundle create (Request req, Response res) { // Set legacy progress field to indicate that all feeds have been loaded. bundle.feedsComplete = bundle.totalFeeds; - // TODO Handle crossing the antimeridian bundle.north = bundleBounds.getMaxY(); bundle.south = bundleBounds.getMinY(); bundle.east = bundleBounds.getMaxX(); @@ -265,7 +274,7 @@ private void writeManifestToCache (Bundle bundle) throws IOException { } private Bundle deleteBundle (Request req, Response res) throws IOException { - Bundle bundle = Persistence.bundles.removeIfPermitted(req.params("_id"), req.attribute("accessGroup")); + Bundle bundle = Persistence.bundles.removeIfPermitted(req.params("_id"), UserPermissions.from(req)); FileStorageKey key = new FileStorageKey(BUNDLES, bundle._id + ".zip"); fileStorage.delete(key); diff --git a/src/main/java/com/conveyal/analysis/controllers/DataSourceController.java b/src/main/java/com/conveyal/analysis/controllers/DataSourceController.java new file mode 100644 index 000000000..b0bf7c626 --- /dev/null +++ b/src/main/java/com/conveyal/analysis/controllers/DataSourceController.java @@ -0,0 +1,147 @@ +package com.conveyal.analysis.controllers; + +import com.conveyal.analysis.UserPermissions; +import com.conveyal.analysis.components.TaskScheduler; +import com.conveyal.analysis.datasource.DataSourceUploadAction; +import com.conveyal.analysis.grids.SeamlessCensusGridExtractor; +import com.conveyal.analysis.models.DataSource; +import com.conveyal.analysis.models.GtfsDataSource; +import com.conveyal.analysis.models.OsmDataSource; +import com.conveyal.analysis.models.SpatialDataSource; +import com.conveyal.analysis.persistence.AnalysisCollection; +import com.conveyal.analysis.persistence.AnalysisDB; +import com.conveyal.analysis.util.HttpUtils; +import com.conveyal.file.FileStorage; +import com.conveyal.file.FileStorageKey; +import com.conveyal.r5.analyst.progress.Task; +import com.mongodb.client.result.DeleteResult; +import org.apache.commons.fileupload.FileItem; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Request; +import spark.Response; + +import java.lang.invoke.MethodHandles; +import java.util.List; +import java.util.Map; + +import static com.conveyal.analysis.util.JsonUtil.toJson; +import static com.conveyal.file.FileCategory.DATASOURCES; +import static com.conveyal.file.FileStorageFormat.SHP; +import static com.conveyal.r5.analyst.WebMercatorGridPointSet.parseZoom; +import static com.mongodb.client.model.Filters.eq; + +/** + * Controller that handles CRUD of DataSources, which are Mongo metadata about user-uploaded files. + * Unlike some Mongo documents, these are mostly created and updated by backend validation and processing methods. + * Currently this handles only one subtype: SpatialDataSource, which represents GIS-like vector geospatial data. + */ +public class DataSourceController implements HttpController { + + private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + // Component Dependencies + private final FileStorage fileStorage; + private final TaskScheduler taskScheduler; + private final SeamlessCensusGridExtractor extractor; + + // Collection in the database holding all our DataSources, which can be of several subtypes. + private final AnalysisCollection dataSourceCollection; + + public DataSourceController ( + FileStorage fileStorage, + AnalysisDB database, + TaskScheduler taskScheduler, + SeamlessCensusGridExtractor extractor + ) { + this.fileStorage = fileStorage; + this.taskScheduler = taskScheduler; + this.extractor = extractor; + // We don't hold on to the AnalysisDB Component, just get one collection from it. + // Register all the subclasses so the Mongo driver will recognize their discriminators. + // TODO should this be done once in AnalysisDB and the collection reused everywhere? Is that threadsafe? + this.dataSourceCollection = database.getAnalysisCollection( + "dataSources", DataSource.class, SpatialDataSource.class, OsmDataSource.class, GtfsDataSource.class + ); + } + + /** HTTP GET: Retrieve all DataSource records, filtered by the (required) regionId query parameter. */ + private List getAllDataSourcesForRegion (Request req, Response res) { + return dataSourceCollection.findPermitted( + eq("regionId", req.queryParams("regionId")), UserPermissions.from(req) + ); + } + + /** HTTP GET: Retrieve a single DataSource record by the ID supplied in the URL path parameter. */ + private DataSource getOneDataSourceById (Request req, Response res) { + return dataSourceCollection.findPermittedByRequestParamId(req); + } + + /** HTTP DELETE: Delete a single DataSource record and associated files in FileStorage by supplied ID parameter. */ + private String deleteOneDataSourceById (Request request, Response response) { + DataSource dataSource = dataSourceCollection.findPermittedByRequestParamId(request); + DeleteResult deleteResult = dataSourceCollection.delete(dataSource); + long nDeleted = deleteResult.getDeletedCount(); + // This will not delete the file if its extension when uploaded did not match the canonical one. + // Ideally we should normalize file extensions when uploaded, but it's a little tricky to handle SHP sidecars. + fileStorage.delete(dataSource.fileStorageKey()); + // This is so ad-hoc but it's not necessarily worth generalizing since SHP is the only format with sidecars. + if (dataSource.fileFormat == SHP) { + fileStorage.delete(new FileStorageKey(DATASOURCES, dataSource._id.toString(), "shx")); + fileStorage.delete(new FileStorageKey(DATASOURCES, dataSource._id.toString(), "dbf")); + fileStorage.delete(new FileStorageKey(DATASOURCES, dataSource._id.toString(), "prj")); + } + return "Deleted " + nDeleted; + } + + private SpatialDataSource downloadLODES(Request req, Response res) { + final String regionId = req.params("regionId"); + final int zoom = parseZoom(req.queryParams("zoom")); + UserPermissions userPermissions = UserPermissions.from(req); + SpatialDataSource source = new SpatialDataSource(userPermissions, extractor.sourceName); + source.regionId = regionId; + + taskScheduler.enqueue(Task.create("Extracting LODES data") + .forUser(userPermissions) + .setHeavy(true) + .withWorkProduct(source) + .withAction((progressListener) -> { + // TODO implement + throw new UnsupportedOperationException(); + })); + + return source; + } + + /** + * A file is posted to this endpoint to create a new DataSource. It is validated and metadata are extracted. + * The request should be a multipart/form-data POST request, containing uploaded files and associated parameters. + * In standard REST API style, a POST would return the ID of the newly created DataSource. Here we're starting an + * async background process, so we return the ID of the enqueued Task (rather than its work product, the DataSource). + */ + private String handleUpload (Request req, Response res) { + final UserPermissions userPermissions = UserPermissions.from(req); + final Map> formFields = HttpUtils.getRequestFiles(req.raw()); + DataSourceUploadAction uploadAction = DataSourceUploadAction.forFormFields( + fileStorage, dataSourceCollection, formFields, userPermissions + ); + Task backgroundTask = Task.create("Processing uploaded files: " + uploadAction.getDataSourceName()) + .forUser(userPermissions) + .withAction(uploadAction); + + taskScheduler.enqueue(backgroundTask); + return backgroundTask.id.toString(); + } + + @Override + public void registerEndpoints (spark.Service sparkService) { + sparkService.path("/api/dataSource", () -> { + sparkService.get("/", this::getAllDataSourcesForRegion, toJson); + sparkService.get("/:_id", this::getOneDataSourceById, toJson); + sparkService.delete("/:_id", this::deleteOneDataSourceById, toJson); + sparkService.post("", this::handleUpload, toJson); + // regionId will be in query parameter + sparkService.post("/addLodesDataSource", this::downloadLODES, toJson); + }); + } +} diff --git a/src/main/java/com/conveyal/analysis/controllers/FileStorageController.java b/src/main/java/com/conveyal/analysis/controllers/FileStorageController.java index b41f7eeef..220f8be18 100644 --- a/src/main/java/com/conveyal/analysis/controllers/FileStorageController.java +++ b/src/main/java/com/conveyal/analysis/controllers/FileStorageController.java @@ -1,5 +1,6 @@ package com.conveyal.analysis.controllers; +import com.conveyal.analysis.UserPermissions; import com.conveyal.analysis.models.FileInfo; import com.conveyal.analysis.persistence.AnalysisCollection; import com.conveyal.analysis.persistence.AnalysisDB; @@ -21,6 +22,8 @@ /** * HTTP request handler methods allowing users to upload/download files from FileStorage implementations and CRUDing * metadata about those files in the database. + * NOTE: THIS CLASS IS UNUSED AND IS RETAINED FOR DOCUMENTATION PURPOSES - TO DEMONSTRATE HOW FILESTORAGE IS USED. + * In practice we don't need direct HTTP API access to FileStorage - it's always used in some more complex process. */ public class FileStorageController implements HttpController { @@ -59,12 +62,14 @@ public void registerEndpoints (Service sparkService) { * Find all associated FileInfo records for a region. */ private List findAllForRegion(Request req, Response res) { - return fileCollection.findPermitted(and(eq("regionId", req.queryParams("regionId"))), req.attribute("accessGroup")); + return fileCollection.findPermitted( + eq("regionId", req.queryParams("regionId")), UserPermissions.from(req) + ); } /** * Create the metadata object used to represent a file in FileStorage. Note: this does not handle the process of - * storing the file itself. See `addFile` for that. + * storing the file itself. See `uploadFile` for that. */ private FileInfo createFileInfo(Request req, Response res) throws IOException { FileInfo fileInfo = fileCollection.create(req, res); @@ -77,7 +82,7 @@ private FileInfo createFileInfo(Request req, Response res) throws IOException { * Remove the FileInfo record from the database and the file from the FileStorage. */ private boolean deleteFile(Request req, Response res) { - FileInfo file = fileCollection.findPermittedByRequestParamId(req, res); + FileInfo file = fileCollection.findPermittedByRequestParamId(req); fileStorage.delete(file.getKey()); return fileCollection.delete(file).wasAcknowledged(); } @@ -87,7 +92,7 @@ private boolean deleteFile(Request req, Response res) { * file. */ private String generateDownloadURL(Request req, Response res) { - FileInfo file = fileCollection.findPermittedByRequestParamId(req, res); + FileInfo file = fileCollection.findPermittedByRequestParamId(req); res.type("text/plain"); return fileStorage.getURL(file.getKey()); } @@ -96,7 +101,7 @@ private String generateDownloadURL(Request req, Response res) { * Find FileInfo by passing in and _id and download the corresponding file by returning an InputStream. */ private InputStream downloadFile(Request req, Response res) throws IOException { - FileInfo fileInfo = fileCollection.findPermittedByRequestParamId(req, res); + FileInfo fileInfo = fileCollection.findPermittedByRequestParamId(req); File file = fileStorage.getFile(fileInfo.getKey()); res.type(fileInfo.format.mimeType); if (FileUtils.isGzip(file)) { @@ -110,13 +115,13 @@ private InputStream downloadFile(Request req, Response res) throws IOException { * file. */ private FileInfo uploadFile(Request req, Response res) throws Exception { - FileInfo fileInfo = fileCollection.findPermittedByRequestParamId(req, res); + FileInfo fileInfo = fileCollection.findPermittedByRequestParamId(req); File file = FileUtils.createScratchFile(req.raw().getInputStream()); fileStorage.moveIntoStorage(fileInfo.getKey(), file); // Set status to ready fileInfo.isReady = true; - fileInfo.updatedBy = req.attribute("email"); + fileInfo.updatedBy = UserPermissions.from(req).email; // Store changes to the file info fileCollection.update(fileInfo); diff --git a/src/main/java/com/conveyal/analysis/controllers/GTFSGraphQLController.java b/src/main/java/com/conveyal/analysis/controllers/GTFSGraphQLController.java deleted file mode 100644 index a3ba0a264..000000000 --- a/src/main/java/com/conveyal/analysis/controllers/GTFSGraphQLController.java +++ /dev/null @@ -1,208 +0,0 @@ -package com.conveyal.analysis.controllers; - -import com.conveyal.analysis.AnalysisServerException; -import com.conveyal.analysis.models.Bundle; -import com.conveyal.analysis.persistence.Persistence; -import com.conveyal.analysis.util.JsonUtil; -import com.conveyal.gtfs.GTFSCache; -import com.conveyal.gtfs.GTFSFeed; -import com.conveyal.gtfs.api.graphql.WrappedGTFSEntity; -import com.conveyal.gtfs.api.graphql.fetchers.RouteFetcher; -import com.conveyal.gtfs.api.graphql.fetchers.StopFetcher; -import com.conveyal.gtfs.model.FeedInfo; -import com.fasterxml.jackson.core.type.TypeReference; -import com.google.common.util.concurrent.UncheckedExecutionException; -import com.mongodb.QueryBuilder; -import graphql.ExceptionWhileDataFetching; -import graphql.ExecutionResult; -import graphql.GraphQL; -import graphql.GraphQLError; -import graphql.execution.ExecutionContext; -import graphql.schema.DataFetchingEnvironment; -import graphql.schema.GraphQLEnumType; -import graphql.schema.GraphQLList; -import graphql.schema.GraphQLObjectType; -import graphql.schema.GraphQLSchema; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import spark.Request; -import spark.Response; - -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -import static com.conveyal.analysis.controllers.BundleController.setBundleServiceDates; -import static com.conveyal.analysis.util.JsonUtil.toJson; -import static com.conveyal.gtfs.api.graphql.GraphQLGtfsSchema.routeType; -import static com.conveyal.gtfs.api.graphql.GraphQLGtfsSchema.stopType; -import static com.conveyal.gtfs.api.util.GraphQLUtil.multiStringArg; -import static com.conveyal.gtfs.api.util.GraphQLUtil.string; -import static graphql.Scalars.GraphQLLong; -import static graphql.schema.GraphQLEnumType.newEnum; -import static graphql.schema.GraphQLFieldDefinition.newFieldDefinition; -import static graphql.schema.GraphQLObjectType.newObject; - -/** - * GraphQL interface for fetching GTFS feed contents (generally used for scenario editing). - * For now it just wraps the GTFS API graphql response with a bundle object. - */ -public class GTFSGraphQLController implements HttpController { - - private static final Logger LOG = LoggerFactory.getLogger(GTFSGraphQLController.class); - - private final GTFSCache gtfsCache; - - public GTFSGraphQLController (GTFSCache gtfsCache) { - this.gtfsCache = gtfsCache; - } - - private Object handleQuery (Request req, Response res) throws IOException { - res.type("application/json"); - - Map variables = JsonUtil.objectMapper.readValue(req.queryParams("variables"), new TypeReference>() { - }); - - QueryContext context = new QueryContext(); - context.accessGroup = req.attribute("accessGroup"); - - ExecutionResult er = graphql.execute(req.queryParams("query"), null, context, variables); - - List errs = er.getErrors(); - errs.addAll(context.getErrors()); - if (!errs.isEmpty()) { - throw AnalysisServerException.graphQL(errs); - } - - return er.getData(); - } - - /** Special feed type that also includes checksum */ - public GraphQLObjectType feedType = newObject() - .name("feed") - .field(string("feed_id")) - .field(string("feed_publisher_name")) - .field(string("feed_publisher_url")) - .field(string("feed_lang")) - .field(string("feed_version")) - // We have a custom wrapped GTFS Entity type for FeedInfo that includes feed checksum - .field(newFieldDefinition() - .name("checksum") - .type(GraphQLLong) - .dataFetcher(env -> ((WrappedFeedInfo) env.getSource()).checksum) - .build() - ) - .field(newFieldDefinition() - .name("routes") - .type(new GraphQLList(routeType)) - .argument(multiStringArg("route_id")) - .dataFetcher(RouteFetcher::fromFeed) - .build() - ) - .field(newFieldDefinition() - .name("stops") - .type(new GraphQLList(stopType)) - .dataFetcher(StopFetcher::fromFeed) - .build() - ) - .build(); - - private GraphQLEnumType bundleStatus = newEnum() - .name("status") - .value("PROCESSING_GTFS", Bundle.Status.PROCESSING_GTFS) - .value("PROCESSING_OSM", Bundle.Status.PROCESSING_OSM) - .value("ERROR", Bundle.Status.ERROR) - .value("DONE", Bundle.Status.DONE) - .build(); - - private GraphQLObjectType bundleType = newObject() - .name("bundle") - .field(string("_id")) - .field(string("name")) - .field(newFieldDefinition() - .name("status") - .type(bundleStatus) - .dataFetcher((env) -> ((Bundle) env.getSource()).status) - .build() - ) - .field(newFieldDefinition() - .name("feeds") - .type(new GraphQLList(feedType)) - .dataFetcher(this::fetchFeeds) - .build() - ) - .build(); - - private GraphQLObjectType bundleQuery = newObject() - .name("bundleQuery") - .field(newFieldDefinition() - .name("bundle") - .type(new GraphQLList(bundleType)) - .argument(multiStringArg("bundle_id")) - .dataFetcher(this::fetchBundle) - .build() - ) - .build(); - - public GraphQLSchema schema = GraphQLSchema.newSchema().query(bundleQuery).build(); - private GraphQL graphql = new GraphQL(schema); - - private Collection fetchBundle(DataFetchingEnvironment environment) { - QueryContext context = (QueryContext) environment.getContext(); - return Persistence.bundles.findPermitted( - QueryBuilder.start("_id").in(environment.getArgument("bundle_id")).get(), - context.accessGroup - ); - } - - /** - * Returns a list of wrapped FeedInfo objects. These objects have all fields null (default), except feed_id, which - * is set from the cached copy of each requested feed. This method previously returned the fully populated FeedInfo - * objects, but incremental changes led to incompatibilities with Analysis (see analysis-internal #102). The - * current implementation is a stopgap for backward compatibility. - */ - private List> fetchFeeds(DataFetchingEnvironment environment) { - Bundle bundle = (Bundle) environment.getSource(); - ExecutionContext context = (ExecutionContext) environment.getContext(); - - // Old bundles were created without computing the service start and end dates. Will only compute if needed. - try { - setBundleServiceDates(bundle, gtfsCache); - } catch (Exception e) { - context.addError(new ExceptionWhileDataFetching(e)); - } - - return bundle.feeds.stream() - .map(summary -> { - String bundleScopedFeedId = Bundle.bundleScopeFeedId(summary.feedId, bundle.feedGroupId); - try { - GTFSFeed feed = gtfsCache.get(bundleScopedFeedId); - FeedInfo ret = new FeedInfo(); - ret.feed_id = feed.feedId; - return new WrappedFeedInfo(summary.bundleScopedFeedId, ret, summary.checksum); - } catch (UncheckedExecutionException nsee) { - Exception e = new Exception(String.format("Feed %s does not exist in the cache.", summary.name), nsee); - context.addError(new ExceptionWhileDataFetching(e)); - return null; - } catch (Exception e) { - context.addError(new ExceptionWhileDataFetching(e)); - return null; - } - }) - .collect(Collectors.toList()); - } - - @Override - public void registerEndpoints (spark.Service sparkService) { - // TODO make this `post` as per GraphQL convention - sparkService.get("/api/graphql", this::handleQuery, toJson); - } - - /** Context for a graphql query. Currently contains authorization info. */ - public static class QueryContext extends ExecutionContext { - public String accessGroup; - } - -} diff --git a/src/main/java/com/conveyal/analysis/controllers/GtfsController.java b/src/main/java/com/conveyal/analysis/controllers/GtfsController.java new file mode 100644 index 000000000..1552e2314 --- /dev/null +++ b/src/main/java/com/conveyal/analysis/controllers/GtfsController.java @@ -0,0 +1,242 @@ +package com.conveyal.analysis.controllers; + +import com.conveyal.analysis.AnalysisServerException; +import com.conveyal.analysis.models.Bundle; +import com.conveyal.analysis.persistence.Persistence; +import com.conveyal.gtfs.GTFSCache; +import com.conveyal.gtfs.GTFSFeed; +import com.conveyal.gtfs.model.Pattern; +import com.conveyal.gtfs.model.Route; +import com.conveyal.gtfs.model.Stop; +import com.conveyal.gtfs.model.Trip; +import com.mongodb.QueryBuilder; +import org.mapdb.Fun; +import org.mongojack.DBCursor; +import spark.Request; +import spark.Response; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static com.conveyal.analysis.util.JsonUtil.toJson; + +/** + * Controller for retrieving data from the GTFS cache. + * + * Each endpoint starts with it's `feedGroupId` and `feedId` for retrieving the feed from the cache. No database + * interaction is done. This assumes that if the user is logged in and retrieving the feed by the appropriate ID then + * they have access to it, without checking the access group. This setup will allow for putting this endpoint behind a + * CDN in the future. Everything retrieved is immutable. Once it's retrieved and stored in the CDN, it doesn't need to + * be pulled from the cache again. + */ +public class GtfsController implements HttpController { + private final GTFSCache gtfsCache; + public GtfsController(GTFSCache gtfsCache) { + this.gtfsCache = gtfsCache; + } + + /** + * Use the same Cache-Control header for each endpoint here. 2,592,000 seconds is one month. + * https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control + */ + private final String cacheControlImmutable = "public, max-age=2592000, immutable"; + + /** + * Extracted into a common method to allow turning off during development. + */ + private void addImmutableResponseHeader (Response res) { + res.header("Cache-Control", cacheControlImmutable); + } + + private static class BaseApiResponse { + public final String _id; + public final String name; + + BaseApiResponse(String _id, String name) { + this._id = _id; + this.name = name; + } + } + + private static class GeoJsonLineString { + public final String type = "LineString"; + public double[][] coordinates; + } + + private GTFSFeed getFeedFromRequest (Request req) { + String bundleScopedFeedId = Bundle.bundleScopeFeedId(req.params("feedId"), req.params("feedGroupId")); + return gtfsCache.get(bundleScopedFeedId); + } + + static class RouteApiResponse extends BaseApiResponse { + public final int type; + public final String color; + + static String getRouteName (Route route) { + String tempName = ""; + if (route.route_short_name != null) tempName += route.route_short_name; + if (route.route_long_name != null) tempName += " " + route.route_long_name; + return tempName.trim(); + } + + RouteApiResponse(Route route) { + super(route.route_id, getRouteName(route)); + color = route.route_color; + type = route.route_type; + } + } + + private RouteApiResponse getRoute(Request req, Response res) { + addImmutableResponseHeader(res); + GTFSFeed feed = getFeedFromRequest(req); + return new RouteApiResponse(feed.routes.get(req.params("routeId"))); + } + + private List getRoutes(Request req, Response res) { + addImmutableResponseHeader(res); + GTFSFeed feed = getFeedFromRequest(req); + return feed.routes + .values() + .stream() + .map(RouteApiResponse::new) + .collect(Collectors.toList()); + } + + static class PatternApiResponse extends BaseApiResponse { + public final GeoJsonLineString geometry; + public final List orderedStopIds; + public final List associatedTripIds; + + PatternApiResponse(Pattern pattern) { + super(pattern.pattern_id, pattern.name); + geometry = serialize(pattern.geometry); + orderedStopIds = pattern.orderedStops; + associatedTripIds = pattern.associatedTrips; + } + + static GeoJsonLineString serialize (com.vividsolutions.jts.geom.LineString geometry) { + GeoJsonLineString ret = new GeoJsonLineString(); + ret.coordinates = Stream.of(geometry.getCoordinates()) + .map(c -> new double[] { c.x, c.y }) + .toArray(double[][]::new); + + return ret; + } + } + + private List getPatternsForRoute (Request req, Response res) { + addImmutableResponseHeader(res); + GTFSFeed feed = getFeedFromRequest(req); + final String routeId = req.params("routeId"); + return feed.patterns + .values() + .stream() + .filter(p -> Objects.equals(p.route_id, routeId)) + .map(PatternApiResponse::new) + .collect(Collectors.toList()); + } + + static class StopApiResponse extends BaseApiResponse { + public final double lat; + public final double lon; + + StopApiResponse(Stop stop) { + super(stop.stop_id, stop.stop_name); + lat = stop.stop_lat; + lon = stop.stop_lon; + } + } + /** + * Return StopApiResponse values for GTFS stops (location_type = 0) in a single feed + */ + private List getAllStopsForOneFeed(Request req, Response res) { + addImmutableResponseHeader(res); + GTFSFeed feed = getFeedFromRequest(req); + return feed.stops.values().stream().filter(s -> s.location_type == 0) + .map(StopApiResponse::new).collect(Collectors.toList()); + } + + /** + * Groups the feedId and stops (location_type = 0; not parent stations, entrances/exits, generic nodes, etc.) for a + * given GTFS feed + */ + static class FeedGroupStopsApiResponse { + public final String feedId; + public final List stops; + + FeedGroupStopsApiResponse(GTFSFeed feed) { + this.feedId = feed.feedId; + this.stops = + feed.stops.values().stream().filter(s -> s.location_type == 0). + map(StopApiResponse::new).collect(Collectors.toList()); + } + } + + private List getAllStopsForFeedGroup(Request req, Response res) { + addImmutableResponseHeader(res); + String feedGroupId = req.params("feedGroupId"); + DBCursor cursor = Persistence.bundles.find(QueryBuilder.start("feedGroupId").is(feedGroupId).get()); + if (!cursor.hasNext()) { + throw AnalysisServerException.notFound("Bundle could not be found for the given feed group ID."); + } + + List allStopsByFeed = new ArrayList<>(); + Bundle bundle = cursor.next(); + for (Bundle.FeedSummary feedSummary : bundle.feeds) { + String bundleScopedFeedId = Bundle.bundleScopeFeedId(feedSummary.feedId, feedGroupId); + GTFSFeed feed = gtfsCache.get(bundleScopedFeedId); + allStopsByFeed.add(new FeedGroupStopsApiResponse(feed)); + } + return allStopsByFeed; + } + + static class TripApiResponse extends BaseApiResponse { + public final String headsign; + public final Integer startTime; + public final Integer duration; + public final int directionId; + + TripApiResponse(GTFSFeed feed, Trip trip) { + super(trip.trip_id, trip.trip_short_name); + headsign = trip.trip_headsign; + directionId = trip.direction_id; + + var st = feed.stop_times.ceilingEntry(new Fun.Tuple2(trip.trip_id, null)); + var endStopTime = feed.stop_times.floorEntry(new Fun.Tuple2(trip.trip_id, Fun.HI)); + + startTime = st != null ? st.getValue().departure_time : null; + + if (startTime == null || endStopTime == null || endStopTime.getValue().arrival_time < startTime) { + duration = null; + } else { + duration = endStopTime.getValue().arrival_time - startTime; + } + } + } + + private List getTripsForRoute (Request req, Response res) { + addImmutableResponseHeader(res); + final GTFSFeed feed = getFeedFromRequest(req); + final String routeId = req.params("routeId"); + return feed.trips + .values().stream() + .filter(t -> Objects.equals(t.route_id, routeId)) + .map(t -> new TripApiResponse(feed, t)) + .sorted(Comparator.comparingInt(t -> t.startTime)) + .collect(Collectors.toList()); + } + + @Override + public void registerEndpoints (spark.Service sparkService) { + sparkService.get("/api/gtfs/:feedGroupId/stops", this::getAllStopsForFeedGroup, toJson); + sparkService.get("/api/gtfs/:feedGroupId/:feedId/routes", this::getRoutes, toJson); + sparkService.get("/api/gtfs/:feedGroupId/:feedId/routes/:routeId", this::getRoute, toJson); + sparkService.get("/api/gtfs/:feedGroupId/:feedId/routes/:routeId/patterns", this::getPatternsForRoute, toJson); + sparkService.get("/api/gtfs/:feedGroupId/:feedId/routes/:routeId/trips", this::getTripsForRoute, toJson); + sparkService.get("/api/gtfs/:feedGroupId/:feedId/stops", this::getAllStopsForOneFeed, toJson); + } +} diff --git a/src/main/java/com/conveyal/analysis/controllers/LocalFilesController.java b/src/main/java/com/conveyal/analysis/controllers/LocalFilesController.java index ed7d5b38d..1541773ad 100644 --- a/src/main/java/com/conveyal/analysis/controllers/LocalFilesController.java +++ b/src/main/java/com/conveyal/analysis/controllers/LocalFilesController.java @@ -5,6 +5,7 @@ import com.conveyal.file.FileStorageFormat; import com.conveyal.file.FileStorageKey; import com.conveyal.file.FileUtils; +import com.conveyal.file.LocalFileStorage; import spark.Request; import spark.Response; import spark.Service; @@ -20,14 +21,13 @@ */ public class LocalFilesController implements HttpController { - // Something feels whack here, this should more specifically be a LocalFileStorage - private final FileStorage fileStorage; + private final LocalFileStorage fileStorage; public LocalFilesController (FileStorage fileStorage) { - this.fileStorage = fileStorage; + this.fileStorage = (LocalFileStorage) fileStorage; } - private InputStream getFile (Request req, Response res) throws Exception { + private Object getFile (Request req, Response res) throws Exception { String filename = req.splat()[0]; FileCategory category = FileCategory.valueOf(req.params("category").toUpperCase(Locale.ROOT)); FileStorageKey key = new FileStorageKey(category, filename); @@ -35,15 +35,21 @@ private InputStream getFile (Request req, Response res) throws Exception { FileStorageFormat format = FileStorageFormat.fromFilename(filename); res.type(format.mimeType); - // If the content-encoding is set to gzip, Spark automatically gzips the response. This mangles data - // that was already gzipped. Therefore, check if it's gzipped and pipe directly to the raw OutputStream. + // If the content-encoding is set to gzip, Spark automatically gzips the response. This double-gzips anything + // that was already gzipped. Some of our files are already gzipped, and we rely on the the client browser to + // decompress them upon receiving them. Therefore, when serving a file that's already gzipped we bypass Spark, + // piping it directly to the raw Jetty OutputStream. As soon as transferFromFileTo completes it closes the + // output stream, which completes the HTTP response to the client. We must then return something to Spark. We + // can't return null because Spark will spew errors about the endpoint being "not mapped" and try to replace + // the response with a 404, so we return an empty String. res.header("Content-Encoding", "gzip"); if (FileUtils.isGzip(file)) { // TODO Trace in debug: how does this actually work? // Verify what this is transferring into - a buffer? In another reading thread? // Is Jetty ServletOutputStream implementation automatically threaded or buffered? + // It appears to be buffered because the response has a Content-Length header. FileUtils.transferFromFileTo(file, res.raw().getOutputStream()); - return null; + return ""; } else { return FileUtils.getInputStream(file); } diff --git a/src/main/java/com/conveyal/analysis/controllers/ModificationController.java b/src/main/java/com/conveyal/analysis/controllers/ModificationController.java deleted file mode 100644 index b4b3c68e8..000000000 --- a/src/main/java/com/conveyal/analysis/controllers/ModificationController.java +++ /dev/null @@ -1,96 +0,0 @@ -package com.conveyal.analysis.controllers; - -import com.conveyal.analysis.models.AbstractTimetable; -import com.conveyal.analysis.models.AddTripPattern; -import com.conveyal.analysis.models.ConvertToFrequency; -import com.conveyal.analysis.models.Modification; -import com.conveyal.analysis.persistence.Persistence; -import org.bson.types.ObjectId; -import spark.Request; -import spark.Response; - -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static com.conveyal.analysis.util.JsonUtil.toJson; - -public class ModificationController implements HttpController { - - public ModificationController () { - // NO COMPONENT DEPENDENCIES - // Eventually Persistence will be a component (AnalysisDatabase) instead of static. - } - - private Modification getModification (Request req, Response res) { - return Persistence.modifications.findByIdFromRequestIfPermitted(req); - } - - private Modification create (Request request, Response response) throws IOException { - return Persistence.modifications.createFromJSONRequest(request); - } - - private Modification update (Request request, Response response) throws IOException { - return Persistence.modifications.updateFromJSONRequest(request); - } - - private Modification deleteModification (Request req, Response res) { - return Persistence.modifications.removeIfPermitted(req.params("_id"), req.attribute("accessGroup")); - } - - private void mapPhaseIds (List timetables, String oldModificationId, String newModificationId) { - Map idPairs = new HashMap(); - timetables.forEach(tt -> { - String newId = ObjectId.get().toString(); - idPairs.put(tt._id, newId); - tt._id = newId; - }); - - timetables - .stream() - .filter(tt -> tt.phaseFromTimetable != null && tt.phaseFromTimetable.length() > 0) - .filter(tt -> tt.phaseFromTimetable.contains(oldModificationId)) - .forEach(tt -> { - String oldTTId = tt.phaseFromTimetable.split(":")[1]; - tt.phaseFromTimetable = newModificationId + ":" + idPairs.get(oldTTId); - }); - } - - private Modification copyModification (Request req, Response res) { - Modification modification = Persistence.modifications.findByIdFromRequestIfPermitted(req); - - String oldId = modification._id; - Modification clone = Persistence.modifications.create(modification); - - // Matched up the phased entries and timetables - if (modification.getType().equals(AddTripPattern.type)) { - mapPhaseIds((List)(List)((AddTripPattern) clone).timetables, oldId, clone._id); - } else if (modification.getType().equals(ConvertToFrequency.type)) { - mapPhaseIds((List)(List)((ConvertToFrequency) clone).entries, oldId, clone._id); - } - - // Set `name` to include "(copy)" - clone.name = clone.name + " (copy)"; - - // Set `updateBy` manually, `createdBy` stays with the original modification author - clone.updatedBy = req.attribute("email"); - - // Update the clone - return Persistence.modifications.put(clone); - } - - @Override - public void registerEndpoints (spark.Service sparkService) { - sparkService.path("/api/modification", () -> { - sparkService.get("/:_id", this::getModification, toJson); - sparkService.post("/:_id/copy", this::copyModification, toJson); - sparkService.post("", this::create, toJson); - // Handle HTTP OPTIONS request to provide any configured CORS headers. - sparkService.options("", (q, s) -> ""); - sparkService.put("/:_id", this::update, toJson); - sparkService.options("/:_id", (q, s) -> ""); - sparkService.delete("/:_id", this::deleteModification, toJson); - }); - } -} diff --git a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java index 5a6532453..012309ee1 100644 --- a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java +++ b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java @@ -1,12 +1,19 @@ package com.conveyal.analysis.controllers; import com.conveyal.analysis.AnalysisServerException; +import com.conveyal.analysis.UserPermissions; import com.conveyal.analysis.components.TaskScheduler; import com.conveyal.analysis.grids.SeamlessCensusGridExtractor; +import com.conveyal.analysis.models.DataGroup; import com.conveyal.analysis.models.OpportunityDataset; import com.conveyal.analysis.models.Region; +import com.conveyal.analysis.models.SpatialDataSource; +import com.conveyal.analysis.persistence.AnalysisCollection; +import com.conveyal.analysis.persistence.AnalysisDB; import com.conveyal.analysis.persistence.Persistence; import com.conveyal.analysis.util.FileItemInputStreamProvider; +import com.conveyal.analysis.util.HttpUtils; +import com.conveyal.analysis.util.JsonUtil; import com.conveyal.file.FileStorage; import com.conveyal.file.FileStorageFormat; import com.conveyal.file.FileStorageKey; @@ -14,11 +21,14 @@ import com.conveyal.r5.analyst.FreeFormPointSet; import com.conveyal.r5.analyst.Grid; import com.conveyal.r5.analyst.PointSet; -import com.conveyal.r5.analyst.WebMercatorExtents; +import com.conveyal.r5.analyst.progress.NoopProgressListener; +import com.conveyal.r5.analyst.progress.Task; +import com.conveyal.r5.analyst.progress.WorkProduct; +import com.conveyal.r5.analyst.progress.WorkProductType; import com.conveyal.r5.util.ExceptionUtils; import com.conveyal.r5.util.InputStreamProvider; import com.conveyal.r5.util.ProgressListener; -import com.google.common.collect.Sets; +import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.io.Files; import com.mongodb.QueryBuilder; import org.apache.commons.fileupload.FileItem; @@ -28,7 +38,6 @@ import org.apache.commons.fileupload.servlet.ServletFileUpload; import org.apache.commons.io.FilenameUtils; import org.bson.types.ObjectId; -import org.json.simple.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import spark.Request; @@ -39,7 +48,6 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.io.UnsupportedEncodingException; import java.time.LocalDateTime; import java.time.ZoneId; import java.util.ArrayList; @@ -47,17 +55,17 @@ import java.util.Collection; import java.util.Date; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; +import static com.conveyal.analysis.datasource.DataSourceUtil.detectUploadFormatAndValidate; import static com.conveyal.analysis.util.JsonUtil.toJson; import static com.conveyal.file.FileCategory.GRIDS; import static com.conveyal.r5.analyst.WebMercatorGridPointSet.parseZoom; +import static com.conveyal.r5.analyst.progress.WorkProductType.OPPORTUNITY_DATASET; /** * Controller that handles fetching opportunity datasets (grids and other pointset formats). @@ -66,32 +74,33 @@ public class OpportunityDatasetController implements HttpController { private static final Logger LOG = LoggerFactory.getLogger(OpportunityDatasetController.class); - private static final FileItemFactory fileItemFactory = new DiskFileItemFactory(); - // Component Dependencies private final FileStorage fileStorage; private final TaskScheduler taskScheduler; private final SeamlessCensusGridExtractor extractor; + // Database tables + + private final AnalysisCollection dataGroupCollection; + public OpportunityDatasetController ( FileStorage fileStorage, TaskScheduler taskScheduler, - SeamlessCensusGridExtractor extractor + SeamlessCensusGridExtractor extractor, + AnalysisDB database ) { this.fileStorage = fileStorage; this.taskScheduler = taskScheduler; this.extractor = extractor; + this.dataGroupCollection = database.getAnalysisCollection("dataGroups", DataGroup.class); } /** Store upload status objects FIXME trivial Javadoc */ private final List uploadStatuses = new ArrayList<>(); - private JSONObject getJSONURL (FileStorageKey key) { - JSONObject json = new JSONObject(); - String url = fileStorage.getURL(key); - json.put("url", url); - return json; + private ObjectNode getJsonUrl (FileStorageKey key) { + return JsonUtil.objectNode().put("url", fileStorage.getURL(key)); } private void addStatusAndRemoveOldStatuses(OpportunityDatasetUploadStatus status) { @@ -105,14 +114,14 @@ private void addStatusAndRemoveOldStatuses(OpportunityDatasetUploadStatus status private Collection getRegionDatasets(Request req, Response res) { return Persistence.opportunityDatasets.findPermitted( QueryBuilder.start("regionId").is(req.params("regionId")).get(), - req.attribute("accessGroup") + UserPermissions.from(req) ); } private Object getOpportunityDataset(Request req, Response res) { OpportunityDataset dataset = Persistence.opportunityDatasets.findByIdFromRequestIfPermitted(req); if (dataset.format == FileStorageFormat.GRID) { - return getJSONURL(dataset.getStorageKey()); + return getJsonUrl(dataset.getStorageKey()); } else { // Currently the UI can only visualize grids, not other kinds of datasets (freeform points). // We do generate a rasterized grid for each of the freeform pointsets we create, so ideally we'd redirect @@ -136,33 +145,37 @@ private boolean clearStatus(Request req, Response res) { return uploadStatuses.removeIf(s -> s.id.equals(statusId)); } - private OpportunityDatasetUploadStatus downloadLODES(Request req, Response res) { + private OpportunityDatasetUploadStatus downloadLODES (Request req, Response res) { final String regionId = req.params("regionId"); final int zoom = parseZoom(req.queryParams("zoom")); - - // default - final String accessGroup = req.attribute("accessGroup"); - final String email = req.attribute("email"); - final Region region = Persistence.regions.findByIdIfPermitted(regionId, accessGroup); + final UserPermissions userPermissions = UserPermissions.from(req); + final Region region = Persistence.regions.findByIdIfPermitted(regionId, userPermissions); // Common UUID for all LODES datasets created in this download (e.g. so they can be grouped together and - // deleted as a batch using deleteSourceSet) - final String downloadBatchId = new ObjectId().toString(); + // deleted as a batch using deleteSourceSet) TODO use DataGroup and DataSource (creating only one DataSource per region). // The bucket name contains the specific lodes data set and year so works as an appropriate name final OpportunityDatasetUploadStatus status = new OpportunityDatasetUploadStatus(regionId, extractor.sourceName); addStatusAndRemoveOldStatuses(status); - taskScheduler.enqueueHeavyTask(() -> { - try { - status.message = "Extracting census data for region"; - List grids = extractor.censusDataForBounds(region.bounds, zoom); - createDatasetsFromPointSets( - email, accessGroup, extractor.sourceName, downloadBatchId, regionId, status, grids - ); - } catch (IOException e) { - status.completeWithError(e); - LOG.error("Exception processing LODES data: " + ExceptionUtils.stackTraceString(e)); - } - }); + // TODO we should be reusing the same source from Mongo, not making new ephemeral ones on each extract operation + SpatialDataSource source = new SpatialDataSource(userPermissions, extractor.sourceName); + source.regionId = regionId; + // Make a new group that will containin the N OpportunityDatasets we're saving. + String description = String.format("Import %s to %s", extractor.sourceName, region.name); + DataGroup dataGroup = new DataGroup(userPermissions, source._id.toString(), description); + + taskScheduler.enqueue(Task.create("Extracting LODES data") + .forUser(userPermissions) + .setHeavy(true) + .withAction((progressListener) -> { + try { + status.message = "Extracting census data for region"; + List grids = extractor.censusDataForBounds(region.bounds, zoom, progressListener); + updateAndStoreDatasets(source, dataGroup, status, grids, progressListener); + } catch (IOException e) { + status.completeWithError(e); + LOG.error("Exception processing LODES data: " + ExceptionUtils.stackTraceString(e)); + } + })); return status; } @@ -171,47 +184,33 @@ private OpportunityDatasetUploadStatus downloadLODES(Request req, Response res) * Given a list of new PointSets, serialize each PointSet and save it to S3, then create a metadata object about * that PointSet and store it in Mongo. */ - private List createDatasetsFromPointSets(String email, - String accessGroup, - String sourceName, - String sourceId, - String regionId, - OpportunityDatasetUploadStatus status, - List pointSets) { + private void updateAndStoreDatasets (SpatialDataSource source, + DataGroup dataGroup, + OpportunityDatasetUploadStatus status, + List pointSets, + com.conveyal.r5.analyst.progress.ProgressListener progressListener) { status.status = Status.UPLOADING; status.totalGrids = pointSets.size(); + progressListener.beginTask("Storing opportunity data", pointSets.size()); // Create an OpportunityDataset holding some metadata about each PointSet (Grid or FreeForm). final List datasets = new ArrayList<>(); for (PointSet pointSet : pointSets) { - - // Make new PointSet metadata objects. - // Unfortunately we can't pull this step out into a method because there are so many parameters. - // Some of that metadata could be consolidated e.g. user email and access group. OpportunityDataset dataset = new OpportunityDataset(); - dataset.sourceName = sourceName; - dataset.sourceId = sourceId; + dataset.sourceName = source.name; + dataset.sourceId = source._id.toString(); + dataset.dataGroupId = dataGroup._id.toString(); + dataset.createdBy = source.createdBy; + dataset.accessGroup = source.accessGroup; + dataset.regionId = source.regionId; dataset.name = pointSet.name; - dataset.createdBy = email; - dataset.accessGroup = accessGroup; dataset.totalPoints = pointSet.featureCount(); - dataset.regionId = regionId; dataset.totalOpportunities = pointSet.sumTotalOpportunities(); dataset.format = getFormatCode(pointSet); if (dataset.format == FileStorageFormat.FREEFORM) { dataset.name = String.join(" ", pointSet.name, "(freeform)"); } - // These bounds are currently in web Mercator pixels, which are relevant to Grids but are not natural units - // for FreeformPointSets. There are only unique minimal web Mercator bounds for FreeformPointSets because - // the zoom level is fixed in OpportunityDataset (there is not even a field for it). - // Perhaps these metadata bounds should be WGS84 instead, it depends how the UI uses them. - { - WebMercatorExtents webMercatorExtents = pointSet.getWebMercatorExtents(); - dataset.north = webMercatorExtents.north; - dataset.west = webMercatorExtents.west; - dataset.width = webMercatorExtents.width; - dataset.height = webMercatorExtents.height; - } + dataset.setWebMercatorExtents(pointSet); // TODO make origin and destination pointsets reference each other and indicate they are suitable // for one-to-one analyses @@ -232,7 +231,8 @@ private List createDatasetsFromPointSets(String email, fileStorage.moveIntoStorage(dataset.getStorageKey(FileStorageFormat.GRID), gridFile); } else if (pointSet instanceof FreeFormPointSet) { // Upload serialized freeform pointset back to S3 - FileStorageKey fileStorageKey = new FileStorageKey(GRIDS, regionId + "/" + dataset._id + ".pointset"); + FileStorageKey fileStorageKey = new FileStorageKey(GRIDS, source.regionId + "/" + dataset._id + + ".pointset"); File pointsetFile = FileUtils.createScratchFile("pointset"); OutputStream os = new GZIPOutputStream(new FileOutputStream(pointsetFile)); @@ -246,15 +246,18 @@ private List createDatasetsFromPointSets(String email, if (status.uploadedGrids == status.totalGrids) { status.completeSuccessfully(); } - LOG.info("Completed {}/{} uploads for {}", status.uploadedGrids, status.totalGrids, status.name); + LOG.info("Moved {}/{} files into storage for {}", status.uploadedGrids, status.totalGrids, status.name); } catch (NumberFormatException e) { throw new AnalysisServerException("Error attempting to parse number in uploaded file: " + e.toString()); } catch (Exception e) { status.completeWithError(e); throw AnalysisServerException.unknown(e); } + progressListener.increment(); } - return datasets; + // Set the workProduct - TODO update UI so it can handle a link to a group of OPPORTUNITY_DATASET + dataGroupCollection.insert(dataGroup); + progressListener.setWorkProduct(WorkProduct.forDataGroup(OPPORTUNITY_DATASET, dataGroup, source.regionId)); } private static FileStorageFormat getFormatCode (PointSet pointSet) { @@ -312,140 +315,19 @@ private List createFreeFormPointSetsFromCsv(FileItem csvFileIt } - /** - * Get the specified field from a map representing a multipart/form-data POST request, as a UTF-8 String. - * FileItems represent any form item that was received within a multipart/form-data POST request, not just files. - */ - private String getFormField(Map> formFields, String fieldName, boolean required) { - try { - List fileItems = formFields.get(fieldName); - if (fileItems == null || fileItems.isEmpty()) { - if (required) { - throw AnalysisServerException.badRequest("Missing required field: " + fieldName); - } else { - return null; - } - } - String value = fileItems.get(0).getString("UTF-8"); - return value; - } catch (UnsupportedEncodingException e) { - throw AnalysisServerException.badRequest(String.format("Multipart form field '%s' had unsupported encoding", - fieldName)); - } - } - - private enum UploadFormat { - SHAPEFILE, GRID, CSV - } - - /** - * Detect from a batch of uploaded files whether the user has uploaded a Shapefile, a CSV, or one or more binary - * grids. In the process we validate the list of uploaded files, making sure certain preconditions are met. - * Some kinds of uploads must contain multiple files (.shp) or can contain multiple files (.grid) while others - * must have only a single file (.csv). Scan the list of uploaded files to ensure it makes sense before acting. - * @throws AnalysisServerException if the type of the upload can't be detected or preconditions are violated. - * @return the expected type of the uploaded file or files, never null. - */ - private UploadFormat detectUploadFormatAndValidate (List fileItems) { - if (fileItems == null || fileItems.isEmpty()) { - throw AnalysisServerException.fileUpload("You must include some files to create an opportunity dataset."); - } - - Set fileExtensions = extractFileExtensions(fileItems); - - // There was at least one file with an extension, the set must now contain at least one extension. - if (fileExtensions.isEmpty()) { - throw AnalysisServerException.fileUpload("No file extensions seen, cannot detect upload type."); - } - - UploadFormat uploadFormat = null; - - // Check that if upload contains any of the Shapefile sidecar files, it contains all of the required ones. - final Set shapefileExtensions = Sets.newHashSet("SHP", "DBF", "PRJ"); - if ( ! Sets.intersection(fileExtensions, shapefileExtensions).isEmpty()) { - if (fileExtensions.containsAll(shapefileExtensions)) { - uploadFormat = UploadFormat.SHAPEFILE; - verifyBaseNamesSame(fileItems); - // TODO check that any additional file is SHX, and that there are no more than 4 files. - } else { - final String message = "You must multi-select at least SHP, DBF, and PRJ files for shapefile upload."; - throw AnalysisServerException.fileUpload(message); - } - } - - // Even if we've already detected a shapefile, run the other tests to check for a bad mixture of file types. - if (fileExtensions.contains("GRID")) { - if (fileExtensions.size() == 1) { - uploadFormat = UploadFormat.GRID; - } else { - String message = "When uploading grids you may upload multiple files, but they must all be grids."; - throw AnalysisServerException.fileUpload(message); - } - } else if (fileExtensions.contains("CSV")) { - if (fileItems.size() == 1) { - uploadFormat = UploadFormat.CSV; - } else { - String message = "When uploading CSV you may only upload one file at a time."; - throw AnalysisServerException.fileUpload(message); - } - } - - if (uploadFormat == null) { - throw AnalysisServerException.fileUpload("Could not detect format of opportunity dataset upload."); - } - return uploadFormat; - } - - private Set extractFileExtensions (List fileItems) { - - Set fileExtensions = new HashSet<>(); - - for (FileItem fileItem : fileItems) { - String fileName = fileItem.getName(); - String extension = FilenameUtils.getExtension(fileName); - if (extension.isEmpty()) { - throw AnalysisServerException.fileUpload("Filename has no extension: " + fileName); - } - fileExtensions.add(extension.toUpperCase()); - } - - return fileExtensions; - } - - private void verifyBaseNamesSame (List fileItems) { - String firstBaseName = null; - for (FileItem fileItem : fileItems) { - String baseName = FilenameUtils.getBaseName(fileItem.getName()); - if (firstBaseName == null) { - firstBaseName = baseName; - } - if (!firstBaseName.equals(baseName)) { - String message = "In a shapefile upload, all files must have the same base name."; - throw AnalysisServerException.fileUpload(message); - } - } - } - /** * Handle many types of file upload. Returns a OpportunityDatasetUploadStatus which has a handle to request status. * The request should be a multipart/form-data POST request, containing uploaded files and associated parameters. */ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Response res) { - final String accessGroup = req.attribute("accessGroup"); - final String email = req.attribute("email"); - final Map> formFields; - try { - ServletFileUpload sfu = new ServletFileUpload(fileItemFactory); - formFields = sfu.parseParameterMap(req.raw()); - } catch (FileUploadException e) { - // We can't even get enough information to create a status tracking object. Re-throw an exception. - throw AnalysisServerException.fileUpload("Unable to parse opportunity dataset. " + ExceptionUtils.stackTraceString(e)); - } + // Extract user info, uploaded files and form fields from the incoming request. + final UserPermissions userPermissions = UserPermissions.from(req); + final Map> formFields = HttpUtils.getRequestFiles(req.raw()); // Parse required fields. Will throw a ServerException on failure. - final String sourceName = getFormField(formFields, "Name", true); - final String regionId = getFormField(formFields, "regionId", true); - final int zoom = parseZoom(getFormField(formFields, "zoom", false)); + final String sourceName = HttpUtils.getFormField(formFields, "Name", true); + final String regionId = HttpUtils.getFormField(formFields, "regionId", true); + final int zoom = parseZoom(HttpUtils.getFormField(formFields, "zoom", false)); // Create a region-wide status object tracking the processing of opportunity data. // Create the status object before doing anything including input and parameter validation, so that any problems @@ -455,7 +337,7 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res addStatusAndRemoveOldStatuses(status); final List fileItems; - final UploadFormat uploadFormat; + final FileStorageFormat uploadFormat; final Map parameters; try { // Validate inputs and parameters, which will throw an exception if there's anything wrong with them. @@ -475,13 +357,13 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res try { // A place to accumulate all the PointSets created, both FreeForm and Grids. List pointsets = new ArrayList<>(); - if (uploadFormat == UploadFormat.GRID) { + if (uploadFormat == FileStorageFormat.GRID) { LOG.info("Detected opportunity dataset stored in Conveyal binary format."); pointsets.addAll(createGridsFromBinaryGridFiles(fileItems, status)); - } else if (uploadFormat == UploadFormat.SHAPEFILE) { + } else if (uploadFormat == FileStorageFormat.SHP) { LOG.info("Detected opportunity dataset stored as ESRI shapefile."); pointsets.addAll(createGridsFromShapefile(fileItems, zoom, status)); - } else if (uploadFormat == UploadFormat.CSV) { + } else if (uploadFormat == FileStorageFormat.CSV) { LOG.info("Detected opportunity dataset stored as CSV"); // Create a grid even when user has requested a freeform pointset so we have something to visualize. FileItem csvFileItem = fileItems.get(0); @@ -511,13 +393,18 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res if (pointsets.isEmpty()) { throw new RuntimeException("No opportunity dataset was created from the files uploaded."); } - LOG.info("Uploading opportunity datasets to S3 and storing metadata in database."); + LOG.info("Moving opportunity datasets into storage and adding metadata to database."); // Create a single unique ID string that will be referenced by all opportunity datasets produced by // this upload. This allows us to group together datasets from the same source and associate them with // the file(s) that produced them. - final String sourceFileId = new ObjectId().toString(); - createDatasetsFromPointSets(email, accessGroup, sourceName, sourceFileId, regionId, status, pointsets); + // Currently we are creating the DataSource document in Mongo but not actually saving the source files. + // Some methods like createGridsFromShapefile above "consume" those files by moving them into a tempdir. + SpatialDataSource source = new SpatialDataSource(userPermissions, sourceName); + source.regionId = regionId; + DataGroup dataGroup = new DataGroup(userPermissions, source._id.toString(), "Import opportunity data"); + updateAndStoreDatasets(source, dataGroup, status, pointsets, new NoopProgressListener()); } catch (Exception e) { + e.printStackTrace(); status.completeWithError(e); } }); @@ -557,34 +444,31 @@ private OpportunityDataset editOpportunityDataset(Request request, Response resp private Collection deleteSourceSet(Request request, Response response) { String sourceId = request.params("sourceId"); - String accessGroup = request.attribute("accessGroup"); + UserPermissions userPermissions = UserPermissions.from(request); Collection datasets = Persistence.opportunityDatasets.findPermitted( - QueryBuilder.start("sourceId").is(sourceId).get(), accessGroup); - - datasets.forEach(dataset -> deleteDataset(dataset._id, accessGroup)); - + QueryBuilder.start("sourceId").is(sourceId).get(), userPermissions); + datasets.forEach(dataset -> deleteDataset(dataset._id, userPermissions)); return datasets; } private OpportunityDataset deleteOpportunityDataset(Request request, Response response) { String opportunityDatasetId = request.params("_id"); - return deleteDataset(opportunityDatasetId, request.attribute("accessGroup")); + return deleteDataset(opportunityDatasetId, UserPermissions.from(request)); } /** * Delete an Opportunity Dataset from the database and all formats from the file store. */ - private OpportunityDataset deleteDataset(String id, String accessGroup) { - OpportunityDataset dataset = Persistence.opportunityDatasets.removeIfPermitted(id, accessGroup); - + private OpportunityDataset deleteDataset(String id, UserPermissions userPermissions) { + OpportunityDataset dataset = Persistence.opportunityDatasets.removeIfPermitted(id, userPermissions); if (dataset == null) { throw AnalysisServerException.notFound("Opportunity dataset could not be found."); } else { + // Several of these files may not exist. FileStorage::delete contract states this will be handled cleanly. fileStorage.delete(dataset.getStorageKey(FileStorageFormat.GRID)); fileStorage.delete(dataset.getStorageKey(FileStorageFormat.PNG)); - fileStorage.delete(dataset.getStorageKey(FileStorageFormat.TIFF)); + fileStorage.delete(dataset.getStorageKey(FileStorageFormat.GEOTIFF)); } - return dataset; } @@ -599,14 +483,14 @@ private List createGridsFromCsv(FileItem csvFileItem, int zoom, OpportunityDatasetUploadStatus status) throws Exception { - String latField = getFormField(query, "latField", true); - String lonField = getFormField(query, "lonField", true); - String idField = getFormField(query, "idField", false); + String latField = HttpUtils.getFormField(query, "latField", true); + String lonField = HttpUtils.getFormField(query, "lonField", true); + String idField = HttpUtils.getFormField(query, "idField", false); // Optional fields to run grid construction twice with two different sets of points. // This is only really useful when creating grids to visualize freeform pointsets for one-to-one analyses. - String latField2 = getFormField(query, "latField2", false); - String lonField2 = getFormField(query, "lonField2", false); + String latField2 = HttpUtils.getFormField(query, "latField2", false); + String lonField2 = HttpUtils.getFormField(query, "lonField2", false); List ignoreFields = Arrays.asList(idField, latField2, lonField2); InputStreamProvider csvStreamProvider = new FileItemInputStreamProvider(csvFileItem); @@ -687,15 +571,17 @@ private List createGridsFromShapefile(List fileItems, */ private Object downloadOpportunityDataset (Request req, Response res) throws IOException { FileStorageFormat downloadFormat; + String format = req.params("format"); try { - downloadFormat = FileStorageFormat.valueOf(req.params("format").toUpperCase()); + downloadFormat = FileStorageFormat.valueOf(format.toUpperCase()); } catch (IllegalArgumentException iae) { - // This code handles the deprecated endpoint for retrieving opportunity datasets - // get("/api/opportunities/:regionId/:gridKey") is the same signature as this endpoint. + LOG.warn("Unable to interpret format path parameter '{}', using legacy code path.", format); + // This code handles the deprecated endpoint for retrieving opportunity datasets. + // get("/api/opportunities/:regionId/:gridKey") has the same path pattern as this endpoint. String regionId = req.params("_id"); - String gridKey = req.params("format"); + String gridKey = format; FileStorageKey storageKey = new FileStorageKey(GRIDS, String.format("%s/%s.grid", regionId, gridKey)); - return getJSONURL(storageKey); + return getJsonUrl(storageKey); } if (FileStorageFormat.GRID.equals(downloadFormat)) return getOpportunityDataset(req, res); @@ -719,14 +605,14 @@ private Object downloadOpportunityDataset (Request req, Response res) throws IOE if (FileStorageFormat.PNG.equals(downloadFormat)) { grid.writePng(fos); - } else if (FileStorageFormat.TIFF.equals(downloadFormat)) { + } else if (FileStorageFormat.GEOTIFF.equals(downloadFormat)) { grid.writeGeotiff(fos); } fileStorage.moveIntoStorage(formatKey, localFile); } - return getJSONURL(formatKey); + return getJsonUrl(formatKey); } /** diff --git a/src/main/java/com/conveyal/analysis/controllers/ProjectController.java b/src/main/java/com/conveyal/analysis/controllers/ProjectController.java deleted file mode 100644 index cd7cb0440..000000000 --- a/src/main/java/com/conveyal/analysis/controllers/ProjectController.java +++ /dev/null @@ -1,175 +0,0 @@ -package com.conveyal.analysis.controllers; - -import com.conveyal.analysis.models.AddTripPattern; -import com.conveyal.analysis.models.ConvertToFrequency; -import com.conveyal.analysis.models.Modification; -import com.conveyal.analysis.models.Project; -import com.conveyal.analysis.persistence.Persistence; -import com.mongodb.QueryBuilder; -import org.bson.types.ObjectId; -import spark.Request; -import spark.Response; - -import java.io.IOException; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; -import java.util.stream.Collectors; - -import static com.conveyal.analysis.util.JsonUtil.toJson; - -public class ProjectController implements HttpController { - - public ProjectController () { - // NO COMPONENT DEPENDENCIES - // Eventually persistence will be a component (AnalysisDatabase) instead of static. - } - - private Project findById(Request req, Response res) { - return Persistence.projects.findByIdFromRequestIfPermitted(req); - } - - private Collection getAllProjects (Request req, Response res) { - return Persistence.projects.findPermitted( - QueryBuilder.start("regionId").is(req.params("region")).get(), - req.attribute("accessGroup") - ); - } - - private Project create(Request req, Response res) throws IOException { - return Persistence.projects.createFromJSONRequest(req); - } - - private Project update(Request req, Response res) throws IOException { - return Persistence.projects.updateFromJSONRequest(req); - } - - private Collection modifications (Request req, Response res) { - return Persistence.modifications.findPermitted( - QueryBuilder.start("projectId").is(req.params("_id")).get(), - req.attribute("accessGroup") - ); - } - - private Collection importModifications (Request req, Response res) { - final String importId = req.params("_importId"); - final String newId = req.params("_id"); - final String accessGroup = req.attribute("accessGroup"); - final Project project = Persistence.projects.findByIdIfPermitted(newId, accessGroup); - final Project importProject = Persistence.projects.findByIdIfPermitted(importId, accessGroup); - final boolean bundlesAreNotEqual = !project.bundleId.equals(importProject.bundleId); - - QueryBuilder query = QueryBuilder.start("projectId").is(importId); - if (bundlesAreNotEqual) { - // Different bundle? Only copy add trip modifications - query = query.and("type").is("add-trip-pattern"); - } - final Collection modifications = Persistence.modifications.findPermitted(query.get(), accessGroup); - - // This would be a lot easier if we just used the actual `_id`s and dealt with it elsewhere when searching. They - // should be unique anyways. Hmmmmmmmmmmmm. Trade offs. - // Need to make two passes to create all the pairs and rematch for phasing - final Map modificationIdPairs = new HashMap<>(); - final Map timetableIdPairs = new HashMap<>(); - - return modifications - .stream() - .map(modification -> { - String oldModificationId = modification._id; - Modification clone = Persistence.modifications.create(modification); - modificationIdPairs.put(oldModificationId, clone._id); - - // Change the projectId, most important part! - clone.projectId = newId; - - // Set `name` to include "(import)" - clone.name = clone.name + " (import)"; - - // Set `updatedBy` by manually, `createdBy` stays with the original author - clone.updatedBy = req.attribute("email"); - - // Matched up the phased entries and timetables - if (modification.getType().equals(AddTripPattern.type)) { - if (bundlesAreNotEqual) { - // Remove references to real stops in the old bundle - ((AddTripPattern) clone).segments.forEach(segment -> { - segment.fromStopId = null; - segment.toStopId = null; - }); - - // Remove all phasing - ((AddTripPattern) clone).timetables.forEach(tt -> { - tt.phaseFromTimetable = null; - tt.phaseAtStop = null; - tt.phaseFromStop = null; - }); - } - - ((AddTripPattern) clone).timetables.forEach(tt -> { - String oldTTId = tt._id; - tt._id = new ObjectId().toString(); - timetableIdPairs.put(oldTTId, tt._id); - }); - } else if (modification.getType().equals(ConvertToFrequency.type)) { - ((ConvertToFrequency) clone).entries.forEach(tt -> { - String oldTTId = tt._id; - tt._id = new ObjectId().toString(); - timetableIdPairs.put(oldTTId, tt._id); - }); - } - - return clone; - }) - .collect(Collectors.toList()) - .stream() - .map(modification -> { - // A second pass is needed to map the phase pairs - if (modification.getType().equals(AddTripPattern.type)) { - ((AddTripPattern) modification).timetables.forEach(tt -> { - String pft = tt.phaseFromTimetable; - if (pft != null && pft.length() > 0) { - String[] pfts = pft.split(":"); - tt.phaseFromTimetable = modificationIdPairs.get(pfts[0]) + ":" + timetableIdPairs.get(pfts[1]); - } - }); - } else if (modification.getType().equals(ConvertToFrequency.type)) { - ((ConvertToFrequency) modification).entries.forEach(tt -> { - String pft = tt.phaseFromTimetable; - if (pft != null && pft.length() > 0) { - String[] pfts = pft.split(":"); - tt.phaseFromTimetable = modificationIdPairs.get(pfts[0]) + ":" + timetableIdPairs.get(pfts[1]); - } - }); - } - - return Persistence.modifications.put(modification); - }) - .collect(Collectors.toList()); - } - - private Project deleteProject (Request req, Response res) { - return Persistence.projects.removeIfPermitted(req.params("_id"), req.attribute("accessGroup")); - } - - public Collection getProjects (Request req, Response res) { - return Persistence.projects.findPermittedForQuery(req); - } - - @Override - public void registerEndpoints (spark.Service sparkService) { - sparkService.path("/api/project", () -> { - sparkService.get("", this::getProjects, toJson); - sparkService.get("/:_id", this::findById, toJson); - sparkService.get("/:_id/modifications", this::modifications, toJson); - sparkService.post("/:_id/import/:_importId", this::importModifications, toJson); - sparkService.post("", this::create, toJson); - sparkService.options("", (q, s) -> ""); - sparkService.put("/:_id", this::update, toJson); - sparkService.delete("/:_id", this::deleteProject, toJson); - sparkService.options("/:_id", (q, s) -> ""); - }); - // Note this one is under the /api/region path, not /api/project - sparkService.get("/api/region/:region/projects", this::getAllProjects); // TODO response transformer? - } - -} diff --git a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java index 894c62ac0..60197114b 100644 --- a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java +++ b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java @@ -2,28 +2,29 @@ import com.conveyal.analysis.AnalysisServerException; import com.conveyal.analysis.SelectingGridReducer; +import com.conveyal.analysis.UserPermissions; import com.conveyal.analysis.components.broker.Broker; +import com.conveyal.analysis.components.broker.Job; import com.conveyal.analysis.components.broker.JobStatus; import com.conveyal.analysis.models.AnalysisRequest; import com.conveyal.analysis.models.OpportunityDataset; -import com.conveyal.analysis.models.Project; import com.conveyal.analysis.models.RegionalAnalysis; import com.conveyal.analysis.persistence.Persistence; import com.conveyal.analysis.results.CsvResultType; import com.conveyal.analysis.util.JsonUtil; -import com.conveyal.file.FileCategory; import com.conveyal.file.FileStorage; import com.conveyal.file.FileStorageFormat; import com.conveyal.file.FileStorageKey; import com.conveyal.file.FileUtils; +import com.conveyal.r5.analyst.FreeFormPointSet; import com.conveyal.r5.analyst.Grid; import com.conveyal.r5.analyst.PointSet; import com.conveyal.r5.analyst.PointSetCache; import com.conveyal.r5.analyst.cluster.RegionalTask; +import com.fasterxml.jackson.databind.JsonNode; import com.google.common.primitives.Ints; import com.mongodb.QueryBuilder; import gnu.trove.list.array.TIntArrayList; -import org.json.simple.JSONObject; import org.mongojack.DBProjection; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,7 +44,9 @@ import java.util.zip.GZIPOutputStream; import static com.conveyal.analysis.util.JsonUtil.toJson; +import static com.conveyal.file.FileCategory.BUNDLES; import static com.conveyal.file.FileCategory.RESULTS; +import static com.conveyal.r5.transit.TransportNetworkCache.getScenarioFilename; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; @@ -73,24 +76,24 @@ public RegionalAnalysisController (Broker broker, FileStorage fileStorage) { this.fileStorage = fileStorage; } - private Collection getRegionalAnalysesForRegion(String regionId, String accessGroup) { + private Collection getRegionalAnalysesForRegion(String regionId, UserPermissions userPermissions) { return Persistence.regionalAnalyses.findPermitted( QueryBuilder.start().and( QueryBuilder.start("regionId").is(regionId).get(), QueryBuilder.start("deleted").is(false).get() ).get(), DBProjection.exclude("request.scenario.modifications"), - accessGroup + userPermissions ); } private Collection getRegionalAnalysesForRegion(Request req, Response res) { - return getRegionalAnalysesForRegion(req.params("regionId"), req.attribute("accessGroup")); + return getRegionalAnalysesForRegion(req.params("regionId"), UserPermissions.from(req)); } // Note: this includes the modifications object which can be very large private RegionalAnalysis getRegionalAnalysis(Request req, Response res) { - return Persistence.regionalAnalyses.findByIdIfPermitted(req.params("_id"), req.attribute("accessGroup")); + return Persistence.regionalAnalyses.findByIdIfPermitted(req.params("_id"), UserPermissions.from(req)); } /** @@ -99,7 +102,7 @@ private RegionalAnalysis getRegionalAnalysis(Request req, Response res) { * @return JobStatues with associated regional analysis embedded */ private Collection getRunningAnalyses(Request req, Response res) { - Collection allAnalysesInRegion = getRegionalAnalysesForRegion(req.params("regionId"), req.attribute("accessGroup")); + Collection allAnalysesInRegion = getRegionalAnalysesForRegion(req.params("regionId"), UserPermissions.from(req)); List runningStatusesForRegion = new ArrayList<>(); Collection allJobStatuses = broker.getAllJobStatuses(); for (RegionalAnalysis ra : allAnalysesInRegion) { @@ -114,19 +117,17 @@ private Collection getRunningAnalyses(Request req, Response res) { } private RegionalAnalysis deleteRegionalAnalysis (Request req, Response res) { - String accessGroup = req.attribute("accessGroup"); - String email = req.attribute("email"); - + UserPermissions userPermissions = UserPermissions.from(req); RegionalAnalysis analysis = Persistence.regionalAnalyses.findPermitted( QueryBuilder.start().and( QueryBuilder.start("_id").is(req.params("_id")).get(), QueryBuilder.start("deleted").is(false).get() ).get(), DBProjection.exclude("request.scenario.modifications"), - accessGroup + userPermissions ).iterator().next(); analysis.deleted = true; - Persistence.regionalAnalyses.updateByUserIfPermitted(analysis, email, accessGroup); + Persistence.regionalAnalyses.updateByUserIfPermitted(analysis, userPermissions); // clear it from the broker if (!analysis.complete) { @@ -173,7 +174,7 @@ private Object getRegionalResults (Request req, Response res) throws IOException RegionalAnalysis analysis = Persistence.regionalAnalyses.findPermitted( QueryBuilder.start("_id").is(req.params("_id")).get(), DBProjection.exclude("request.scenario.modifications"), - req.attribute("accessGroup") + UserPermissions.from(req) ).iterator().next(); if (analysis == null || analysis.deleted) { @@ -230,99 +231,85 @@ private Object getRegionalResults (Request req, Response res) throws IOException String.join(",", analysis.destinationPointSetIds)); } - // It seems like you would check regionalAnalysis.complete to choose between redirecting to s3 and fetching - // the partially completed local file. But this field is never set to true - it's on a UI model object that - // isn't readily accessible to the internal Job-tracking mechanism of the back end. Instead, just try to fetch - // the partially completed results file, which includes an O(1) check whether the job is still being processed. - File partialRegionalAnalysisResultFile = broker.getPartialRegionalAnalysisResults(regionalAnalysisId); - - if (partialRegionalAnalysisResultFile != null) { - // FIXME we need to do the equivalent of the SelectingGridReducer here. - // The job is still being processed. There is a probably harmless race condition if the job happens to be - // completed at the very moment we're in this block, because the file will be deleted at that moment. - LOG.debug("Analysis {} is not complete, attempting to return the partial results grid.", regionalAnalysisId); - if (!"GRID".equalsIgnoreCase(fileFormatExtension)) { - throw AnalysisServerException.badRequest( - "For partially completed regional analyses, we can only return grid files, not images."); - } - if (partialRegionalAnalysisResultFile == null) { - throw AnalysisServerException.unknown( - "Could not find partial result grid for incomplete regional analysis on server."); - } - try { - res.header("content-type", "application/octet-stream"); - // This will cause Spark Framework to gzip the data automatically if requested by the client. - res.header("Content-Encoding", "gzip"); - // Spark has default serializers for InputStream and Bytes, and calls toString() on everything else. - return new FileInputStream(partialRegionalAnalysisResultFile); - } catch (FileNotFoundException e) { - // The job must have finished and the file was deleted upon upload to S3. This should be very rare. - throw AnalysisServerException.unknown( - "Could not find partial result grid for incomplete regional analysis on server."); - } - } else { - // The analysis has already completed, results should be stored and retrieved from S3 via redirects. - LOG.debug("Returning {} minute accessibility to pointset {} (percentile {}) for regional analysis {}.", - cutoffMinutes, destinationPointSetId, percentile, regionalAnalysisId); - FileStorageFormat format = FileStorageFormat.valueOf(fileFormatExtension.toUpperCase()); - if (!FileStorageFormat.GRID.equals(format) && !FileStorageFormat.PNG.equals(format) && !FileStorageFormat.TIFF.equals(format)) { - throw AnalysisServerException.badRequest("Format \"" + format + "\" is invalid. Request format must be \"grid\", \"png\", or \"tiff\"."); - } + // We started implementing the ability to retrieve and display partially completed analyses. + // We eventually decided these should not be available here at the same endpoint as complete, immutable results. - // Analysis grids now have the percentile and cutoff in their S3 key, because there can be many of each. - // We do this even for results generated by older workers, so they will be re-extracted with the new name. - // These grids are reasonably small, we may be able to just send all cutoffs to the UI instead of selecting. - String singleCutoffKey = - String.format("%s_%s_P%d_C%d.%s", regionalAnalysisId, destinationPointSetId, percentile, cutoffMinutes, fileFormatExtension); - - // A lot of overhead here - UI contacts backend, backend calls S3, backend responds to UI, UI contacts S3. - FileStorageKey singleCutoffFileStorageKey = new FileStorageKey(RESULTS, singleCutoffKey); - if (!fileStorage.exists(singleCutoffFileStorageKey)) { - // An accessibility grid for this particular cutoff has apparently never been extracted from the - // regional results file before. Extract one and save it for future reuse. Older regional analyses - // may not have arrays allowing multiple cutoffs, percentiles, or destination pointsets. The - // filenames of such regional accessibility results will not have a percentile or pointset ID. - String multiCutoffKey; - if (analysis.travelTimePercentiles == null) { - // Oldest form of results, single-percentile, single grid. - multiCutoffKey = regionalAnalysisId + ".access"; + if (broker.findJob(regionalAnalysisId) != null) { + throw AnalysisServerException.notFound("Analysis is incomplete, no results file is available."); + } + + // FIXME It is possible that regional analysis is complete, but UI is trying to fetch gridded results when there + // aren't any (only CSV, because origins are freeform). + // How can we determine whether this analysis is expected to have no gridded results and cleanly return a 404? + + // The analysis has already completed, results should be stored and retrieved from S3 via redirects. + LOG.debug("Returning {} minute accessibility to pointset {} (percentile {}) for regional analysis {}.", + cutoffMinutes, destinationPointSetId, percentile, regionalAnalysisId); + FileStorageFormat format = FileStorageFormat.valueOf(fileFormatExtension.toUpperCase()); + if (!FileStorageFormat.GRID.equals(format) && !FileStorageFormat.PNG.equals(format) && !FileStorageFormat.GEOTIFF.equals(format)) { + throw AnalysisServerException.badRequest("Format \"" + format + "\" is invalid. Request format must be \"grid\", \"png\", or \"tiff\"."); + } + + // Analysis grids now have the percentile and cutoff in their S3 key, because there can be many of each. + // We do this even for results generated by older workers, so they will be re-extracted with the new name. + // These grids are reasonably small, we may be able to just send all cutoffs to the UI instead of selecting. + String singleCutoffKey = + String.format("%s_%s_P%d_C%d.%s", regionalAnalysisId, destinationPointSetId, percentile, cutoffMinutes, fileFormatExtension); + + // A lot of overhead here - UI contacts backend, backend calls S3, backend responds to UI, UI contacts S3. + FileStorageKey singleCutoffFileStorageKey = new FileStorageKey(RESULTS, singleCutoffKey); + if (!fileStorage.exists(singleCutoffFileStorageKey)) { + // An accessibility grid for this particular cutoff has apparently never been extracted from the + // regional results file before. Extract one and save it for future reuse. Older regional analyses + // did not have arrays allowing multiple cutoffs, percentiles, or destination pointsets. The + // filenames of such regional accessibility results will not have a percentile or pointset ID. + // First try the newest form of regional results: multi-percentile, multi-destination-grid. + String multiCutoffKey = String.format("%s_%s_P%d.access", regionalAnalysisId, destinationPointSetId, percentile); + FileStorageKey multiCutoffFileStorageKey = new FileStorageKey(RESULTS, multiCutoffKey); + if (!fileStorage.exists(multiCutoffFileStorageKey)) { + LOG.warn("Falling back to older file name formats for regional results file: " + multiCutoffKey); + // Fall back to second-oldest form: multi-percentile, single destination grid. + multiCutoffKey = String.format("%s_P%d.access", regionalAnalysisId, percentile); + multiCutoffFileStorageKey = new FileStorageKey(RESULTS, multiCutoffKey); + if (fileStorage.exists(multiCutoffFileStorageKey)) { + checkArgument(analysis.destinationPointSetIds.length == 1); } else { - if (analysis.destinationPointSetIds == null) { - // Newer form of regional results: multi-percentile, single grid. - multiCutoffKey = String.format("%s_P%d.access", regionalAnalysisId, percentile); + // Fall back on oldest form of results, single-percentile, single-destination-grid. + multiCutoffKey = regionalAnalysisId + ".access"; + multiCutoffFileStorageKey = new FileStorageKey(RESULTS, multiCutoffKey); + if (fileStorage.exists(multiCutoffFileStorageKey)) { + checkArgument(analysis.travelTimePercentiles.length == 1); + checkArgument(analysis.destinationPointSetIds.length == 1); } else { - // Newest form of regional results: multi-percentile, multi-grid. - multiCutoffKey = String.format("%s_%s_P%d.access", regionalAnalysisId, destinationPointSetId, percentile); + throw AnalysisServerException.notFound("Cannot find original source regional analysis output."); } } - LOG.debug("Single-cutoff grid {} not found on S3, deriving it from {}.", singleCutoffKey, multiCutoffKey); - FileStorageKey multiCutoffFileStorageKey = new FileStorageKey(RESULTS, multiCutoffKey); - - InputStream multiCutoffInputStream = new FileInputStream(fileStorage.getFile(multiCutoffFileStorageKey)); - Grid grid = new SelectingGridReducer(cutoffIndex).compute(multiCutoffInputStream); - - File localFile = FileUtils.createScratchFile(format.toString()); - FileOutputStream fos = new FileOutputStream(localFile); - - switch (format) { - case GRID: - grid.write(new GZIPOutputStream(fos)); - break; - case PNG: - grid.writePng(fos); - break; - case TIFF: - grid.writeGeotiff(fos); - break; - } - - fileStorage.moveIntoStorage(singleCutoffFileStorageKey, localFile); + } + LOG.debug("Single-cutoff grid {} not found on S3, deriving it from {}.", singleCutoffKey, multiCutoffKey); + + InputStream multiCutoffInputStream = new FileInputStream(fileStorage.getFile(multiCutoffFileStorageKey)); + Grid grid = new SelectingGridReducer(cutoffIndex).compute(multiCutoffInputStream); + + File localFile = FileUtils.createScratchFile(format.toString()); + FileOutputStream fos = new FileOutputStream(localFile); + + switch (format) { + case GRID: + grid.write(new GZIPOutputStream(fos)); + break; + case PNG: + grid.writePng(fos); + break; + case GEOTIFF: + grid.writeGeotiff(fos); + break; } - JSONObject json = new JSONObject(); - json.put("url", fileStorage.getURL(singleCutoffFileStorageKey)); - return json.toJSONString(); + fileStorage.moveIntoStorage(singleCutoffFileStorageKey, localFile); } + return JsonUtil.toJsonString( + JsonUtil.objectNode().put("url", fileStorage.getURL(singleCutoffFileStorageKey)) + ); } private String getCsvResults (Request req, Response res) { @@ -333,7 +320,7 @@ private String getCsvResults (Request req, Response res) { RegionalAnalysis analysis = Persistence.regionalAnalyses.findPermitted( QueryBuilder.start("_id").is(regionalAnalysisId).get(), DBProjection.exclude("request.scenario.modifications"), - req.attribute("accessGroup") + UserPermissions.from(req) ).iterator().next(); if (analysis == null || analysis.deleted) { @@ -357,8 +344,7 @@ private String getCsvResults (Request req, Response res) { * in the body of the HTTP response. */ private RegionalAnalysis createRegionalAnalysis (Request req, Response res) throws IOException { - final String accessGroup = req.attribute("accessGroup"); - final String email = req.attribute("email"); + final UserPermissions userPermissions = UserPermissions.from(req); AnalysisRequest analysisRequest = JsonUtil.objectMapper.readValue(req.body(), AnalysisRequest.class); @@ -378,13 +364,13 @@ private RegionalAnalysis createRegionalAnalysis (Request req, Response res) thro } // Create an internal RegionalTask and RegionalAnalysis from the AnalysisRequest sent by the client. - Project project = Persistence.projects.findByIdIfPermitted(analysisRequest.projectId, accessGroup); // TODO now this is setting cutoffs and percentiles in the regional (template) task. // why is some stuff set in this populate method, and other things set here in the caller? - RegionalTask task = (RegionalTask) analysisRequest.populateTask(new RegionalTask(), project); + RegionalTask task = new RegionalTask(); + analysisRequest.populateTask(task, userPermissions); // Set the destination PointSets, which are required for all non-Taui regional requests. - if (! analysisRequest.makeTauiSite) { + if (!analysisRequest.makeTauiSite) { checkNotNull(analysisRequest.destinationPointSetIds); checkState(analysisRequest.destinationPointSetIds.length > 0, "At least one destination pointset ID must be supplied."); @@ -395,7 +381,7 @@ private RegionalAnalysis createRegionalAnalysis (Request req, Response res) thro String destinationPointSetId = analysisRequest.destinationPointSetIds[i]; OpportunityDataset opportunityDataset = Persistence.opportunityDatasets.findByIdIfPermitted( destinationPointSetId, - accessGroup + userPermissions ); checkNotNull(opportunityDataset, "Opportunity dataset could not be found in database."); opportunityDatasets.add(opportunityDataset); @@ -430,7 +416,7 @@ private RegionalAnalysis createRegionalAnalysis (Request req, Response res) thro // Also load this freeform origin pointset instance itself, so broker can see point coordinates, ids etc. if (analysisRequest.originPointSetId != null) { task.originPointSetKey = Persistence.opportunityDatasets - .findByIdIfPermitted(analysisRequest.originPointSetId, accessGroup).storageLocation(); + .findByIdIfPermitted(analysisRequest.originPointSetId, userPermissions).storageLocation(); task.originPointSet = PointSetCache.readFreeFormFromFileStore(task.originPointSetKey); } @@ -456,6 +442,14 @@ private RegionalAnalysis createRegionalAnalysis (Request req, Response res) thro PointSetCache.readFreeFormFromFileStore(task.destinationPointSetKeys[0]) }; } + if (task.recordTimes) { + checkArgument( + task.destinationPointSets != null && + task.destinationPointSets.length == 1 && + task.destinationPointSets[0] instanceof FreeFormPointSet, + "recordTimes can only be used with a single destination pointset, which must be freeform (non-grid)." + ); + } // TODO remove duplicate fields from RegionalAnalysis that are already in the nested task. // The RegionalAnalysis should just be a minimal wrapper around the template task, adding the origin point set. @@ -470,14 +464,14 @@ private RegionalAnalysis createRegionalAnalysis (Request req, Response res) thro regionalAnalysis.west = task.west; regionalAnalysis.width = task.width; - regionalAnalysis.accessGroup = accessGroup; - regionalAnalysis.bundleId = project.bundleId; - regionalAnalysis.createdBy = email; + regionalAnalysis.accessGroup = userPermissions.accessGroup; + regionalAnalysis.bundleId = analysisRequest.bundleId; + regionalAnalysis.createdBy = userPermissions.email; regionalAnalysis.destinationPointSetIds = analysisRequest.destinationPointSetIds; regionalAnalysis.name = analysisRequest.name; regionalAnalysis.projectId = analysisRequest.projectId; - regionalAnalysis.regionId = project.regionId; - regionalAnalysis.variant = analysisRequest.variantIndex; + regionalAnalysis.regionId = analysisRequest.regionId; + regionalAnalysis.scenarioId = analysisRequest.scenarioId; regionalAnalysis.workerVersion = analysisRequest.workerVersion; regionalAnalysis.zoom = task.zoom; @@ -525,11 +519,26 @@ private RegionalAnalysis createRegionalAnalysis (Request req, Response res) thro return regionalAnalysis; } - private RegionalAnalysis updateRegionalAnalysis(Request request, Response response) throws IOException { - final String accessGroup = request.attribute("accessGroup"); - final String email = request.attribute("email"); + private RegionalAnalysis updateRegionalAnalysis (Request request, Response response) throws IOException { RegionalAnalysis regionalAnalysis = JsonUtil.objectMapper.readValue(request.body(), RegionalAnalysis.class); - return Persistence.regionalAnalyses.updateByUserIfPermitted(regionalAnalysis, email, accessGroup); + return Persistence.regionalAnalyses.updateByUserIfPermitted(regionalAnalysis, UserPermissions.from(request)); + } + + /** + * Return a JSON-wrapped URL for the file in FileStorage containing the JSON representation of the scenario for + * the given regional analysis. + */ + private JsonNode getScenarioJsonUrl (Request request, Response response) { + RegionalAnalysis regionalAnalysis = Persistence.regionalAnalyses + .findByIdIfPermitted(request.params("_id"), UserPermissions.from(request)); + // In the persisted objects, regionalAnalysis.scenarioId seems to be null. Get it from the embedded request. + final String networkId = regionalAnalysis.bundleId; + final String scenarioId = regionalAnalysis.request.scenarioId; + checkNotNull(networkId, "RegionalAnalysis did not contain a network ID."); + checkNotNull(scenarioId, "RegionalAnalysis did not contain an embedded request with scenario ID."); + String scenarioUrl = fileStorage.getURL( + new FileStorageKey(BUNDLES, getScenarioFilename(regionalAnalysis.bundleId, scenarioId))); + return JsonUtil.objectNode().put("url", scenarioUrl); } @Override @@ -543,6 +552,7 @@ public void registerEndpoints (spark.Service sparkService) { sparkService.get("/:_id", this::getRegionalAnalysis); sparkService.get("/:_id/grid/:format", this::getRegionalResults); sparkService.get("/:_id/csv/:resultType", this::getCsvResults); + sparkService.get("/:_id/scenarioJsonUrl", this::getScenarioJsonUrl); sparkService.delete("/:_id", this::deleteRegionalAnalysis, toJson); sparkService.post("", this::createRegionalAnalysis, toJson); sparkService.put("/:_id", this::updateRegionalAnalysis, toJson); diff --git a/src/main/java/com/conveyal/analysis/controllers/TimetableController.java b/src/main/java/com/conveyal/analysis/controllers/TimetableController.java deleted file mode 100644 index 4bccee924..000000000 --- a/src/main/java/com/conveyal/analysis/controllers/TimetableController.java +++ /dev/null @@ -1,86 +0,0 @@ -package com.conveyal.analysis.controllers; - -import com.conveyal.analysis.models.AddTripPattern; -import com.conveyal.analysis.models.Modification; -import com.conveyal.analysis.models.Project; -import com.conveyal.analysis.models.Region; -import com.conveyal.analysis.persistence.Persistence; -import com.conveyal.analysis.util.JsonUtil; -import com.mongodb.QueryBuilder; -import org.json.simple.JSONArray; -import org.json.simple.JSONObject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import spark.Request; -import spark.Response; - -import java.util.Collection; -import java.util.List; - -/** - * Created by evan siroky on 5/3/18. - */ -public class TimetableController implements HttpController { - - private static final Logger LOG = LoggerFactory.getLogger(TimetableController.class); - - public TimetableController () { - // NO COMPONENT DEPENDENCIES - // Eventually persistence will be a component (AnalysisDatabase) instead of static. - } - - // Unlike many other methods, rather than serializing a Java type to JSON, - // this builds up the JSON using a map-like API. It looks like we're using org.json.simple here - // instead of Jackson which we're using elsewhere. We should use one or the other. - private String getTimetables (Request req, Response res) { - JSONArray json = new JSONArray(); - Collection regions = Persistence.regions.findAllForRequest(req); - - for (Region region : regions) { - JSONObject r = new JSONObject(); - r.put("_id", region._id); - r.put("name", region.name); - JSONArray regionProjects = new JSONArray(); - List projects = Persistence.projects.find(QueryBuilder.start("regionId").is(region._id).get()).toArray(); - for (Project project : projects) { - JSONObject p = new JSONObject(); - p.put("_id", project._id); - p.put("name", project.name); - JSONArray projectModifications = new JSONArray(); - List modifications = Persistence.modifications.find( - QueryBuilder.start("projectId").is(project._id).and("type").is("add-trip-pattern").get() - ).toArray(); - for (Modification modification : modifications) { - AddTripPattern tripPattern = (AddTripPattern) modification; - JSONObject m = new JSONObject(); - m.put("_id", modification._id); - m.put("name", modification.name); - m.put("segments", JsonUtil.objectMapper.valueToTree(tripPattern.segments)); - JSONArray modificationTimetables = new JSONArray(); - for (AddTripPattern.Timetable timetable : tripPattern.timetables) { - modificationTimetables.add(JsonUtil.objectMapper.valueToTree(timetable)); - } - m.put("timetables", modificationTimetables); - if (modificationTimetables.size() > 0) { - projectModifications.add(m); - } - } - p.put("modifications", projectModifications); - if (projectModifications.size() > 0) { - regionProjects.add(p); - } - } - r.put("projects", regionProjects); - if (regionProjects.size() > 0) { - json.add(r); - } - } - - return json.toString(); - } - - @Override - public void registerEndpoints (spark.Service sparkService) { - sparkService.get("/api/timetables", this::getTimetables); - } -} diff --git a/src/main/java/com/conveyal/analysis/controllers/UserActivityController.java b/src/main/java/com/conveyal/analysis/controllers/UserActivityController.java index c72220079..ac7b2ecb4 100644 --- a/src/main/java/com/conveyal/analysis/controllers/UserActivityController.java +++ b/src/main/java/com/conveyal/analysis/controllers/UserActivityController.java @@ -3,13 +3,15 @@ import com.conveyal.analysis.UserPermissions; import com.conveyal.analysis.components.TaskScheduler; import com.conveyal.r5.analyst.progress.ApiTask; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import spark.Request; import spark.Response; import spark.Service; +import java.util.ArrayList; import java.util.List; -import static com.conveyal.analysis.components.HttpApi.USER_PERMISSIONS_ATTRIBUTE; import static com.conveyal.analysis.util.JsonUtil.toJson; /** @@ -41,11 +43,13 @@ public void registerEndpoints (Service sparkService) { } private ResponseModel getActivity (Request req, Response res) { - UserPermissions userPermissions = req.attribute(USER_PERMISSIONS_ATTRIBUTE); + UserPermissions userPermissions = UserPermissions.from(req); ResponseModel responseModel = new ResponseModel(); responseModel.systemStatusMessages = List.of(); responseModel.taskBacklog = taskScheduler.getBacklog(); - responseModel.taskProgress = taskScheduler.getTasksForUser(userPermissions.email); + boolean system = Boolean.parseBoolean(req.queryParams("system")); // false if param not present + String user = system ? "SYSTEM" : userPermissions.email; + responseModel.taskProgress = taskScheduler.getTasksForUser(user); return responseModel; } diff --git a/src/main/java/com/conveyal/analysis/controllers/WorkerProxyController.java b/src/main/java/com/conveyal/analysis/controllers/WorkerProxyController.java index 84b0d34e9..2e9a36932 100644 --- a/src/main/java/com/conveyal/analysis/controllers/WorkerProxyController.java +++ b/src/main/java/com/conveyal/analysis/controllers/WorkerProxyController.java @@ -1,5 +1,6 @@ package com.conveyal.analysis.controllers; +import com.conveyal.analysis.UserPermissions; import com.conveyal.analysis.components.broker.Broker; import com.conveyal.analysis.components.broker.WorkerTags; import com.conveyal.analysis.models.Bundle; @@ -60,9 +61,7 @@ private Object proxyGet (Request request, Response response) { if (address == null) { Bundle bundle = null; // There are no workers that can handle this request. Request one and ask the UI to retry later. - final String accessGroup = request.attribute("accessGroup"); - final String userEmail = request.attribute("email"); - WorkerTags workerTags = new WorkerTags(accessGroup, userEmail, "anyProjectId", bundle.regionId); + WorkerTags workerTags = new WorkerTags(UserPermissions.from(request), bundle.regionId); broker.createOnDemandWorkerInCategory(workerCategory, workerTags); response.status(HttpStatus.ACCEPTED_202); response.header("Retry-After", "30"); diff --git a/src/main/java/com/conveyal/analysis/controllers/WrappedFeedInfo.java b/src/main/java/com/conveyal/analysis/controllers/WrappedFeedInfo.java deleted file mode 100644 index 08f5c5ed3..000000000 --- a/src/main/java/com/conveyal/analysis/controllers/WrappedFeedInfo.java +++ /dev/null @@ -1,21 +0,0 @@ -package com.conveyal.analysis.controllers; - -import com.conveyal.gtfs.api.graphql.WrappedGTFSEntity; -import com.conveyal.gtfs.model.FeedInfo; - -/** - * Wrap feed info with GTFS feed checksum and feed unique ID. - */ -public class WrappedFeedInfo extends WrappedGTFSEntity { - public long checksum; - - - /** - * Wrap the given GTFS entity with the unique Feed ID specified (this is not generally a GTFS feed ID as they - * are not unique between different versions of the same feed. Also pass in feed checksum. - */ - public WrappedFeedInfo(String feedUniqueID, FeedInfo entity, long checksum) { - super(feedUniqueID, entity); - this.checksum = checksum; - } -} diff --git a/src/main/java/com/conveyal/analysis/datasource/CsvDataSourceIngester.java b/src/main/java/com/conveyal/analysis/datasource/CsvDataSourceIngester.java new file mode 100644 index 000000000..1fd3c25a6 --- /dev/null +++ b/src/main/java/com/conveyal/analysis/datasource/CsvDataSourceIngester.java @@ -0,0 +1,40 @@ +package com.conveyal.analysis.datasource; + +import com.conveyal.analysis.models.Bounds; +import com.conveyal.analysis.models.SpatialDataSource; +import com.conveyal.file.FileStorageFormat; +import com.conveyal.r5.analyst.FreeFormPointSet; +import com.conveyal.r5.analyst.progress.ProgressListener; +import com.conveyal.r5.util.ShapefileReader; +import org.locationtech.jts.geom.Envelope; +import org.opengis.referencing.FactoryException; +import org.opengis.referencing.operation.TransformException; + +import java.io.File; + + +/** + * Logic to create SpatialDataSource metadata from a comma separated file. + * Eventually we may want to support other separators like semicolon, tab, vertical bar etc. + * Eventually this could also import non-spatial delimited text files. + */ +public class CsvDataSourceIngester extends DataSourceIngester { + + private final SpatialDataSource dataSource; + + @Override + public SpatialDataSource dataSource () { + return dataSource; + } + + public CsvDataSourceIngester () { + this.dataSource = new SpatialDataSource(); + dataSource.fileFormat = FileStorageFormat.CSV; + } + + @Override + public void ingest (File file, ProgressListener progressListener) { + throw new UnsupportedOperationException(); + } + +} diff --git a/src/main/java/com/conveyal/analysis/datasource/DataSourceException.java b/src/main/java/com/conveyal/analysis/datasource/DataSourceException.java new file mode 100644 index 000000000..31c9bbf4e --- /dev/null +++ b/src/main/java/com/conveyal/analysis/datasource/DataSourceException.java @@ -0,0 +1,13 @@ +package com.conveyal.analysis.datasource; + +public class DataSourceException extends RuntimeException { + + public DataSourceException (String message) { + super(message); + } + + public DataSourceException (String message, Throwable cause) { + super(message, cause); + } + +} diff --git a/src/main/java/com/conveyal/analysis/datasource/DataSourceIngester.java b/src/main/java/com/conveyal/analysis/datasource/DataSourceIngester.java new file mode 100644 index 000000000..9e3943c99 --- /dev/null +++ b/src/main/java/com/conveyal/analysis/datasource/DataSourceIngester.java @@ -0,0 +1,77 @@ +package com.conveyal.analysis.datasource; + +import com.conveyal.analysis.UserPermissions; +import com.conveyal.analysis.models.DataSource; +import com.conveyal.file.FileStorageFormat; +import com.conveyal.r5.analyst.progress.ProgressListener; +import org.bson.types.ObjectId; + +import java.io.File; + +import static com.conveyal.file.FileStorageFormat.GEOJSON; +import static com.conveyal.file.FileStorageFormat.GEOPACKAGE; +import static com.conveyal.file.FileStorageFormat.SHP; +import static com.conveyal.file.FileStorageFormat.GEOTIFF; + +/** + * Logic for loading and validating a specific kind of input file, yielding a specific subclass of DataSource. + * This plugs into DataSourceUploadAction, which handles the general parts of processing any new DataSource. + */ +public abstract class DataSourceIngester { + + /** + * An accessor method that gives the general purpose DataSourceUploadAction and DataSourceIngester code a view of + * the DataSource being constructed. This allows to DataSourceUploadAction to set all the shared general properties + * of a DataSource and insert it into the database, leaving the DataSourceIngester to handle only the details + * specific to its input format and DataSource subclass. Concrete subclasses should ensure that this method can + * return an object immediately after they're constructed. + */ + protected abstract DataSource dataSource (); + + /** + * This method is implemented on concrete subclasses to provide logic for interpreting a particular file type. + * This is potentially the slowest part of DataSource creation so is called asynchronously (in a background task). + * A single File is passed in here (rather than in the subclass constructors) because the file is moved into + * storage before ingestion. Some supported formats (only shapefile for now) are made up of more than one file, + * which must all be in the same directory. Moving them into storage ensures they're all in the same directory with + * the same base name as required, and only one of their complete file names must be provided. + */ + public abstract void ingest (File file, ProgressListener progressListener); + + /** + * This method takes care of setting the fields common to all kinds of DataSource, with the specific concrete + * DataSourceIngester taking care of the rest. + * Our no-arg BaseModel constructors are used for deserialization so they don't create an _id or nonce ObjectId(); + */ + public void initializeDataSource ( + String name, String originalFileNames, String regionId, UserPermissions userPermissions + ) { + DataSource dataSource = dataSource(); + dataSource._id = new ObjectId(); + dataSource.nonce = new ObjectId(); + dataSource.name = name; + dataSource.regionId = regionId; + dataSource.createdBy = userPermissions.email; + dataSource.updatedBy = userPermissions.email; + dataSource.accessGroup = userPermissions.accessGroup; + dataSource.originalFileName = originalFileNames; + dataSource.description = "From uploaded files: " + originalFileNames; + } + + /** + * Factory method to return an instance of the appropriate concrete subclass for the given file format. + */ + public static DataSourceIngester forFormat (FileStorageFormat format) { + if (format == SHP) { + return new ShapefileDataSourceIngester(); + } else if (format == GEOJSON) { + return new GeoJsonDataSourceIngester(); + } else if (format == GEOTIFF) { + return new GeoTiffDataSourceIngester(); + } else if (format == GEOPACKAGE) { + return new GeoPackageDataSourceIngester(); + } + throw new UnsupportedOperationException("Unknown file format: " + format.name()); + } + +} diff --git a/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java b/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java new file mode 100644 index 000000000..39f6740ab --- /dev/null +++ b/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java @@ -0,0 +1,143 @@ +package com.conveyal.analysis.datasource; + +import com.conveyal.analysis.UserPermissions; +import com.conveyal.analysis.models.DataSource; +import com.conveyal.analysis.persistence.AnalysisCollection; +import com.conveyal.file.FileStorage; +import com.conveyal.file.FileStorageFormat; +import com.conveyal.file.FileStorageKey; +import com.conveyal.r5.analyst.progress.ProgressListener; +import com.conveyal.r5.analyst.progress.TaskAction; +import org.apache.commons.fileupload.FileItem; +import org.apache.commons.fileupload.disk.DiskFileItem; +import org.apache.commons.io.FilenameUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.stream.Collectors; + +import static com.conveyal.analysis.util.HttpUtils.getFormField; +import static com.conveyal.analysis.datasource.DataSourceUtil.detectUploadFormatAndValidate; +import static com.conveyal.file.FileCategory.DATASOURCES; +import static com.conveyal.file.FileStorageFormat.SHP; +import static com.google.common.base.Preconditions.checkNotNull; +import static com.google.common.base.Preconditions.checkState; + +/** + * Given a batch of uploaded files, put them into FileStorage, categorize and validate them, and record metadata as + * some specific subclass of DataSource. This implements TaskAction so it can be run in the background without blocking + * the HTTP request and handler thread. + */ +public class DataSourceUploadAction implements TaskAction { + + private static final Logger LOG = LoggerFactory.getLogger(DataSourceUploadAction.class); + + // The Components used by this background task, which were formerly captured by an anonymous closure. + // Using named and well-defined classes for these background actions makes data flow and depdendencies clearer. + private final FileStorage fileStorage; + private final AnalysisCollection dataSourceCollection; + + /** The files provided in the HTTP post form. These will be moved into storage. */ + private final List fileItems; + + /** + * This DataSourceIngester provides encapsulated loading and validation logic for a single format, by composition + * rather than subclassing. Format ingestion does not require access to the fileStorage or the database collection. + */ + private DataSourceIngester ingester; + + /** + * The file to be ingested, after it has been moved into storage. For Shapefiles and other such "sidecar" formats, + * this is the main file (.shp), with the same base name and in the same directory as all its sidecar files. + */ + private File file; + + // This method is a stopgaps - it seems like this should be done differently. + public String getDataSourceName () { + return ingester.dataSource().name; + } + + public DataSourceUploadAction ( + FileStorage fileStorage, + AnalysisCollection dataSourceCollection, + List fileItems, + DataSourceIngester ingester + ) { + this.fileStorage = fileStorage; + this.dataSourceCollection = dataSourceCollection; + this.fileItems = fileItems; + this.ingester = ingester; + } + + @Override + public final void action (ProgressListener progressListener) throws Exception { + progressListener.setWorkProduct(ingester.dataSource().toWorkProduct()); + moveFilesIntoStorage(progressListener); + ingester.ingest(file, progressListener); + dataSourceCollection.insert(ingester.dataSource()); + } + + /** + * Move all files uploaded in the HTTP post form into (cloud) FileStorage from their temp upload location. + * Called asynchronously (in a background task) because when using cloud storage, this transfer could be slow. + * We could do this after processing instead of before, but consider the shapefile case: we can't be completely + * sure the source temp files are all in the same directory. Better to process them after moving into one directory. + * We should also consider whether preprocessing like conversion of GTFS to MapDBs should happen at this upload + * stage. If so, then this logic needs to change a bit. + */ + private final void moveFilesIntoStorage (ProgressListener progressListener) { + // Loop through uploaded files, registering the extensions and writing to storage + // (with filenames that correspond to the source id) + progressListener.beginTask("Moving files into storage...", 1); + final String dataSourceId = ingester.dataSource()._id.toString(); + for (FileItem fileItem : fileItems) { + DiskFileItem dfi = (DiskFileItem) fileItem; + // Use canonical extension from file type - files may be uploaded with e.g. tif instead of tiff or geotiff. + String extension = ingester.dataSource().fileFormat.extension; + if (fileItems.size() > 1) { + // If we have multiple files, as with Shapefile, just keep the original extension for each file. + // This could lead to orphaned files after a deletion, we might want to implement wildcard deletion. + extension = FilenameUtils.getExtension(fileItem.getName()).toLowerCase(Locale.ROOT); + } + FileStorageKey key = new FileStorageKey(DATASOURCES, dataSourceId, extension); + fileStorage.moveIntoStorage(key, dfi.getStoreLocation()); + if (fileItems.size() == 1 || extension.equalsIgnoreCase(SHP.extension)) { + file = fileStorage.getFile(key); + } + } + checkNotNull(file); + checkState(file.exists()); + } + + /** + * Given the HTTP post form fields from our data source creation endpoint, return a DataSourceUploadAction + * instance set up to process the uploaded data in the background. This will fail fast on data files that we can't + * recognize or have obvious problems. Care should be taken that this method contains no slow actions. + */ + public static DataSourceUploadAction forFormFields ( + FileStorage fileStorage, + AnalysisCollection dataSourceCollection, + Map> formFields, + UserPermissions userPermissions + ) { + // Extract required parameters. Throws AnalysisServerException on failure, e.g. if a field is missing. + final String sourceName = getFormField(formFields, "sourceName", true); + final String regionId = getFormField(formFields, "regionId", true); + final List fileItems = formFields.get("sourceFiles"); + + FileStorageFormat format = detectUploadFormatAndValidate(fileItems); + DataSourceIngester ingester = DataSourceIngester.forFormat(format); + + String originalFileNames = fileItems.stream().map(FileItem::getName).collect(Collectors.joining(", ")); + ingester.initializeDataSource(sourceName, originalFileNames, regionId, userPermissions); + DataSourceUploadAction dataSourceUploadAction = + new DataSourceUploadAction(fileStorage, dataSourceCollection, fileItems, ingester); + + return dataSourceUploadAction; + } + +} diff --git a/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java b/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java new file mode 100644 index 000000000..d85b883e9 --- /dev/null +++ b/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java @@ -0,0 +1,121 @@ +package com.conveyal.analysis.datasource; + +import com.conveyal.file.FileStorageFormat; +import com.google.common.collect.Sets; +import org.apache.commons.fileupload.FileItem; +import org.apache.commons.fileupload.disk.DiskFileItem; +import org.apache.commons.io.FilenameUtils; + +import java.io.File; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Set; + +import static com.conveyal.r5.common.Util.isNullOrEmpty; +import static com.google.common.base.Preconditions.checkState; + +/** + * Utility class with common static methods for validating and processing uploaded spatial data files. + */ +public abstract class DataSourceUtil { + + /** + * Detect the format of a batch of user-uploaded files. Once the intended file type has been established, we + * validate the list of uploaded files, making sure certain preconditions are met. Some kinds of uploads must + * contain multiple files (.shp) while most others must contain only a single file (.csv, .gpkg etc.). + * Note that this does not perform structural or semantic validation of file contents, just the high-level + * characteristics of the set of file names. + * @throws DataSourceException if the type of the upload can't be detected or preconditions are violated. + * @return the expected type of the uploaded file or files, never null. + */ + public static FileStorageFormat detectUploadFormatAndValidate (List fileItems) { + if (isNullOrEmpty(fileItems)) { + throw new DataSourceException("You must select some files to upload."); + } + Set fileExtensions = extractFileExtensions(fileItems); + if (fileExtensions.isEmpty()) { + throw new DataSourceException("No file extensions seen, cannot detect upload type."); + } + checkFileCharacteristics(fileItems); + if (fileExtensions.contains("zip")) { + throw new DataSourceException("Upload of spatial .zip files not yet supported"); + // TODO unzip and process unzipped files - will need to peek inside to detect GTFS uploads first. + // detectUploadFormatAndValidate(unzipped) + } + // Check that if upload contains any of the Shapefile sidecar files, it contains all of the required ones. + final Set shapefileExtensions = Sets.newHashSet("shp", "dbf", "prj"); + if ( ! Sets.intersection(fileExtensions, shapefileExtensions).isEmpty()) { + if (fileExtensions.containsAll(shapefileExtensions)) { + verifyBaseNamesSame(fileItems); + // TODO check that any additional file is .shx, and that there are no more than 4 files. + } else { + throw new DataSourceException("You must multi-select at least SHP, DBF, and PRJ files for shapefile upload."); + } + return FileStorageFormat.SHP; + } + // The upload was not a Shapefile. All other formats should contain one single file. + if (fileExtensions.size() != 1) { + throw new DataSourceException("For any format but Shapefile, upload only one file at a time."); + } + final String extension = fileExtensions.stream().findFirst().get(); + // TODO replace with iteration over FileStorageFormat.values() and their lists of extensions + if (extension.equals("grid")) { + return FileStorageFormat.GRID; + } else if (extension.equals("csv")) { + return FileStorageFormat.CSV; + } else if (extension.equals("geojson") || extension.equals("json")) { + return FileStorageFormat.GEOJSON; + } else if (extension.equals("gpkg")) { + return FileStorageFormat.GEOPACKAGE; + } else if (extension.equals("tif") || extension.equals("tiff") || extension.equals("geotiff")) { + return FileStorageFormat.GEOTIFF; + } + throw new DataSourceException("Could not detect format of uploaded spatial data."); + } + + /** + * Check that all FileItems supplied are stored in disk files (not memory), that they are all readable and all + * have nonzero size. + */ + private static void checkFileCharacteristics (List fileItems) { + for (FileItem fileItem : fileItems) { + checkState(fileItem instanceof DiskFileItem, "Uploaded file was not stored to disk."); + File diskFile = ((DiskFileItem)fileItem).getStoreLocation(); + checkState(diskFile.exists(), "Uploaded file does not exist on filesystem as expected."); + checkState(diskFile.canRead(), "Read permissions were not granted on uploaded file."); + checkState(diskFile.length() > 0, "Uploaded file was empty (contained no data)."); + } + } + + /** + * Given a list of FileItems, return a set of all unique file extensions present, normalized to lower case. + * Always returns a set instance which may be empty, but never null. + */ + private static Set extractFileExtensions (List fileItems) { + Set fileExtensions = new HashSet<>(); + for (FileItem fileItem : fileItems) { + String fileName = fileItem.getName(); + String extension = FilenameUtils.getExtension(fileName); + if (extension.isEmpty()) { + throw new DataSourceException("Filename has no extension: " + fileName); + } + fileExtensions.add(extension.toLowerCase(Locale.ROOT)); + } + return fileExtensions; + } + + /** In uploads containing more than one file, all files are expected to have the same name before the extension. */ + private static void verifyBaseNamesSame (List fileItems) { + String firstBaseName = null; + for (FileItem fileItem : fileItems) { + String baseName = FilenameUtils.getBaseName(fileItem.getName()); + if (firstBaseName == null) { + firstBaseName = baseName; + } else if (!firstBaseName.equals(baseName)) { + throw new DataSourceException("In a shapefile upload, all files must have the same base name."); + } + } + } + +} diff --git a/src/main/java/com/conveyal/analysis/datasource/GeoJsonDataSourceIngester.java b/src/main/java/com/conveyal/analysis/datasource/GeoJsonDataSourceIngester.java new file mode 100644 index 000000000..49f182860 --- /dev/null +++ b/src/main/java/com/conveyal/analysis/datasource/GeoJsonDataSourceIngester.java @@ -0,0 +1,181 @@ +package com.conveyal.analysis.datasource; + +import com.conveyal.analysis.models.Bounds; +import com.conveyal.analysis.models.DataSource; +import com.conveyal.analysis.models.SpatialDataSource; +import com.conveyal.file.FileStorageFormat; +import com.conveyal.r5.analyst.progress.ProgressListener; +import com.conveyal.r5.util.ShapefileReader.GeometryType; +import org.geotools.data.Query; +import org.geotools.data.geojson.GeoJSONDataStore; +import org.geotools.data.simple.SimpleFeatureSource; +import org.geotools.feature.FeatureCollection; +import org.geotools.feature.FeatureIterator; +import org.geotools.referencing.CRS; +import org.geotools.referencing.crs.DefaultGeographicCRS; +import org.locationtech.jts.geom.Envelope; +import org.locationtech.jts.geom.Geometry; +import org.opengis.feature.simple.SimpleFeature; +import org.opengis.feature.simple.SimpleFeatureType; +import org.opengis.feature.type.AttributeDescriptor; +import org.opengis.feature.type.FeatureType; +import org.opengis.referencing.FactoryException; +import org.opengis.referencing.crs.CoordinateReferenceSystem; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import static com.conveyal.analysis.models.DataSourceValidationIssue.Level.ERROR; +import static com.conveyal.r5.common.GeometryUtils.checkWgsEnvelopeSize; + +/** + * Logic to create SpatialDataSource metadata from an uploaded GeoJSON file and perform validation. + * + * GeoJSON geometries are JSON objects with a type property (Point, LineString, Polygon, MultiPoint, MultiPolygon, + * or MultiLineString) and an array of coordinates. The "multi" types simply have another level of nested arrays. + * Geometries are usually nested into objects of type "Feature", which allows attaching properties. Features can be + * further nested into a top-level object of type FeatureCollection. We only support GeoJSON whose top level object is + * a FeatureCollection (not a single Feature or a single Geometry), and where every geometry is of the same type. + * + * For consistency with other vector data sources and our internal geometry representation, we are using the + * (unsupported) GeoTools module gt-geojsondatasource for loading GeoJSON into a FeatureCollection of OpenGIS Features. + * + * This is somewhat problematic because GeoJSON does not adhere to some common GIS principles. For example, in a + * GeoJSON feature collection, every single object can have a different geometry type and different properties, or + * even properties with the same name but different data types. For simplicity we only support GeoJSON inputs that + * have a consistent schema across all features - the same geometry type and attribute types on every feature. We + * still need to verify these constraints ourselves, as GeoTools does not enforce them. + * + * It is notable that GeoTools will not work correctly with GeoJSON input that does not respect these constraints, but + * it does not detect or report those problems - it just fails silently. For example, GeoTools will report the same + * property schema for every feature in a FeatureCollection. If a certain property is reported as having an integer + * numeric type, but a certain feature has text in the attribute of that same name, the reported value will be an + * Integer object with a value of zero, not a String. + * + * This behavior is odd, but remember that the gt-geojsondatastore module is unsupported (though apparently on the path + * to being supported) and was only recently included in Geotools releases. We may want to make code contributions to + * Geotools to improve JSON validation and error reporting. + * + * Section 4 of the GeoJSON RFC at https://datatracker.ietf.org/doc/html/rfc7946#section-4 defines the only acceptable + * coordinate reference system as WGS84. You may notice older versions of the GeoTools GeoJSON handler have CRS parsing + * capabilities. This is just support for an obsolete feature and should not be invoked. We instead range check all + * incoming coordinates (via a total bounding box check) to ensure they look reasonable in WGS84. + * + * Note that QGIS will happily and silently export GeoJSON with a crs field, which gt-geojsondatastore will happily + * read and report that it's in WGS84 without ever looking at the crs field. This is another case where Geotools would + * seriously benefit from added validation and error reporting, and where we need to add stopgap validation of our own. + * + * In GeoTools, FeatureSource is a read-only mechanism but it can apparently only return FeatureCollections which load + * everything into memory. FeatureReader provides iterator-style access, but seems quite low-level and not intended + * for regular use. Because we limit the size of file uploads we can be fairly sure it will be harmless for the backend + * to load any data fully into memory. Feature streaming capabilities and/or streaming JSON decoding can be added later + * if the need arises. The use of FeatureReader and FeatureSource are explained well at: + * https://docs.geotools.org/stable/userguide/tutorial/datastore/read.html + */ +public class GeoJsonDataSourceIngester extends DataSourceIngester { + + public static final int MIN_GEOJSON_FILE_LENGTH = "{'type':'FeatureCollection','features':[]}".length(); + + private final SpatialDataSource dataSource; + + @Override + public DataSource dataSource () { + return dataSource; + } + + public GeoJsonDataSourceIngester () { + // Note we're using the no-arg constructor creating a totally empty object. + // Its ID and other general fields will be set later by the enclosing DataSourceUploadAction. + this.dataSource = new SpatialDataSource(); + dataSource.fileFormat = FileStorageFormat.GEOJSON; + } + + + @Override + public void ingest (File file, ProgressListener progressListener) { + progressListener.beginTask("Processing and validating uploaded GeoJSON", 1); + progressListener.setWorkProduct(dataSource.toWorkProduct()); + // Check that file exists and is not empty. Geotools reader fails with stack overflow on empty/missing file. + if (!file.exists()) { + throw new IllegalArgumentException("File does not exist: " + file.getPath()); + } + if (file.length() < MIN_GEOJSON_FILE_LENGTH) { + throw new DataSourceException("File is too short to be GeoJSON, length is: " + file.length()); + } + try { + // Note that most of this logic is identical to Shapefile and GeoPackage, extract common code. + GeoJSONDataStore dataStore = new GeoJSONDataStore(file); + SimpleFeatureSource featureSource = dataStore.getFeatureSource(); + // This loads the whole thing into memory. That should be harmless given our file upload size limits. + Query query = new Query(Query.ALL); + query.setCoordinateSystemReproject(DefaultGeographicCRS.WGS84); + FeatureCollection wgsFeatureCollection = featureSource.getFeatures(query); + // The schema of the FeatureCollection does seem to reflect all attributes present on all features. + // However the type of those attributes seems to be restricted to that of the first value encountered. + // Conversions may fail silently on any successive instances of that property with a different type. + SimpleFeatureType featureType = wgsFeatureCollection.getSchema(); + // Note: this somewhat duplicates ShapefileReader.attributes, code should be reusable across formats + // But look into the null checking and duplicate attribute checks there. + dataSource.attributes = new ArrayList<>(); + for (AttributeDescriptor descriptor : featureType.getAttributeDescriptors()) { + dataSource.attributes.add(new SpatialAttribute(descriptor)); + } + // The schema always reports the geometry type as the very generic "Geometry" class. + // Check that all features have the same concrete Geometry type. + Set> geometryClasses = new HashSet<>(); + FeatureIterator iterator = wgsFeatureCollection.features(); + while (iterator.hasNext()) { + SimpleFeature feature = iterator.next(); + Geometry geometry = (Geometry) feature.getDefaultGeometry(); + if (geometry == null) { + dataSource.addIssue(ERROR, "Geometry is null on feature: " + feature.getID()); + continue; + } + geometryClasses.add(geometry.getClass()); + } + checkCrs(featureType); + Envelope wgsEnvelope = wgsFeatureCollection.getBounds(); + checkWgsEnvelopeSize(wgsEnvelope, "GeoJSON"); + + // Set SpatialDataSource fields (Conveyal metadata) from GeoTools model + dataSource.wgsBounds = Bounds.fromWgsEnvelope(wgsEnvelope); + dataSource.featureCount = wgsFeatureCollection.size(); + // Cannot set geometry type based on FeatureType.getGeometryDescriptor() because it's always just Geometry + // for GeoJson. We will leave the type null if there are zero or multiple geometry types present. + List geometryTypes = + geometryClasses.stream().map(GeometryType::forBindingClass).collect(Collectors.toList()); + if (geometryTypes.isEmpty()) { + dataSource.addIssue(ERROR, "No geometry types are present."); + } else if (geometryTypes.size() > 1) { + dataSource.addIssue(ERROR, "Multiple geometry types present: " + geometryTypes); + } else { + dataSource.geometryType = geometryTypes.get(0); + } + dataSource.coordinateSystem = DefaultGeographicCRS.WGS84.getName().getCode(); + progressListener.increment(); + } catch (FactoryException | IOException e) { + // Catch only checked exceptions to avoid excessive wrapping of root cause exception when possible. + throw new DataSourceException("Error parsing GeoJSON. Please ensure the files you uploaded are valid."); + } + } + + /** + * GeoJSON used to allow CRS, but the RFC now says GeoJSON is always in WGS84 and no other CRS are allowed. + * QGIS and GeoTools both seem to support crs fields, but it's an obsolete feature. + */ + private static void checkCrs (FeatureType featureType) throws FactoryException { + // FIXME newer GeoTools always reports WGS84 even when crs field is present. + // It doesn't report the problem or attempt any reprojection. + CoordinateReferenceSystem crs = featureType.getCoordinateReferenceSystem(); + if (crs != null && !DefaultGeographicCRS.WGS84.equals(crs) && !CRS.decode("CRS:84").equals(crs)) { + throw new DataSourceException("GeoJSON should specify no coordinate reference system, and contain " + + "unprojected WGS84 coordinates. CRS is: " + crs.toString()); + } + + } +} diff --git a/src/main/java/com/conveyal/analysis/datasource/GeoPackageDataSourceIngester.java b/src/main/java/com/conveyal/analysis/datasource/GeoPackageDataSourceIngester.java new file mode 100644 index 000000000..64ba8d525 --- /dev/null +++ b/src/main/java/com/conveyal/analysis/datasource/GeoPackageDataSourceIngester.java @@ -0,0 +1,102 @@ +package com.conveyal.analysis.datasource; + +import com.conveyal.analysis.models.Bounds; +import com.conveyal.analysis.models.DataSource; +import com.conveyal.analysis.models.SpatialDataSource; +import com.conveyal.file.FileStorageFormat; +import com.conveyal.r5.analyst.progress.ProgressListener; +import com.conveyal.r5.util.ShapefileReader; +import org.geotools.data.DataStore; +import org.geotools.data.DataStoreFinder; +import org.geotools.data.FeatureSource; +import org.geotools.data.Query; +import org.geotools.data.geojson.GeoJSONDataStore; +import org.geotools.data.simple.SimpleFeatureSource; +import org.geotools.feature.FeatureCollection; +import org.geotools.feature.FeatureIterator; +import org.geotools.geometry.jts.ReferencedEnvelope; +import org.geotools.referencing.CRS; +import org.geotools.referencing.crs.DefaultGeographicCRS; +import org.locationtech.jts.geom.Envelope; +import org.locationtech.jts.geom.Geometry; +import org.opengis.feature.simple.SimpleFeature; +import org.opengis.feature.simple.SimpleFeatureType; +import org.opengis.referencing.FactoryException; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import static com.conveyal.r5.common.GeometryUtils.checkWgsEnvelopeSize; +import static com.conveyal.r5.util.ShapefileReader.attributes; +import static com.conveyal.r5.util.ShapefileReader.geometryType; +import static com.google.common.base.Preconditions.checkState; + +/** + * Logic to create SpatialDataSource metadata from an uploaded GeoPackage file and perform validation. + * We are using the (unsupported) GeoTools module for loading GeoPackages into OpenGIS Features. + * + * Note that a GeoPackage can be a vector or a raster (coverage). We should handle both cases. + */ +public class GeoPackageDataSourceIngester extends DataSourceIngester { + + private final SpatialDataSource dataSource; + + @Override + public DataSource dataSource () { + return dataSource; + } + + public GeoPackageDataSourceIngester () { + // Note we're using the no-arg constructor creating a totally empty object. + // Its ID and other general fields will be set later by the enclosing DataSourceUploadAction. + this.dataSource = new SpatialDataSource(); + dataSource.fileFormat = FileStorageFormat.GEOPACKAGE; + } + + @Override + public void ingest (File file, ProgressListener progressListener) { + progressListener.beginTask("Validating uploaded GeoPackage", 2); + progressListener.setWorkProduct(dataSource.toWorkProduct()); + try { + Map params = new HashMap(); + params.put("dbtype", "geopkg"); + params.put("database", file.getAbsolutePath()); + DataStore datastore = DataStoreFinder.getDataStore(params); + // TODO Remaining logic should be similar to Shapefile and GeoJson + // Some GeoTools DataStores have multiple tables ("type names") available. GeoPackage seems to allow this. + // Shapefile has only one per DataStore, so the ShapefileDataStore provides a convenience method that does + // this automatically. + String[] typeNames = datastore.getTypeNames(); + if (typeNames.length != 1) { + throw new RuntimeException("GeoPackage must contain only one table, this file has " + typeNames.length); + } + FeatureSource featureSource = datastore.getFeatureSource(typeNames[0]); + Query query = new Query(Query.ALL); + query.setCoordinateSystemReproject(DefaultGeographicCRS.WGS84); + FeatureCollection wgsFeatureCollection = featureSource.getFeatures(query); + Envelope wgsEnvelope = wgsFeatureCollection.getBounds(); + checkWgsEnvelopeSize(wgsEnvelope, "GeoPackage"); + progressListener.increment(); + FeatureIterator wgsFeatureIterator = wgsFeatureCollection.features(); + while (wgsFeatureIterator.hasNext()) { + Geometry wgsFeatureGeometry = (Geometry)(wgsFeatureIterator.next().getDefaultGeometry()); + // FIXME GeoTools seems to be returning an envelope slightly smaller than the projected shapes. + // maybe it's giving us projection(envelope(shapes)) instead of envelope(projection(shapes))? + // As a stopgap, test that they intersect. + checkState(wgsEnvelope.intersects(wgsFeatureGeometry.getEnvelopeInternal())); + } + dataSource.wgsBounds = Bounds.fromWgsEnvelope(wgsEnvelope); + dataSource.attributes = attributes(wgsFeatureCollection.getSchema()); + dataSource.geometryType = geometryType(wgsFeatureCollection); + dataSource.featureCount = wgsFeatureCollection.size(); + dataSource.coordinateSystem = + featureSource.getSchema().getCoordinateReferenceSystem().getName().getCode(); + progressListener.increment(); + } catch (IOException e) { + throw new RuntimeException("Error reading GeoPackage.", e); + } + } + +} diff --git a/src/main/java/com/conveyal/analysis/datasource/GeoTiffDataSourceIngester.java b/src/main/java/com/conveyal/analysis/datasource/GeoTiffDataSourceIngester.java new file mode 100644 index 000000000..c1692460e --- /dev/null +++ b/src/main/java/com/conveyal/analysis/datasource/GeoTiffDataSourceIngester.java @@ -0,0 +1,93 @@ +package com.conveyal.analysis.datasource; + +import com.conveyal.analysis.models.Bounds; +import com.conveyal.analysis.models.DataSource; +import com.conveyal.analysis.models.SpatialDataSource; +import com.conveyal.r5.analyst.progress.ProgressListener; +import org.geotools.coverage.GridSampleDimension; +import org.geotools.coverage.grid.GridCoverage2D; +import org.geotools.coverage.grid.GridEnvelope2D; +import org.geotools.coverage.grid.io.AbstractGridFormat; +import org.geotools.coverage.grid.io.GridFormatFinder; +import org.geotools.geometry.jts.ReferencedEnvelope; +import org.geotools.referencing.CRS; +import org.geotools.referencing.crs.DefaultGeographicCRS; +import org.geotools.util.factory.Hints; +import org.opengis.coverage.SampleDimensionType; +import org.opengis.referencing.FactoryException; +import org.opengis.referencing.crs.CoordinateReferenceSystem; +import org.opengis.referencing.operation.MathTransform; +import org.opengis.referencing.operation.TransformException; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static com.conveyal.file.FileStorageFormat.GEOTIFF; +import static com.conveyal.r5.util.ShapefileReader.GeometryType.PIXEL; + +/** + * GoeTIFFs are used as inputs in network building as digital elevation profiles, and eventually expected to + * serve as impedance or cost fields (e.g. shade bonus and pollution malus). + */ +public class GeoTiffDataSourceIngester extends DataSourceIngester { + + private final SpatialDataSource dataSource; + + public GeoTiffDataSourceIngester () { + this.dataSource = new SpatialDataSource(); + dataSource.geometryType = PIXEL; + dataSource.fileFormat = GEOTIFF; // Should be GEOTIFF specifically + } + + @Override + protected DataSource dataSource () { + return dataSource; + } + + @Override + public void ingest (File file, ProgressListener progressListener) { + progressListener.beginTask("Processing uploaded GeoTIFF", 1); + AbstractGridFormat format = GridFormatFinder.findFormat(file); + Hints hints = new Hints(Hints.FORCE_LONGITUDE_FIRST_AXIS_ORDER, Boolean.TRUE); + var coverageReader = format.getReader(file, hints); + GridCoverage2D coverage; + try { + coverage = coverageReader.read(null); + } catch (IOException e) { + throw new DataSourceException("Could not read GeoTiff.", e); + } + // Transform to WGS84 to ensure this will not trigger any errors downstream. + CoordinateReferenceSystem coverageCrs = coverage.getCoordinateReferenceSystem2D(); + MathTransform wgsToCoverage, coverageToWgs; + ReferencedEnvelope wgsEnvelope; + try { + // These two transforms are not currently used - find them just to fail fast if GeoTools does not understand. + wgsToCoverage = CRS.findMathTransform(DefaultGeographicCRS.WGS84, coverageCrs); + coverageToWgs = wgsToCoverage.inverse(); + // Envelope in coverage CRS is not necessarily aligned with axes when transformed to WGS84. + // As far as I can tell those cases are handled by this call, but I'm not completely sure. + wgsEnvelope = new ReferencedEnvelope(coverage.getEnvelope2D().toBounds(DefaultGeographicCRS.WGS84)); + } catch (FactoryException | TransformException e) { + throw new DataSourceException("Could not create coordinate transform to and from WGS84."); + } + + List attributes = new ArrayList<>(); + for (int d = 0; d < coverage.getNumSampleDimensions(); d++) { + GridSampleDimension sampleDimension = coverage.getSampleDimension(d); + SampleDimensionType type = sampleDimension.getSampleDimensionType(); + attributes.add(new SpatialAttribute(type, d)); + } + // Get the dimensions of the pixel grid so we can record the number of pixels. + // Total number of pixels can be huge, cast it to 64 bits. + GridEnvelope2D gridEnv = coverage.getGridGeometry().getGridRange2D(); + dataSource.wgsBounds = Bounds.fromWgsEnvelope(wgsEnvelope); + dataSource.featureCount = (long)gridEnv.width * (long)gridEnv.height; + dataSource.geometryType = PIXEL; + dataSource.attributes = attributes; + dataSource.coordinateSystem = coverage.getCoordinateReferenceSystem().getName().getCode(); + progressListener.increment(); + } + +} diff --git a/src/main/java/com/conveyal/analysis/datasource/ShapefileDataSourceIngester.java b/src/main/java/com/conveyal/analysis/datasource/ShapefileDataSourceIngester.java new file mode 100644 index 000000000..03e61c6b7 --- /dev/null +++ b/src/main/java/com/conveyal/analysis/datasource/ShapefileDataSourceIngester.java @@ -0,0 +1,71 @@ +package com.conveyal.analysis.datasource; + +import com.conveyal.analysis.models.Bounds; +import com.conveyal.analysis.models.DataSource; +import com.conveyal.analysis.models.SpatialDataSource; +import com.conveyal.file.FileStorageFormat; +import com.conveyal.r5.analyst.progress.ProgressListener; +import com.conveyal.r5.util.ShapefileReader; +import org.geotools.referencing.CRS; +import org.locationtech.jts.geom.Envelope; +import org.locationtech.jts.geom.Geometry; +import org.opengis.referencing.FactoryException; +import org.opengis.referencing.operation.TransformException; + +import java.io.File; +import java.io.IOException; + +import static com.conveyal.r5.common.GeometryUtils.checkWgsEnvelopeSize; +import static com.google.common.base.Preconditions.checkState; + +/** + * Logic to create SpatialDataSource metadata from a Shapefile. + */ +public class ShapefileDataSourceIngester extends DataSourceIngester { + + private final SpatialDataSource dataSource; + + @Override + public DataSource dataSource () { + return dataSource; + } + + public ShapefileDataSourceIngester () { + // Note we're using the no-arg constructor creating a totally empty object. + // Its fields will be set later by the enclosing DataSourceUploadAction. + this.dataSource = new SpatialDataSource(); + dataSource.fileFormat = FileStorageFormat.SHP; + } + + @Override + public void ingest (File file, ProgressListener progressListener) { + progressListener.beginTask("Validating uploaded shapefile", 2); + progressListener.setWorkProduct(dataSource.toWorkProduct()); + try { + ShapefileReader reader = new ShapefileReader(file); + // Iterate over all features to ensure file is readable, geometries are valid, and can be reprojected. + Envelope envelope = reader.wgs84Bounds(); + checkWgsEnvelopeSize(envelope, "Shapefile"); + reader.wgs84Stream().forEach(f -> { + checkState(envelope.contains(((Geometry)f.getDefaultGeometry()).getEnvelopeInternal())); + }); + reader.close(); + progressListener.increment(); + dataSource.wgsBounds = Bounds.fromWgsEnvelope(envelope); + dataSource.attributes = reader.attributes(); + dataSource.geometryType = reader.geometryType(); + dataSource.featureCount = reader.featureCount(); + dataSource.coordinateSystem = + reader.crs.getName().getCode(); + + progressListener.increment(); + } catch (FactoryException | TransformException e) { + throw new DataSourceException("Shapefile transform error. " + + "Try uploading an unprojected WGS84 (EPSG:4326) file.", e); + } catch (IOException e) { + // ShapefileReader throws a checked IOException. + throw new DataSourceException("Error parsing shapefile. Ensure the files you uploaded are valid.", e); + } + } + +} diff --git a/src/main/java/com/conveyal/analysis/datasource/SpatialAttribute.java b/src/main/java/com/conveyal/analysis/datasource/SpatialAttribute.java new file mode 100644 index 000000000..b33aa2348 --- /dev/null +++ b/src/main/java/com/conveyal/analysis/datasource/SpatialAttribute.java @@ -0,0 +1,63 @@ +package com.conveyal.analysis.datasource; + +import org.locationtech.jts.geom.Geometry; +import org.opengis.coverage.SampleDimensionType; +import org.opengis.feature.type.AttributeDescriptor; +import org.opengis.feature.type.AttributeType; + +/** + * In OpenGIS terminology, SpatialDataSources contain features, each of which has attributes. This class represents a + * single attribute present on all the features in a resource - it's basically the schema metadata for a GIS layer. + * Users can specify their own name for any attribute in the source file, so this also associates these user-specified + * names with the original attribute name. + */ +public class SpatialAttribute { + + /** The name of the attribute (CSV column, Shapefile attribute, etc.) in the uploaded source file. */ + public String name; + + /** The editable label specified by the end user. */ + public String label; + + /** The data type of the attribute - for our purposes primarily distinguishing between numbers and text. */ + public Type type; + + /** On how many features does this attribute occur? */ + public int occurrances = 0; + + public enum Type { + NUMBER, // internally, we generally work with doubles so all numeric GIS types can be up-converted + TEXT, + GEOM, + ERROR; + public static Type forBindingClass (Class binding) { + if (Number.class.isAssignableFrom(binding)) return Type.NUMBER; + else if (String.class.isAssignableFrom(binding)) return Type.TEXT; + else if (Geometry.class.isAssignableFrom(binding)) return Type.GEOM; + else return Type.ERROR; + } + } + + /** + * Given an OpenGIS AttributeType, create a new Conveyal attribute metadata object reflecting it. + */ + public SpatialAttribute(String name, AttributeType type) { + this.name = name; + this.label = name; + this.type = Type.forBindingClass(type.getBinding()); + } + + public SpatialAttribute (AttributeDescriptor descriptor) { + this(descriptor.getLocalName(), descriptor.getType()); + } + + public SpatialAttribute (SampleDimensionType dimensionType, int bandNumber) { + name = "Band " + bandNumber; + label = String.format("%s (%s)", name, dimensionType.name()); + type = Type.NUMBER; + } + + /** No-arg constructor required for Mongo POJO deserialization. */ + public SpatialAttribute () { } + +} diff --git a/src/main/java/com/conveyal/analysis/datasource/derivation/AggregationAreaDerivation.java b/src/main/java/com/conveyal/analysis/datasource/derivation/AggregationAreaDerivation.java new file mode 100644 index 000000000..2bdd61369 --- /dev/null +++ b/src/main/java/com/conveyal/analysis/datasource/derivation/AggregationAreaDerivation.java @@ -0,0 +1,249 @@ +package com.conveyal.analysis.datasource.derivation; + +import com.conveyal.analysis.AnalysisServerException; +import com.conveyal.analysis.UserPermissions; +import com.conveyal.analysis.datasource.DataSourceException; +import com.conveyal.analysis.datasource.SpatialAttribute; +import com.conveyal.analysis.models.AggregationArea; +import com.conveyal.analysis.models.DataGroup; +import com.conveyal.analysis.models.DataSource; +import com.conveyal.analysis.models.SpatialDataSource; +import com.conveyal.analysis.persistence.AnalysisCollection; +import com.conveyal.analysis.persistence.AnalysisDB; +import com.conveyal.file.FileCategory; +import com.conveyal.file.FileStorage; +import com.conveyal.file.FileStorageKey; +import com.conveyal.file.FileUtils; +import com.conveyal.r5.analyst.Grid; +import com.conveyal.r5.analyst.progress.ProgressListener; +import com.conveyal.r5.analyst.progress.WorkProduct; +import com.conveyal.r5.util.ShapefileReader; +import org.locationtech.jts.geom.Envelope; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.operation.union.UnaryUnionOp; +import org.opengis.feature.simple.SimpleFeature; +import org.opengis.referencing.FactoryException; +import org.opengis.referencing.operation.TransformException; +import spark.Request; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.zip.GZIPOutputStream; + +import static com.conveyal.file.FileStorageFormat.GEOJSON; +import static com.conveyal.file.FileStorageFormat.SHP; +import static com.conveyal.r5.analyst.WebMercatorGridPointSet.parseZoom; +import static com.conveyal.r5.analyst.progress.WorkProductType.AGGREGATION_AREA; +import static com.conveyal.r5.util.ShapefileReader.GeometryType.POLYGON; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkNotNull; + +/** + * Created by abyrd on 2021-09-03 + */ +public class AggregationAreaDerivation implements DataDerivation { + + /** + * Arbitrary limit to prevent UI clutter from many aggregation areas (e.g. if someone uploads thousands of blocks). + * Someone might reasonably request an aggregation area for each of Chicago's 50 wards, so that's a good approximate + * limit for now. + */ + private static final int MAX_FEATURES = 100; + + private final FileStorage fileStorage; + private final UserPermissions userPermissions; + private final String dataSourceId; + private final String nameProperty; + private final boolean mergePolygons; + private final int zoom; + private final SpatialDataSource spatialDataSource; + private final List finalFeatures; + + // TODO derivations could return their model objects and DataGroups so they don't need direct database and fileStorage access. + // A DerivationProduct could be a collection of File, a Collection and a DataGroup. + private final AnalysisCollection aggregationAreaCollection; + private final AnalysisCollection dataGroupCollection; + + /** + * Extraction, validation and range checking of parameters. + * It's kind of a red flag that we're passing Components in here. The products should probably be returned by the + * Derivation and stored by some more general purpose wrapper so we can avoid direct file and database access here. + * It's also not great to pass in the full request - we only need to extract and validate query parameters. + */ + private AggregationAreaDerivation (FileStorage fileStorage, AnalysisDB database, Request req) { + + // Before kicking off asynchronous processing, range check inputs to fail fast on obvious problems. + userPermissions = UserPermissions.from(req); + dataSourceId = req.queryParams("dataSourceId"); + nameProperty = req.queryParams("nameProperty"); //"dist_name"; // + zoom = parseZoom(req.queryParams("zoom")); + mergePolygons = Boolean.parseBoolean(req.queryParams("mergePolygons")); + checkNotNull(dataSourceId); + + AnalysisCollection dataSourceCollection = + database.getAnalysisCollection("dataSources", DataSource.class); + DataSource dataSource = dataSourceCollection.findById(dataSourceId); + checkArgument(dataSource instanceof SpatialDataSource, + "Only spatial data sets can be converted to aggregation areas."); + spatialDataSource = (SpatialDataSource) dataSource; + checkArgument(POLYGON.equals(spatialDataSource.geometryType), + "Only polygons can be converted to aggregation areas. DataSource is: " + spatialDataSource.geometryType); + checkArgument(SHP.equals(spatialDataSource.fileFormat), + "Currently, only shapefiles can be converted to aggregation areas."); + + if (!mergePolygons) { + checkNotNull(nameProperty, "You must supply a nameProperty if mergePolygons is not true."); + SpatialAttribute sa = spatialDataSource.attributes.stream() + .filter(a -> a.name.equals(nameProperty)) + .findFirst().orElseThrow(() -> + new IllegalArgumentException("nameProperty does not exist: " + nameProperty)); + if (sa.type == SpatialAttribute.Type.GEOM) { + throw new IllegalArgumentException("nameProperty must be of type TEXT or NUMBER, not GEOM."); + } + } + + this.fileStorage = fileStorage; + // Do not retain AnalysisDB reference, but grab the collections we need. + // TODO cache AnalysisCollection instances and reuse? Are they threadsafe? + aggregationAreaCollection = database.getAnalysisCollection("aggregationAreas", AggregationArea.class); + dataGroupCollection = database.getAnalysisCollection("dataGroups", DataGroup.class); + + /* + Implementation notes: + Collecting all the Features to a List is a red flag for scalability, but the UnaryUnionOp used below (and the + CascadedPolygonUnion it depends on) appear to only operate on in-memory lists. The ShapefileReader and the + FeatureSource it contains also seem to always load all features at once. So for now we just have to tolerate + loading the whole files into memory at once. + If we do need to pre-process the file (here reading it and converting it to WGS84) that's not a + constant-time operation, so it should probably be done in the async task below instead of this synchronous + HTTP controller code. + We may not need to union the features at all. We could just iteratively rasterize all the polygons into a + single grid which would effectively union them. This would allow both the union and non-union case to be + handled in a streaming fashion (in constant memory). + This whole process needs to be audited though, it's strangely slow. + */ + File sourceFile; + if (SHP.equals(spatialDataSource.fileFormat)) { + // On a newly started backend, we can't be sure any sidecar files are on the local filesystem. + // We may want to factor this out when we use shapefile DataSources in other derivations. + String baseName = spatialDataSource._id.toString(); + prefetchDataSource(baseName, "dbf"); + prefetchDataSource(baseName, "shx"); + prefetchDataSource(baseName, "prj"); + sourceFile = fileStorage.getFile(spatialDataSource.storageKey()); + // Reading the shapefile into a list may actually take a moment, should this be done in the async part? + try (ShapefileReader reader = new ShapefileReader(sourceFile)) { + finalFeatures = reader.wgs84Stream().collect(Collectors.toList()); + } catch (Exception e) { + throw new DataSourceException("Failed to load shapefile.", e); + } + } else { + // GeoJSON, GeoPackage etc. + throw new UnsupportedOperationException("To be implemented."); + } + if (!mergePolygons && finalFeatures.size() > MAX_FEATURES) { + String message = MessageFormat.format( + "The uploaded shapefile has {0} features, exceeding the limit of {1}", + finalFeatures.size(), MAX_FEATURES + ); + throw new DataSourceException(message); + } + } + + /** Used primarily for shapefiles where we can't be sure whether all sidecar files have been synced locally. */ + private void prefetchDataSource (String baseName, String extension) { + FileStorageKey key = new FileStorageKey(FileCategory.DATASOURCES, baseName, extension); + // We need to clarify the FileStorage API on which calls cause the file to be synced locally, and whether these + // getFile tolerates getting files that do not exist. This may all become irrelevant if we use NFS. + if (fileStorage.exists(key)) { + fileStorage.getFile(key); + } + } + + @Override + public void action (ProgressListener progressListener) throws Exception { + + ArrayList aggregationAreas = new ArrayList<>(); + String groupDescription = "Aggregation areas from polygons"; + DataGroup dataGroup = new DataGroup(userPermissions, spatialDataSource._id.toString(), groupDescription); + + progressListener.beginTask("Reading data source", finalFeatures.size() + 1); + Map areaGeometries = new HashMap<>(); + + if (mergePolygons) { + // Union (single combined aggregation area) requested + List geometries = finalFeatures.stream().map(f -> + (Geometry) f.getDefaultGeometry()).collect(Collectors.toList() + ); + UnaryUnionOp union = new UnaryUnionOp(geometries); + // Name the area using the name in the request directly + areaGeometries.put(spatialDataSource.name, union.union()); + } else { + // Don't union. Name each area by looking up its value for the name property in the request. + finalFeatures.forEach(f -> areaGeometries.put( + readProperty(f, nameProperty), (Geometry) f.getDefaultGeometry()) + ); + } + + // Convert to raster grids, then store them. + areaGeometries.forEach((String name, Geometry geometry) -> { + if (geometry == null) throw new AnalysisServerException("Invalid geometry uploaded."); + Envelope env = geometry.getEnvelopeInternal(); + Grid maskGrid = new Grid(zoom, env); + progressListener.beginTask("Creating grid for " + name, 0); + + // Store the percentage each cell overlaps the mask, scaled as 0 to 100,000 + List weights = maskGrid.getPixelWeights(geometry, true); + weights.forEach(pixel -> { + maskGrid.grid[pixel.x][pixel.y] = pixel.weight * 100_000; + }); + + AggregationArea aggregationArea = new AggregationArea(userPermissions, name, spatialDataSource); + + try { + File gridFile = FileUtils.createScratchFile("grid"); + OutputStream os = new GZIPOutputStream(FileUtils.getOutputStream(gridFile)); + maskGrid.write(os); + os.close(); + aggregationArea.dataGroupId = dataGroup._id.toString(); + aggregationAreas.add(aggregationArea); + fileStorage.moveIntoStorage(aggregationArea.getStorageKey(), gridFile); + } catch (IOException e) { + throw new AnalysisServerException("Error processing/uploading aggregation area"); + } + progressListener.increment(); + }); + aggregationAreaCollection.insertMany(aggregationAreas); + dataGroupCollection.insert(dataGroup); + progressListener.setWorkProduct(WorkProduct.forDataGroup(AGGREGATION_AREA, dataGroup, spatialDataSource.regionId)); + progressListener.increment(); + + } + + private static String readProperty (SimpleFeature feature, String propertyName) { + try { + return feature.getProperty(propertyName).getValue().toString(); + } catch (NullPointerException e) { + String message = String.format("The specified property '%s' was not present on the uploaded features. " + + "Please verify that '%s' corresponds to a shapefile column.", propertyName, propertyName); + throw new AnalysisServerException(message); + } + } + + public static AggregationAreaDerivation fromRequest (Request req, FileStorage fileStorage, AnalysisDB database) { + return new AggregationAreaDerivation(fileStorage, database, req); + } + + @Override + public SpatialDataSource dataSource () { + return spatialDataSource; + } + +} diff --git a/src/main/java/com/conveyal/analysis/datasource/derivation/DataDerivation.java b/src/main/java/com/conveyal/analysis/datasource/derivation/DataDerivation.java new file mode 100644 index 000000000..9e0ea8515 --- /dev/null +++ b/src/main/java/com/conveyal/analysis/datasource/derivation/DataDerivation.java @@ -0,0 +1,24 @@ +package com.conveyal.analysis.datasource.derivation; + +import com.conveyal.analysis.models.BaseModel; +import com.conveyal.analysis.models.DataGroup; +import com.conveyal.analysis.models.DataSource; +import com.conveyal.r5.analyst.progress.TaskAction; + +import java.util.Collection; + +/** + * An interface for unary operators mapping DataSources into other data sets represented in our Mongo database. + * An asynchronous function from D to M. + */ +public interface DataDerivation extends TaskAction { + + public D dataSource (); + +// public Collection outputs(); + +// public DataGroup outputGroup(); + +// or single output method: public DataGroup output(); + +} diff --git a/src/main/java/com/conveyal/analysis/grids/SeamlessCensusGridExtractor.java b/src/main/java/com/conveyal/analysis/grids/SeamlessCensusGridExtractor.java index 6bf0b3095..289b9987f 100644 --- a/src/main/java/com/conveyal/analysis/grids/SeamlessCensusGridExtractor.java +++ b/src/main/java/com/conveyal/analysis/grids/SeamlessCensusGridExtractor.java @@ -5,6 +5,7 @@ import com.conveyal.data.census.S3SeamlessSource; import com.conveyal.data.geobuf.GeobufFeature; import com.conveyal.r5.analyst.Grid; +import com.conveyal.r5.analyst.progress.ProgressListener; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -48,17 +49,20 @@ public SeamlessCensusGridExtractor (Config config) { /** * Retrieve data for bounds and save to a bucket under a given key */ - public List censusDataForBounds (Bounds bounds, int zoom) throws IOException { + public List censusDataForBounds (Bounds bounds, int zoom, ProgressListener progressListener) throws IOException { long startTime = System.currentTimeMillis(); // All the features are buffered in a Map in memory. This could be problematic on large areas. - Map features = source.extract(bounds.north, bounds.east, bounds.south, bounds.west, false); + Map features = + source.extract(bounds.north, bounds.east, bounds.south, bounds.west, false, progressListener); if (features.isEmpty()) { LOG.info("No seamless census data found here, not pre-populating grids"); return new ArrayList<>(); } + progressListener.beginTask("Processing census blocks", features.size()); + // One string naming each attribute (column) in the incoming census data. Map grids = new HashMap<>(); for (GeobufFeature feature : features.values()) { @@ -81,6 +85,7 @@ public List censusDataForBounds (Bounds bounds, int zoom) throws IOExcepti } grid.incrementFromPixelWeights(weights, value.doubleValue()); } + progressListener.increment(); } long endTime = System.currentTimeMillis(); diff --git a/src/main/java/com/conveyal/analysis/models/AggregationArea.java b/src/main/java/com/conveyal/analysis/models/AggregationArea.java index 253e0fe3b..a0a34be19 100644 --- a/src/main/java/com/conveyal/analysis/models/AggregationArea.java +++ b/src/main/java/com/conveyal/analysis/models/AggregationArea.java @@ -1,17 +1,47 @@ package com.conveyal.analysis.models; +import com.conveyal.analysis.UserPermissions; +import com.conveyal.file.FileStorageKey; import com.fasterxml.jackson.annotation.JsonIgnore; +import org.bson.codecs.pojo.annotations.BsonIgnore; + +import static com.conveyal.file.FileCategory.GRIDS; /** - * An aggregation area defines a set of origin points to be averaged together to produce an aggregate accessibility figure. - * It is defined by a geometry that is rasterized and stored as a grid, with pixels with values between 0 and 100,000 - * depending on how much of that pixel is overlapped by the mask. + * An aggregation area defines a set of origin points that can be combined to produce an aggregate accessibility figure. + * For example, if we have accessibility results for an entire city, we might want to calculate 25th percentile + * population-weighted accessibility for each administrative district. Each neighborhood would be an aggreagation area. + * + * An aggregation area is defined by a polygon that has been rasterized and stored as a grid, with each pixel value + * expressing how much of that pixel falls within the mask polygon. These values are in the range of 0 to 100,000 + * (rather than 0 to 1) because our serialized (on-disk) grid format can only store integers. */ -public class AggregationArea extends Model { +public class AggregationArea extends BaseModel { + public String regionId; + public String dataSourceId; + public String dataGroupId; + + /** Zero-argument constructor required for Mongo automatic POJO deserialization. */ + public AggregationArea () { } + + public AggregationArea(UserPermissions user, String name, SpatialDataSource dataSource) { + super(user, name); + this.regionId = dataSource.regionId; + this.dataSourceId = dataSource._id.toString(); + } @JsonIgnore + @BsonIgnore public String getS3Key () { return String.format("%s/mask/%s.grid", regionId, _id); } + + @JsonIgnore + @BsonIgnore + public FileStorageKey getStorageKey () { + // These in the GRIDS file storage category because aggregation areas are masks represented as binary grids. + return new FileStorageKey(GRIDS, getS3Key()); + } + } diff --git a/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java b/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java index b118a793a..1bfdabca2 100644 --- a/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java +++ b/src/main/java/com/conveyal/analysis/models/AnalysisRequest.java @@ -1,39 +1,48 @@ package com.conveyal.analysis.models; import com.conveyal.analysis.AnalysisServerException; +import com.conveyal.analysis.UserPermissions; import com.conveyal.analysis.persistence.Persistence; -import com.conveyal.r5.analyst.Grid; +import com.conveyal.r5.analyst.WebMercatorExtents; import com.conveyal.r5.analyst.cluster.AnalysisWorkerTask; import com.conveyal.r5.analyst.decay.DecayFunction; import com.conveyal.r5.analyst.decay.StepDecayFunction; import com.conveyal.r5.analyst.fare.InRoutingFareCalculator; -import com.conveyal.r5.analyst.scenario.Modification; import com.conveyal.r5.analyst.scenario.Scenario; import com.conveyal.r5.api.util.LegMode; import com.conveyal.r5.api.util.TransitModes; -import com.conveyal.r5.common.JsonUtilities; import com.mongodb.QueryBuilder; +import org.apache.commons.codec.digest.DigestUtils; import java.time.LocalDate; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.EnumSet; import java.util.List; import java.util.stream.Collectors; -import java.util.zip.CRC32; /** * Request sent from the UI to the backend. It is actually distinct from the task that the broker * sends/forwards to R5 workers (see {@link AnalysisWorkerTask}), though it has many of the same fields. */ public class AnalysisRequest { - private static int MIN_ZOOM = 9; private static int MAX_ZOOM = 12; private static int MAX_GRID_CELLS = 5_000_000; + /** + * These three IDs are redundant, and just help reduce the number of database lookups necessary. + * The bundleId and modificationIds should be considered the definitive source of truth (regionId and projectId are + * implied by the bundleId and the modification Ids). Behavior is undefined if the API caller sends inconsistent + * information (a different regionId or projectId than the one the bundleId belongs to). + */ + public String regionId; public String projectId; - public int variantIndex; + public String scenarioId; + + public String bundleId; + public List modificationIds = new ArrayList<>(); public String workerVersion; public String accessModes; @@ -149,20 +158,20 @@ public class AnalysisRequest { public DecayFunction decayFunction; /** - * Get all of the modifications for a project id that are in the Variant and map them to their - * corresponding r5 mod + * Create the R5 `Scenario` from this request. */ - private static List modificationsForProject ( - String accessGroup, - String projectId, - int variantIndex) - { - return Persistence.modifications - .findPermitted(QueryBuilder.start("projectId").is(projectId).get(), accessGroup) - .stream() - .filter(m -> variantIndex < m.variants.length && m.variants[variantIndex]) - .map(com.conveyal.analysis.models.Modification::toR5) - .collect(Collectors.toList()); + public Scenario createScenario (UserPermissions userPermissions) { + QueryBuilder query = "all".equals(scenarioId) + ? QueryBuilder.start("projectId").is(projectId) + : QueryBuilder.start("_id").in(modificationIds); + Collection modifications = Persistence.modifications.findPermitted(query.get(), userPermissions); + // `findPermitted` sorts by creation time by default. Nonces will be in the same order each time. + String nonces = Arrays.toString(modifications.stream().map(m -> m.nonce).toArray()); + String scenarioId = String.format("%s-%s", bundleId, DigestUtils.sha1Hex(nonces)); + Scenario scenario = new Scenario(); + scenario.id = scenarioId; + scenario.modifications = modifications.stream().map(com.conveyal.analysis.models.Modification::toR5).collect(Collectors.toList()); + return scenario; } /** @@ -178,58 +187,23 @@ private static List modificationsForProject ( * TODO arguably this should be done by a method on the task classes themselves, with common parts factored out * to the same method on the superclass. */ - public AnalysisWorkerTask populateTask (AnalysisWorkerTask task, Project project) { - - // Fetch the modifications associated with this project, filtering for the selected scenario - // (denoted here as "variant"). There are no modifications in the baseline scenario - // (which is denoted by special index -1). - List modifications = new ArrayList<>(); - String scenarioName; - if (variantIndex > -1) { - modifications = modificationsForProject(project.accessGroup, projectId, variantIndex); - scenarioName = project.variants[variantIndex]; - } else { - scenarioName = "Baseline"; - } + public void populateTask (AnalysisWorkerTask task, UserPermissions userPermissions) { + if (bounds == null) throw AnalysisServerException.badRequest("Analysis bounds must be set."); - // The CRC of the modifications in this scenario is appended to the scenario ID to - // identify a unique revision of the scenario (still denoted here as variant) allowing - // the worker to cache and reuse networks built by applying that exact revision of the - // scenario to a base network. - CRC32 crc = new CRC32(); - crc.update(JsonUtilities.objectToJsonBytes(modifications)); - long crcValue = crc.getValue(); - - task.scenario = new Scenario(); - // FIXME Job IDs need to be unique. Why are we setting this to the project and variant? - // This only works because the job ID is overwritten when the job is enqueued. - // Its main effect is to cause the scenario ID to have this same pattern! - // We should probably leave the JobID null on single point tasks. Needed: polymorphic task initialization. - task.jobId = String.format("%s-%s-%s", projectId, variantIndex, crcValue); - task.scenario.id = task.scenarioId = task.jobId; - task.scenario.modifications = modifications; - task.scenario.description = scenarioName; - task.graphId = project.bundleId; + task.scenario = createScenario(userPermissions); + task.graphId = bundleId; task.workerVersion = workerVersion; - task.maxFare = this.maxFare; - task.inRoutingFareCalculator = this.inRoutingFareCalculator; - - Bounds bounds = this.bounds; - if (bounds == null) { - // If no bounds were specified, fall back on the bounds of the entire region. - Region region = Persistence.regions.findByIdIfPermitted(project.regionId, project.accessGroup); - bounds = region.bounds; - } + task.maxFare = maxFare; + task.inRoutingFareCalculator = inRoutingFareCalculator; // TODO define class with static factory function WebMercatorGridBounds.fromLatLonBounds(). // Also include getIndex(x, y), getX(index), getY(index), totalTasks() - - Grid grid = new Grid(zoom, bounds.envelope()); - checkZoom(grid); - task.height = grid.height; - task.north = grid.north; - task.west = grid.west; - task.width = grid.width; + WebMercatorExtents extents = WebMercatorExtents.forWgsEnvelope(bounds.envelope(), zoom); + checkGridSize(extents); + task.height = extents.height; + task.north = extents.north; + task.west = extents.west; + task.width = extents.width; task.zoom = zoom; task.date = date; @@ -279,21 +253,19 @@ public AnalysisWorkerTask populateTask (AnalysisWorkerTask task, Project project if (task.decayFunction == null) { task.decayFunction = new StepDecayFunction(); } - - return task; } - private static void checkZoom(Grid grid) { - if (grid.zoom < MIN_ZOOM || grid.zoom > MAX_ZOOM) { + private static void checkGridSize (WebMercatorExtents extents) { + if (extents.zoom < MIN_ZOOM || extents.zoom > MAX_ZOOM) { throw AnalysisServerException.badRequest(String.format( - "Requested zoom (%s) is outside valid range (%s - %s)", grid.zoom, MIN_ZOOM, MAX_ZOOM + "Requested zoom (%s) is outside valid range (%s - %s)", extents.zoom, MIN_ZOOM, MAX_ZOOM )); } - if (grid.height * grid.width > MAX_GRID_CELLS) { + if (extents.height * extents.width > MAX_GRID_CELLS) { throw AnalysisServerException.badRequest(String.format( "Requested number of destinations (%s) exceeds limit (%s). " + "Set smaller custom geographic bounds or a lower zoom level.", - grid.height * grid.width, MAX_GRID_CELLS + extents.height * extents.width, MAX_GRID_CELLS )); } } diff --git a/src/main/java/com/conveyal/analysis/models/BaseModel.java b/src/main/java/com/conveyal/analysis/models/BaseModel.java index d3bfec99d..37562f7df 100644 --- a/src/main/java/com/conveyal/analysis/models/BaseModel.java +++ b/src/main/java/com/conveyal/analysis/models/BaseModel.java @@ -1,13 +1,15 @@ package com.conveyal.analysis.models; +import com.conveyal.analysis.UserPermissions; import org.bson.types.ObjectId; +/** The base type for objects stored in our newer AnalysisDB using the Mongo Java driver's POJO functionality. */ public class BaseModel { // Can retrieve `createdAt` from here public ObjectId _id; // For version management. ObjectId's contain a timestamp, so can retrieve `updatedAt` from here. - public ObjectId nonce = new ObjectId(); + public ObjectId nonce; public String createdBy = null; public String updatedBy = null; @@ -17,4 +19,18 @@ public class BaseModel { // Everything has a name public String name = null; + + // package private to encourage use of static factory methods + BaseModel (UserPermissions user, String name) { + this._id = new ObjectId(); + this.nonce = new ObjectId(); + this.createdBy = user.email; + this.updatedBy = user.email; + this.accessGroup = user.accessGroup; + this.name = name; + } + + /** Zero argument constructor required for MongoDB driver automatic POJO deserialization. */ + public BaseModel () { } + } diff --git a/src/main/java/com/conveyal/analysis/models/Bounds.java b/src/main/java/com/conveyal/analysis/models/Bounds.java index fce7673fd..87240befe 100644 --- a/src/main/java/com/conveyal/analysis/models/Bounds.java +++ b/src/main/java/com/conveyal/analysis/models/Bounds.java @@ -26,4 +26,12 @@ public Envelope envelope () { return new Envelope(this.west, this.east, this.south, this.north); } + public static Bounds fromWgsEnvelope (Envelope envelope) { + Bounds bounds = new Bounds(); + bounds.west = envelope.getMinX(); + bounds.east = envelope.getMaxX(); + bounds.south = envelope.getMinY(); + bounds.north = envelope.getMaxY(); + return bounds; + } } diff --git a/src/main/java/com/conveyal/analysis/models/Bundle.java b/src/main/java/com/conveyal/analysis/models/Bundle.java index 1f17abd93..912f066b4 100644 --- a/src/main/java/com/conveyal/analysis/models/Bundle.java +++ b/src/main/java/com/conveyal/analysis/models/Bundle.java @@ -82,8 +82,8 @@ public static class GtfsErrorTypeSummary { public Priority priority; public GtfsErrorTypeSummary () { /* For deserialization. */ } public GtfsErrorTypeSummary (GTFSError error) { - this.priority = error.priority; this.type = error.errorType; + this.priority = error.getPriority(); } } diff --git a/src/main/java/com/conveyal/analysis/models/DataGroup.java b/src/main/java/com/conveyal/analysis/models/DataGroup.java new file mode 100644 index 000000000..6bad6307a --- /dev/null +++ b/src/main/java/com/conveyal/analysis/models/DataGroup.java @@ -0,0 +1,23 @@ +package com.conveyal.analysis.models; + +import com.conveyal.analysis.UserPermissions; +import org.bson.types.ObjectId; + +/** + * When deriving data (layers, networks, etc.) from a DataSource, we sometimes produce many outputs at once from + * the same source and configuration options. We group all those derived products together using a DataGroup. + * The grouping is achieved simply by multiple other entities of the same type referencing the same dataGroupId. + * The DataGroups don't have many other characteristics of their own. They are materialized and stored in Mongo just + * to provide a user-editable name/description for the group. + */ +public class DataGroup extends BaseModel { + + /** The data source this group of products was derived from. */ + public String dataSourceId; + + public DataGroup (UserPermissions user, String dataSourceId, String description) { + super(user, description); + this.dataSourceId = dataSourceId; + } + +} diff --git a/src/main/java/com/conveyal/analysis/models/DataSource.java b/src/main/java/com/conveyal/analysis/models/DataSource.java new file mode 100644 index 000000000..c6fbbff8f --- /dev/null +++ b/src/main/java/com/conveyal/analysis/models/DataSource.java @@ -0,0 +1,79 @@ +package com.conveyal.analysis.models; + +import com.conveyal.analysis.UserPermissions; +import com.conveyal.file.FileCategory; +import com.conveyal.file.FileStorageFormat; +import com.conveyal.file.FileStorageKey; +import com.conveyal.r5.analyst.progress.WorkProduct; +import org.bson.codecs.pojo.annotations.BsonDiscriminator; + +import java.util.ArrayList; +import java.util.List; + +import static com.conveyal.r5.analyst.progress.WorkProductType.DATA_SOURCE; + +/** + * This represents a file which was uploaded by the user and validated by the backend. Instances are persisted to Mongo. + * DataSources can be processed into derived products like aggregation areas, destination grids, and transport networks. + * Subtypes exist to allow additional fields on certain kinds of data sources. The attribute "type" of instances + * serialized into Mongo is a "discriminator" which determines the corresponding Java class on deserialization. + * + * Given the existence of descriminators in the Mongo driver, for now we're trying full Java typing with inheritance. + * It is debatable whether we get any advantages from this DataSource class hierarchy as opposed to a single class with + * some fields left null in certain cases (e.g. feature schema is null on OSM DataSources). Juggling different classes + * may not be worth the trouble unless we get some utility out of polymorphic methods. For example, Mongo collections + * will return the shared supertype, leaving any specialized fields inaccessible except via overridden methods. + * Usefulness will depend on how different the different subtypes are, and we haven't really seen that yet. + * The main action we take on DataSources is to process them into derived data. That can't easily use polymorphism. + */ +@BsonDiscriminator(key="type") +public abstract class DataSource extends BaseModel { + + public String regionId; + + /** Description editable by end users */ + public String description; + + /** + * Internally we store all files with the same ID as their database entry, but we retain the file name to help the + * user recognize files they uploaded. We could also just put that info in the description. + */ + public String originalFileName; + + /** The size of the uploaded file, including any sidecar files. */ + public int fileSizeBytes; + + public FileStorageFormat fileFormat; + + /** + * The geographic bounds of this data set in WGS84 coordinates (independent of the original CRS of uploaded file). + * This type uses (north, south, east, west), ideally for consistency we'd use (minLon, minLat, maxLon, maxLat). + */ + public Bounds wgsBounds; + + /** + * Problems encountered while loading. + * TODO should this be a separate json file in storage? Should it be a Set to deduplicate? + */ + public List issues = new ArrayList<>(); + + public DataSource (UserPermissions user, String name) { + super(user, name); + } + + /** Zero-argument constructor required for Mongo automatic POJO deserialization. */ + public DataSource () { } + + public WorkProduct toWorkProduct () { + return new WorkProduct(DATA_SOURCE, _id.toString(), regionId); + }; + + public void addIssue (DataSourceValidationIssue.Level level, String message) { + issues.add(new DataSourceValidationIssue(level, message)); + } + + public FileStorageKey fileStorageKey () { + return new FileStorageKey(FileCategory.DATASOURCES, _id.toString(), fileFormat.extension); + } + +} diff --git a/src/main/java/com/conveyal/analysis/models/DataSourceValidationIssue.java b/src/main/java/com/conveyal/analysis/models/DataSourceValidationIssue.java new file mode 100644 index 000000000..515abc5a9 --- /dev/null +++ b/src/main/java/com/conveyal/analysis/models/DataSourceValidationIssue.java @@ -0,0 +1,21 @@ +package com.conveyal.analysis.models; + +/** + * Represents problems encountered while validating a newly uploaded DataSource. + */ +public class DataSourceValidationIssue { + + public Level level; + + public String description; + + public enum Level { + ERROR, WARN, INFO + } + + public DataSourceValidationIssue (Level level, String description) { + this.level = level; + this.description = description; + } + +} diff --git a/src/main/java/com/conveyal/analysis/models/FileInfo.java b/src/main/java/com/conveyal/analysis/models/FileInfo.java index 8c38cbf96..351058899 100644 --- a/src/main/java/com/conveyal/analysis/models/FileInfo.java +++ b/src/main/java/com/conveyal/analysis/models/FileInfo.java @@ -6,6 +6,9 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import org.bson.types.ObjectId; +/** + * Metadata about files uploaded to Conveyal + */ public class FileInfo extends BaseModel { public String regionId = null; diff --git a/src/main/java/com/conveyal/analysis/models/GtfsDataSource.java b/src/main/java/com/conveyal/analysis/models/GtfsDataSource.java new file mode 100644 index 000000000..7b40e0053 --- /dev/null +++ b/src/main/java/com/conveyal/analysis/models/GtfsDataSource.java @@ -0,0 +1,11 @@ +package com.conveyal.analysis.models; + +import org.bson.codecs.pojo.annotations.BsonDiscriminator; + +/** + * Placeholder for representing uploaded GTFS data. + */ +@BsonDiscriminator(key="type", value="gtfs") +public class GtfsDataSource extends DataSource { + +} diff --git a/src/main/java/com/conveyal/analysis/models/OpportunityDataset.java b/src/main/java/com/conveyal/analysis/models/OpportunityDataset.java index dc977b586..773c4efd2 100644 --- a/src/main/java/com/conveyal/analysis/models/OpportunityDataset.java +++ b/src/main/java/com/conveyal/analysis/models/OpportunityDataset.java @@ -2,6 +2,7 @@ import com.conveyal.file.FileStorageFormat; import com.conveyal.file.FileStorageKey; +import com.conveyal.r5.analyst.PointSet; import com.conveyal.r5.analyst.WebMercatorExtents; import com.fasterxml.jackson.annotation.JsonIgnore; @@ -22,6 +23,9 @@ public class OpportunityDataset extends Model { /** The unique id for the data source (CSV file, Shapefile etc.) from which this dataset was derived. */ public String sourceId; + /** The ID of the DataGroup that this OpportunityDataset belongs to (all created at once from a single source). */ + public String dataGroupId; + /** * Bucket name on S3 where the opportunity data itself is persisted. Deprecated: as of April 2021, the FileStorage * system encapsulates how local or remote storage coordinates are derived from the FileCategory. @@ -103,6 +107,19 @@ public WebMercatorExtents getWebMercatorExtents () { return new WebMercatorExtents(west, north, width, height, DEFAULT_ZOOM); } + @JsonIgnore + public void setWebMercatorExtents (PointSet pointset) { + // These bounds are currently in web Mercator pixels, which are relevant to Grids but are not natural units + // for FreeformPointSets. There are only unique minimal web Mercator bounds for FreeformPointSets if + // the zoom level is fixed in OpportunityDataset (FIXME we may change this soon). + // Perhaps these metadata bounds should be WGS84 instead, it depends how the UI uses them. + WebMercatorExtents extents = pointset.getWebMercatorExtents(); + this.west = extents.west; + this.north = extents.north; + this.width = extents.width; + this.height = extents.height; + } + /** Analysis region this dataset was uploaded in. */ public String regionId; } diff --git a/src/main/java/com/conveyal/analysis/models/OsmDataSource.java b/src/main/java/com/conveyal/analysis/models/OsmDataSource.java new file mode 100644 index 000000000..e7abca37f --- /dev/null +++ b/src/main/java/com/conveyal/analysis/models/OsmDataSource.java @@ -0,0 +1,11 @@ +package com.conveyal.analysis.models; + +import org.bson.codecs.pojo.annotations.BsonDiscriminator; + +/** + * Placeholder for representing uploaded OSM data. + */ +@BsonDiscriminator(key="type", value="osm") +public class OsmDataSource extends DataSource { + +} diff --git a/src/main/java/com/conveyal/analysis/models/Project.java b/src/main/java/com/conveyal/analysis/models/Project.java index 9f633c209..1e3993b1b 100644 --- a/src/main/java/com/conveyal/analysis/models/Project.java +++ b/src/main/java/com/conveyal/analysis/models/Project.java @@ -22,4 +22,4 @@ public Project clone () { throw AnalysisServerException.unknown(e); } } -} +} \ No newline at end of file diff --git a/src/main/java/com/conveyal/analysis/models/RegionalAnalysis.java b/src/main/java/com/conveyal/analysis/models/RegionalAnalysis.java index 932ac5218..670833251 100644 --- a/src/main/java/com/conveyal/analysis/models/RegionalAnalysis.java +++ b/src/main/java/com/conveyal/analysis/models/RegionalAnalysis.java @@ -16,6 +16,7 @@ public class RegionalAnalysis extends Model implements Cloneable { public String regionId; public String bundleId; public String projectId; + public String scenarioId; public int variant; diff --git a/src/main/java/com/conveyal/analysis/models/Reroute.java b/src/main/java/com/conveyal/analysis/models/Reroute.java index 163b80fc1..4f7499ab9 100644 --- a/src/main/java/com/conveyal/analysis/models/Reroute.java +++ b/src/main/java/com/conveyal/analysis/models/Reroute.java @@ -10,6 +10,7 @@ public String getType() { return "reroute"; } + /** The _id of the gtfs feed, providing a scope for any unscoped identifiers in this Modification. */ public String feed; public String[] routes; public String[] trips; diff --git a/src/main/java/com/conveyal/analysis/models/SpatialDataSource.java b/src/main/java/com/conveyal/analysis/models/SpatialDataSource.java new file mode 100644 index 000000000..0be627908 --- /dev/null +++ b/src/main/java/com/conveyal/analysis/models/SpatialDataSource.java @@ -0,0 +1,47 @@ +package com.conveyal.analysis.models; + +import com.conveyal.analysis.UserPermissions; +import com.conveyal.analysis.datasource.SpatialAttribute; +import com.conveyal.file.FileStorageKey; +import com.conveyal.r5.util.ShapefileReader; +import org.bson.codecs.pojo.annotations.BsonDiscriminator; + +import java.util.List; + +import static com.conveyal.file.FileCategory.DATASOURCES; + +/** + * A SpatialDataSource is metadata about a user-uploaded file containing geospatial features (e.g. shapefile, GeoJSON, + * or CSV containing point coordinates) that has been validated and is ready to be processed into specific Conveyal + * formats (e.g. grids and other spatial layers). + * The defining characteristic of a SpatialDataSource is that it contains a set of "features" which all share a schema: + * they all have the same set of attributes (named and typed fields), and one of these attributes is a geometry of a + * dataset-wide type (polygon, linestring etc.) in a coordinate system referencing geographic space. + */ +@BsonDiscriminator(key="type", value="spatial") +public class SpatialDataSource extends DataSource { + + /** The number of features in this SpatialDataSource. */ + public long featureCount; + + /** All features in this SpatialDataSource have an attached geometry of this type. */ + public ShapefileReader.GeometryType geometryType; + + /** An EPSG code for the source's native coordinate system, or a WKT projection string. */ + public String coordinateSystem; + + /** Every feature has this set of Attributes - this is essentially a schema giving attribute names and types. */ + public List attributes; + + public SpatialDataSource (UserPermissions userPermissions, String name) { + super(userPermissions, name); + } + + /** Zero-argument constructor required for Mongo automatic POJO deserialization. */ + public SpatialDataSource () { } + + public FileStorageKey storageKey () { + return new FileStorageKey(DATASOURCES, this._id.toString(), fileFormat.extension); + } + +} diff --git a/src/main/java/com/conveyal/analysis/persistence/AnalysisCollection.java b/src/main/java/com/conveyal/analysis/persistence/AnalysisCollection.java index fe7e43984..7de3f8a60 100644 --- a/src/main/java/com/conveyal/analysis/persistence/AnalysisCollection.java +++ b/src/main/java/com/conveyal/analysis/persistence/AnalysisCollection.java @@ -1,6 +1,7 @@ package com.conveyal.analysis.persistence; import com.conveyal.analysis.AnalysisServerException; +import com.conveyal.analysis.UserPermissions; import com.conveyal.analysis.models.BaseModel; import com.conveyal.analysis.util.JsonUtil; import com.mongodb.client.MongoCollection; @@ -21,12 +22,10 @@ public class AnalysisCollection { - public MongoCollection collection; - private Class type; + public static final String MONGO_PROP_ACCESS_GROUP = "accessGroup"; - private String getAccessGroup (Request req) { - return req.attribute("accessGroup"); - } + public final MongoCollection collection; + private final Class type; private AnalysisServerException invalidAccessGroup() { return AnalysisServerException.forbidden("Permission denied. Invalid access group."); @@ -41,8 +40,14 @@ public DeleteResult delete (T value) { return collection.deleteOne(eq("_id", value._id)); } - public List findPermitted(Bson query, String accessGroup) { - return find(and(eq("accessGroup", accessGroup), query)); + public DeleteResult deleteByIdParamIfPermitted (Request request) { + String _id = request.params("_id"); + UserPermissions user = UserPermissions.from(request); + return collection.deleteOne(and(eq("_id", new ObjectId(_id)), eq("accessGroup", user.accessGroup))); + } + + public List findPermitted(Bson query, UserPermissions userPermissions) { + return find(and(eq(MONGO_PROP_ACCESS_GROUP, userPermissions.accessGroup), query)); } public List find(Bson query) { @@ -62,17 +67,42 @@ public T findById(ObjectId _id) { return collection.find(eq("_id", _id)).first(); } - public T create(T newModel, String accessGroup, String creatorEmail) { - newModel.accessGroup = accessGroup; - newModel.createdBy = creatorEmail; - newModel.updatedBy = creatorEmail; + public T findByIdIfPermitted (String _id, UserPermissions userPermissions) { + T item = findById(_id); + if (item.accessGroup.equals(userPermissions.accessGroup)) { + return item; + } else { + // TODO: To simplify stack traces this should be refactored to "throw new InvalidAccessGroupException()" + // which should be a subtype of AnalysisServerException with methods like getHttpCode(). + throw invalidAccessGroup(); + } + } + + public T create(T newModel, UserPermissions userPermissions) { + newModel.accessGroup = userPermissions.accessGroup; + newModel.createdBy = userPermissions.email; + newModel.updatedBy = userPermissions.email; - // This creates the `_id` automatically + // This creates the `_id` automatically if it is missing collection.insertOne(newModel); return newModel; } + /** + * Note that if the supplied model has _id = null, the Mongo insertOne method will overwrite it with a new + * ObjectId(). We consider it good practice to set the _id for any model object ourselves, avoiding this behavior. + * It looks like we could remove the OBJECT_ID_GENERATORS convention to force explicit ID creation. + * https://mongodb.github.io/mongo-java-driver/3.11/bson/pojos/#conventions + */ + public void insert (T model) { + collection.insertOne(model); + } + + public void insertMany (List models) { + collection.insertMany(models); + } + public T update(T value) { return update(value, value.accessGroup); } @@ -86,7 +116,7 @@ public T update(T value, String accessGroup) { UpdateResult result = collection.replaceOne(and( eq("_id", value._id), eq("nonce", oldNonce), - eq("accessGroup", accessGroup) + eq(MONGO_PROP_ACCESS_GROUP, accessGroup) ), value); // If no documents were modified try to find the document to find out why @@ -106,29 +136,27 @@ public T update(T value, String accessGroup) { return value; } + // TODO should all below be static helpers on HttpController? Passing the whole request in seems to defy encapsulation. + // On the other hand, making them instance methods reduces the number of parameters and gives access to Class. + /** * Controller creation helper. */ public T create(Request req, Response res) throws IOException { T value = JsonUtil.objectMapper.readValue(req.body(), type); - - String accessGroup = getAccessGroup(req); - String email = req.attribute("email"); - return create(value, accessGroup, email); + return create(value, UserPermissions.from(req)); } /** - * Controller find by id helper. + * Helper for HttpControllers - find a document by the _id path parameter in the request, checking permissions. */ - public T findPermittedByRequestParamId(Request req, Response res) { - String accessGroup = getAccessGroup(req); + public T findPermittedByRequestParamId (Request req) { + UserPermissions user = UserPermissions.from(req); T value = findById(req.params("_id")); - // Throw if or does not have permission - if (!value.accessGroup.equals(accessGroup)) { + if (!value.accessGroup.equals(user.accessGroup)) { throw invalidAccessGroup(); } - return value; } @@ -137,12 +165,12 @@ public T findPermittedByRequestParamId(Request req, Response res) { */ public T update(Request req, Response res) throws IOException { T value = JsonUtil.objectMapper.readValue(req.body(), type); - - String accessGroup = getAccessGroup(req); - value.updatedBy = req.attribute("email"); - - if (!value.accessGroup.equals(accessGroup)) throw invalidAccessGroup(); - - return update(value, accessGroup); + final UserPermissions user = UserPermissions.from(req); + value.updatedBy = user.email; + if (!value.accessGroup.equals(user.accessGroup)) { + throw invalidAccessGroup(); + } + return update(value, user.accessGroup); } + } diff --git a/src/main/java/com/conveyal/analysis/persistence/AnalysisDB.java b/src/main/java/com/conveyal/analysis/persistence/AnalysisDB.java index 4c31e75fd..ababb330a 100644 --- a/src/main/java/com/conveyal/analysis/persistence/AnalysisDB.java +++ b/src/main/java/com/conveyal/analysis/persistence/AnalysisDB.java @@ -1,18 +1,22 @@ package com.conveyal.analysis.persistence; -import com.mongodb.MongoClientSettings; +import com.conveyal.analysis.models.BaseModel; import com.mongodb.client.MongoClient; import com.mongodb.client.MongoClients; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoDatabase; +import org.bson.codecs.configuration.CodecProvider; import org.bson.codecs.configuration.CodecRegistry; +import org.bson.codecs.pojo.Conventions; import org.bson.codecs.pojo.PojoCodecProvider; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static com.mongodb.MongoClientSettings.getDefaultCodecRegistry; import static org.bson.codecs.configuration.CodecRegistries.fromProviders; import static org.bson.codecs.configuration.CodecRegistries.fromRegistries; +/** TODO should we pre-create all the AnalysisCollections and fetch them by Class? */ public class AnalysisDB { private final Logger LOG = LoggerFactory.getLogger(AnalysisDB.class); @@ -27,15 +31,9 @@ public AnalysisDB (Config config) { LOG.info("Connecting to local MongoDB instance..."); mongo = MongoClients.create(); } + database = mongo.getDatabase(config.databaseName()).withCodecRegistry(makeCodecRegistry()); - // Create codec registry for POJOs - CodecRegistry pojoCodecRegistry = fromRegistries( - MongoClientSettings.getDefaultCodecRegistry(), - fromProviders(PojoCodecProvider.builder().automatic(true).build())); - - database = mongo.getDatabase(config.databaseName()).withCodecRegistry(pojoCodecRegistry); - - // Reqeust that the JVM clean up database connections in all cases - exiting cleanly or by being terminated. + // Request that the JVM clean up database connections in all cases - exiting cleanly or by being terminated. // We should probably register such hooks for other components to shut down more cleanly. Runtime.getRuntime().addShutdownHook(new Thread(() -> { Persistence.mongo.close(); @@ -43,10 +41,48 @@ public AnalysisDB (Config config) { })); } - public AnalysisCollection getAnalysisCollection (String name, Class clazz) { - return new AnalysisCollection<>(database.getCollection(name, clazz), clazz); + /** + * Create a codec registry that has all the default codecs (dates, geojson, etc.) and falls back to a provider + * that automatically generates codecs for any other Java class it encounters, based on their public getter and + * setter methods and public fields, skipping any properties whose underlying fields are transient or static. + * These classes must have an empty public or protected zero-argument constructor. + * An automatic PojoCodecProvider can create class models and codecs on the fly as it encounters the classes + * during writing. However, upon restart it will need to re-register those same classes before it can decode + * them. This is apparently done automatically when calling database.getCollection(), but gets a little tricky + * when decoding subclasses whose discriminators are not fully qualified class names with package. See Javadoc + * on getAnalysisCollection() for how we register such subclasses. + * We could register all these subclasses here via the PojoCodecProvider.Builder, but that separates their + * registration from the place they're used. The builder has methods for registering whole packages, but these + * methods do not auto-scan, they just provide the same behavior as automatic() but limited to specific packages. + */ + private CodecRegistry makeCodecRegistry () { + CodecProvider automaticPojoCodecProvider = PojoCodecProvider.builder().automatic(true).build(); + CodecRegistry pojoCodecRegistry = fromRegistries( + getDefaultCodecRegistry(), + fromProviders(automaticPojoCodecProvider) + ); + return pojoCodecRegistry; + } + + /** + * If the optional subclasses are supplied, the codec registry will be hit to cause it to build class models and + * codecs for them. This is necessary when these subclasses specify short discriminators, as opposed to the + * verbose default discriminator of a fully qualified class name, because the Mongo driver does not auto-scan for + * classes it has not encountered in a write operation or in a request for a collection. + */ + public AnalysisCollection getAnalysisCollection ( + String name, Class clazz, Class... subclasses + ){ + for (Class subclass : subclasses) { + database.getCodecRegistry().get(subclass); + } + return new AnalysisCollection(database.getCollection(name, clazz), clazz); } + /** + * Lower-level access to Mongo collections without the user-oriented functionality of BaseModel (accessGroup etc.) + * This is useful when storing server monitoring data (time series or event data) in a cloud environment. + */ public MongoCollection getMongoCollection (String name, Class clazz) { return database.getCollection(name, clazz); } diff --git a/src/main/java/com/conveyal/analysis/persistence/MongoMap.java b/src/main/java/com/conveyal/analysis/persistence/MongoMap.java index 80fd07072..6001a82d2 100644 --- a/src/main/java/com/conveyal/analysis/persistence/MongoMap.java +++ b/src/main/java/com/conveyal/analysis/persistence/MongoMap.java @@ -1,6 +1,7 @@ package com.conveyal.analysis.persistence; import com.conveyal.analysis.AnalysisServerException; +import com.conveyal.analysis.UserPermissions; import com.conveyal.analysis.models.Model; import com.conveyal.r5.common.JsonUtilities; import com.mongodb.BasicDBObject; @@ -18,6 +19,8 @@ import java.io.IOException; import java.util.Collection; +import static com.conveyal.analysis.persistence.AnalysisCollection.MONGO_PROP_ACCESS_GROUP; + /** * An attempt at simulating a MapDB-style interface, for storing Java objects in MongoDB. * Note this used to implement Map, but the Map interface predates generics in Java, so it is more typesafe not @@ -31,10 +34,6 @@ public class MongoMap { private JacksonDBCollection wrappedCollection; private Class type; - private String getAccessGroup (Request req) { - return req.attribute("accessGroup"); - } - public MongoMap (JacksonDBCollection wrappedCollection, Class type) { this.type = type; this.wrappedCollection = wrappedCollection; @@ -45,17 +44,17 @@ public int size() { } public V findByIdFromRequestIfPermitted(Request request) { - return findByIdIfPermitted(request.params("_id"), getAccessGroup(request)); + return findByIdIfPermitted(request.params("_id"), UserPermissions.from(request)); } - public V findByIdIfPermitted(String id, String accessGroup) { + public V findByIdIfPermitted(String id, UserPermissions userPermissions) { V result = wrappedCollection.findOneById(id); if (result == null) { throw AnalysisServerException.notFound(String.format( "The resource you requested (_id %s) could not be found. Has it been deleted?", id )); - } else if (!accessGroup.equals(result.accessGroup)) { + } else if (!userPermissions.accessGroup.equals(result.accessGroup)) { throw AnalysisServerException.forbidden("You do not have permission to access this data."); } else { return result; @@ -67,27 +66,27 @@ public V get(String key) { } public Collection findAllForRequest(Request req) { - return find(QueryBuilder.start("accessGroup").is(getAccessGroup(req)).get()).toArray(); + return find(QueryBuilder.start(MONGO_PROP_ACCESS_GROUP).is(UserPermissions.from(req).accessGroup).get()).toArray(); } /** * Helper function that adds the `accessGroup` to the query if the user is not an admin. If you want to query using * the `accessGroup` as an admin it must be added to the query. */ - public Collection findPermitted(DBObject query, String accessGroup) { + public Collection findPermitted(DBObject query, UserPermissions userPermissions) { DBCursor cursor = find(QueryBuilder.start().and( query, - QueryBuilder.start("accessGroup").is(accessGroup).get() + QueryBuilder.start(MONGO_PROP_ACCESS_GROUP).is(userPermissions.accessGroup).get() ).get()); return cursor.toArray(); } // See comments for `findPermitted` above. This helper also adds a projection. - public Collection findPermitted(DBObject query, DBObject project, String accessGroup) { + public Collection findPermitted(DBObject query, DBObject project, UserPermissions userPermissions) { DBCursor cursor = find(QueryBuilder.start().and( query, - QueryBuilder.start("accessGroup").is(accessGroup).get() + QueryBuilder.start(MONGO_PROP_ACCESS_GROUP).is(userPermissions.accessGroup).get() ).get(), project); return cursor.toArray(); @@ -102,8 +101,7 @@ public Collection findPermittedForQuery (Request req) { req.queryParams().forEach(name -> { query.and(name).is(req.queryParams(name)); }); - - return findPermitted(query.get(), getAccessGroup(req)); + return findPermitted(query.get(), UserPermissions.from(req)); } /** @@ -124,13 +122,9 @@ public Collection getByProperty (String property, Object value) { public V createFromJSONRequest(Request request) throws IOException { V json = JsonUtilities.objectMapper.readValue(request.body(), this.type); - - // Set access group - json.accessGroup = getAccessGroup(request); - - // Set `createdBy` from the user's email - json.createdBy = request.attribute("email"); - + UserPermissions userPermissions = UserPermissions.from(request); + json.accessGroup = userPermissions.accessGroup; + json.createdBy = userPermissions.email; return create(json); } @@ -159,18 +153,18 @@ public V create(V value) { public V updateFromJSONRequest(Request request) throws IOException { V json = JsonUtilities.objectMapper.readValue(request.body(), this.type); // Add the additional check for the same access group - return updateByUserIfPermitted(json, request.attribute("email"), getAccessGroup(request)); + return updateByUserIfPermitted(json, UserPermissions.from(request)); } - public V updateByUserIfPermitted(V value, String updatedBy, String accessGroup) { + public V updateByUserIfPermitted(V value, UserPermissions userPermissions) { // Set `updatedBy` - value.updatedBy = updatedBy; + value.updatedBy = userPermissions.email; - return put(value, QueryBuilder.start("accessGroup").is(accessGroup).get()); + return put(value, QueryBuilder.start(MONGO_PROP_ACCESS_GROUP).is(userPermissions.accessGroup).get()); } public V put(String key, V value) { - if (key != value._id) throw AnalysisServerException.badRequest("ID does not match"); + if (!key.equals(value._id)) throw AnalysisServerException.badRequest("ID does not match"); return put(value, null); } @@ -228,10 +222,10 @@ public V modifiyWithoutUpdatingLock (V value) { return value; } - public V removeIfPermitted(String key, String accessGroup) { + public V removeIfPermitted(String key, UserPermissions userPermissions) { DBObject query = QueryBuilder.start().and( QueryBuilder.start("_id").is(key).get(), - QueryBuilder.start("accessGroup").is(accessGroup).get() + QueryBuilder.start(MONGO_PROP_ACCESS_GROUP).is(userPermissions.accessGroup).get() ).get(); V result = wrappedCollection.findAndRemove(query); diff --git a/src/main/java/com/conveyal/analysis/persistence/Persistence.java b/src/main/java/com/conveyal/analysis/persistence/Persistence.java index 71129c0a9..85e8be964 100644 --- a/src/main/java/com/conveyal/analysis/persistence/Persistence.java +++ b/src/main/java/com/conveyal/analysis/persistence/Persistence.java @@ -1,6 +1,5 @@ package com.conveyal.analysis.persistence; -import com.conveyal.analysis.models.AggregationArea; import com.conveyal.analysis.models.Bundle; import com.conveyal.analysis.models.JsonViews; import com.conveyal.analysis.models.Model; @@ -40,7 +39,6 @@ public class Persistence { public static MongoMap bundles; public static MongoMap regions; public static MongoMap regionalAnalyses; - public static MongoMap aggregationAreas; public static MongoMap opportunityDatasets; // TODO progressively migrate to AnalysisDB which is non-static @@ -59,7 +57,6 @@ public static void initializeStatically (AnalysisDB.Config config) { bundles = getTable("bundles", Bundle.class); regions = getTable("regions", Region.class); regionalAnalyses = getTable("regional-analyses", RegionalAnalysis.class); - aggregationAreas = getTable("aggregationAreas", AggregationArea.class); opportunityDatasets = getTable("opportunityDatasets", OpportunityDataset.class); } diff --git a/src/main/java/com/conveyal/analysis/results/TimeCsvResultWriter.java b/src/main/java/com/conveyal/analysis/results/TimeCsvResultWriter.java index 6d9137f5a..144da7713 100644 --- a/src/main/java/com/conveyal/analysis/results/TimeCsvResultWriter.java +++ b/src/main/java/com/conveyal/analysis/results/TimeCsvResultWriter.java @@ -1,6 +1,7 @@ package com.conveyal.analysis.results; import com.conveyal.file.FileStorage; +import com.conveyal.r5.analyst.FreeFormPointSet; import com.conveyal.r5.analyst.cluster.RegionalTask; import com.conveyal.r5.analyst.cluster.RegionalWorkResult; @@ -8,6 +9,7 @@ import java.util.ArrayList; import java.util.List; +import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; public class TimeCsvResultWriter extends CsvResultWriter { @@ -31,9 +33,13 @@ public String[] columnHeaders () { */ @Override protected void checkDimension (RegionalWorkResult workResult) { - // This CSV writer expects only a single freeform destination pointset. // TODO handle multiple destination pointsets at once? - checkState(task.destinationPointSets.length == 1); + checkState( + task.destinationPointSets != null && + task.destinationPointSets.length == 1 && + task.destinationPointSets[0] instanceof FreeFormPointSet, + "Time CSV writer expects only a single freeform destination pointset." + ); // In one-to-one mode, we expect only one value per origin, the destination point at the same pointset index as // the origin point. Otherwise, for each origin, we expect one value per destination. final int nDestinations = task.oneToOne ? 1 : task.destinationPointSets[0].featureCount(); diff --git a/src/main/java/com/conveyal/analysis/util/HttpUtils.java b/src/main/java/com/conveyal/analysis/util/HttpUtils.java index 6226c7d8d..de3ea3b49 100644 --- a/src/main/java/com/conveyal/analysis/util/HttpUtils.java +++ b/src/main/java/com/conveyal/analysis/util/HttpUtils.java @@ -8,23 +8,56 @@ import org.apache.commons.fileupload.servlet.ServletFileUpload; import javax.servlet.http.HttpServletRequest; +import java.io.UnsupportedEncodingException; import java.util.List; import java.util.Map; public abstract class HttpUtils { - /** Extract files from a Spark Request containing RFC 1867 multipart form-based file upload data. */ + + /** + * Extract files from a Spark Request containing RFC 1867 multipart form-based file upload data. + */ public static Map> getRequestFiles (HttpServletRequest req) { // The Javadoc on this factory class doesn't say anything about thread safety. Looking at the source code it // all looks threadsafe. But also very lightweight to instantiate, so in this code run by multiple threads // we play it safe and always create a new factory. // Setting a size threshold of 0 causes all files to be written to disk, which allows processing them in a - // uniform way in other threads, after the request handler has returned. - FileItemFactory fileItemFactory = new DiskFileItemFactory(0, null); - ServletFileUpload sfu = new ServletFileUpload(fileItemFactory); + // uniform way in other threads, after the request handler has returned. This does however cause some very + // small form fields to be written to disk files. Ideally we'd identify the smallest actual file we'll ever + // handle and set the threshold a little higher. The downside is that if a tiny file is actually uploaded even + // by accident, our code will not be able to get a file handle for it and fail. Some legitimate files like + // Shapefile .prj sidecars can be really small. + // If we always saved the FileItems via write() or read them with getInputStream() they would not all need to + // be on disk. try { + FileItemFactory fileItemFactory = new DiskFileItemFactory(0, null); + ServletFileUpload sfu = new ServletFileUpload(fileItemFactory); return sfu.parseParameterMap(req); } catch (Exception e) { - throw AnalysisServerException.badRequest(ExceptionUtils.stackTraceString(e)); + throw AnalysisServerException.fileUpload(ExceptionUtils.stackTraceString(e)); + } + } + + /** + * Get the specified field from a map representing a multipart/form-data POST request, as a UTF-8 String. + * FileItems represent any form item that was received within a multipart/form-data POST request, not just files. + * This is a static utility method that should be reusable across different HttpControllers. + */ + public static String getFormField(Map> formFields, String fieldName, boolean required) { + try { + List fileItems = formFields.get(fieldName); + if (fileItems == null || fileItems.isEmpty()) { + if (required) { + throw AnalysisServerException.badRequest("Missing required field: " + fieldName); + } else { + return null; + } + } + String value = fileItems.get(0).getString("UTF-8"); + return value; + } catch (UnsupportedEncodingException e) { + throw AnalysisServerException.badRequest(String.format("Multipart form field '%s' had unsupported encoding", + fieldName)); } } } diff --git a/src/main/java/com/conveyal/analysis/util/JsonUtil.java b/src/main/java/com/conveyal/analysis/util/JsonUtil.java index 88c435842..04f11ef8d 100644 --- a/src/main/java/com/conveyal/analysis/util/JsonUtil.java +++ b/src/main/java/com/conveyal/analysis/util/JsonUtil.java @@ -3,8 +3,12 @@ import com.conveyal.analysis.models.JsonViews; import com.conveyal.geojson.GeoJsonModule; import com.conveyal.r5.model.json_serialization.JavaLocalDateSerializer; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import org.mongojack.internal.MongoJackModule; import spark.ResponseTransformer; @@ -33,4 +37,20 @@ public static ObjectMapper getObjectMapper(Class view, boolean configureMongoJac return objectMapper; } + public static String toJsonString (JsonNode node) { + try { + return objectMapper.writeValueAsString(node); + } catch (JsonProcessingException e) { + throw new RuntimeException("Failed to write JSON.", e); + } + } + + public static ObjectNode objectNode () { + return objectMapper.createObjectNode(); + } + + public static ArrayNode arrayNode () { + return objectMapper.createArrayNode(); + } + } diff --git a/src/main/java/com/conveyal/data/census/CensusExtractor.java b/src/main/java/com/conveyal/data/census/CensusExtractor.java index 8a89b25b9..86918c2d7 100644 --- a/src/main/java/com/conveyal/data/census/CensusExtractor.java +++ b/src/main/java/com/conveyal/data/census/CensusExtractor.java @@ -3,6 +3,7 @@ import com.conveyal.data.geobuf.GeobufEncoder; import com.conveyal.data.geobuf.GeobufFeature; import com.conveyal.geojson.GeoJsonModule; +import com.conveyal.r5.analyst.progress.NoopProgressListener; import com.fasterxml.jackson.databind.ObjectMapper; import org.locationtech.jts.geom.Geometry; @@ -47,7 +48,8 @@ public static void main (String... args) throws IOException { Double.parseDouble(args[2]), Double.parseDouble(args[3]), Double.parseDouble(args[4]), - false + false, + new NoopProgressListener() ); } else { @@ -57,8 +59,7 @@ public static void main (String... args) throws IOException { FileInputStream fis = new FileInputStream(new File(args[1])); FeatureCollection fc = om.readValue(fis, FeatureCollection.class); fis.close(); - - features = source.extract(fc.features.get(0).geometry, false); + features = source.extract(fc.features.get(0).geometry, false, new NoopProgressListener()); } OutputStream out; diff --git a/src/main/java/com/conveyal/data/census/SeamlessSource.java b/src/main/java/com/conveyal/data/census/SeamlessSource.java index 4824a268e..7d912eccd 100644 --- a/src/main/java/com/conveyal/data/census/SeamlessSource.java +++ b/src/main/java/com/conveyal/data/census/SeamlessSource.java @@ -2,6 +2,7 @@ import com.conveyal.data.geobuf.GeobufDecoder; import com.conveyal.data.geobuf.GeobufFeature; +import com.conveyal.r5.analyst.progress.ProgressListener; import org.locationtech.jts.geom.Coordinate; import org.locationtech.jts.geom.Envelope; import org.locationtech.jts.geom.Geometry; @@ -34,18 +35,22 @@ public abstract class SeamlessSource { private static final GeometryFactory geometryFactory = new GeometryFactory(); /** Extract features by bounding box */ - public Map extract(double north, double east, double south, double west, boolean onDisk) throws - IOException { + public Map extract( + double north, double east, double south, double west, boolean onDisk, ProgressListener progressListener + ) throws IOException { GeometricShapeFactory factory = new GeometricShapeFactory(geometryFactory); factory.setCentre(new Coordinate((east + west) / 2, (north + south) / 2)); factory.setWidth(east - west); factory.setHeight(north - south); Polygon rect = factory.createRectangle(); - return extract(rect, onDisk); + return extract(rect, onDisk, progressListener); } /** Extract features by arbitrary polygons */ - public Map extract(Geometry bounds, boolean onDisk) throws IOException { + public Map extract ( + Geometry bounds, boolean onDisk, ProgressListener progressListener + ) throws IOException { + Map ret; if (onDisk) @@ -65,6 +70,7 @@ public Map extract(Geometry bounds, boolean onDisk) throws int tcount = (maxX - minX + 1) * (maxY - minY + 1); LOG.info("Requesting {} tiles", tcount); + progressListener.beginTask("Reading census tiles", tcount); int fcount = 0; @@ -72,14 +78,13 @@ public Map extract(Geometry bounds, boolean onDisk) throws for (int x = minX; x <= maxX; x++) { for (int y = minY; y <= maxY; y++) { InputStream is = getInputStream(x, y); - - if (is == null) + if (is == null) { // no data in this tile + progressListener.increment(); continue; - + } // decoder closes input stream as soon as it has read the tile GeobufDecoder decoder = new GeobufDecoder(new GZIPInputStream(new BufferedInputStream(is))); - while (decoder.hasNext()) { GeobufFeature f = decoder.next(); // blocks are duplicated at the edges of tiles, no need to import twice @@ -94,9 +99,9 @@ public Map extract(Geometry bounds, boolean onDisk) throws LOG.info("Read {} features", fcount); } } + progressListener.increment(); } } - return ret; } diff --git a/src/main/java/com/conveyal/file/FileCategory.java b/src/main/java/com/conveyal/file/FileCategory.java index 7533a5110..02ac8c856 100644 --- a/src/main/java/com/conveyal/file/FileCategory.java +++ b/src/main/java/com/conveyal/file/FileCategory.java @@ -8,7 +8,7 @@ */ public enum FileCategory { - BUNDLES, GRIDS, RESULTS, RESOURCES, POLYGONS, TAUI; + BUNDLES, GRIDS, RESULTS, DATASOURCES, TAUI; /** @return a String for the directory or sub-bucket name containing all files in this category. */ public String directoryName () { diff --git a/src/main/java/com/conveyal/file/FileStorage.java b/src/main/java/com/conveyal/file/FileStorage.java index 41a417a44..d3dc55299 100644 --- a/src/main/java/com/conveyal/file/FileStorage.java +++ b/src/main/java/com/conveyal/file/FileStorage.java @@ -60,6 +60,8 @@ public interface FileStorage { /** * Delete the File identified by the FileStorageKey, in both the local cache and any remote mirror. + * Due to some pre-existing code, implementations must tolerate calling this method on files that don't exist + * without throwing exceptions. */ void delete(FileStorageKey fileStorageKey); diff --git a/src/main/java/com/conveyal/file/FileStorageFormat.java b/src/main/java/com/conveyal/file/FileStorageFormat.java index 90cd8826d..7d9cbf7c1 100644 --- a/src/main/java/com/conveyal/file/FileStorageFormat.java +++ b/src/main/java/com/conveyal/file/FileStorageFormat.java @@ -1,22 +1,39 @@ package com.conveyal.file; +import org.bson.codecs.pojo.annotations.BsonIgnore; + +/** + * An enumeration of all the file types we handle as uploads, derived internal data, or work products. + * Really this should be a union of several enumerated types (upload/internal/product) but Java does not allow this. + */ public enum FileStorageFormat { FREEFORM("pointset", "application/octet-stream"), GRID("grid", "application/octet-stream"), - POINTSET("pointset", "application/octet-stream"), + POINTSET("pointset", "application/octet-stream"), // Why is this "pointset" extension duplicated? PNG("png", "image/png"), - TIFF("tiff", "image/tiff"), - CSV("csv", "text/csv"); + GEOTIFF("tif", "image/tiff"), + CSV("csv", "text/csv"), + + // SHP implies .dbf and .prj, and optionally .shx + SHP("shp", "application/octet-stream"), - // These are not currently used but plan to be in the future. Exact types need to be determined - // GTFS("zip", "application/zip"), - // PBF("pbf", "application/octet-stream"), - // SHP("shp", "application/octet-stream") // This type does not work as is, it should be a zip? + // Some of these are not yet used. + // In our internal storage, we may want to force less ambiguous .gtfs.zip .osm.pbf and .geojson. + GTFS("zip", "application/zip"), + OSMPBF("pbf", "application/octet-stream"), + // Also can be application/geo+json, see https://www.iana.org/assignments/media-types/application/geo+json + // The extension used to be defined as .json TODO ensure that changing it to .geojson hasn't broken anything. + GEOJSON("geojson", "application/json"), + // See requirement 3 http://www.geopackage.org/spec130/#_file_extension_name + GEOPACKAGE("gpkg", "application/geopackage+sqlite3"); + // These fields will not be serialized into Mongo. + // The default codec to serialize Enums into BSON for Mongo uses String name() and valueOf(String). + // TODO array of file extensions, with the first one used canonically in FileStorage and the others for detection. public final String extension; public final String mimeType; - FileStorageFormat(String extension, String mimeType) { + FileStorageFormat (String extension, String mimeType) { this.extension = extension; this.mimeType = mimeType; } diff --git a/src/main/java/com/conveyal/file/FileStorageKey.java b/src/main/java/com/conveyal/file/FileStorageKey.java index 22d504f3c..e9a1c5244 100644 --- a/src/main/java/com/conveyal/file/FileStorageKey.java +++ b/src/main/java/com/conveyal/file/FileStorageKey.java @@ -4,7 +4,7 @@ * A unique identifier for a file within a namespace drawn from an enum of known categories. * This maps to a subdirectory and filename in local storage, and a bucket and object key in S3-style cloud storage. * While keeping stored files in multiple distinct categories, this avoids passing around a lot of directory/bucket - * names as strings, and avoids mistakes where such strings are mismatched accross different function calls. + * names as strings, and avoids mistakes where such strings are mismatched across different function calls. */ public class FileStorageKey { diff --git a/src/main/java/com/conveyal/file/LocalFileStorage.java b/src/main/java/com/conveyal/file/LocalFileStorage.java index b0b398356..2731d4e71 100644 --- a/src/main/java/com/conveyal/file/LocalFileStorage.java +++ b/src/main/java/com/conveyal/file/LocalFileStorage.java @@ -9,6 +9,12 @@ import java.nio.file.FileSystemException; import java.nio.file.Files; import java.nio.file.StandardCopyOption; +import java.nio.file.attribute.PosixFilePermission; +import java.util.EnumSet; +import java.util.Set; + +import static java.nio.file.attribute.PosixFilePermission.OWNER_READ; +import static java.nio.file.attribute.PosixFilePermission.OWNER_WRITE; /** * This implementation of FileStorage stores files in a local directory hierarchy and does not mirror anything to @@ -36,26 +42,28 @@ public LocalFileStorage (Config config) { /** * Move the File into the FileStorage by moving the passed in file to the Path represented by the FileStorageKey. + * It is possible that on some systems (Windows) the file cannot be moved and it will be copied instead, leaving + * the source file in place. */ @Override - public void moveIntoStorage(FileStorageKey key, File file) { - // Get a pointer to the local file + public void moveIntoStorage(FileStorageKey key, File sourceFile) { + // Get the destination file path inside FileStorage, and ensure all its parent directories exist. File storedFile = getFile(key); - // Ensure the directories exist storedFile.getParentFile().mkdirs(); try { try { // Move the temporary file to the permanent file location. - Files.move(file.toPath(), storedFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + Files.move(sourceFile.toPath(), storedFile.toPath(), StandardCopyOption.REPLACE_EXISTING); } catch (FileSystemException e) { // The default Windows filesystem (NTFS) does not unlock memory-mapped files, so certain files (e.g. - // mapdb) cannot be moved or deleted. This workaround may cause temporary files to accumulate, but it - // should not be triggered for default Linux filesystems (ext). + // mapdb Write Ahead Log) cannot be moved or deleted. This workaround may cause temporary files + // to accumulate, but it should not be triggered for default Linux filesystems (ext). // See https://github.com/jankotek/MapDB/issues/326 - Files.copy(file.toPath(), storedFile.toPath(), StandardCopyOption.REPLACE_EXISTING); - LOG.info("Could not move {} because of FileSystem restrictions (probably NTFS). Copying instead.", - file.getName()); + Files.copy(sourceFile.toPath(), storedFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + LOG.info("Could not move {} because of FileSystem restrictions (probably NTFS). Copied instead.", + sourceFile.getName()); } + setReadOnly(storedFile); } catch (IOException e) { throw new RuntimeException(e); } @@ -83,7 +91,18 @@ public String getURL (FileStorageKey key) { @Override public void delete (FileStorageKey key) { - getFile(key).delete(); + try { + File storedFile = getFile(key); + if (storedFile.exists()) { + // File permissions are set read-only to prevent corruption, so must be changed to allow deletion. + Files.setPosixFilePermissions(storedFile.toPath(), Set.of(OWNER_READ, OWNER_WRITE)); + storedFile.delete(); + } else { + LOG.warn("Attempted to delete non-existing file: " + storedFile); + } + } catch (Exception e) { + throw new RuntimeException("Exception while deleting stored file.", e); + } } @Override @@ -91,4 +110,35 @@ public boolean exists(FileStorageKey key) { return getFile(key).exists(); } + /** + * Set the file to be read-only and accessible only by the current user. + * All files in our FileStorage are set to read-only as a safeguard against corruption under concurrent access. + * Because the method Files.setPosixFilePermissions fails on Windows with an UnsupportedOperationException, + * we attempted to use the portable File.setReadable and File.setWritable methods to cover both POSIX and Windows + * filesystems, but these require multiple calls in succession to achieve fine grained control on POSIX filesystems. + * Specifically, there is no way to atomically set a file readable by its owner but non-readable by all other users. + * The setReadable/Writable ownerOnly parameter just leaves group and others permissions untouched and unchanged. + * To get the desired result on systems with user-group-other permissions granularity, you have to do something like: + * success &= file.setReadable(false, false); + * success &= file.setWritable(false, false); + * success &= file.setReadable(true, true); + * + * Instead, we first do the POSIX atomic call, which should cover all deployment environments, then fall back on the + * NIO call to cover any development environments using other filesystems. + */ + public static void setReadOnly (File file) { + try { + try { + Files.setPosixFilePermissions(file.toPath(), EnumSet.of(PosixFilePermission.OWNER_READ)); + } catch (UnsupportedOperationException e) { + LOG.warn("POSIX permissions unsupported on this filesystem. Falling back on portable NIO methods."); + if (!(file.setReadable(true) && file.setWritable(false))) { + LOG.error("Could not set read-only permissions on file {}", file); + } + } + } catch (Exception e) { + LOG.error("Could not set read-only permissions on file {}", file, e); + } + } + } diff --git a/src/main/java/com/conveyal/gtfs/ExtractGTFSMode.java b/src/main/java/com/conveyal/gtfs/ExtractGTFSMode.java new file mode 100644 index 000000000..58474451c --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/ExtractGTFSMode.java @@ -0,0 +1,121 @@ +package com.conveyal.gtfs; + +import com.conveyal.gtfs.model.Frequency; +import com.conveyal.gtfs.model.Route; +import com.conveyal.gtfs.model.Stop; +import com.conveyal.gtfs.model.StopTime; +import com.conveyal.gtfs.model.Transfer; +import com.conveyal.gtfs.model.Trip; +import com.google.common.base.Strings; +import org.locationtech.jts.geom.Coordinate; +import org.locationtech.jts.geom.Geometry; +import org.mapdb.Fun; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; + +/** + * This main method will filter an input GTFS file, retaining only the given route_type (mode of transport). + * All routes, trips, stops, and frequencies for other route_types will be removed. This is useful for preparing + * minimal GTFS inputs for tests. For example, we have extracted only the subway / metro routes from the STM + * Montreal feed - they are useful for testing Monte Carlo code because they have many frequency entries per trip. + */ +public class ExtractGTFSMode { + + private static final String inputFile = "/Users/abyrd/geodata/stm.gtfs.zip"; + private static final String outputFile = "/Users/abyrd/geodata/stm-metro.gtfs.zip"; + + // Remove all shapes from the GTFS to make it simpler to render in a web UI + private static final boolean REMOVE_SHAPES = true; + + private static final int RETAIN_ROUTE_TYPE = Route.SUBWAY; + + public static void main (String[] args) { + + GTFSFeed feed = GTFSFeed.writableTempFileFromGtfs(inputFile); + + System.out.println("Removing routes that are not on mode " + RETAIN_ROUTE_TYPE); + Set retainRoutes = new HashSet<>(); + Iterator routeIterator = feed.routes.values().iterator(); + while (routeIterator.hasNext()) { + Route route = routeIterator.next(); + if (route.route_type == RETAIN_ROUTE_TYPE) { + retainRoutes.add(route.route_id); + } else { + routeIterator.remove(); + } + } + + System.out.println("Removing trips that are not on mode " + RETAIN_ROUTE_TYPE); + Set retainTrips = new HashSet<>(); + Iterator tripIterator = feed.trips.values().iterator(); + while (tripIterator.hasNext()) { + Trip trip = tripIterator.next(); + if (retainRoutes.contains(trip.route_id)) { + retainTrips.add(trip.trip_id); + } else { + tripIterator.remove(); + } + } + + System.out.println("Removing frequencies that are not on mode " + RETAIN_ROUTE_TYPE); + Iterator> freqIterator = feed.frequencies.iterator(); + while (freqIterator.hasNext()) { + Frequency frequency = freqIterator.next().b; + if (!retainTrips.contains(frequency.trip_id)) { + freqIterator.remove(); + } + } + + System.out.println("Removing stop_times that are not on mode " + RETAIN_ROUTE_TYPE); + Set referencedStops = new HashSet<>(); + Iterator stIterator = feed.stop_times.values().iterator(); + while (stIterator.hasNext()) { + StopTime stopTime = stIterator.next(); + if (retainTrips.contains(stopTime.trip_id)) { + referencedStops.add(stopTime.stop_id); + } else { + stIterator.remove(); + } + } + + System.out.println("Removing unreferenced stops..."); + Iterator stopIterator = feed.stops.values().iterator(); + while (stopIterator.hasNext()) { + Stop stop = stopIterator.next(); + if (!referencedStops.contains(stop.stop_id)) { + stopIterator.remove(); + } + } + + if (REMOVE_SHAPES) { + System.out.println("Removing shapes table and removing shape IDs from trips..."); + feed.shape_points.clear(); + for (String tripId : feed.trips.keySet()) { + Trip trip = feed.trips.get(tripId); + trip.shape_id = null; + // Entry.setValue is an unsupported operation in MapDB, just re-put the trip. + feed.trips.put(tripId, trip); + } + } + + System.out.println("Filtering transfers for removed stops..."); + Iterator ti = feed.transfers.values().iterator(); + while (ti.hasNext()) { + Transfer t = ti.next(); + if ( ! (referencedStops.contains(t.from_stop_id) && referencedStops.contains(t.to_stop_id))) { + ti.remove(); + } + } + + System.out.println("Writing GTFS..."); + feed.toFile(outputFile); + feed.close(); + } + +} diff --git a/src/main/java/com/conveyal/gtfs/GTFSCache.java b/src/main/java/com/conveyal/gtfs/GTFSCache.java index cc945b9ef..5593b77a5 100644 --- a/src/main/java/com/conveyal/gtfs/GTFSCache.java +++ b/src/main/java/com/conveyal/gtfs/GTFSCache.java @@ -14,6 +14,7 @@ import java.util.concurrent.TimeUnit; import static com.conveyal.file.FileCategory.BUNDLES; +import static com.google.common.base.Preconditions.checkState; /** * Cache for GTFSFeed objects, a disk-backed (MapDB) representation of data from one GTFS feed. The source GTFS @@ -99,24 +100,47 @@ public FileStorageKey getFileKey (String id, String extension) { } /** This method should only ever be called by the cache loader. */ - private @Nonnull GTFSFeed retrieveAndProcessFeed(String id) throws GtfsLibException { - FileStorageKey dbKey = getFileKey(id, "db"); - FileStorageKey dbpKey = getFileKey(id, "db.p"); + private @Nonnull GTFSFeed retrieveAndProcessFeed(String bundleScopedFeedId) throws GtfsLibException { + FileStorageKey dbKey = getFileKey(bundleScopedFeedId, "db"); + FileStorageKey dbpKey = getFileKey(bundleScopedFeedId, "db.p"); if (fileStorage.exists(dbKey) && fileStorage.exists(dbpKey)) { // Ensure both MapDB files are local, pulling them down from remote storage as needed. fileStorage.getFile(dbKey); fileStorage.getFile(dbpKey); - return GTFSFeed.reopenReadOnly(fileStorage.getFile(dbKey)); + try { + return GTFSFeed.reopenReadOnly(fileStorage.getFile(dbKey)); + } catch (GtfsLibException e) { + if (e.getCause().getMessage().contains("Could not set field value: priority")) { + // Swallow exception and fall through - rebuild bad MapDB and upload to S3. + LOG.warn("Detected poisoned MapDB containing GTFSError.priority serializer. Rebuilding."); + } else { + throw e; + } + } } - FileStorageKey zipKey = getFileKey(id, "zip"); + FileStorageKey zipKey = getFileKey(bundleScopedFeedId, "zip"); if (!fileStorage.exists(zipKey)) { throw new GtfsLibException("Original GTFS zip file could not be found: " + zipKey); } + // This code path is rarely run because we usually pre-build GTFS MapDBs in bundleController and cache them. + // This will only be run when the resultant MapDB has been deleted or is otherwise unavailable. LOG.debug("Building or rebuilding MapDB from original GTFS ZIP file at {}...", zipKey); try { File tempDbFile = FileUtils.createScratchFile("db"); File tempDbpFile = new File(tempDbFile.getAbsolutePath() + ".p"); - GTFSFeed.newFileFromGtfs(tempDbFile, fileStorage.getFile(zipKey)); + // An unpleasant hack since we do not have separate references to the GTFS feed ID and Bundle ID here, + // only a concatenation of the two joined with an underscore. We have to force-override feed ID because + // references to its contents (e.g. in scenarios) are scoped only by the feed ID not the bundle ID. + // The bundle ID is expected to always be an underscore-free UUID, but old feed IDs may contain underscores + // (yielding filenames like old_feed_id_bundleuuid) so we look for the last underscore as a split point. + // GTFS feeds may now be referenced by multiple bundles with different IDs, so the last part of the file + // name is rather arbitrary - it's just the bundleId with which this feed was first associated. + // We don't really need to scope newer feeds by bundleId since they now have globally unique UUIDs. + int splitIndex = bundleScopedFeedId.lastIndexOf("_"); + checkState(splitIndex > 0 && splitIndex < bundleScopedFeedId.length() - 1, + "Expected underscore-joined feedId and bundleId."); + String feedId = bundleScopedFeedId.substring(0, splitIndex); + GTFSFeed.newFileFromGtfs(tempDbFile, fileStorage.getFile(zipKey), feedId); // The DB file should already be closed and flushed to disk. // Put the DB and DB.p files in local cache, and mirror to remote storage if configured. fileStorage.moveIntoStorage(dbKey, tempDbFile); diff --git a/src/main/java/com/conveyal/gtfs/GTFSFeed.java b/src/main/java/com/conveyal/gtfs/GTFSFeed.java index 77c6c3bac..49b4840bd 100644 --- a/src/main/java/com/conveyal/gtfs/GTFSFeed.java +++ b/src/main/java/com/conveyal/gtfs/GTFSFeed.java @@ -150,10 +150,11 @@ public class GTFSFeed implements Cloneable, Closeable { /** * A place to accumulate errors while the feed is loaded. Tolerate as many errors as possible and keep on loading. - * TODO store these outside the mapdb for size? If we just don't create this map, old workers should not fail. - * Ideally we'd report the errors to the backend when it first builds the MapDB. + * Note that this set is in memory, not in the MapDB file. We save these into a JSON file to avoid shuttling + * all the serialized errors back and forth to workers. Older workers do have a MapDB table here, but when they + * try to reopen newer MapDB files without that table, even in read-only mode, they'll just receive an empty Set. */ - public final NavigableSet errors; + public final Set errors; // TODO eliminate if not used by Analysis /** Merged stop buffers polygon built lazily by getMergedBuffers() */ @@ -187,6 +188,8 @@ public class GTFSFeed implements Cloneable, Closeable { * Interestingly, all references are resolvable when tables are loaded in alphabetical order. * * @param zip the source ZIP file to load, which will be closed when done loading. + * @param fid the feedId to be set on the feed. If null, any feedId declared in the feed will be used, falling back + * on the filename without any .zip extension. */ public void loadFromFile(ZipFile zip, String fid) throws Exception { if (this.loaded) throw new UnsupportedOperationException("Attempt to load GTFS into existing database"); @@ -207,13 +210,11 @@ public void loadFromFile(ZipFile zip, String fid) throws Exception { // maybe we should just point to the feed object itself instead of its ID, and null out its stoptimes map after loading if (fid != null) { feedId = fid; - LOG.info("Feed ID is undefined, pester maintainers to include a feed ID. Using file name {}.", feedId); // TODO log an error, ideally feeds should include a feedID - } - else if (feedId == null || feedId.isEmpty()) { + LOG.info("Forcing feedId in MapDB to supplied string: {}.", feedId); + } else if (feedId == null || feedId.isEmpty()) { feedId = new File(zip.getName()).getName().replaceAll("\\.zip$", ""); - LOG.info("Feed ID is undefined, pester maintainers to include a feed ID. Using file name {}.", feedId); // TODO log an error, ideally feeds should include a feedID - } - else { + LOG.info("No feedId supplied and feed does not declare one. Using file name {}.", feedId); + } else { LOG.info("Feed ID is '{}'.", feedId); } @@ -259,10 +260,6 @@ else if (feedId == null || feedId.isEmpty()) { LOG.info("Detected {} errors in feed.", errors.size()); } - public void loadFromFile(ZipFile zip) throws Exception { - loadFromFile(zip, null); - } - public void toFile (String file) { try { File out = new File(file); @@ -783,7 +780,8 @@ private GTFSFeed (DB db) { patternForTrip = db.getTreeMap("patternForTrip"); - errors = db.getTreeSet("errors"); + // Note that this is an in-memory Java HashSet instead of MapDB table (as it was in past versions). + errors = new HashSet<>(); } // One critical point when constructing the MapDB is the instance cache type and size. @@ -796,7 +794,7 @@ private GTFSFeed (DB db) { // Initial tests show similar speeds for the default hashtable cache of 64k or 32k size and the hardRef cache. // By not calling any of the cacheEnable or cacheSize methods on the DB builder, we use the default values // that seem to perform well. - private static DB constructMapDb (File dbFile, boolean readOnly) { + private static DB constructMapDb (File dbFile, boolean writable) { DBMaker dbMaker; // TODO also allow for in-memory if (dbFile == null) { @@ -804,10 +802,10 @@ private static DB constructMapDb (File dbFile, boolean readOnly) { } else { dbMaker = DBMaker.newFileDB(dbFile); } - if (readOnly) { - dbMaker.readOnly(); - } else { + if (writable) { dbMaker.asyncWriteEnable(); + } else { + dbMaker.readOnly(); } try{ return dbMaker @@ -828,7 +826,7 @@ private static DB constructMapDb (File dbFile, boolean readOnly) { public static GTFSFeed reopenReadOnly (File file) { if (file.exists()) { - return new GTFSFeed(file, true); + return new GTFSFeed(file, false); } else { throw new GtfsLibException("Cannot reopen file, it does not exist."); } @@ -838,14 +836,16 @@ public static GTFSFeed reopenReadOnly (File file) { * Create a new DB file and load the specified GTFS ZIP into it. The resulting writable feed object is not returned * and must be reopened for subsequent read-only access. * @param dbFile the new file in which to store the database, or null to use a temporary file + * @param feedId the feedId to be set on the feed. If null, any feedId declared in the feed will be used, falling + * back on the filename without its .zip extension. */ - public static void newFileFromGtfs (File dbFile, File gtfsFile) { + public static void newFileFromGtfs (File dbFile, File gtfsFile, String feedId) { if (gtfsFile == null || !gtfsFile.exists()) { throw new GtfsLibException("Cannot load from GTFS feed, file does not exist."); } try { GTFSFeed feed = newWritableFile(dbFile); - feed.loadFromFile(new ZipFile(gtfsFile)); + feed.loadFromFile(new ZipFile(gtfsFile), feedId); feed.close(); } catch (Exception e) { throw new GtfsLibException("Cannot load GTFS from feed ZIP.", e); @@ -863,7 +863,7 @@ public static GTFSFeed newWritableFile (File dbFile) { if (dbFile != null && dbFile.exists() && dbFile.length() > 0) { throw new GtfsLibException("Cannot create new file, it already exists."); } - return new GTFSFeed(dbFile, false); + return new GTFSFeed(dbFile, true); } /** @@ -871,10 +871,10 @@ public static GTFSFeed newWritableFile (File dbFile) { * the GTFS file at the supplied filesystem path. This could probably be combined with some other factory methods. */ public static GTFSFeed writableTempFileFromGtfs (String file) { - GTFSFeed feed = new GTFSFeed(null, false); + GTFSFeed feed = new GTFSFeed(null, true); try { ZipFile zip = new ZipFile(file); - feed.loadFromFile(zip); + feed.loadFromFile(zip, null); zip.close(); return feed; } catch (Exception e) { @@ -883,11 +883,13 @@ public static GTFSFeed writableTempFileFromGtfs (String file) { } } + // NOTE the feedId within the MapDB created here will be the one declared by the feed or based on its filename. + // This method makes no effort to impose the more unique feed IDs created by the Analysis backend. public static GTFSFeed readOnlyTempFileFromGtfs (String fileName) { try { File tempFile = File.createTempFile("com.conveyal.gtfs.", ".db"); tempFile.deleteOnExit(); - GTFSFeed.newFileFromGtfs(tempFile, new File(fileName)); + GTFSFeed.newFileFromGtfs(tempFile, new File(fileName), null); return GTFSFeed.reopenReadOnly(tempFile); } catch (Exception e) { throw new GtfsLibException("Error loading GTFS.", e); diff --git a/src/main/java/com/conveyal/gtfs/api/ApiMain.java b/src/main/java/com/conveyal/gtfs/api/ApiMain.java deleted file mode 100644 index 494b35033..000000000 --- a/src/main/java/com/conveyal/gtfs/api/ApiMain.java +++ /dev/null @@ -1,58 +0,0 @@ -package com.conveyal.gtfs.api; - -import com.conveyal.gtfs.GTFSCache; -import com.conveyal.gtfs.GTFSFeed; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.List; -import java.util.stream.Collectors; - -/** - * Created by landon on 2/3/16. - * TODO convert ApiMain into a Component once it's very simple. - */ -public class ApiMain { - - private static GTFSCache cache; - - public static final Logger LOG = LoggerFactory.getLogger(ApiMain.class); - - public static void initialize (GTFSCache cache) { - ApiMain.cache = cache; - } - - // TODO rename methods, we no longer have FeedSource. - private static GTFSFeed getFeedSource (String uniqueId) { - GTFSFeed feed = cache.get(uniqueId); - // The feedId of the GTFSFeed objects may not be unique - we can have multiple versions of the same feed - // covering different time periods, uploaded by different users. Therefore we record another ID here that is - // known to be unique across the whole application - the ID used to fetch the feed. - // TODO setting this field could be pushed down into cache.get() or even into the CacheLoader, but I'm doing - // it here to keep this a pure refactor for now. - feed.uniqueId = uniqueId; - return feed; - } - - /** - * Convenience function to get a feed source without throwing checked exceptions, for example for use in lambdas. - * @return the GTFSFeed for the given ID, or null if an exception occurs. - */ - public static GTFSFeed getFeedSourceWithoutExceptions (String id) { - try { - return getFeedSource(id); - } catch (Exception e) { - LOG.error("Error retrieving from cache feed " + id, e); - return null; - } - } - - // TODO verify that this is not used to fetch so many feeds that it will cause some of them to be closed by eviction - // TODO introduce checks on quantity of feeds, against max cache size, and fail hard if too many are requested. - public static List getFeedSources (List feedIds) { - return feedIds.stream() - .map(ApiMain::getFeedSourceWithoutExceptions) - .filter(fs -> fs != null) - .collect(Collectors.toList()); - } -} diff --git a/src/main/java/com/conveyal/gtfs/api/graphql/GeoJsonCoercing.java b/src/main/java/com/conveyal/gtfs/api/graphql/GeoJsonCoercing.java deleted file mode 100644 index 65f297967..000000000 --- a/src/main/java/com/conveyal/gtfs/api/graphql/GeoJsonCoercing.java +++ /dev/null @@ -1,64 +0,0 @@ -package com.conveyal.gtfs.api.graphql; - -import graphql.schema.Coercing; -import org.locationtech.jts.geom.LineString; -import org.locationtech.jts.geom.MultiPolygon; - -import java.util.stream.Stream; - -/** - * Created by matthewc on 3/9/16. - */ -public class GeoJsonCoercing implements Coercing { - @Override - public Object serialize(Object input) { - // Handle newer org.locationtech JTS LineStrings - if (input instanceof LineString) { - GeoJsonLineString ret = new GeoJsonLineString(); - ret.coordinates = Stream.of(((LineString)input).getCoordinates()) - .map(c -> new double[] { c.x, c.y }) - .toArray(i -> new double[i][]); - - return ret; - } - // Also handle legacy com.vividsolutons JTS LineStrings, which are serialized into our MapDBs - else if (input instanceof com.vividsolutions.jts.geom.LineString) { - GeoJsonLineString ret = new GeoJsonLineString(); - ret.coordinates = Stream.of(((com.vividsolutions.jts.geom.LineString) input).getCoordinates()) - .map(c -> new double[] { c.x, c.y }) - .toArray(i -> new double[i][]); - - return ret; - } - else if (input instanceof MultiPolygon) { - MultiPolygon g = (MultiPolygon) input; - GeoJsonMultiPolygon ret = new GeoJsonMultiPolygon(); - ret.coordinates = Stream.of(g.getCoordinates()) - .map(c -> new double[] { c.x, c.y }) - .toArray(i -> new double[i][]); - - return ret; - } - else return null; - } - - @Override - public Object parseValue(Object o) { - return null; - } - - @Override - public Object parseLiteral(Object o) { - return null; - } - - private static class GeoJsonLineString { - public final String type = "LineString"; - public double[][] coordinates; - } - - private static class GeoJsonMultiPolygon { - public final String type = "MultiPolygon"; - public double[][] coordinates; - } -} diff --git a/src/main/java/com/conveyal/gtfs/api/graphql/GraphQLGtfsSchema.java b/src/main/java/com/conveyal/gtfs/api/graphql/GraphQLGtfsSchema.java deleted file mode 100644 index ccd1274b3..000000000 --- a/src/main/java/com/conveyal/gtfs/api/graphql/GraphQLGtfsSchema.java +++ /dev/null @@ -1,113 +0,0 @@ -package com.conveyal.gtfs.api.graphql; - -import com.conveyal.gtfs.api.graphql.fetchers.FeedFetcher; -import com.conveyal.gtfs.api.graphql.fetchers.PatternFetcher; -import com.conveyal.gtfs.api.graphql.fetchers.RouteFetcher; -import com.conveyal.gtfs.api.graphql.fetchers.StopFetcher; -import com.conveyal.gtfs.api.graphql.fetchers.StopTimeFetcher; -import com.conveyal.gtfs.api.graphql.fetchers.TripDataFetcher; -import com.conveyal.gtfs.api.graphql.types.FeedType; -import com.conveyal.gtfs.api.graphql.types.PatternType; -import com.conveyal.gtfs.api.graphql.types.RouteType; -import com.conveyal.gtfs.api.graphql.types.StopTimeType; -import com.conveyal.gtfs.api.graphql.types.StopType; -import com.conveyal.gtfs.api.graphql.types.TripType; -import graphql.schema.*; - -import static com.conveyal.gtfs.api.util.GraphQLUtil.*; -import static graphql.schema.GraphQLFieldDefinition.newFieldDefinition; -import static graphql.schema.GraphQLObjectType.newObject; - -/** - * Created by matthewc on 3/9/16. - */ -public class GraphQLGtfsSchema { - public static GraphQLObjectType stopType = StopType.build(); - - public static GraphQLObjectType stopTimeType = StopTimeType.build(); - - public static GraphQLObjectType tripType = TripType.build(); - - public static GraphQLObjectType patternType = PatternType.build(); - - public static GraphQLObjectType routeType = RouteType.build(); - - public static GraphQLObjectType feedType = FeedType.build(); - - public static GraphQLObjectType rootQuery = newObject() - .name("rootQuery") - .description("Root level query for routes, stops, feeds, patterns, trips, and stopTimes within GTFS feeds.") - .field(newFieldDefinition() - .name("routes") - .description("List of GTFS routes optionally queried by route_id (feed_id required).") - .type(new GraphQLList(routeType)) - .argument(multiStringArg("route_id")) - .argument(multiStringArg("feed_id")) - .dataFetcher(RouteFetcher::apex) - .build() - ) - .field(newFieldDefinition() - .name("stops") - .type(new GraphQLList(stopType)) - .argument(multiStringArg("feed_id")) - .argument(multiStringArg("stop_id")) - .argument(multiStringArg("route_id")) - .argument(multiStringArg("pattern_id")) - .argument(floatArg("lat")) - .argument(floatArg("lon")) - .argument(floatArg("radius")) - .argument(floatArg("max_lat")) - .argument(floatArg("max_lon")) - .argument(floatArg("min_lat")) - .argument(floatArg("min_lon")) - .dataFetcher(StopFetcher::apex) - .build() - ) - .field(newFieldDefinition() - .name("feeds") - .argument(multiStringArg("feed_id")) - .dataFetcher(FeedFetcher::apex) - .type(new GraphQLList(feedType)) - .build() - ) - // TODO: determine if there's a better way to get at the refs for patterns, trips, and stopTimes than injecting them at the root. - .field(newFieldDefinition() - .name("patterns") - .type(new GraphQLList(patternType)) - .argument(multiStringArg("feed_id")) - .argument(multiStringArg("pattern_id")) - .argument(floatArg("lat")) - .argument(floatArg("lon")) - .argument(floatArg("radius")) - .argument(floatArg("max_lat")) - .argument(floatArg("max_lon")) - .argument(floatArg("min_lat")) - .argument(floatArg("min_lon")) - .dataFetcher(PatternFetcher::apex) - .build() - ) - .field(newFieldDefinition() - .name("trips") - .argument(multiStringArg("feed_id")) - .argument(multiStringArg("trip_id")) - .argument(multiStringArg("route_id")) - .dataFetcher(TripDataFetcher::apex) - .type(new GraphQLList(tripType)) - .build() - ) - .field(newFieldDefinition() - .name("stopTimes") - .argument(multiStringArg("feed_id")) - .argument(multiStringArg("stop_id")) - .argument(multiStringArg("trip_id")) - .dataFetcher(StopTimeFetcher::apex) - .type(new GraphQLList(stopTimeType)) - .build() - ) - .build(); - - - - public static GraphQLSchema schema = GraphQLSchema.newSchema().query(rootQuery).build(); - -} diff --git a/src/main/java/com/conveyal/gtfs/api/graphql/WrappedEntityFieldFetcher.java b/src/main/java/com/conveyal/gtfs/api/graphql/WrappedEntityFieldFetcher.java deleted file mode 100644 index dc707f9fd..000000000 --- a/src/main/java/com/conveyal/gtfs/api/graphql/WrappedEntityFieldFetcher.java +++ /dev/null @@ -1,37 +0,0 @@ -package com.conveyal.gtfs.api.graphql; - -import graphql.schema.DataFetcher; -import graphql.schema.DataFetchingEnvironment; - -import java.lang.reflect.Field; - -/** - * Fetch data from wrapped GTFS entities. Modeled after graphql-java FieldDataFetcher. - */ -public class WrappedEntityFieldFetcher implements DataFetcher { - private final String field; - - public WrappedEntityFieldFetcher (String field) { - this.field = field; - } - - @Override - public Object get(DataFetchingEnvironment dataFetchingEnvironment) { - Object source = dataFetchingEnvironment.getSource(); - - if (source instanceof WrappedGTFSEntity) source = ((WrappedGTFSEntity) source).entity; - - Field field = null; - try { - field = source.getClass().getField(this.field); - } catch (NoSuchFieldException e) { - return null; - } - - try { - return field.get(source); - } catch (IllegalAccessException e) { - throw new RuntimeException(e); - } - } -} diff --git a/src/main/java/com/conveyal/gtfs/api/graphql/WrappedGTFSEntity.java b/src/main/java/com/conveyal/gtfs/api/graphql/WrappedGTFSEntity.java deleted file mode 100644 index 70763e7b6..000000000 --- a/src/main/java/com/conveyal/gtfs/api/graphql/WrappedGTFSEntity.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.conveyal.gtfs.api.graphql; - -/** - * Wraps a GTFS entity, whose own ID may only be unique within the feed, decorating it with the unique ID of the feed - * it came from. - */ -public class WrappedGTFSEntity { - public T entity; - public String feedUniqueId; - - /** - * Wrap the given GTFS entity with the unique Feed ID specified (this is not generally a GTFS feed ID as they - * are not unique between different versions of the same feed. - */ - public WrappedGTFSEntity (String feedUniqueID, T entity) { - this.feedUniqueId = feedUniqueID; - this.entity = entity; - } -} diff --git a/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/FeedFetcher.java b/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/FeedFetcher.java deleted file mode 100644 index 61bf9339a..000000000 --- a/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/FeedFetcher.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.conveyal.gtfs.api.graphql.fetchers; - -import com.conveyal.gtfs.GTFSFeed; -import com.conveyal.gtfs.api.ApiMain; -import com.conveyal.gtfs.api.graphql.WrappedGTFSEntity; -import com.conveyal.gtfs.model.FeedInfo; -import graphql.schema.DataFetchingEnvironment; -import org.locationtech.jts.geom.Geometry; - -import java.util.List; -import java.util.stream.Collectors; - -/** - * Created by matthewc on 3/10/16. - */ -public class FeedFetcher { - public static List> apex(DataFetchingEnvironment environment) { - List feedId = environment.getArgument("feed_id"); - return ApiMain.getFeedSources(feedId).stream() - .map(fs -> getFeedInfo(fs)) - .collect(Collectors.toList()); - - } - - private static WrappedGTFSEntity getFeedInfo(GTFSFeed feed) { - FeedInfo ret; - if (feed.feedInfo.size() > 0) { - ret = feed.feedInfo.values().iterator().next(); - } else { - ret = new FeedInfo(); - } - // NONE is a special value used in GTFS Lib feed info - if (ret.feed_id == null || "NONE".equals(ret.feed_id)) { - ret = ret.clone(); - ret.feed_id = feed.feedId; - } - return new WrappedGTFSEntity<>(feed.uniqueId, ret); - } - - public static Geometry getMergedBuffer(DataFetchingEnvironment env) { - WrappedGTFSEntity feedInfo = (WrappedGTFSEntity) env.getSource(); - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(feedInfo.feedUniqueId); - if (feed == null) return null; - return feed.getMergedBuffers(); - } - - public static WrappedGTFSEntity forWrappedGtfsEntity (DataFetchingEnvironment env) { - WrappedGTFSEntity feedInfo = (WrappedGTFSEntity) env.getSource(); - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(feedInfo.feedUniqueId); - if (feed == null) return null; - return getFeedInfo(feed); - } -} diff --git a/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/PatternFetcher.java b/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/PatternFetcher.java deleted file mode 100644 index 43f960ac3..000000000 --- a/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/PatternFetcher.java +++ /dev/null @@ -1,100 +0,0 @@ -package com.conveyal.gtfs.api.graphql.fetchers; - -import com.conveyal.gtfs.GTFSFeed; -import com.conveyal.gtfs.api.ApiMain; -import com.conveyal.gtfs.api.graphql.WrappedGTFSEntity; -import com.conveyal.gtfs.model.Pattern; -import com.conveyal.gtfs.model.Route; -import com.conveyal.gtfs.model.Trip; -import graphql.schema.DataFetchingEnvironment; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -/** - * - * Created by matthewc on 3/9/16. - */ -public class PatternFetcher { - private static final Double DEFAULT_RADIUS = 1.0; // default 1 km search radius - - public static List> apex(DataFetchingEnvironment env) { - Collection feeds; - - List feedId = env.getArgument("feed_id"); - feeds = ApiMain.getFeedSources(feedId); - Map args = env.getArguments(); - List> patterns = new ArrayList<>(); - - for (GTFSFeed feed : feeds) { - if (env.getArgument("pattern_id") != null) { - List patternId = env.getArgument("pattern_id"); - patternId.stream() - .filter(feed.patterns::containsKey) - .map(feed.patterns::get) - .map(pattern -> new WrappedGTFSEntity(feed.uniqueId, pattern)) - .forEach(patterns::add); - } - else if (env.getArgument("route_id") != null) { - List routeId = (List) env.getArgument("route_id"); - feed.patterns.values().stream() - .filter(p -> routeId.contains(p.route_id)) - .map(pattern -> new WrappedGTFSEntity(feed.uniqueId, pattern)) - .forEach(patterns::add); - } - } - - return patterns; - } - public static List> fromRoute(DataFetchingEnvironment env) { - WrappedGTFSEntity route = (WrappedGTFSEntity) env.getSource(); - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(route.feedUniqueId); - if (feed == null) return null; - - List stopIds = env.getArgument("stop_id"); - List patternId = env.getArgument("pattern_id"); - Long limit = env.getArgument("limit"); - - List> patterns = feed.patterns.values().stream() - .filter(p -> p.route_id.equals(route.entity.route_id)) - .map(p -> new WrappedGTFSEntity<>(feed.uniqueId, p)) - .collect(Collectors.toList()); - if (patternId != null) { - patterns.stream() - .filter(p -> patternId.contains(p.entity.pattern_id)) - .collect(Collectors.toList()); - } - if (stopIds != null) { - patterns.stream() - .filter(p -> !Collections.disjoint(p.entity.orderedStops, stopIds)) // disjoint returns true if no elements in common - .collect(Collectors.toList()); - } - if (limit != null) { - return patterns.stream().limit(limit).collect(Collectors.toList()); - } - return patterns; - } - - public static Long fromRouteCount(DataFetchingEnvironment env) { - WrappedGTFSEntity route = (WrappedGTFSEntity) env.getSource(); - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(route.feedUniqueId); - if (feed == null) return null; - - return feed.patterns.values().stream() - .filter(p -> p.route_id.equals(route.entity.route_id)) - .count(); - } - - public static WrappedGTFSEntity fromTrip(DataFetchingEnvironment env) { - WrappedGTFSEntity trip = (WrappedGTFSEntity) env.getSource(); - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(trip.feedUniqueId); - if (feed == null) return null; - - Pattern patt = feed.patterns.get(feed.patternForTrip.get(trip.entity.trip_id)); - return new WrappedGTFSEntity<>(feed.uniqueId, patt); - } -} diff --git a/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/RouteFetcher.java b/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/RouteFetcher.java deleted file mode 100644 index 6c989e3a5..000000000 --- a/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/RouteFetcher.java +++ /dev/null @@ -1,113 +0,0 @@ -package com.conveyal.gtfs.api.graphql.fetchers; - -import com.conveyal.gtfs.GTFSFeed; -import com.conveyal.gtfs.api.ApiMain; -import com.conveyal.gtfs.api.graphql.WrappedGTFSEntity; -import com.conveyal.gtfs.model.FeedInfo; -import com.conveyal.gtfs.model.Pattern; -import com.conveyal.gtfs.model.Route; -import com.conveyal.gtfs.model.Stop; -import graphql.schema.DataFetchingEnvironment; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -/** - * Created by matthewc on 3/10/16. - */ -public class RouteFetcher { - public static List> apex (DataFetchingEnvironment environment) { - Map args = environment.getArguments(); - - Collection feeds; - - List feedId = (List) args.get("feed_id"); - feeds = ApiMain.getFeedSources(feedId); - - List> routes = new ArrayList<>(); - - // TODO: clear up possible scope issues feed and route IDs - for (GTFSFeed feed : feeds) { - if (args.get("route_id") != null) { - List routeId = (List) args.get("route_id"); - routeId.stream() - .filter(feed.routes::containsKey) - .map(feed.routes::get) - .map(r -> new WrappedGTFSEntity(feed.uniqueId, r)) - .forEach(routes::add); - } - else { - feed.routes.values().stream().map(r -> new WrappedGTFSEntity<>(feed.uniqueId, r)).forEach(routes::add); - } - } - - return routes; - } - - public static List> fromStop(DataFetchingEnvironment environment) { - WrappedGTFSEntity stop = (WrappedGTFSEntity) environment.getSource(); - List routeIds = environment.getArgument("route_id"); - - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(stop.feedUniqueId); - if (feed == null) return null; - - List> routes = feed.patterns.values().stream() - .filter(p -> p.orderedStops.contains(stop.entity.stop_id)) - .map(p -> feed.routes.get(p.route_id)) - .distinct() - .map(r -> new WrappedGTFSEntity<>(feed.uniqueId, r)) - .collect(Collectors.toList()); - - if (routeIds != null) { - return routes.stream() - .filter(r -> routeIds.contains(r.entity.route_id)) - .collect(Collectors.toList()); - } - else { - return routes; - } - } - - public static WrappedGTFSEntity fromPattern(DataFetchingEnvironment env) { - WrappedGTFSEntity pattern = (WrappedGTFSEntity) env.getSource(); - List routeIds = env.getArgument("route_id"); - - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(pattern.feedUniqueId); - if (feed == null) return null; - - return new WrappedGTFSEntity<>(feed.uniqueId, feed.routes.get(pattern.entity.route_id)); - } - - public static List> fromFeed(DataFetchingEnvironment environment) { - WrappedGTFSEntity fi = (WrappedGTFSEntity) environment.getSource(); - List routeIds = environment.getArgument("route_id"); - - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(fi.feedUniqueId); - if (feed == null) return null; - - if (routeIds != null) { - return routeIds.stream() - .filter(id -> id != null && feed.routes.containsKey(id)) - .map(feed.routes::get) - .map(r -> new WrappedGTFSEntity<>(feed.uniqueId, r)) - .collect(Collectors.toList()); - } - else { - return feed.routes.values().stream() - .map(r -> new WrappedGTFSEntity<>(feed.uniqueId, r)) - .collect(Collectors.toList()); - } - } - - public static Long fromFeedCount(DataFetchingEnvironment environment) { - WrappedGTFSEntity fi = (WrappedGTFSEntity) environment.getSource(); - - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(fi.feedUniqueId); - if (feed == null) return null; - - return feed.routes.values().stream().count(); - } -} diff --git a/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/StopFetcher.java b/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/StopFetcher.java deleted file mode 100644 index 3f7cb3b35..000000000 --- a/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/StopFetcher.java +++ /dev/null @@ -1,135 +0,0 @@ -package com.conveyal.gtfs.api.graphql.fetchers; - -import com.conveyal.gtfs.GTFSFeed; -import com.conveyal.gtfs.api.ApiMain; -import com.conveyal.gtfs.api.graphql.WrappedGTFSEntity; -import com.conveyal.gtfs.model.FeedInfo; -import com.conveyal.gtfs.model.Pattern; -import com.conveyal.gtfs.model.Stop; -import graphql.schema.DataFetchingEnvironment; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -/** - * Created by matthewc on 3/9/16. - */ -public class StopFetcher { - private static final Logger LOG = LoggerFactory.getLogger(StopFetcher.class); - private static final Double DEFAULT_RADIUS = 1.0; // default 1 km search radius - - /** top level stops query (i.e. not inside a stoptime etc) */ - public static List> apex(DataFetchingEnvironment env) { - Map args = env.getArguments(); - - Collection feeds; - - List feedId = (List) args.get("feed_id"); - feeds = ApiMain.getFeedSources(feedId); - - List> stops = new ArrayList<>(); - - // TODO: clear up possible scope issues feed and stop IDs - for (GTFSFeed feed : feeds) { - if (args.get("stop_id") != null) { - List stopId = (List) args.get("stop_id"); - stopId.stream() - .filter(id -> id != null && feed.stops.containsKey(id)) - .map(feed.stops::get) - .map(s -> new WrappedGTFSEntity(feed.uniqueId, s)) - .forEach(stops::add); - } - // TODO: should pattern pre-empt route or should they operate together? - else if (args.get("pattern_id") != null) { - List patternId = (List) args.get("pattern_id"); - feed.patterns.values().stream() - .filter(p -> patternId.contains(p.pattern_id)) - .map(p -> feed.getOrderedStopListForTrip(p.associatedTrips.get(0))) - .flatMap(List::stream) - .map(feed.stops::get) - .distinct() - .map(stop -> new WrappedGTFSEntity(feed.uniqueId, stop)) - .forEach(stops::add); - } - else if (args.get("route_id") != null) { - List routeId = (List) args.get("route_id"); - feed.patterns.values().stream() - .filter(p -> routeId.contains(p.route_id)) - .map(p -> feed.getOrderedStopListForTrip(p.associatedTrips.get(0))) - .flatMap(List::stream) - .map(feed.stops::get) - .distinct() - .map(stop -> new WrappedGTFSEntity(feed.uniqueId, stop)) - .forEach(stops::add); - } - } - return stops; - } - - public static List> fromPattern(DataFetchingEnvironment environment) { - WrappedGTFSEntity pattern = (WrappedGTFSEntity) environment.getSource(); - - if (pattern.entity.associatedTrips.isEmpty()) { - LOG.warn("Empty pattern!"); - return Collections.emptyList(); - } - - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(pattern.feedUniqueId); - if (feed == null) return null; - - return feed.getOrderedStopListForTrip(pattern.entity.associatedTrips.get(0)) - .stream() - .map(feed.stops::get) - .map(s -> new WrappedGTFSEntity<>(feed.uniqueId, s)) - .collect(Collectors.toList()); - } - - public static Long fromPatternCount(DataFetchingEnvironment environment) { - WrappedGTFSEntity pattern = (WrappedGTFSEntity) environment.getSource(); - - if (pattern.entity.associatedTrips.isEmpty()) { - LOG.warn("Empty pattern!"); - return 0L; - } - - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(pattern.feedUniqueId); - if (feed == null) return null; - - return feed.getOrderedStopListForTrip(pattern.entity.associatedTrips.get(0)) - .stream().count(); - } - - public static List> fromFeed(DataFetchingEnvironment env) { - WrappedGTFSEntity fi = (WrappedGTFSEntity) env.getSource(); - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(fi.feedUniqueId); - if (feed == null) return null; - - Collection stops = feed.stops.values(); - List stopIds = env.getArgument("stop_id"); - - if (stopIds != null) { - return stopIds.stream() - .filter(id -> id != null && feed.stops.containsKey(id)) - .map(feed.stops::get) - .map(s -> new WrappedGTFSEntity<>(feed.uniqueId, s)) - .collect(Collectors.toList()); - } - return stops.stream() - .map(s -> new WrappedGTFSEntity<>(feed.uniqueId, s)) - .collect(Collectors.toList()); - } - - public static Long fromFeedCount(DataFetchingEnvironment env) { - WrappedGTFSEntity fi = (WrappedGTFSEntity) env.getSource(); - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(fi.feedUniqueId); - if (feed == null) return null; - Collection stops = feed.stops.values(); - return stops.stream().count(); - } -} diff --git a/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/StopTimeFetcher.java b/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/StopTimeFetcher.java deleted file mode 100644 index 2ce2c90e1..000000000 --- a/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/StopTimeFetcher.java +++ /dev/null @@ -1,64 +0,0 @@ -package com.conveyal.gtfs.api.graphql.fetchers; - -import com.conveyal.gtfs.GTFSFeed; -import com.conveyal.gtfs.api.ApiMain; -import com.conveyal.gtfs.api.graphql.WrappedGTFSEntity; -import com.conveyal.gtfs.model.StopTime; -import com.conveyal.gtfs.model.Trip; -import graphql.schema.DataFetchingEnvironment; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import java.util.stream.StreamSupport; - -/** - * Created by matthewc on 3/9/16. - */ -public class StopTimeFetcher { - public static List> apex(DataFetchingEnvironment env) { - Collection feeds; - - List feedId = env.getArgument("feed_id"); - feeds = ApiMain.getFeedSources(feedId); - - List> stopTimes = new ArrayList<>(); - - // TODO: clear up possible scope issues feed and stop IDs - for (GTFSFeed feed : feeds) { - if (env.getArgument("trip_id") != null) { - List tripId = env.getArgument("trip_id"); - tripId.stream() - .map(id -> feed.getOrderedStopTimesForTrip(id)) - .map(st -> new WrappedGTFSEntity(feed.uniqueId, st)) - .forEach(stopTimes::add); - } - } - - return stopTimes; - } - public static List> fromTrip(DataFetchingEnvironment env) { - WrappedGTFSEntity trip = (WrappedGTFSEntity) env.getSource(); - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(trip.feedUniqueId); - if (feed == null) return null; - - List stopIds = env.getArgument("stop_id"); - - // get stop_times in order - Stream stopTimes = StreamSupport.stream(feed.getOrderedStopTimesForTrip(trip.entity.trip_id).spliterator(), false); - if (stopIds != null) { - return stopTimes - .filter(stopTime -> stopIds.contains(stopTime.stop_id)) - .map(st -> new WrappedGTFSEntity<>(feed.uniqueId, st)) - .collect(Collectors.toList()); - } - else { - return stopTimes - .map(st -> new WrappedGTFSEntity<>(feed.uniqueId, st)) - .collect(Collectors.toList()); - } - } - -} diff --git a/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/TripDataFetcher.java b/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/TripDataFetcher.java deleted file mode 100644 index 6b242e938..000000000 --- a/src/main/java/com/conveyal/gtfs/api/graphql/fetchers/TripDataFetcher.java +++ /dev/null @@ -1,171 +0,0 @@ -package com.conveyal.gtfs.api.graphql.fetchers; - -import com.conveyal.gtfs.GTFSFeed; -import com.conveyal.gtfs.api.ApiMain; -import com.conveyal.gtfs.api.graphql.WrappedGTFSEntity; -import com.conveyal.gtfs.model.Agency; -import com.conveyal.gtfs.model.Pattern; -import com.conveyal.gtfs.model.Route; -import com.conveyal.gtfs.model.StopTime; -import com.conveyal.gtfs.model.Trip; -import graphql.schema.DataFetchingEnvironment; -import org.mapdb.Fun; - -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.ZoneId; -import java.time.ZoneOffset; -import java.time.temporal.ChronoUnit; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -import static spark.Spark.halt; - -public class TripDataFetcher { - public static List> apex(DataFetchingEnvironment env) { - Collection feeds; - - List feedId = (List) env.getArgument("feed_id"); - feeds = ApiMain.getFeedSources(feedId); - - List> trips = new ArrayList<>(); - - // TODO: clear up possible scope issues feed and trip IDs - for (GTFSFeed feed : feeds) { - if (env.getArgument("trip_id") != null) { - List tripId = (List) env.getArgument("trip_id"); - tripId.stream() - .filter(feed.trips::containsKey) - .map(feed.trips::get) - .map(trip -> new WrappedGTFSEntity(feed.uniqueId, trip)) - .forEach(trips::add); - } - else if (env.getArgument("route_id") != null) { - List routeId = (List) env.getArgument("route_id"); - feed.trips.values().stream() - .filter(t -> routeId.contains(t.route_id)) - .map(trip -> new WrappedGTFSEntity(feed.uniqueId, trip)) - .forEach(trips::add); - } - else { - feed.trips.values().stream() - .map(trip -> new WrappedGTFSEntity(feed.uniqueId, trip)) - .forEach(trips::add); - } - } - - return trips; - } - - /** - * Fetch trip data given a route. - */ - public static List> fromRoute(DataFetchingEnvironment dataFetchingEnvironment) { - WrappedGTFSEntity route = (WrappedGTFSEntity) dataFetchingEnvironment.getSource(); - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(route.feedUniqueId); - if (feed == null) return null; - - return feed.trips.values().stream() - .filter(t -> t.route_id.equals(route.entity.route_id)) - .map(t -> new WrappedGTFSEntity<>(feed.uniqueId, t)) - .collect(Collectors.toList()); - } - - public static Long fromRouteCount(DataFetchingEnvironment dataFetchingEnvironment) { - WrappedGTFSEntity route = (WrappedGTFSEntity) dataFetchingEnvironment.getSource(); - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(route.feedUniqueId); - if (feed == null) return null; - - return feed.trips.values().stream() - .filter(t -> t.route_id.equals(route.entity.route_id)) - .count(); - } - - public static WrappedGTFSEntity fromStopTime (DataFetchingEnvironment env) { - WrappedGTFSEntity stopTime = (WrappedGTFSEntity) env.getSource(); - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(stopTime.feedUniqueId); - if (feed == null) return null; - - Trip trip = feed.trips.get(stopTime.entity.trip_id); - - return new WrappedGTFSEntity<>(stopTime.feedUniqueId, trip); - } - - public static List> fromPattern (DataFetchingEnvironment env) { - WrappedGTFSEntity pattern = (WrappedGTFSEntity) env.getSource(); - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(pattern.feedUniqueId); - if (feed == null) return null; - - Long beginTime = env.getArgument("begin_time"); - Long endTime = env.getArgument("end_time"); - - if (beginTime != null && endTime != null) { - String agencyId = feed.routes.get(pattern.entity.route_id).agency_id; - Agency agency = agencyId != null ? feed.agency.get(agencyId) : null; - if (beginTime >= endTime) { - halt(404, "end_time must be greater than begin_time."); - } - LocalDateTime beginDateTime = LocalDateTime.ofEpochSecond(beginTime, 0, ZoneOffset.UTC); - int beginSeconds = beginDateTime.getSecond(); - LocalDateTime endDateTime = LocalDateTime.ofEpochSecond(endTime, 0, ZoneOffset.UTC); - int endSeconds = endDateTime.getSecond(); - long days = ChronoUnit.DAYS.between(beginDateTime, endDateTime); - ZoneId zone = agency != null ? ZoneId.of(agency.agency_timezone) : ZoneId.systemDefault(); - Set services = feed.services.values().stream() - .filter(s -> { - for (int i = 0; i < days; i++) { - LocalDate date = beginDateTime.toLocalDate().plusDays(i); - if (s.activeOn(date)) { - return true; - } - } - return false; - }) - .map(s -> s.service_id) - .collect(Collectors.toSet()); - return pattern.entity.associatedTrips.stream().map(feed.trips::get) - .filter(t -> services.contains(t.service_id)) - .map(t -> new WrappedGTFSEntity<>(feed.uniqueId, t)) - .collect(Collectors.toList()); - } - else { - return pattern.entity.associatedTrips.stream().map(feed.trips::get) - .map(t -> new WrappedGTFSEntity<>(feed.uniqueId, t)) - .collect(Collectors.toList()); - } - } - - public static Long fromPatternCount (DataFetchingEnvironment env) { - WrappedGTFSEntity pattern = (WrappedGTFSEntity) env.getSource(); - - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(pattern.feedUniqueId); - if (feed == null) return null; - - return pattern.entity.associatedTrips.stream().map(feed.trips::get).count(); - } - - public static Integer getStartTime(DataFetchingEnvironment env) { - WrappedGTFSEntity trip = (WrappedGTFSEntity) env.getSource(); - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(trip.feedUniqueId); - if (feed == null) return null; - - Map.Entry st = feed.stop_times.ceilingEntry(new Fun.Tuple2(trip.entity.trip_id, null)); - return st != null ? st.getValue().departure_time : null; - } - - public static Integer getDuration(DataFetchingEnvironment env) { - WrappedGTFSEntity trip = (WrappedGTFSEntity) env.getSource(); - GTFSFeed feed = ApiMain.getFeedSourceWithoutExceptions(trip.feedUniqueId); - if (feed == null) return null; - - Integer startTime = getStartTime(env); - Map.Entry endStopTime = feed.stop_times.floorEntry(new Fun.Tuple2(trip.entity.trip_id, Fun.HI)); - - if (startTime == null || endStopTime == null || endStopTime.getValue().arrival_time < startTime) return null; - else return endStopTime.getValue().arrival_time - startTime; - } -} diff --git a/src/main/java/com/conveyal/gtfs/api/graphql/types/FeedType.java b/src/main/java/com/conveyal/gtfs/api/graphql/types/FeedType.java deleted file mode 100644 index 2385b0dd6..000000000 --- a/src/main/java/com/conveyal/gtfs/api/graphql/types/FeedType.java +++ /dev/null @@ -1,65 +0,0 @@ -package com.conveyal.gtfs.api.graphql.types; - -import com.conveyal.gtfs.api.graphql.fetchers.FeedFetcher; -import com.conveyal.gtfs.api.graphql.fetchers.RouteFetcher; -import com.conveyal.gtfs.api.graphql.fetchers.StopFetcher; -import graphql.schema.GraphQLList; -import graphql.schema.GraphQLObjectType; -import graphql.schema.GraphQLTypeReference; - -import static com.conveyal.gtfs.api.util.GraphQLUtil.lineString; -import static com.conveyal.gtfs.api.util.GraphQLUtil.multiStringArg; -import static com.conveyal.gtfs.api.util.GraphQLUtil.string; -import static graphql.Scalars.GraphQLLong; -import static graphql.schema.GraphQLFieldDefinition.newFieldDefinition; -import static graphql.schema.GraphQLObjectType.newObject; - -/** - * Created by landon on 10/3/16. - */ -public class FeedType { - public static GraphQLObjectType build () { - return newObject() - .name("feed") - .description("Provides information for a GTFS feed and access to the entities it contains") - .field(string("feed_id")) - .field(string("feed_publisher_name")) - .field(string("feed_publisher_url")) - .field(string("feed_lang")) - .field(string("feed_version")) - .field(newFieldDefinition() - .name("routes") - .type(new GraphQLList(new GraphQLTypeReference("route"))) - .argument(multiStringArg("route_id")) - .dataFetcher(RouteFetcher::fromFeed) - .build() - ) - .field(newFieldDefinition() - .type(GraphQLLong) - .name("route_count") - .dataFetcher(RouteFetcher::fromFeedCount) - .build() - ) - .field(newFieldDefinition() - .name("stops") - .type(new GraphQLList(new GraphQLTypeReference("stop"))) - .argument(multiStringArg("stop_id")) - .dataFetcher(StopFetcher::fromFeed) - .build() - ) - .field(newFieldDefinition() - .type(GraphQLLong) - .name("stop_count") - .dataFetcher(StopFetcher::fromFeedCount) - .build() - ) - .field(newFieldDefinition() - .name("mergedBuffer") - .type(lineString()) - .description("Merged buffers around all stops in feed") - .dataFetcher(FeedFetcher::getMergedBuffer) - .build() - ) - .build(); - } -} diff --git a/src/main/java/com/conveyal/gtfs/api/graphql/types/PatternType.java b/src/main/java/com/conveyal/gtfs/api/graphql/types/PatternType.java deleted file mode 100644 index ccf054eed..000000000 --- a/src/main/java/com/conveyal/gtfs/api/graphql/types/PatternType.java +++ /dev/null @@ -1,86 +0,0 @@ -package com.conveyal.gtfs.api.graphql.types; - -import com.conveyal.gtfs.api.graphql.WrappedEntityFieldFetcher; -import com.conveyal.gtfs.api.graphql.fetchers.RouteFetcher; -import com.conveyal.gtfs.api.graphql.fetchers.StopFetcher; -import com.conveyal.gtfs.api.graphql.fetchers.TripDataFetcher; -import graphql.schema.GraphQLList; -import graphql.schema.GraphQLObjectType; -import graphql.schema.GraphQLTypeReference; - -import static com.conveyal.gtfs.api.util.GraphQLUtil.doublee; -import static com.conveyal.gtfs.api.util.GraphQLUtil.feed; -import static com.conveyal.gtfs.api.util.GraphQLUtil.lineString; -import static com.conveyal.gtfs.api.util.GraphQLUtil.longArg; -import static com.conveyal.gtfs.api.util.GraphQLUtil.string; -import static graphql.Scalars.GraphQLLong; -import static graphql.schema.GraphQLFieldDefinition.newFieldDefinition; -import static graphql.schema.GraphQLObjectType.newObject; - -/** - * Created by landon on 10/3/16. - */ -public class PatternType { - public static GraphQLObjectType build () { - GraphQLObjectType patternStats = newObject() - .name("patternStats") - .description("Statistics about a pattern") - .field(doublee("headway")) - .field(doublee("avgSpeed")) - .field(doublee("stopSpacing")) - .build(); - - return newObject() - .name("pattern") - .description("A unique sequence of stops that a GTFS route visits") - .field(string("pattern_id")) - .field(string("name")) - .field(string("route_id")) - .field(feed()) - .field(newFieldDefinition() - .name("route") - .description("Route that pattern operates along") - .dataFetcher(RouteFetcher::fromPattern) - .type(new GraphQLTypeReference("route")) - .build() - ) - .field(newFieldDefinition() - .name("stops") - .description("Stops that pattern serves") - .dataFetcher(StopFetcher::fromPattern) - .type(new GraphQLList(new GraphQLTypeReference("stop"))) - .build() - ) - .field(newFieldDefinition() - .type(GraphQLLong) - .description("Count of stops that pattern serves") - .name("stop_count") - .dataFetcher(StopFetcher::fromPatternCount) - .build() - ) - .field(newFieldDefinition() - .type(lineString()) - .description("Geometry that pattern operates along") - .dataFetcher(new WrappedEntityFieldFetcher("geometry")) - .name("geometry") - .build() - ) - .field(newFieldDefinition() - .name("trips") - .description("Trips associated with pattern") - .type(new GraphQLList(new GraphQLTypeReference("trip"))) - .dataFetcher(TripDataFetcher::fromPattern) - .argument(longArg("begin_time")) - .argument(longArg("end_time")) - .build() - ) - .field(newFieldDefinition() - .type(GraphQLLong) - .description("Count of trips associated with pattern") - .name("trip_count") - .dataFetcher(TripDataFetcher::fromPatternCount) - .build() - ) - .build(); - } -} diff --git a/src/main/java/com/conveyal/gtfs/api/graphql/types/RouteType.java b/src/main/java/com/conveyal/gtfs/api/graphql/types/RouteType.java deleted file mode 100644 index 92fea837b..000000000 --- a/src/main/java/com/conveyal/gtfs/api/graphql/types/RouteType.java +++ /dev/null @@ -1,75 +0,0 @@ -package com.conveyal.gtfs.api.graphql.types; - -import com.conveyal.gtfs.api.graphql.fetchers.PatternFetcher; -import com.conveyal.gtfs.api.graphql.fetchers.TripDataFetcher; -import graphql.schema.GraphQLList; -import graphql.schema.GraphQLObjectType; -import graphql.schema.GraphQLTypeReference; - -import static com.conveyal.gtfs.api.util.GraphQLUtil.doublee; -import static com.conveyal.gtfs.api.util.GraphQLUtil.feed; -import static com.conveyal.gtfs.api.util.GraphQLUtil.intt; -import static com.conveyal.gtfs.api.util.GraphQLUtil.longArg; -import static com.conveyal.gtfs.api.util.GraphQLUtil.multiStringArg; -import static com.conveyal.gtfs.api.util.GraphQLUtil.string; -import static graphql.Scalars.GraphQLLong; -import static graphql.schema.GraphQLFieldDefinition.newFieldDefinition; -import static graphql.schema.GraphQLObjectType.newObject; - -/** - * Created by landon on 10/3/16. - */ -public class RouteType { - public static GraphQLObjectType build () { - // routeStats should be modeled after com.conveyal.gtfs.stats.model.RouteStatistic - GraphQLObjectType routeStats = newObject() - .name("routeStats") - .description("Statistics about a route") - .field(doublee("headway")) - .field(doublee("avgSpeed")) - .field(doublee("stopSpacing")) - .build(); - - return newObject() - .name("route") - .description("A GTFS route object") - .field(string("route_id")) - // TODO agency - .field(string("route_short_name")) - .field(string("route_long_name")) - .field(string("route_desc")) - .field(string("route_url")) - // TODO route_type as enum - .field(intt("route_type")) - .field(string("route_color")) - .field(string("route_text_color")) - .field(feed()) - .field(newFieldDefinition() - .type(new GraphQLList(new GraphQLTypeReference("trip"))) - .name("trips") - .dataFetcher(TripDataFetcher::fromRoute) - .build() - ) - .field(newFieldDefinition() - .type(GraphQLLong) - .name("trip_count") - .dataFetcher(TripDataFetcher::fromRouteCount) - .build() - ) - .field(newFieldDefinition() - .type(new GraphQLList(new GraphQLTypeReference("pattern"))) - .name("patterns") - .argument(multiStringArg("stop_id")) - .argument(multiStringArg("pattern_id")) - .argument(longArg("limit")) - .dataFetcher(PatternFetcher::fromRoute) - .build() - ) - .field(newFieldDefinition() - .type(GraphQLLong) - .name("pattern_count") - .dataFetcher(PatternFetcher::fromRouteCount) - .build() - ) - .build(); - }} diff --git a/src/main/java/com/conveyal/gtfs/api/graphql/types/StopTimeType.java b/src/main/java/com/conveyal/gtfs/api/graphql/types/StopTimeType.java deleted file mode 100644 index c1877592d..000000000 --- a/src/main/java/com/conveyal/gtfs/api/graphql/types/StopTimeType.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.conveyal.gtfs.api.graphql.types; - -import com.conveyal.gtfs.api.graphql.fetchers.TripDataFetcher; -import graphql.schema.GraphQLObjectType; -import graphql.schema.GraphQLTypeReference; - -import static com.conveyal.gtfs.api.util.GraphQLUtil.*; -import static graphql.schema.GraphQLFieldDefinition.newFieldDefinition; -import static graphql.schema.GraphQLObjectType.newObject; - -/** - * Created by landon on 10/3/16. - */ -public class StopTimeType { - public static GraphQLObjectType build () { - return newObject() - .name("stopTime") - .field(intt("arrival_time")) - .field(intt("departure_time")) - .field(intt("stop_sequence")) - .field(string("stop_id")) - .field(string("stop_headsign")) - .field(doublee("shape_dist_traveled")) - .field(feed()) - .field(newFieldDefinition() - .name("trip") - .type(new GraphQLTypeReference("trip")) - .dataFetcher(TripDataFetcher::fromStopTime) - .argument(stringArg("date")) - .argument(longArg("from")) - .argument(longArg("to")) - .build() - ) - .build(); - } -} diff --git a/src/main/java/com/conveyal/gtfs/api/graphql/types/StopType.java b/src/main/java/com/conveyal/gtfs/api/graphql/types/StopType.java deleted file mode 100644 index a14149179..000000000 --- a/src/main/java/com/conveyal/gtfs/api/graphql/types/StopType.java +++ /dev/null @@ -1,65 +0,0 @@ -package com.conveyal.gtfs.api.graphql.types; - -import com.conveyal.gtfs.api.graphql.fetchers.RouteFetcher; -import graphql.schema.GraphQLList; -import graphql.schema.GraphQLObjectType; -import graphql.schema.GraphQLTypeReference; - -import static com.conveyal.gtfs.api.util.GraphQLUtil.doublee; -import static com.conveyal.gtfs.api.util.GraphQLUtil.feed; -import static com.conveyal.gtfs.api.util.GraphQLUtil.intt; -import static com.conveyal.gtfs.api.util.GraphQLUtil.multiStringArg; -import static com.conveyal.gtfs.api.util.GraphQLUtil.string; -import static graphql.schema.GraphQLFieldDefinition.newFieldDefinition; -import static graphql.schema.GraphQLObjectType.newObject; - -/** - * Created by landon on 10/3/16. - */ -public class StopType { - public static GraphQLObjectType build () { - - // transferPerformance should be modeled after com.conveyal.gtfs.stats.model.TransferPerformanceSummary - GraphQLObjectType transferPerformance = newObject() - .name("transferPerformance") - .description("Transfer performance for a stop") - .field(string("fromRoute")) - .field(string("toRoute")) - .field(intt("bestCase")) - .field(intt("worstCase")) - .field(intt("typicalCase")) - .build(); - - // stopStats should be modeled after com.conveyal.gtfs.stats.model.StopStatistic - GraphQLObjectType stopStats = newObject() - .name("stopStats") - .description("Statistics about a stop") - .field(doublee("headway")) - .field(intt("tripCount")) - .field(intt("routeCount")) - .build(); - - return newObject() - .name("stop") - .description("A GTFS stop object") - .field(string("stop_id")) - .field(string("stop_name")) - .field(string("stop_code")) - .field(string("stop_desc")) - .field(doublee("stop_lon")) - .field(doublee("stop_lat")) - .field(string("zone_id")) - .field(string("stop_url")) - .field(string("stop_timezone")) - .field(feed()) - .field(newFieldDefinition() - .name("routes") - .description("The list of routes that serve a stop") - .type(new GraphQLList(new GraphQLTypeReference("route"))) - .argument(multiStringArg("route_id")) - .dataFetcher(RouteFetcher::fromStop) - .build() - ) - .build(); - } -} diff --git a/src/main/java/com/conveyal/gtfs/api/graphql/types/TripType.java b/src/main/java/com/conveyal/gtfs/api/graphql/types/TripType.java deleted file mode 100644 index fcf1c2bad..000000000 --- a/src/main/java/com/conveyal/gtfs/api/graphql/types/TripType.java +++ /dev/null @@ -1,57 +0,0 @@ -package com.conveyal.gtfs.api.graphql.types; - -import com.conveyal.gtfs.api.graphql.fetchers.PatternFetcher; -import com.conveyal.gtfs.api.graphql.fetchers.StopTimeFetcher; -import com.conveyal.gtfs.api.graphql.fetchers.TripDataFetcher; -import graphql.schema.GraphQLList; -import graphql.schema.GraphQLObjectType; -import graphql.schema.GraphQLTypeReference; - -import static com.conveyal.gtfs.api.util.GraphQLUtil.*; -import static graphql.Scalars.GraphQLInt; -import static graphql.schema.GraphQLFieldDefinition.newFieldDefinition; -import static graphql.schema.GraphQLObjectType.newObject; - -/** - * Created by landon on 10/3/16. - */ -public class TripType { - public static GraphQLObjectType build () { - return newObject() - .name("trip") - .field(string("trip_id")) - .field(string("trip_headsign")) - .field(string("trip_short_name")) - .field(string("block_id")) - .field(intt("direction_id")) - .field(string("route_id")) - .field(feed()) - .field(newFieldDefinition() - .name("pattern") - .type(new GraphQLTypeReference("pattern")) - .dataFetcher(PatternFetcher::fromTrip) - .build() - ) - .field(newFieldDefinition() - .name("stop_times") - .type(new GraphQLList(new GraphQLTypeReference("stopTime"))) - .argument(multiStringArg("stop_id")) - .dataFetcher(StopTimeFetcher::fromTrip) - .build() - ) - // some pseudo-fields to reduce the amount of data that has to be fetched - .field(newFieldDefinition() - .name("start_time") - .type(GraphQLInt) - .dataFetcher(TripDataFetcher::getStartTime) - .build() - ) - .field(newFieldDefinition() - .name("duration") - .type(GraphQLInt) - .dataFetcher(TripDataFetcher::getDuration) - .build() - ) - .build(); - } -} diff --git a/src/main/java/com/conveyal/gtfs/api/util/GeomUtil.java b/src/main/java/com/conveyal/gtfs/api/util/GeomUtil.java deleted file mode 100644 index 731144cfa..000000000 --- a/src/main/java/com/conveyal/gtfs/api/util/GeomUtil.java +++ /dev/null @@ -1,30 +0,0 @@ -package com.conveyal.gtfs.api.util; - -import org.locationtech.jts.geom.Coordinate; -import org.locationtech.jts.geom.CoordinateList; -import org.locationtech.jts.geom.Envelope; - -/** - * Created by landon on 2/8/16. - */ -public class GeomUtil { - public static Envelope getBoundingBox(Coordinate coordinate, Double radius){ - Envelope boundingBox; - - double R = 6371; // earth radius in km - - // radius argument is also in km - - double x1 = coordinate.x - Math.toDegrees(radius/R/Math.cos(Math.toRadians(coordinate.y))); - - double x2 = coordinate.x + Math.toDegrees(radius/R/Math.cos(Math.toRadians(coordinate.y))); - - double y1 = coordinate.y + Math.toDegrees(radius/R); - - double y2 = coordinate.y - Math.toDegrees(radius/R); - - boundingBox = new Envelope(x1, x2, y1, y2); - - return boundingBox; - } -} diff --git a/src/main/java/com/conveyal/gtfs/api/util/GraphQLUtil.java b/src/main/java/com/conveyal/gtfs/api/util/GraphQLUtil.java deleted file mode 100644 index de321b8bc..000000000 --- a/src/main/java/com/conveyal/gtfs/api/util/GraphQLUtil.java +++ /dev/null @@ -1,90 +0,0 @@ -package com.conveyal.gtfs.api.util; - -import com.conveyal.gtfs.api.graphql.GeoJsonCoercing; -import com.conveyal.gtfs.api.graphql.WrappedEntityFieldFetcher; -import com.conveyal.gtfs.api.graphql.fetchers.FeedFetcher; -import graphql.schema.DataFetchingEnvironment; -import graphql.schema.GraphQLArgument; -import graphql.schema.GraphQLFieldDefinition; -import graphql.schema.GraphQLList; -import graphql.schema.GraphQLScalarType; -import graphql.schema.GraphQLTypeReference; - -import static graphql.Scalars.*; -import static graphql.schema.GraphQLArgument.newArgument; -import static graphql.schema.GraphQLFieldDefinition.newFieldDefinition; - -/** - * Created by landon on 10/3/16. - */ -public class GraphQLUtil { - - public static GraphQLScalarType lineString () { - return new GraphQLScalarType("GeoJSON", "GeoJSON", new GeoJsonCoercing()); - } - - public static GraphQLFieldDefinition string (String name) { - return newFieldDefinition() - .name(name) - .type(GraphQLString) - .dataFetcher(new WrappedEntityFieldFetcher(name)) - .build(); - } - - public static GraphQLFieldDefinition intt (String name) { - return newFieldDefinition() - .name(name) - .type(GraphQLInt) - .dataFetcher(new WrappedEntityFieldFetcher(name)) - .build(); - } - - public static GraphQLFieldDefinition doublee (String name) { - return newFieldDefinition() - .name(name) - .type(GraphQLFloat) - .dataFetcher(new WrappedEntityFieldFetcher(name)) - .build(); - } - - public static GraphQLFieldDefinition feed () { - return newFieldDefinition() - .name("feed") - .description("Containing feed") - .dataFetcher(FeedFetcher::forWrappedGtfsEntity) - .type(new GraphQLTypeReference("feed")) - .build(); - } - - public static GraphQLArgument stringArg (String name) { - return newArgument() - .name(name) - .type(GraphQLString) - .build(); - } - - public static GraphQLArgument multiStringArg (String name) { - return newArgument() - .name(name) - .type(new GraphQLList(GraphQLString)) - .build(); - } - - public static GraphQLArgument floatArg (String name) { - return newArgument() - .name(name) - .type(GraphQLFloat) - .build(); - } - - public static GraphQLArgument longArg (String name) { - return newArgument() - .name(name) - .type(GraphQLLong) - .build(); - } - - public static boolean argumentDefined(DataFetchingEnvironment env, String name) { - return (env.containsArgument(name) && env.getArgument(name) != null); - } -} diff --git a/src/main/java/com/conveyal/gtfs/error/DateParseError.java b/src/main/java/com/conveyal/gtfs/error/DateParseError.java index a02d55c70..566dee393 100644 --- a/src/main/java/com/conveyal/gtfs/error/DateParseError.java +++ b/src/main/java/com/conveyal/gtfs/error/DateParseError.java @@ -9,10 +9,14 @@ public class DateParseError extends GTFSError implements Serializable { public static final long serialVersionUID = 1L; public DateParseError(String file, long line, String field) { - super(file, line, field, Priority.MEDIUM); + super(file, line, field); } @Override public String getMessage() { return "Could not parse date (format should be YYYYMMDD)."; } + + @Override public Priority getPriority() { + return Priority.MEDIUM; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/DuplicateKeyError.java b/src/main/java/com/conveyal/gtfs/error/DuplicateKeyError.java index fda04d977..9630449f4 100644 --- a/src/main/java/com/conveyal/gtfs/error/DuplicateKeyError.java +++ b/src/main/java/com/conveyal/gtfs/error/DuplicateKeyError.java @@ -9,10 +9,14 @@ public class DuplicateKeyError extends GTFSError implements Serializable { public static final long serialVersionUID = 1L; public DuplicateKeyError(String file, long line, String field) { - super(file, line, field, Priority.MEDIUM); + super(file, line, field); } @Override public String getMessage() { return "Duplicate primary key."; } + + @Override public Priority getPriority() { + return Priority.MEDIUM; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/DuplicateStopError.java b/src/main/java/com/conveyal/gtfs/error/DuplicateStopError.java index 85007c8ae..1d448a1fb 100644 --- a/src/main/java/com/conveyal/gtfs/error/DuplicateStopError.java +++ b/src/main/java/com/conveyal/gtfs/error/DuplicateStopError.java @@ -13,7 +13,7 @@ public class DuplicateStopError extends GTFSError implements Serializable { public final DuplicateStops duplicateStop; public DuplicateStopError(DuplicateStops duplicateStop) { - super("stop", duplicateStop.getDuplicatedStop().sourceFileLine, "stop_lat,stop_lon", Priority.MEDIUM, duplicateStop.getDuplicatedStop().stop_id); + super("stop", duplicateStop.getDuplicatedStop().sourceFileLine, "stop_lat,stop_lon", duplicateStop.getDuplicatedStop().stop_id); this.message = duplicateStop.toString(); this.duplicateStop = duplicateStop; } @@ -21,4 +21,8 @@ public DuplicateStopError(DuplicateStops duplicateStop) { @Override public String getMessage() { return message; } + + @Override public Priority getPriority() { + return Priority.MEDIUM; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/DuplicateTripError.java b/src/main/java/com/conveyal/gtfs/error/DuplicateTripError.java index 7c35a2d32..964b5d5f9 100644 --- a/src/main/java/com/conveyal/gtfs/error/DuplicateTripError.java +++ b/src/main/java/com/conveyal/gtfs/error/DuplicateTripError.java @@ -20,7 +20,7 @@ public class DuplicateTripError extends GTFSError implements Serializable { String lastArrival; public DuplicateTripError(Trip trip, long line, String duplicateTripId, String patternName, String firstDeparture, String lastArrival) { - super("trips", line, "trip_id", Priority.MEDIUM, trip.trip_id); + super("trips", line, "trip_id", trip.trip_id); this.duplicateTripId = duplicateTripId; this.patternName = patternName; this.routeId = trip.route_id; @@ -33,4 +33,8 @@ public DuplicateTripError(Trip trip, long line, String duplicateTripId, String p @Override public String getMessage() { return String.format("Trip Ids %s & %s (route %s) are duplicates (pattern: %s, calendar: %s, from %s to %s)", duplicateTripId, affectedEntityId, routeId, patternName, serviceId, firstDeparture, lastArrival); } + + @Override public Priority getPriority() { + return Priority.MEDIUM; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/EmptyFieldError.java b/src/main/java/com/conveyal/gtfs/error/EmptyFieldError.java index badfb0ab0..4703df572 100644 --- a/src/main/java/com/conveyal/gtfs/error/EmptyFieldError.java +++ b/src/main/java/com/conveyal/gtfs/error/EmptyFieldError.java @@ -9,11 +9,14 @@ public class EmptyFieldError extends GTFSError implements Serializable { public static final long serialVersionUID = 1L; public EmptyFieldError(String file, long line, String field) { - super(file, line, field, Priority.MEDIUM); + super(file, line, field); } @Override public String getMessage() { return String.format("No value supplied for a required column."); } + @Override public Priority getPriority() { + return Priority.MEDIUM; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/EmptyTableError.java b/src/main/java/com/conveyal/gtfs/error/EmptyTableError.java index f3fda5d4d..60f1e0187 100644 --- a/src/main/java/com/conveyal/gtfs/error/EmptyTableError.java +++ b/src/main/java/com/conveyal/gtfs/error/EmptyTableError.java @@ -11,10 +11,14 @@ public class EmptyTableError extends GTFSError implements Serializable { public static final long serialVersionUID = 1L; public EmptyTableError(String file) { - super(file, 0, null, Priority.MEDIUM); + super(file, 0, null); } @Override public String getMessage() { return String.format("Table is present in zip file, but it has no entries."); } + + @Override public Priority getPriority() { + return Priority.MEDIUM; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/GTFSError.java b/src/main/java/com/conveyal/gtfs/error/GTFSError.java index 95a41e4c6..84680a82d 100644 --- a/src/main/java/com/conveyal/gtfs/error/GTFSError.java +++ b/src/main/java/com/conveyal/gtfs/error/GTFSError.java @@ -18,21 +18,17 @@ public abstract class GTFSError implements Comparable, Serializable { public final String field; public final String affectedEntityId; public final String errorType; - // NOTE: Do not remove this field. Though this field is somewhat redundant (since every instance of each class has - // the same priority) we have old MapDB files around that contain serialized errors. They would all break. - public final Priority priority; - public GTFSError(String file, long line, String field, Priority priority) { - this(file, line, field, priority, null); + public GTFSError(String file, long line, String field) { + this(file, line, field, null); } - public GTFSError(String file, long line, String field, Priority priority, String affectedEntityId) { + public GTFSError(String file, long line, String field, String affectedEntityId) { this.file = file; this.line = line; this.field = field; this.affectedEntityId = affectedEntityId; this.errorType = this.getClass().getSimpleName(); - this.priority = priority; } /** @@ -44,7 +40,6 @@ public GTFSError (String entityId) { this.field = null; this.errorType = null; this.affectedEntityId = entityId; - this.priority = Priority.UNKNOWN; } /** @@ -55,6 +50,13 @@ public final String getErrorCode () { return this.getClass().getSimpleName(); } + /** + * @return The Error priority level associated with this class. + */ + public Priority getPriority() { + return Priority.UNKNOWN; + } + /** * @return a Class object for the class of GTFS entity in which the error was found, * which also implies a table in the GTFS feed. diff --git a/src/main/java/com/conveyal/gtfs/error/GeneralError.java b/src/main/java/com/conveyal/gtfs/error/GeneralError.java index 54eb270b3..35219eb21 100644 --- a/src/main/java/com/conveyal/gtfs/error/GeneralError.java +++ b/src/main/java/com/conveyal/gtfs/error/GeneralError.java @@ -11,7 +11,7 @@ public class GeneralError extends GTFSError implements Serializable { private String message; public GeneralError(String file, long line, String field, String message) { - super(file, line, field, Priority.UNKNOWN); + super(file, line, field); this.message = message; } @@ -19,4 +19,7 @@ public GeneralError(String file, long line, String field, String message) { return message; } + @Override public Priority getPriority() { + return Priority.UNKNOWN; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/MisplacedStopError.java b/src/main/java/com/conveyal/gtfs/error/MisplacedStopError.java index 99189aa79..991289ec6 100644 --- a/src/main/java/com/conveyal/gtfs/error/MisplacedStopError.java +++ b/src/main/java/com/conveyal/gtfs/error/MisplacedStopError.java @@ -11,16 +11,18 @@ public class MisplacedStopError extends GTFSError implements Serializable { public static final long serialVersionUID = 1L; - public final Priority priority; public final Stop stop; public MisplacedStopError(String affectedEntityId, long line, Stop stop) { - super("stops", line, "stop_id", Priority.MEDIUM, affectedEntityId); - this.priority = Priority.HIGH; + super("stops", line, "stop_id", affectedEntityId); this.stop = stop; } @Override public String getMessage() { return String.format("Stop Id %s is misplaced.", affectedEntityId); } + + @Override public Priority getPriority() { + return Priority.MEDIUM; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/MissingColumnError.java b/src/main/java/com/conveyal/gtfs/error/MissingColumnError.java index fe9c28f6b..24022a66a 100644 --- a/src/main/java/com/conveyal/gtfs/error/MissingColumnError.java +++ b/src/main/java/com/conveyal/gtfs/error/MissingColumnError.java @@ -9,11 +9,14 @@ public class MissingColumnError extends GTFSError implements Serializable { public static final long serialVersionUID = 1L; public MissingColumnError(String file, String field) { - super(file, 1, field, Priority.MEDIUM); + super(file, 1, field); } @Override public String getMessage() { return String.format("Missing required column."); } + @Override public Priority getPriority() { + return Priority.MEDIUM; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/MissingShapeError.java b/src/main/java/com/conveyal/gtfs/error/MissingShapeError.java index 3ddeb8c75..9487d8551 100644 --- a/src/main/java/com/conveyal/gtfs/error/MissingShapeError.java +++ b/src/main/java/com/conveyal/gtfs/error/MissingShapeError.java @@ -12,10 +12,14 @@ public class MissingShapeError extends GTFSError implements Serializable { public static final long serialVersionUID = 1L; public MissingShapeError(Trip trip) { - super("trips", trip.sourceFileLine, "shape_id", Priority.LOW, trip.trip_id); + super("trips", trip.sourceFileLine, "shape_id", trip.trip_id); } @Override public String getMessage() { return "Trip " + affectedEntityId + " is missing a shape"; } + + @Override public Priority getPriority() { + return Priority.LOW; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/MissingTableError.java b/src/main/java/com/conveyal/gtfs/error/MissingTableError.java index 0ff547881..6a756a8b2 100644 --- a/src/main/java/com/conveyal/gtfs/error/MissingTableError.java +++ b/src/main/java/com/conveyal/gtfs/error/MissingTableError.java @@ -9,11 +9,14 @@ public class MissingTableError extends GTFSError implements Serializable { public static final long serialVersionUID = 1L; public MissingTableError(String file) { - super(file, 0, null, Priority.MEDIUM); + super(file, 0, null); } @Override public String getMessage() { return String.format("This table is required by the GTFS specification but is missing."); } + @Override public Priority getPriority() { + return Priority.MEDIUM; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/NoAgencyInFeedError.java b/src/main/java/com/conveyal/gtfs/error/NoAgencyInFeedError.java index 1d06d727f..e82c6895d 100644 --- a/src/main/java/com/conveyal/gtfs/error/NoAgencyInFeedError.java +++ b/src/main/java/com/conveyal/gtfs/error/NoAgencyInFeedError.java @@ -7,10 +7,14 @@ */ public class NoAgencyInFeedError extends GTFSError { public NoAgencyInFeedError() { - super("agency", 0, "agency_id", Priority.HIGH); + super("agency", 0, "agency_id"); } @Override public String getMessage() { return String.format("No agency listed in feed (must have at least one)."); } + + @Override public Priority getPriority() { + return Priority.HIGH; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/NumberParseError.java b/src/main/java/com/conveyal/gtfs/error/NumberParseError.java index ff6bf6085..c691c911a 100644 --- a/src/main/java/com/conveyal/gtfs/error/NumberParseError.java +++ b/src/main/java/com/conveyal/gtfs/error/NumberParseError.java @@ -9,11 +9,14 @@ public class NumberParseError extends GTFSError implements Serializable { public static final long serialVersionUID = 1L; public NumberParseError(String file, long line, String field) { - super(file, line, field, Priority.HIGH); + super(file, line, field); } @Override public String getMessage() { return String.format("Error parsing a number from a string."); } + @Override public Priority getPriority() { + return Priority.HIGH; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/OverlappingTripsInBlockError.java b/src/main/java/com/conveyal/gtfs/error/OverlappingTripsInBlockError.java deleted file mode 100644 index e4e6ea627..000000000 --- a/src/main/java/com/conveyal/gtfs/error/OverlappingTripsInBlockError.java +++ /dev/null @@ -1,26 +0,0 @@ -package com.conveyal.gtfs.error; - -import com.conveyal.gtfs.validator.model.Priority; - -import java.io.Serializable; - -/** - * Created by landon on 5/6/16. - */ -public class OverlappingTripsInBlockError extends GTFSError implements Serializable { - public static final long serialVersionUID = 1L; - - public final String[] tripIds; - public final Priority priority = Priority.HIGH; - public final String routeId; - - public OverlappingTripsInBlockError(long line, String field, String affectedEntityId, String routeId, String[] tripIds) { - super("trips", line, field, Priority.MEDIUM, affectedEntityId); - this.tripIds = tripIds; - this.routeId = routeId; - } - - @Override public String getMessage() { - return String.format("Trip Ids %s overlap (route: %s) and share block ID %s", String.join(" & ", tripIds), routeId, affectedEntityId); - } -} diff --git a/src/main/java/com/conveyal/gtfs/error/RangeError.java b/src/main/java/com/conveyal/gtfs/error/RangeError.java index 2411f40d7..42a0f51e6 100644 --- a/src/main/java/com/conveyal/gtfs/error/RangeError.java +++ b/src/main/java/com/conveyal/gtfs/error/RangeError.java @@ -11,7 +11,7 @@ public class RangeError extends GTFSError implements Serializable { final double min, max, actual; public RangeError(String file, long line, String field, double min, double max, double actual) { - super(file, line, field, Priority.LOW); + super(file, line, field); this.min = min; this.max = max; this.actual = actual; @@ -21,4 +21,7 @@ public RangeError(String file, long line, String field, double min, double max, return String.format("Number %s outside of acceptable range [%s,%s].", actual, min, max); } + @Override public Priority getPriority() { + return Priority.LOW; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/ReferentialIntegrityError.java b/src/main/java/com/conveyal/gtfs/error/ReferentialIntegrityError.java index 5ada67d82..0ef7edad0 100644 --- a/src/main/java/com/conveyal/gtfs/error/ReferentialIntegrityError.java +++ b/src/main/java/com/conveyal/gtfs/error/ReferentialIntegrityError.java @@ -12,7 +12,7 @@ public class ReferentialIntegrityError extends GTFSError implements Serializable public final String badReference; public ReferentialIntegrityError(String tableName, long row, String field, String badReference) { - super(tableName, row, field, Priority.HIGH); + super(tableName, row, field); this.badReference = badReference; } @@ -27,4 +27,8 @@ public int compareTo (GTFSError o) { @Override public String getMessage() { return String.format(badReference); } + + @Override public Priority getPriority() { + return Priority.HIGH; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/ReversedTripShapeError.java b/src/main/java/com/conveyal/gtfs/error/ReversedTripShapeError.java index 9ce7c714f..c29e2439e 100644 --- a/src/main/java/com/conveyal/gtfs/error/ReversedTripShapeError.java +++ b/src/main/java/com/conveyal/gtfs/error/ReversedTripShapeError.java @@ -14,11 +14,15 @@ public class ReversedTripShapeError extends GTFSError implements Serializable { public final String shapeId; public ReversedTripShapeError(Trip trip) { - super("trips", trip.sourceFileLine, "shape_id", Priority.HIGH, trip.trip_id); + super("trips", trip.sourceFileLine, "shape_id", trip.trip_id); this.shapeId = trip.shape_id; } @Override public String getMessage() { return "Trip " + affectedEntityId + " references reversed shape " + shapeId; } + + @Override public Priority getPriority() { + return Priority.HIGH; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/ShapeMissingCoordinatesError.java b/src/main/java/com/conveyal/gtfs/error/ShapeMissingCoordinatesError.java index 3a7df095a..8ece94a15 100644 --- a/src/main/java/com/conveyal/gtfs/error/ShapeMissingCoordinatesError.java +++ b/src/main/java/com/conveyal/gtfs/error/ShapeMissingCoordinatesError.java @@ -14,11 +14,15 @@ public class ShapeMissingCoordinatesError extends GTFSError implements Serializa public final String[] tripIds; public ShapeMissingCoordinatesError(ShapePoint shapePoint, String[] tripIds) { - super("shapes", shapePoint.sourceFileLine, "shape_id", Priority.MEDIUM, shapePoint.shape_id); + super("shapes", shapePoint.sourceFileLine, "shape_id", shapePoint.shape_id); this.tripIds = tripIds; } @Override public String getMessage() { return "Shape " + affectedEntityId + " is missing coordinates (affects " + tripIds.length + " trips)"; } + + @Override public Priority getPriority() { + return Priority.MEDIUM; + } } \ No newline at end of file diff --git a/src/main/java/com/conveyal/gtfs/error/TableInSubdirectoryError.java b/src/main/java/com/conveyal/gtfs/error/TableInSubdirectoryError.java index ed4ac0d84..e415ef6c8 100644 --- a/src/main/java/com/conveyal/gtfs/error/TableInSubdirectoryError.java +++ b/src/main/java/com/conveyal/gtfs/error/TableInSubdirectoryError.java @@ -13,11 +13,15 @@ public class TableInSubdirectoryError extends GTFSError implements Serializable public final String directory; public TableInSubdirectoryError(String file, String directory) { - super(file, 0, null, Priority.HIGH); + super(file, 0, null); this.directory = directory; } @Override public String getMessage() { return String.format("All GTFS files (including %s.txt) should be at root of zipfile, not nested in subdirectory (%s)", file, directory); } + + @Override public Priority getPriority() { + return Priority.HIGH; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/TimeParseError.java b/src/main/java/com/conveyal/gtfs/error/TimeParseError.java index d6579e715..6dcc4b282 100644 --- a/src/main/java/com/conveyal/gtfs/error/TimeParseError.java +++ b/src/main/java/com/conveyal/gtfs/error/TimeParseError.java @@ -9,11 +9,14 @@ public class TimeParseError extends GTFSError implements Serializable { public static final long serialVersionUID = 1L; public TimeParseError(String file, long line, String field) { - super(file, line, field, Priority.MEDIUM); + super(file, line, field); } @Override public String getMessage() { return "Could not parse time (format should be HH:MM:SS)."; } + @Override public Priority getPriority() { + return Priority.MEDIUM; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/TimeZoneError.java b/src/main/java/com/conveyal/gtfs/error/TimeZoneError.java index a1b9155c8..5a9710f2a 100644 --- a/src/main/java/com/conveyal/gtfs/error/TimeZoneError.java +++ b/src/main/java/com/conveyal/gtfs/error/TimeZoneError.java @@ -22,11 +22,15 @@ public class TimeZoneError extends GTFSError implements Serializable { * @param message description of issue with timezone reference */ public TimeZoneError(String tableName, long line, String field, String affectedEntityId, String message) { - super(tableName, line, field, Priority.MEDIUM, affectedEntityId); + super(tableName, line, field, affectedEntityId); this.message = message; } @Override public String getMessage() { return message + ". (" + field + ": " + affectedEntityId + ")"; } + + @Override public Priority getPriority() { + return Priority.MEDIUM; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/URLParseError.java b/src/main/java/com/conveyal/gtfs/error/URLParseError.java index 6a38818a5..6157e861c 100644 --- a/src/main/java/com/conveyal/gtfs/error/URLParseError.java +++ b/src/main/java/com/conveyal/gtfs/error/URLParseError.java @@ -9,11 +9,14 @@ public class URLParseError extends GTFSError implements Serializable { public static final long serialVersionUID = 1L; public URLParseError(String file, long line, String field) { - super(file, line, field, Priority.LOW); + super(file, line, field); } @Override public String getMessage() { return "Could not parse URL (format should be ://?#)."; } + @Override public Priority getPriority() { + return Priority.LOW; + } } diff --git a/src/main/java/com/conveyal/gtfs/error/UnusedStopError.java b/src/main/java/com/conveyal/gtfs/error/UnusedStopError.java index 16c237c93..be2c9ea47 100644 --- a/src/main/java/com/conveyal/gtfs/error/UnusedStopError.java +++ b/src/main/java/com/conveyal/gtfs/error/UnusedStopError.java @@ -12,11 +12,15 @@ public class UnusedStopError extends GTFSError implements Serializable { public final Stop stop; public UnusedStopError(Stop stop) { - super("stops", stop.sourceFileLine, "stop_id", Priority.LOW, stop.stop_id); + super("stops", stop.sourceFileLine, "stop_id", stop.stop_id); this.stop = stop; } @Override public String getMessage() { return String.format("Stop Id %s is not used in any trips.", affectedEntityId); } + + @Override public Priority getPriority() { + return Priority.LOW; + } } diff --git a/src/main/java/com/conveyal/osmlib/OSM.java b/src/main/java/com/conveyal/osmlib/OSM.java index f690bb18d..4d1972a20 100644 --- a/src/main/java/com/conveyal/osmlib/OSM.java +++ b/src/main/java/com/conveyal/osmlib/OSM.java @@ -69,7 +69,7 @@ public class OSM implements OSMEntitySource, OSMEntitySink { /* If true, track which nodes are referenced by more than one way. */ public boolean intersectionDetection = false; - /** If true we are reading already filled OSM mapdv **/ + /** If true we are reading already filled OSM mapdb **/ private boolean reading = false; /** diff --git a/src/main/java/com/conveyal/r5/SoftwareVersion.java b/src/main/java/com/conveyal/r5/SoftwareVersion.java index f1f0acc01..4389afa74 100644 --- a/src/main/java/com/conveyal/r5/SoftwareVersion.java +++ b/src/main/java/com/conveyal/r5/SoftwareVersion.java @@ -20,7 +20,7 @@ public class SoftwareVersion { private static final String UNKNOWN = "UNKNOWN"; // This could potentially be made into a Component so it's non-static - public static SoftwareVersion instance = new SoftwareVersion(); + public static SoftwareVersion instance = new SoftwareVersion("r5"); private final Properties properties = new Properties(); @@ -29,7 +29,10 @@ public class SoftwareVersion { public final String commit; public final String branch; - protected SoftwareVersion () { + // Which software product is this a version of? Provides a scope or context for the version and commit strings. + public final String name; + + protected SoftwareVersion (String productName) { try (InputStream is = getClass().getResourceAsStream(VERSION_PROPERTIES_FILE)) { properties.load(is); } catch (IOException | NullPointerException e) { @@ -38,6 +41,7 @@ protected SoftwareVersion () { version = getPropertyOrUnknown("version"); commit = getPropertyOrUnknown("commit"); branch = getPropertyOrUnknown("branch"); + name = productName; } /** diff --git a/src/main/java/com/conveyal/r5/analyst/BootstrappingTravelTimeReducer.java b/src/main/java/com/conveyal/r5/analyst/BootstrappingTravelTimeReducer.java index b8a8db924..058644a6e 100644 --- a/src/main/java/com/conveyal/r5/analyst/BootstrappingTravelTimeReducer.java +++ b/src/main/java/com/conveyal/r5/analyst/BootstrappingTravelTimeReducer.java @@ -65,8 +65,8 @@ public void recordTravelTimesForTarget(int target, int[] travelTimesForTarget) { // We use the size of the grid to determine the number of destinations used in the linked point set in // TravelTimeComputer, therefore the target indices are relative to the grid, not the task. // TODO verify that the above is still accurate - int gridx = target % grid.width; - int gridy = target / grid.width; + int gridx = target % grid.extents.width; + int gridy = target / grid.extents.width; double opportunityCountAtTarget = grid.grid[gridx][gridy]; // As an optimization, don't even bother to check whether cells that contain no opportunities are reachable. diff --git a/src/main/java/com/conveyal/r5/analyst/FreeFormPointSet.java b/src/main/java/com/conveyal/r5/analyst/FreeFormPointSet.java index d580fae67..0cce92dd6 100644 --- a/src/main/java/com/conveyal/r5/analyst/FreeFormPointSet.java +++ b/src/main/java/com/conveyal/r5/analyst/FreeFormPointSet.java @@ -18,6 +18,7 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; +import static com.conveyal.r5.common.GeometryUtils.checkWgsEnvelopeSize; import static com.conveyal.r5.streets.VertexStore.fixedDegreesToFloating; /** @@ -118,7 +119,7 @@ public static FreeFormPointSet fromCsv ( // If count column was specified and present, use it. Otherwise, one opportunity per point. ret.counts[rec] = countCol < 0 ? 1D : Double.parseDouble(reader.get(countCol)); } - Grid.checkWgsEnvelopeSize(ret.getWgsEnvelope()); + checkWgsEnvelopeSize(ret.getWgsEnvelope(), "freeform pointset"); return ret; } catch (NumberFormatException nfe) { throw new ParameterException( diff --git a/src/main/java/com/conveyal/r5/analyst/Grid.java b/src/main/java/com/conveyal/r5/analyst/Grid.java index f741ca692..7a11f595e 100644 --- a/src/main/java/com/conveyal/r5/analyst/Grid.java +++ b/src/main/java/com/conveyal/r5/analyst/Grid.java @@ -1,5 +1,6 @@ package com.conveyal.r5.analyst; +import com.conveyal.analysis.datasource.DataSourceException; import com.conveyal.r5.common.GeometryUtils; import com.conveyal.r5.util.InputStreamProvider; import com.conveyal.r5.util.ProgressListener; @@ -62,6 +63,8 @@ import java.util.concurrent.atomic.AtomicInteger; import static com.conveyal.gtfs.util.Util.human; +import static com.conveyal.r5.common.GeometryUtils.checkWgsEnvelopeSize; +import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; import static java.lang.Double.parseDouble; import static org.apache.commons.math3.util.FastMath.atan; @@ -85,51 +88,28 @@ public class Grid extends PointSet { public static final String COUNT_COLUMN_NAME = "[COUNT]"; - /** The web mercator zoom level for this grid. */ - public final int zoom; - - /* The following fields establish the position of this sub-grid within the full worldwide web mercator grid. */ - - /** - * The pixel number of the northernmost pixel in this grid (smallest y value in web Mercator, - * because y increases from north to south in web Mercator). - */ - public final int north; - - /** The pixel number of the westernmost pixel in this grid (smallest x value). */ - public final int west; - - /** The width of the grid in web Mercator pixels. */ - public final int width; - - /** The height of the grid in web Mercator pixels. */ - public final int height; + public final WebMercatorExtents extents; /** * The data values for each pixel within this grid. Dimension order is (x, y), with range [0, width) and [0, height). */ public final double[][] grid; - /** Maximum area allowed for the bounding box of an uploaded shapefile -- large enough for New York State. */ - private static final double MAX_BOUNDING_BOX_AREA_SQ_KM = 250_000; - /** Maximum area allowed for features in a shapefile upload */ private static final double MAX_FEATURE_AREA_SQ_DEG = 2; - /** - * Used when reading a saved grid. - */ - public Grid (int zoom, int width, int height, int north, int west) { - this.zoom = zoom; - this.width = width; - this.height = height; - this.north = north; - this.west = west; - this.grid = new double[width][height]; + /** Limit on number of pixels, to prevent OOME when multiple large grids are being created (width * height * + * number of layers/attributes) */ + private static final int MAX_PIXELS = 10_000 * 10_000 * 10; + + /** Used when reading a saved grid. */ + public Grid (int west, int north, int width, int height, int zoom) { + this(new WebMercatorExtents(west, north, width, height, zoom)); } public Grid (WebMercatorExtents extents) { - this(extents.zoom, extents.width, extents.height, extents.north, extents.west); + this.extents = extents; + this.grid = new double[extents.width][extents.height]; } /** @@ -137,14 +117,7 @@ public Grid (WebMercatorExtents extents) { * @param wgsEnvelope Envelope of grid, in absolute WGS84 lat/lon coordinates */ public Grid (int zoom, Envelope wgsEnvelope) { - WebMercatorExtents webMercatorExtents = WebMercatorExtents.forWgsEnvelope(wgsEnvelope, zoom); - // TODO actually store a reference to an immutable WebMercatorExtents instead of inlining the fields in Grid. - this.zoom = webMercatorExtents.zoom; - this.west = webMercatorExtents.west; - this.north = webMercatorExtents.north; - this.width = webMercatorExtents.width; - this.height = webMercatorExtents.height; - this.grid = new double[width][height]; + this(WebMercatorExtents.forWgsEnvelope(wgsEnvelope, zoom)); } public static class PixelWeight { @@ -205,21 +178,21 @@ public List getPixelWeights (Geometry geometry, boolean relativeToP Envelope env = geometry.getEnvelopeInternal(); - for (int worldy = latToPixel(env.getMaxY(), zoom); worldy <= latToPixel(env.getMinY(), zoom); worldy++) { + for (int worldy = latToPixel(env.getMaxY(), extents.zoom); worldy <= latToPixel(env.getMinY(), extents.zoom); worldy++) { // NB web mercator Y is reversed relative to latitude. // Iterate over longitude (x) in the inner loop to avoid repeat calculations of pixel areas, which should be // equal at a given latitude (y) double pixelAreaAtLat = -1; //Set to -1 to recalculate pixelArea at each latitude. - for (int worldx = lonToPixel(env.getMinX(), zoom); worldx <= lonToPixel(env.getMaxX(), zoom); worldx++) { + for (int worldx = lonToPixel(env.getMinX(), extents.zoom); worldx <= lonToPixel(env.getMaxX(), extents.zoom); worldx++) { - int x = worldx - west; - int y = worldy - north; + int x = worldx - extents.west; + int y = worldy - extents.north; - if (x < 0 || x >= width || y < 0 || y >= height) continue; // off the grid + if (x < 0 || x >= extents.width || y < 0 || y >= extents.height) continue; // off the grid - Geometry pixel = getPixelGeometry(x + west, y + north, zoom); + Geometry pixel = getPixelGeometry(x , y , extents); if (pixelAreaAtLat == -1) pixelAreaAtLat = pixel.getArea(); //Recalculate for a new latitude. // Pixel completely within feature: @@ -268,11 +241,11 @@ public void incrementFromPixelWeights (List weights, double value) * Burn point data into the grid. */ private void incrementPoint (double lat, double lon, double amount) { - int worldx = lonToPixel(lon, zoom); - int worldy = latToPixel(lat, zoom); - int x = worldx - west; - int y = worldy - north; - if (x >= 0 && x < width && y >= 0 && y < height) { + int worldx = lonToPixel(lon, extents.zoom); + int worldy = latToPixel(lat, extents.zoom); + int x = worldx - extents.west; + int y = worldy - extents.north; + if (x >= 0 && x < extents.width && y >= 0 && y < extents.height) { grid[x][y] += amount; } else { LOG.warn("{} opportunities are outside regional bounds, at {}, {}", amount, lon, lat); @@ -306,19 +279,19 @@ public void write (OutputStream outputStream) throws IOException { // On almost all current hardware this is little-endian. Guava saves us again. LittleEndianDataOutputStream out = new LittleEndianDataOutputStream(outputStream); // A header consisting of six 4-byte integers specifying the zoom level and bounds. - out.writeInt(zoom); - out.writeInt(west); - out.writeInt(north); - out.writeInt(width); - out.writeInt(height); + out.writeInt(extents.zoom); + out.writeInt(extents.west); + out.writeInt(extents.north); + out.writeInt(extents.width); + out.writeInt(extents.height); // The rest of the file is 32-bit integers in row-major order (x changes faster than y), delta-coded. // Delta coding and error diffusion are reset on each row to avoid wrapping. int prev = 0; - for (int y = 0; y < height; y++) { + for (int y = 0; y < extents.height; y++) { // Reset error on each row to avoid diffusing to distant locations. // An alternative is to use serpentine iteration or iterative diffusion. double error = 0; - for (int x = 0; x < width; x++) { + for (int x = 0; x < extents.width; x++) { double val = grid[x][y]; checkState(val >= 0, "Opportunity density should never be negative."); val += error; @@ -336,9 +309,9 @@ public void write (OutputStream outputStream) throws IOException { /** Write this grid out in GeoTIFF format */ public void writeGeotiff (OutputStream out) { try { - float[][] data = new float[height][width]; - for (int x = 0; x < width; x++) { - for (int y = 0; y < height; y++) { + float[][] data = new float[extents.height][extents.width]; + for (int x = 0; x < extents.width; x++) { + for (int y = 0; y < extents.height; y++) { data[y][x] = (float) grid[x][y]; } } @@ -369,7 +342,7 @@ public static Grid read (InputStream inputStream) throws IOException { int width = data.readInt(); int height = data.readInt(); - Grid grid = new Grid(zoom, width, height, north, west); + Grid grid = new Grid(west, north, width, height, zoom); // loop in row-major order for (int y = 0, value = 0; y < height; y++) { @@ -395,11 +368,11 @@ public void writePng(OutputStream outputStream) throws IOException { } } - BufferedImage img = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY); + BufferedImage img = new BufferedImage(extents.width, extents.height, BufferedImage.TYPE_BYTE_GRAY); byte[] imgPixels = ((DataBufferByte) img.getRaster().getDataBuffer()).getData(); int p = 0; - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) { + for (int y = 0; y < extents.height; y++) { + for (int x = 0; x < extents.width; x++) { double density = grid[x][y]; imgPixels[p++] = (byte)(density * 255 / maxPixel); } @@ -422,13 +395,13 @@ public void writeShapefile (String fileName, String fieldName) { store.createSchema(gridCell); Transaction transaction = new DefaultTransaction("Save Grid"); FeatureWriter writer = store.getFeatureWriterAppend(transaction); - for (int x = 0; x < width; x++) { - for (int y = 0; y < height; y++) { + for (int x = 0; x < extents.width; x++) { + for (int y = 0; y < extents.height; y++) { try { double value = grid[x][y]; if (value > 0) { SimpleFeature feature = (SimpleFeature) writer.next(); - Polygon pixelPolygon = getPixelGeometry(x + west, y + north, zoom); + Polygon pixelPolygon = getPixelGeometry(x, y, extents); feature.setDefaultGeometry(pixelPolygon); feature.setAttribute(fieldName, value); writer.write(); @@ -447,7 +420,7 @@ public void writeShapefile (String fileName, String fieldName) { } public boolean hasEqualExtents(Grid comparisonGrid){ - return this.zoom == comparisonGrid.zoom && this.west == comparisonGrid.west && this.north == comparisonGrid.north && this.width == comparisonGrid.width && this.height == comparisonGrid.height; + return this.extents.equals(comparisonGrid.extents); } /** @@ -460,8 +433,8 @@ public boolean hasEqualExtents(Grid comparisonGrid){ */ public double getLat(int i) { // Integer division of linear index to find vertical integer intra-grid pixel coordinate - int y = i / width; - return pixelToCenterLat(north + y, zoom); + int y = i / extents.width; + return pixelToCenterLat(extents.north + y, extents.zoom); } /** @@ -472,12 +445,12 @@ public double getLat(int i) { */ public double getLon(int i) { // Remainder of division yields horizontal integer intra-grid pixel coordinate - int x = i % width; - return pixelToCenterLon(west + x, zoom); + int x = i % extents.width; + return pixelToCenterLon(extents.west + x, extents.zoom); } public int featureCount() { - return width * height; + return extents.width * extents.height; } /* functions below from http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#Mathematics */ @@ -530,16 +503,20 @@ public static double pixelToLat (double yPixel, int zoom) { } /** - * @param x absolute (world) x pixel number at the given zoom level. - * @param y absolute (world) y pixel number at the given zoom level. - * @return a JTS Polygon in WGS84 coordinates for the given absolute (world) pixel. + * Given a pixel's local grid coordinates within the supplied WebMercatorExtents, return a closed + * polygon of that pixel's outline in WGS84 global geographic coordinates. + * @param localX x pixel number within the given extents. + * @param localY y pixel number within the given extents. + * @return a JTS Polygon in WGS84 coordinates for the given pixel. */ - public static Polygon getPixelGeometry (int x, int y, int zoom) { - double minLon = pixelToLon(x, zoom); - double maxLon = pixelToLon(x + 1, zoom); + public static Polygon getPixelGeometry (int localX, int localY, WebMercatorExtents extents) { + int x = localX + extents.west; + int y = localY + extents.north; + double minLon = pixelToLon(x, extents.zoom); + double maxLon = pixelToLon(x + 1, extents.zoom); // The y axis increases from north to south in web Mercator. - double minLat = pixelToLat(y + 1, zoom); - double maxLat = pixelToLat(y, zoom); + double minLat = pixelToLat(y + 1, extents.zoom); + double maxLat = pixelToLat(y, extents.zoom); return GeometryUtils.geometryFactory.createPolygon(new Coordinate[] { new Coordinate(minLon, minLat), new Coordinate(minLon, maxLat), @@ -606,11 +583,11 @@ public static List fromCsv(InputStreamProvider csvInputStreamProvider, for (Iterator it = numericColumns.iterator(); it.hasNext();) { String field = it.next(); String value = reader.get(field); - if (value == null || "".equals(value)) continue; // allow missing data + if (value == null || "".equals(value)) continue; // allow missing data TODO add "N/A" etc.? try { double dv = parseDouble(value); if (!(Double.isFinite(dv) || dv < 0)) { - it.remove(); + it.remove(); // TODO track removed columns and report to UI? } } catch (NumberFormatException e) { it.remove(); @@ -621,10 +598,9 @@ public static List fromCsv(InputStreamProvider csvInputStreamProvider, // This will also close the InputStreams. reader.close(); - if (numericColumns.isEmpty()) { - throw new IllegalArgumentException("CSV file contained no entirely finite, non-negative numeric columns."); - } - checkWgsEnvelopeSize(envelope); + checkWgsEnvelopeSize(envelope, "CSV points"); + WebMercatorExtents extents = WebMercatorExtents.forWgsEnvelope(envelope, zoom); + checkPixelCount(extents, numericColumns.size()); if (progressListener != null) { progressListener.setTotalItems(total); @@ -633,14 +609,14 @@ public static List fromCsv(InputStreamProvider csvInputStreamProvider, // We now have an envelope and know which columns are numeric. Make a grid for each numeric column. Map grids = new HashMap<>(); for (String columnName : numericColumns) { - Grid grid = new Grid(zoom, envelope); + Grid grid = new Grid(extents); grid.name = columnName; grids.put(grid.name, grid); } // Make one more Grid where every point will have a weight of 1, for counting points rather than opportunities. // This assumes there is no column called "[COUNT]" in the source file, which is validated above. - Grid countGrid = new Grid(zoom, envelope); + Grid countGrid = new Grid(extents); countGrid.name = COUNT_COLUMN_NAME; grids.put(countGrid.name, countGrid); @@ -692,20 +668,24 @@ public static List fromShapefile (File shapefile, int zoom) throws IOExcep public static List fromShapefile (File shapefile, int zoom, ProgressListener progressListener) throws IOException, FactoryException, TransformException { - Map grids = new HashMap<>(); ShapefileReader reader = new ShapefileReader(shapefile); - - Envelope envelope = reader.wgs84Bounds(); - int total = reader.getFeatureCount(); - - checkWgsEnvelopeSize(envelope); + checkWgsEnvelopeSize(envelope, "Shapefile"); + WebMercatorExtents extents = WebMercatorExtents.forWgsEnvelope(envelope, zoom); + List numericAttributes = reader.numericAttributes(); + Set uniqueNumericAttributes = new HashSet<>(numericAttributes); + if (uniqueNumericAttributes.size() != numericAttributes.size()) { + throw new IllegalArgumentException("Shapefile has duplicate numeric attributes"); + } + checkPixelCount(extents, numericAttributes.size()); + int total = reader.featureCount(); if (progressListener != null) { progressListener.setTotalItems(total); } AtomicInteger count = new AtomicInteger(0); + Map grids = new HashMap<>(); reader.wgs84Stream().forEach(feat -> { Geometry geom = (Geometry) feat.getDefaultGeometry(); @@ -718,11 +698,10 @@ public static List fromShapefile (File shapefile, int zoom, ProgressListen if (numericVal == 0) continue; String attributeName = p.getName().getLocalPart(); - - // TODO this is assuming that each attribute name can only exist once. Shapefiles can contain duplicate attribute names. Validate to catch this. + Grid grid = grids.get(attributeName); if (grid == null) { - grid = new Grid(zoom, envelope); + grid = new Grid(extents); grid.name = attributeName; grids.put(attributeName, grid); } @@ -734,7 +713,7 @@ public static List fromShapefile (File shapefile, int zoom, ProgressListen } else if (geom instanceof Polygon || geom instanceof MultiPolygon) { grid.rasterize(geom, numericVal); } else { - throw new IllegalArgumentException("Unsupported geometry type"); + throw new IllegalArgumentException("Unsupported geometry type: " + geom); } } @@ -763,8 +742,8 @@ public double sumTotalOpportunities() { @Override public double getOpportunityCount (int i) { - int x = i % this.width; - int y = i / this.width; + int x = i % extents.width; + int y = i / extents.width; return grid[x][y]; } @@ -793,36 +772,18 @@ public Envelope getWgsEnvelope () { @Override public WebMercatorExtents getWebMercatorExtents () { - return new WebMercatorExtents(this.west, this.north, this.width, this.height, this.zoom); + return extents; } - /** - * @return the approximate area of an Envelope in WGS84 lat/lon coordinates, in square kilometers. - */ - public static double roughWgsEnvelopeArea (Envelope wgsEnvelope) { - double lon0 = wgsEnvelope.getMinX(); - double lon1 = wgsEnvelope.getMaxX(); - double lat0 = wgsEnvelope.getMinY(); - double lat1 = wgsEnvelope.getMaxY(); - double height = lat1 - lat0; - double width = lon1 - lon0; - final double KM_PER_DEGREE_LAT = 111.133; - // Scale the x direction as if the Earth was a sphere. - // Error above the middle latitude should approximately cancel out error below that latitude. - double averageLat = (lat0 + lat1) / 2; - double xScale = FastMath.cos(FastMath.toRadians(averageLat)); - double area = (height * KM_PER_DEGREE_LAT) * (width * KM_PER_DEGREE_LAT * xScale); - return area; - } - - /** - * Throw an exception if the provided envelope is too big for a reasonable destination grid. - */ - public static void checkWgsEnvelopeSize (Envelope envelope) { - if (roughWgsEnvelopeArea(envelope) > MAX_BOUNDING_BOX_AREA_SQ_KM) { - throw new IllegalArgumentException("Shapefile extent (" + roughWgsEnvelopeArea(envelope) + " sq. km.) " + - "exceeds limit (" + MAX_BOUNDING_BOX_AREA_SQ_KM + "sq. km.)."); + public static void checkPixelCount (WebMercatorExtents extents, int layers) { + int pixels = extents.width * extents.height * layers; + if (pixels > MAX_PIXELS) { + throw new DataSourceException("Number of zoom level " + extents.zoom + " pixels (" + pixels + ")" + + "exceeds limit (" + MAX_PIXELS +"). Reduce the zoom level or the file's extents or number of " + + "numeric attributes."); } } + + } diff --git a/src/main/java/com/conveyal/r5/analyst/GridTransformWrapper.java b/src/main/java/com/conveyal/r5/analyst/GridTransformWrapper.java index 8c18549cf..ddc5fcec5 100644 --- a/src/main/java/com/conveyal/r5/analyst/GridTransformWrapper.java +++ b/src/main/java/com/conveyal/r5/analyst/GridTransformWrapper.java @@ -26,7 +26,7 @@ public class GridTransformWrapper extends PointSet { * targetGrid cannot be indexed so are effectively zero for the purpose of accessibility calculations. */ public GridTransformWrapper (WebMercatorExtents targetGridExtents, Grid sourceGrid) { - checkArgument(targetGridExtents.zoom == sourceGrid.zoom, "Zoom levels must be identical."); + checkArgument(targetGridExtents.zoom == sourceGrid.extents.zoom, "Zoom levels must be identical."); // Make a pointset for these extents so we can defer to its methods for lat/lon lookup, size, etc. this.targetGrid = new WebMercatorGridPointSet(targetGridExtents); this.sourceGrid = sourceGrid; @@ -37,13 +37,13 @@ public GridTransformWrapper (WebMercatorExtents targetGridExtents, Grid sourceGr // This could certainly be made more efficient (but complex) by forcing sequential iteration over opportunity counts // and disallowing random access, using a new PointSetIterator class that allows reading lat, lon, and counts. private int transformIndex (int i) { - final int x = (i % targetGrid.width) + targetGrid.west - sourceGrid.west; - final int y = (i / targetGrid.width) + targetGrid.north - sourceGrid.north; - if (x < 0 || x >= sourceGrid.width || y < 0 || y >= sourceGrid.height) { + final int x = (i % targetGrid.width) + targetGrid.west - sourceGrid.extents.west; + final int y = (i / targetGrid.width) + targetGrid.north - sourceGrid.extents.north; + if (x < 0 || x >= sourceGrid.extents.width || y < 0 || y >= sourceGrid.extents.height) { // Point in target grid lies outside source grid, there is no valid index. Return special value. return -1; } - return y * sourceGrid.width + x; + return y * sourceGrid.extents.width + x; } @Override diff --git a/src/main/java/com/conveyal/r5/analyst/NetworkPreloader.java b/src/main/java/com/conveyal/r5/analyst/NetworkPreloader.java index b95c49fa5..6b4ad2e77 100644 --- a/src/main/java/com/conveyal/r5/analyst/NetworkPreloader.java +++ b/src/main/java/com/conveyal/r5/analyst/NetworkPreloader.java @@ -72,7 +72,7 @@ public class NetworkPreloader extends AsyncLoader preloadData (AnalysisWorkerTask task) { return get(Key.forTask(task)); } + /** + * A blocking way to ensure the network and all linkages and precomputed tables are prepared in advance of routing. + * Note that this does not perform any blocking or locking of its own - any synchronization will be that of the + * underlying caches (synchronized methods on TransportNetworkCache or LinkedPointSet). It also bypasses the + * AsyncLoader locking that would usually allow only one buildValue operation at a time. All threads that call with + * similar tasks will make interleaved calls to setProgress (with superficial map synchronization). Other than + * causing a value to briefly revert from PRESENT to BUILDING this doesn't seem deeply problematic. + * This is provided specifically for regional tasks, to ensure that they remain in preloading mode while all this + * data is prepared. + */ + public TransportNetwork synchronousPreload (AnalysisWorkerTask task) { + return buildValue(Key.forTask(task)); + } + @Override protected TransportNetwork buildValue(Key key) { @@ -97,12 +111,19 @@ protected TransportNetwork buildValue(Key key) { // Get the set of points to which we are measuring travel time. Any smaller sub-grids created here will // reference the scenarioNetwork's built-in full-extent pointset, so can reuse its linkage. // TODO handle multiple destination grids. + + if (key.destinationGridExtents == null) { + // Special (and ideally temporary) case for regional freeform destinations, where there is no grid to link. + // The null destinationGridExtents are created by the WebMercatorExtents#forPointsets else clause. + return scenarioNetwork; + } + setProgress(key, 0, "Fetching gridded point set..."); PointSet pointSet = AnalysisWorkerTask.gridPointSetCache.get(key.destinationGridExtents, scenarioNetwork.fullExtentGridPointSet); // Now rebuild grid linkages as needed. One linkage per mode, and one cost table per egress mode. // Cost tables are slow to compute and not needed for access or direct legs, only egress modes. - // Note that we're able to pass a progress listener down into the EgressCostTable contruction process, + // Note that we're able to pass a progress listener down into the EgressCostTable construction process, // but not into the linkage process, because the latter is encapsulated as a Google/Caffeine // LoadingCache. We'll need some way to get LoadingCache's per-key locking, while still allowing a // progress listener specific to the single request. Perhaps this will mean registering 0..N diff --git a/src/main/java/com/conveyal/r5/analyst/TravelTimeComputer.java b/src/main/java/com/conveyal/r5/analyst/TravelTimeComputer.java index f7ae15e08..b9f20a701 100644 --- a/src/main/java/com/conveyal/r5/analyst/TravelTimeComputer.java +++ b/src/main/java/com/conveyal/r5/analyst/TravelTimeComputer.java @@ -151,12 +151,17 @@ public OneOriginResult computeTravelTimes() { // egress end of the trip. We could probably reuse a method for both (getTravelTimesFromPoint). // Note: Access searches (which minimize travel time) are asymmetric with the egress cost tables (which // often minimize distance to allow reuse at different speeds). + // Preserve past behavior: only apply bike or walk time limits when those modes are used to access transit. - if (request.hasTransit()) { - sr.timeLimitSeconds = request.getMaxTimeSeconds(accessMode); - } else { - sr.timeLimitSeconds = request.maxTripDurationMinutes * FastRaptorWorker.SECONDS_PER_MINUTE; + // The overall time limit specified in the request may further decrease that mode-specific limit. + { + int limitSeconds = request.maxTripDurationMinutes * FastRaptorWorker.SECONDS_PER_MINUTE; + if (request.hasTransit()) { + limitSeconds = Math.min(limitSeconds, request.getMaxTimeSeconds(accessMode)); + } + sr.timeLimitSeconds = limitSeconds; } + // Even if generalized cost tags were present on the input data, we always minimize travel time. // The generalized cost calculations currently increment time and weight by the same amount. sr.quantityToMinimize = StreetRouter.State.RoutingVariable.DURATION_SECONDS; diff --git a/src/main/java/com/conveyal/r5/analyst/WebMercatorExtents.java b/src/main/java/com/conveyal/r5/analyst/WebMercatorExtents.java index 79e8f6f5a..150043b91 100644 --- a/src/main/java/com/conveyal/r5/analyst/WebMercatorExtents.java +++ b/src/main/java/com/conveyal/r5/analyst/WebMercatorExtents.java @@ -26,10 +26,22 @@ */ public class WebMercatorExtents { + /** The pixel number of the westernmost pixel (smallest x value). */ public final int west; + + /** + * The pixel number of the northernmost pixel (smallest y value in web Mercator, because y increases from north to + * south in web Mercator). + */ public final int north; + + /** Width in web Mercator pixels */ public final int width; + + /** Height in web Mercator pixels */ public final int height; + + /** Web mercator zoom level. */ public final int zoom; public WebMercatorExtents (int west, int north, int width, int height, int zoom) { diff --git a/src/main/java/com/conveyal/r5/analyst/WebMercatorGridPointSet.java b/src/main/java/com/conveyal/r5/analyst/WebMercatorGridPointSet.java index c3b585812..b6b614df7 100644 --- a/src/main/java/com/conveyal/r5/analyst/WebMercatorGridPointSet.java +++ b/src/main/java/com/conveyal/r5/analyst/WebMercatorGridPointSet.java @@ -10,6 +10,7 @@ import java.io.Serializable; +import static com.conveyal.r5.common.GeometryUtils.checkWgsEnvelopeSize; import static com.conveyal.r5.streets.VertexStore.fixedDegreesToFloating; import static com.google.common.base.Preconditions.checkArgument; @@ -28,6 +29,10 @@ public class WebMercatorGridPointSet extends PointSet implements Serializable { */ public static final int DEFAULT_ZOOM = 9; + public static final int MIN_ZOOM = 9; + + public static final int MAX_ZOOM = 12; + /** web mercator zoom level */ public final int zoom; @@ -75,6 +80,7 @@ public WebMercatorGridPointSet (TransportNetwork transportNetwork) { */ public WebMercatorGridPointSet (Envelope wgsEnvelope) { LOG.info("Creating WebMercatorGridPointSet with WGS84 extents {}", wgsEnvelope); + checkWgsEnvelopeSize(wgsEnvelope, "grid point set"); this.zoom = DEFAULT_ZOOM; int west = lonToPixel(wgsEnvelope.getMinX()); int east = lonToPixel(wgsEnvelope.getMaxX()); @@ -229,12 +235,10 @@ public WebMercatorExtents getWebMercatorExtents () { return new WebMercatorExtents(west, north, width, height, zoom); } - public static int parseZoom(String zoom) { - if (zoom != null) { - return Integer.parseInt(zoom); - } else { - return DEFAULT_ZOOM; - } + public static int parseZoom(String zoomString) { + int zoom = (zoomString == null) ? DEFAULT_ZOOM : Integer.parseInt(zoomString); + checkArgument(zoom >= MIN_ZOOM && zoom <= MAX_ZOOM); + return zoom; } } diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/AnalysisWorker.java b/src/main/java/com/conveyal/r5/analyst/cluster/AnalysisWorker.java index ee88a1272..c409151a9 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/AnalysisWorker.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/AnalysisWorker.java @@ -456,14 +456,8 @@ protected void handleOneRegionalTask (RegionalTask task) throws Throwable { // Get the graph object for the ID given in the task, fetching inputs and building as needed. // All requests handled together are for the same graph, and this call is synchronized so the graph will - // only be built once. - // Record the currently loaded network ID so we "stick" to this same graph on subsequent polls. + // only be built once. Record the currently loaded network ID to remain on this same graph on subsequent polls. networkId = task.graphId; - // Note we're completely bypassing the async loader here and relying on the older nested LoadingCaches. - // If those are ever removed, the async loader will need a synchronous mode with per-path blocking (kind of - // reinventing the wheel of LoadingCache) or we'll need to make preparation for regional tasks async. - TransportNetwork transportNetwork = networkPreloader.transportNetworkCache.getNetworkForScenario(task - .graphId, task.scenarioId); // Static site tasks do not specify destinations, but all other regional tasks should. // Load the PointSets based on the IDs (actually, full storage keys including IDs) in the task. @@ -472,6 +466,13 @@ protected void handleOneRegionalTask (RegionalTask task) throws Throwable { task.loadAndValidateDestinationPointSets(pointSetCache); } + // Pull all necessary inputs into cache in a blocking fashion, unlike single-point tasks where prep is async. + // Avoids auto-shutdown while preloading. Must be done after loading destination pointsets to establish extents. + // Note we're completely bypassing the async loader here and relying on the older nested LoadingCaches. + // If those are ever removed, the async loader will need a synchronous mode with per-path blocking (kind of + // reinventing the wheel of LoadingCache) or we'll need to make preparation for regional tasks async. + TransportNetwork transportNetwork = networkPreloader.synchronousPreload(task); + // If we are generating a static site, there must be a single metadata file for an entire batch of results. // Arbitrarily we create this metadata as part of the first task in the job. if (task.makeTauiSite && task.taskId == 0) { diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java b/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java index edf8ad057..dd268ce95 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java @@ -32,6 +32,14 @@ public class PathResult { + /** + * The maximum number of destinations for which we'll generate detailed path information in a single request. + * Detailed path information was added on to the original design, which returned a simple grid of travel times. + * These results are returned to the backend over an HTTP API so we don't want to risk making them too huge. + * This could be set to a higher number in cases where you know the result return channel can handle the size. + */ + public static int maxDestinations = 5000; + private final int nDestinations; /** * Array with one entry per destination. Each entry is a map from a "path template" to the associated iteration @@ -62,8 +70,8 @@ public PathResult(AnalysisWorkerTask task, TransitLayer transitLayer) { // In regional analyses, return paths to all destinations nDestinations = task.nTargetsPerOrigin(); // This limitation reflects the initial design, for use with freeform pointset destinations - if (nDestinations > 5000) { - throw new UnsupportedOperationException("Path results are limited to 5000 destinations"); + if (nDestinations > maxDestinations) { + throw new UnsupportedOperationException("Number of detailed path destinations exceeds limit of " + maxDestinations); } } iterationsForPathTemplates = new Multimap[nDestinations]; diff --git a/src/main/java/com/conveyal/r5/analyst/progress/ProgressListener.java b/src/main/java/com/conveyal/r5/analyst/progress/ProgressListener.java index 7fbb67b10..899ede322 100644 --- a/src/main/java/com/conveyal/r5/analyst/progress/ProgressListener.java +++ b/src/main/java/com/conveyal/r5/analyst/progress/ProgressListener.java @@ -8,7 +8,8 @@ public interface ProgressListener { /** * Call this method once at the beginning of a new task, specifying how many sub-units of work will be performed. - * This does not allow for subsequently starting sub-tasks that use the same ProgressListener while progress is + * If totalElements is zero or negative, any previously set total number of elements remains unchanged. + * This allows for subsequently starting named sub-tasks that use the same ProgressListener while progress is * still being reported. Any recursion launching sub-tasks will need to be head- or tail-recursion, launched before * you call beginTask or after the last unit of work is complete. * Rather than implementing some kind of push/pop mechanism, we may eventually have some kind of nested task system, @@ -25,4 +26,14 @@ default void increment () { increment(1); } + /** + * We want WorkProducts to be revealed by a TaskAction even in the case of exception or failure (to report complex + * structured error or validation information). Returning them from the action method will not work in case of an + * unexpected exception. Adding them to the background Task with a fluent method is also problematic as it requires + * the caller to construct or otherwise hold a reference to the product to get its ID before the action is run. It's + * preferable for the product to be fully encapsulated in the action, so it's reported as park of the task progress. + * On the other hand, creating the product within the TaskAction usually requires it to hold a UserPermissions. + */ + default void setWorkProduct (WorkProduct workProduct) { /* Default is no-op */ } + } diff --git a/src/main/java/com/conveyal/r5/analyst/progress/Task.java b/src/main/java/com/conveyal/r5/analyst/progress/Task.java index 949ad2d22..49990af7a 100644 --- a/src/main/java/com/conveyal/r5/analyst/progress/Task.java +++ b/src/main/java/com/conveyal/r5/analyst/progress/Task.java @@ -1,7 +1,7 @@ package com.conveyal.r5.analyst.progress; import com.conveyal.analysis.UserPermissions; -import com.conveyal.analysis.models.Model; +import com.conveyal.analysis.models.BaseModel; import com.conveyal.r5.util.ExceptionUtils; import java.time.Duration; @@ -11,9 +11,10 @@ import java.util.UUID; /** - * This is a draft for a more advanced task progress system. It is not yet complete. - * Task is intended for background tasks whose progress the end user should be aware of, such as file uploads. - * It should not be used for automatic internal actions (such as Events) which would clutter a user's active task list. + * This newer task progress system coexists with several older mechanisms, which it is intended to supersede. It is + * still evolving as we adapt it to new use cases. The Task class is intended to represent background tasks whose + * progress the end user should be aware of, such as processing uploaded files. It should not be used for automatic + * internal actions (such as Events) which would clutter a user's active task list. * * A Task (or some interface that it implements) could be used by the AsyncLoader to track progress. Together with some * AsyncLoader functionality it will be a bit like a Future with progress reporting. Use of AsyncLoader could then be @@ -32,7 +33,7 @@ public static enum State { QUEUED, ACTIVE, DONE, ERROR } - /** Every has an ID so the UI can update tasks it already knows about with new information after polling. */ + /** Every Task has an ID so the UI can update tasks it already knows about with new information after polling. */ public final UUID id = UUID.randomUUID(); // User and group are only relevant on the backend. On workers, we want to show network or cost table build progress @@ -106,7 +107,7 @@ public double getPercentComplete() { List subtasks = new ArrayList<>(); - // TODO find a better way to set this than directly inside a closure + // TODO find a better way to set this than directly inside a closure - possibly using ProgressListener public WorkProduct workProduct; public void addSubtask (Task subtask) { @@ -188,11 +189,13 @@ public void run () { @Override public void beginTask(String description, int totalElements) { - // In the absence of subtasks we can call this repeatedly on the same task, which will cause the UI progress - // bar to reset to zero at each stage, while keeping the same top level title. + // In the absence of a real subtask mechanism, we can call this repeatedly on the same task with totalElements + // of zero, allow the UI progress while changing the detail message without resetting progress to zero. this.description = description; - this.totalWorkUnits = totalElements; - this.currentWorkUnit = 0; + if (totalElements > 0) { + this.totalWorkUnits = totalElements; + this.currentWorkUnit = 0; + } } @Override @@ -204,6 +207,7 @@ public void increment (int n) { } // Methods for reporting elapsed times over API + // Durations are reported instead of times to avoid problems with clock skew between backend and client. public Duration durationInQueue () { Instant endTime = (began == null) ? Instant.now() : began; @@ -251,17 +255,28 @@ public Task withAction (TaskAction action) { // We can't return the WorkProduct from TaskAction, that would be disrupted by throwing exceptions. // It is also awkward to make a method to set it on ProgressListener - it's not really progress. // So we set it directly on the task before submitting it. Requires pre-setting (not necessarily storing) Model._id. - public Task withWorkProduct (Model model) { + // Update: I've started setting it via progressListener, it's just more encapsulated to create inside the TaskAction. + // But this then requires the TaskAction to hold a UserPermissions instance. + public Task withWorkProduct (BaseModel model) { this.workProduct = WorkProduct.forModel(model); return this; } - /** Ideally we'd just pass in a Model, but currently we have two base classes, also see WorkProduct.forModel(). */ + /** + * Ideally we'd just pass in a Model, but currently we have two base classes, also see WorkProduct.forModel(). + * This can now be reported via ProgressListener interface for better encapsulation, potentially only revealing the + * work product when it's acutally in the database. + */ public Task withWorkProduct (WorkProductType type, String id, String region) { this.workProduct = new WorkProduct(type, id, region); return this; } + @Override + public void setWorkProduct (WorkProduct workProduct) { + this.workProduct = workProduct; + } + public Task setHeavy (boolean heavy) { this.isHeavy = heavy; return this; diff --git a/src/main/java/com/conveyal/r5/analyst/progress/TaskAction.java b/src/main/java/com/conveyal/r5/analyst/progress/TaskAction.java index 9d0bb0d8c..79ccc661e 100644 --- a/src/main/java/com/conveyal/r5/analyst/progress/TaskAction.java +++ b/src/main/java/com/conveyal/r5/analyst/progress/TaskAction.java @@ -5,6 +5,9 @@ * It's a single-method interface so it can be defined with lambda functions, or other objects can implement it. * When the action is run, it will receive an object implementing an interface through which it can report progress * and errors. + * + * Alteratively, TaskAction could have more methods: return a title, heaviness, user details, etc. to be seen by Task, + * instead of having only one method to make it a functional interface. I'd like to discourage anonymous functions anyway. */ public interface TaskAction { diff --git a/src/main/java/com/conveyal/r5/analyst/progress/WorkProduct.java b/src/main/java/com/conveyal/r5/analyst/progress/WorkProduct.java index beff7e3e6..40dffa198 100644 --- a/src/main/java/com/conveyal/r5/analyst/progress/WorkProduct.java +++ b/src/main/java/com/conveyal/r5/analyst/progress/WorkProduct.java @@ -1,29 +1,38 @@ package com.conveyal.r5.analyst.progress; -import com.conveyal.analysis.controllers.UserActivityController; -import com.conveyal.analysis.models.Model; +import com.conveyal.analysis.models.BaseModel; +import com.conveyal.analysis.models.DataGroup; /** - * A unique identifier for the final product of a single task action. Currently this serves as both an - * internal data structure and an API model class, which should be harmless as it's an immutable data class. - * The id is unique within the type, so the regionId is redundant information, but facilitates prefectches on the UI. + * A unique identifier for the final product of a single TaskAction. Currently this serves as both an internal data + * structure and an API model class, which should be harmless as it's an immutable data class. The id is unique within + * the type, so the regionId is redundant information, but facilitates prefectches on the UI. If isGroup is true, the + * id is not that of an individual record, but the dataGroupId of several records created in a single operation. */ public class WorkProduct { public final WorkProductType type; public final String id; public final String regionId; + public final boolean isGroup; public WorkProduct (WorkProductType type, String id, String regionId) { + this(type, id, regionId, false); + } + + public WorkProduct (WorkProductType type, String id, String regionId, boolean isGroup) { this.type = type; this.id = id; this.regionId = regionId; + this.isGroup = isGroup; } // FIXME Not all Models have a regionId. Rather than pass that in as a String, refine the programming API. - public static WorkProduct forModel (Model model) { - WorkProduct product = new WorkProduct(WorkProductType.forModel(model), model._id, null); - return product; + public static WorkProduct forModel (BaseModel model) { + return new WorkProduct(WorkProductType.forModel(model), model._id.toString(), null); } + public static WorkProduct forDataGroup (WorkProductType type, DataGroup dataGroup, String regionId) { + return new WorkProduct(type, dataGroup._id.toString(), regionId, true); + } } diff --git a/src/main/java/com/conveyal/r5/analyst/progress/WorkProductType.java b/src/main/java/com/conveyal/r5/analyst/progress/WorkProductType.java index bb71ba955..360abce08 100644 --- a/src/main/java/com/conveyal/r5/analyst/progress/WorkProductType.java +++ b/src/main/java/com/conveyal/r5/analyst/progress/WorkProductType.java @@ -1,25 +1,28 @@ package com.conveyal.r5.analyst.progress; -import com.conveyal.analysis.controllers.UserActivityController; +import com.conveyal.analysis.models.AggregationArea; import com.conveyal.analysis.models.Bundle; -import com.conveyal.analysis.models.Model; +import com.conveyal.analysis.models.DataSource; import com.conveyal.analysis.models.OpportunityDataset; import com.conveyal.analysis.models.RegionalAnalysis; +import com.conveyal.analysis.models.SpatialDataSource; /** * There is some implicit and unenforced correspondence between these values and those in FileCategory, as well - * as the tables in Mongo. We should probably clearly state and enforce this parallelism. No background work is + * as the tables in Mongo. We should probably clearly state and enforce this correspondance. No background work is * done creating regions, projects, or modifications so they don't need to be represented here. */ public enum WorkProductType { - BUNDLE, REGIONAL_ANALYSIS, AGGREGATION_AREA, OPPORTUNITY_DATASET; + BUNDLE, REGIONAL_ANALYSIS, AGGREGATION_AREA, OPPORTUNITY_DATASET, DATA_SOURCE; - // Currently we have two base classes for db objects so may need to use Object instead of BaseModel parameter - public static WorkProductType forModel (Model model) { + public static WorkProductType forModel (Object model) { if (model instanceof Bundle) return BUNDLE; if (model instanceof OpportunityDataset) return OPPORTUNITY_DATASET; if (model instanceof RegionalAnalysis) return REGIONAL_ANALYSIS; + if (model instanceof AggregationArea) return AGGREGATION_AREA; + if (model instanceof DataSource) return DATA_SOURCE; throw new IllegalArgumentException("Unrecognized work product type."); } + } diff --git a/src/main/java/com/conveyal/r5/analyst/scenario/IndexedPolygonCollection.java b/src/main/java/com/conveyal/r5/analyst/scenario/IndexedPolygonCollection.java index 339ef1149..d61f6066a 100644 --- a/src/main/java/com/conveyal/r5/analyst/scenario/IndexedPolygonCollection.java +++ b/src/main/java/com/conveyal/r5/analyst/scenario/IndexedPolygonCollection.java @@ -1,9 +1,11 @@ package com.conveyal.r5.analyst.scenario; import com.conveyal.analysis.components.WorkerComponents; +import com.conveyal.file.FileStorageKey; +import org.geotools.data.geojson.GeoJSONDataStore; +import org.geotools.data.simple.SimpleFeatureSource; import org.geotools.feature.FeatureCollection; import org.geotools.feature.FeatureIterator; -import org.geotools.geojson.feature.FeatureJSON; import org.geotools.referencing.CRS; import org.geotools.referencing.crs.DefaultGeographicCRS; import org.locationtech.jts.geom.Envelope; @@ -16,13 +18,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.InputStream; +import java.io.File; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import static com.conveyal.file.FileCategory.POLYGONS; +import static com.conveyal.file.FileCategory.DATASOURCES; /** * This is an abstraction for the polygons used to configure the road congestion modification type and the ride hailing @@ -104,9 +106,11 @@ public IndexedPolygonCollection ( } public void loadFromS3GeoJson() throws Exception { - InputStream polygonInputStream = WorkerComponents.fileStorage.getInputStream(POLYGONS, polygonLayer); - FeatureJSON featureJSON = new FeatureJSON(); - FeatureCollection featureCollection = featureJSON.readFeatureCollection(polygonInputStream); + // FIXME How will we handle .gz data? + File polygonInputFile = WorkerComponents.fileStorage.getFile(new FileStorageKey(DATASOURCES, polygonLayer)); + GeoJSONDataStore dataStore = new GeoJSONDataStore(polygonInputFile); + SimpleFeatureSource featureSource = dataStore.getFeatureSource(); + FeatureCollection featureCollection = featureSource.getFeatures(); LOG.info("Validating features and creating spatial index..."); FeatureType featureType = featureCollection.getSchema(); CoordinateReferenceSystem crs = featureType.getCoordinateReferenceSystem(); diff --git a/src/main/java/com/conveyal/r5/analyst/scenario/RoadCongestion.java b/src/main/java/com/conveyal/r5/analyst/scenario/RoadCongestion.java index f113a0565..86d048c49 100644 --- a/src/main/java/com/conveyal/r5/analyst/scenario/RoadCongestion.java +++ b/src/main/java/com/conveyal/r5/analyst/scenario/RoadCongestion.java @@ -1,8 +1,7 @@ package com.conveyal.r5.analyst.scenario; import com.conveyal.analysis.components.WorkerComponents; -import com.conveyal.file.FileCategory; -import com.conveyal.r5.analyst.cluster.AnalysisWorker; +import com.conveyal.file.FileStorageKey; import com.conveyal.r5.streets.EdgeStore; import com.conveyal.r5.transit.TransportNetwork; import com.conveyal.r5.util.ExceptionUtils; @@ -10,9 +9,10 @@ import gnu.trove.list.array.TShortArrayList; import gnu.trove.map.TObjectIntMap; import gnu.trove.map.hash.TObjectIntHashMap; +import org.geotools.data.geojson.GeoJSONDataStore; +import org.geotools.data.simple.SimpleFeatureSource; import org.geotools.feature.FeatureCollection; import org.geotools.feature.FeatureIterator; -import org.geotools.geojson.feature.FeatureJSON; import org.geotools.referencing.CRS; import org.geotools.referencing.crs.DefaultGeographicCRS; import org.locationtech.jts.geom.Envelope; @@ -25,11 +25,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.InputStream; +import java.io.File; import java.util.List; -import java.util.zip.GZIPInputStream; -import static com.conveyal.file.FileCategory.POLYGONS; +import static com.conveyal.file.FileCategory.DATASOURCES; /** * To simulate traffic congestion, apply a slow-down (or speed-up) factor to roads, according to attributes of polygon @@ -110,10 +109,11 @@ public boolean resolve (TransportNetwork network) { // and errors can all be easily recorded and bubbled back up to the UI. // Polygon should only need to be fetched once when the scenario is applied, then the resulting network is cached. // this.features = polygonLayerCache.getPolygonFeatureCollection(this.polygonLayer); - // Note: Newer JTS now has GeoJsonReader - try (InputStream inputStream = WorkerComponents.fileStorage.getInputStream(POLYGONS, polygonLayer)) { - FeatureJSON featureJSON = new FeatureJSON(); - FeatureCollection featureCollection = featureJSON.readFeatureCollection(inputStream); + try { + File polygonInputFile = WorkerComponents.fileStorage.getFile(new FileStorageKey(DATASOURCES, polygonLayer)); + GeoJSONDataStore dataStore = new GeoJSONDataStore(polygonInputFile); + SimpleFeatureSource featureSource = dataStore.getFeatureSource(); + FeatureCollection featureCollection = featureSource.getFeatures(); LOG.info("Validating features and creating spatial index..."); polygonSpatialIndex = new STRtree(); FeatureType featureType = featureCollection.getSchema(); diff --git a/src/main/java/com/conveyal/r5/common/GeometryUtils.java b/src/main/java/com/conveyal/r5/common/GeometryUtils.java index a7c46ffd8..371798e1b 100644 --- a/src/main/java/com/conveyal/r5/common/GeometryUtils.java +++ b/src/main/java/com/conveyal/r5/common/GeometryUtils.java @@ -1,5 +1,6 @@ package com.conveyal.r5.common; +import com.conveyal.analysis.datasource.DataSourceException; import com.conveyal.r5.streets.VertexStore; import org.apache.commons.math3.util.FastMath; import org.locationtech.jts.geom.Coordinate; @@ -10,16 +11,21 @@ import static com.conveyal.r5.streets.VertexStore.fixedDegreesToFloating; import static com.conveyal.r5.streets.VertexStore.floatingDegreesToFixed; +import static com.google.common.base.Preconditions.checkArgument; /** * Reimplementation of OTP GeometryUtils, using copied code where there are not licensing concerns. + * Also contains reusable methods for validating WGS84 envelopes and latitude and longitude values. */ public class GeometryUtils { public static final GeometryFactory geometryFactory = new GeometryFactory(); - // average of polar and equatorial, https://en.wikipedia.org/wiki/Earth + /** Average of polar and equatorial radii, https://en.wikipedia.org/wiki/Earth */ public static final double RADIUS_OF_EARTH_M = 6_367_450; + /** Maximum area allowed for the bounding box of an uploaded shapefile -- large enough for New York State. */ + private static final double MAX_BOUNDING_BOX_AREA_SQ_KM = 250_000; + /** * Haversine formula for distance on the sphere. We used to have a fastDistance function that would estimate this * quickly, but I'm not convinced we actually need it. @@ -88,4 +94,82 @@ public static Envelope floatingWgsEnvelopeToFixed (Envelope floatingWgsEnvelope) return new Envelope(fixedMinX, fixedMaxX, fixedMinY, fixedMaxY); } + //// Methods for range-checking points and envelopes in WGS84 + + /** + * We have to range-check the envelope before checking its size. Large unprojected y values interpreted as latitudes + * can yield negative cosines, producing negative estimated areas, producing false negatives on size checks. + */ + private static void checkWgsEnvelopeRange (Envelope envelope) { + checkLon(envelope.getMinX()); + checkLon(envelope.getMaxX()); + checkLat(envelope.getMinY()); + checkLat(envelope.getMaxY()); + } + + private static void checkLon (double longitude) { + if (!Double.isFinite(longitude) || Math.abs(longitude) > 180) { + throw new DataSourceException("Longitude is not a finite number with absolute value below 180."); + } + } + + private static void checkLat (double latitude) { + // Longyearbyen on the Svalbard archipelago is the world's northernmost permanent settlement (78 degrees N). + if (!Double.isFinite(latitude) || Math.abs(latitude) > 80) { + throw new DataSourceException("Longitude is not a finite number with absolute value below 80."); + } + } + + /** + * Throw an exception if the envelope appears to be constructed from points spanning the 180 degree meridian. + * We check whether the envelope becomes narrower when its left edge is expressed as a longitude greater than 180 + * (shifted east by 180 degrees) and has points anywhere near the 180 degree line. + * The envelope must already be validated with checkWgsEnvelopeRange to ensure meaningful results. + */ + private static void checkWgsEnvelopeAntimeridian (Envelope envelope, String thingBeingChecked) { + double widthAcrossAntimeridian = (envelope.getMinX() + 180) - envelope.getMaxX(); + boolean nearAntimeridian = + Math.abs(envelope.getMinX() - 180D) < 10 || Math.abs(envelope.getMaxX() - 180D) < 10; + checkArgument( + !nearAntimeridian || envelope.getWidth() < widthAcrossAntimeridian, + thingBeingChecked + " may not span the antimeridian (180 degrees longitude)." + ); + } + + /** + * @return the approximate area of an Envelope in WGS84 lat/lon coordinates, in square kilometers. + */ + public static double roughWgsEnvelopeArea (Envelope wgsEnvelope) { + double lon0 = wgsEnvelope.getMinX(); + double lon1 = wgsEnvelope.getMaxX(); + double lat0 = wgsEnvelope.getMinY(); + double lat1 = wgsEnvelope.getMaxY(); + double height = lat1 - lat0; + double width = lon1 - lon0; + final double KM_PER_DEGREE_LAT = 111.133; + // Scale the x direction as if the Earth was a sphere. + // Error above the middle latitude should approximately cancel out error below that latitude. + double averageLat = (lat0 + lat1) / 2; + double xScale = FastMath.cos(FastMath.toRadians(averageLat)); + double area = (height * KM_PER_DEGREE_LAT) * (width * KM_PER_DEGREE_LAT * xScale); + return area; + } + + /** + * Throw an exception if the provided envelope is too big for a reasonable destination grid. + * Should also catch cases where data sets include points on both sides of the 180 degree meridian. + * This static utility method can be reused to test other automatically determined bounds such as those + * from OSM or GTFS uploads. + */ + public static void checkWgsEnvelopeSize (Envelope envelope, String thingBeingChecked) { + checkWgsEnvelopeRange(envelope); + checkWgsEnvelopeAntimeridian(envelope, thingBeingChecked); + if (roughWgsEnvelopeArea(envelope) > MAX_BOUNDING_BOX_AREA_SQ_KM) { + throw new IllegalArgumentException(String.format( + "Geographic extent of %s (%.0f km2) exceeds limit of %.0f km2.", + thingBeingChecked, roughWgsEnvelopeArea(envelope), MAX_BOUNDING_BOX_AREA_SQ_KM + )); + } + } + } diff --git a/src/main/java/com/conveyal/r5/labeling/SpeedLabeler.java b/src/main/java/com/conveyal/r5/labeling/SpeedLabeler.java index 6360da4d2..7bbb599d1 100644 --- a/src/main/java/com/conveyal/r5/labeling/SpeedLabeler.java +++ b/src/main/java/com/conveyal/r5/labeling/SpeedLabeler.java @@ -13,8 +13,9 @@ import static systems.uom.common.USCustomary.KNOT; import static systems.uom.common.USCustomary.MILE_PER_HOUR; -import static tec.uom.se.unit.Units.KILOMETRE_PER_HOUR; -import static tec.uom.se.unit.Units.METRE_PER_SECOND; +import static tech.units.indriya.unit.Units.KILOMETRE_PER_HOUR; +import static tech.units.indriya.unit.Units.METRE_PER_SECOND; +import static tech.units.indriya.unit.Units.KILOMETRE_PER_HOUR; /** * Gets information about max speeds based on highway tags from build-config diff --git a/src/main/java/com/conveyal/r5/point_to_point/PointToPointRouterServer.java b/src/main/java/com/conveyal/r5/point_to_point/PointToPointRouterServer.java index 6dc06c770..4393999e3 100644 --- a/src/main/java/com/conveyal/r5/point_to_point/PointToPointRouterServer.java +++ b/src/main/java/com/conveyal/r5/point_to_point/PointToPointRouterServer.java @@ -154,26 +154,6 @@ private static void run(TransportNetwork transportNetwork) { PointToPointQuery pointToPointQuery = new PointToPointQuery(transportNetwork); ParetoServer paretoServer = new ParetoServer(transportNetwork); - // add cors header - before((req, res) -> res.header("Access-Control-Allow-Origin", "*")); - - options("/*", (request, response) -> { - - String accessControlRequestHeaders = request - .headers("Access-Control-Request-Headers"); - if (accessControlRequestHeaders != null) { - response.header("Access-Control-Allow-Headers", accessControlRequestHeaders); - } - - String accessControlRequestMethod = request - .headers("Access-Control-Request-Method"); - if (accessControlRequestMethod != null) { - response.header("Access-Control-Allow-Methods", accessControlRequestMethod); - } - - return "OK"; - }); - get("/metadata", (request, response) -> { response.header("Content-Type", "application/json"); RouterInfo routerInfo = new RouterInfo(); diff --git a/src/main/java/com/conveyal/r5/profile/FastRaptorWorker.java b/src/main/java/com/conveyal/r5/profile/FastRaptorWorker.java index 833fbfcd1..0ac37b77f 100644 --- a/src/main/java/com/conveyal/r5/profile/FastRaptorWorker.java +++ b/src/main/java/com/conveyal/r5/profile/FastRaptorWorker.java @@ -623,9 +623,7 @@ private void doFrequencySearchForRound (RaptorState outputState, FrequencyBoardi ) { FilteredPattern filteredPattern = filteredPatterns.patterns.get(patternIndex); TripPattern pattern = transit.tripPatterns.get(patternIndex); - int tripScheduleIndex = -1; // First loop iteration will immediately increment to 0. for (TripSchedule schedule : filteredPattern.runningFrequencyTrips) { - tripScheduleIndex++; // Loop through all the entries for this trip (time windows with service at a given frequency). for (int frequencyEntryIdx = 0; frequencyEntryIdx < schedule.headwaySeconds.length; @@ -671,7 +669,7 @@ private void doFrequencySearchForRound (RaptorState outputState, FrequencyBoardi // this looks like a good candidate for polymorphism (board time strategy passed in). // The offset could be looked up by the getDepartureTime method itself, not passed in. if (frequencyBoardingMode == MONTE_CARLO) { - int offset = offsets.offsets.get(patternIndex)[tripScheduleIndex][frequencyEntryIdx]; + int offset = offsets.getOffsetSeconds(schedule, frequencyEntryIdx); newBoardingDepartureTimeAtStop = getRandomFrequencyDepartureTime( schedule, stopPositionInPattern, @@ -736,7 +734,9 @@ public int getRandomFrequencyDepartureTime ( int frequencyEntryIdx, int earliestTime ) { - checkState(offset >= 0); + checkState(offset >= 0, "Offset should be non-negative."); + checkState(offset < schedule.headwaySeconds[frequencyEntryIdx], "Offset should be less than headway."); + // Find the time the first vehicle in this entry will depart from the current stop: // The start time of the entry window, plus travel time from first stop to current stop, plus phase offset. // TODO check that frequency trips' stop times are always normalized to zero at first departure. diff --git a/src/main/java/com/conveyal/r5/profile/FrequencyRandomOffsets.java b/src/main/java/com/conveyal/r5/profile/FrequencyRandomOffsets.java index 8a47fa61b..ccc04c704 100644 --- a/src/main/java/com/conveyal/r5/profile/FrequencyRandomOffsets.java +++ b/src/main/java/com/conveyal/r5/profile/FrequencyRandomOffsets.java @@ -9,8 +9,14 @@ import org.apache.commons.math3.random.MersenneTwister; import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; - /** +import static com.google.common.base.Preconditions.checkElementIndex; +import static com.google.common.base.Preconditions.checkNotNull; +import static com.google.common.base.Preconditions.checkState; + +/** * Generates and stores departure time offsets for every frequency-based set of trips. * This holds only one set of offsets at a time. It is re-randomized before each Monte Carlo iteration. * Therefore we have no memory of exactly which offsets were used in a particular Monte Carlo search. @@ -18,24 +24,39 @@ * we'd need to make alternate implementations that pre-generate the entire set or use deterministic seeded generators. */ public class FrequencyRandomOffsets { - /** map from trip pattern index to a list of offsets for trip i and frequency entry j on that pattern */ - public final TIntObjectMap offsets = new TIntObjectHashMap<>(); - public final TransitLayer data; + + /** + * Map from trip pattern index (which is the same between filtered and unfiltered patterns) to a list of offsets + * (in seconds) for each frequency entry on each trip of that pattern. Final dimension null for non-frequency trips. + * In other words, patternIndex -> offsetsSeconds[tripOnPattern][frequencyEntryInTrip]. + */ + private final TIntObjectMap offsets = new TIntObjectHashMap<>(); + + private final TransitLayer data; + + /** + * Secondary copy of the offsets keyed on TripSchedule objects. + * This allows lookups where patterns and trips have been filtered and int indexes no longer match unfiltered ones. + * This can't simply replace the other offsets map because we need to fetch stop sequences from TripPatterns + * to look up phase targets on the fly. It's awkward to store them if they're looked up in advance because the + * natural key is frequency entries, which are not objects but just array slots. + */ + private final Map offsetsForTripSchedule = new HashMap<>(); /** The mersenne twister is a higher quality random number generator than the one included with Java */ private MersenneTwister mt = new MersenneTwister(); public FrequencyRandomOffsets(TransitLayer data) { this.data = data; - - if (!data.hasFrequencies) + if (!data.hasFrequencies) { return; - + } + // Create skeleton empty data structure with slots for all offsets that will be generated. for (int pattIdx = 0; pattIdx < data.tripPatterns.size(); pattIdx++) { TripPattern tp = data.tripPatterns.get(pattIdx); - - if (!tp.hasFrequencies) continue; - + if (!tp.hasFrequencies) { + continue; + } int[][] offsetsThisPattern = new int[tp.tripSchedules.size()][]; for (int tripIdx = 0; tripIdx < tp.tripSchedules.size(); tripIdx++) { @@ -47,15 +68,31 @@ public FrequencyRandomOffsets(TransitLayer data) { } } + /** + * Return the random offset ("phase") in seconds generated for the given frequency entry of the given TripSchedule. + * Lookup is now by TripSchedule object as trips are filtered, losing track of their int indexes in unfiltered lists. + */ + public int getOffsetSeconds (TripSchedule tripSchedule, int freqEntryIndex) { + int[] offsetsPerEntry = offsetsForTripSchedule.get(tripSchedule); + checkState( + tripSchedule.nFrequencyEntries() == offsetsPerEntry.length, + "Offsets array length should exactly match number of freq entries in TripSchedule." + ); + int offset = offsetsPerEntry[freqEntryIndex]; + checkState(offset >= 0, "Frequency entry offset was not randomized."); + return offset; + } + /** * Take a new Monte Carlo draw if requested (i.e. if boarding assumption is not half-headway): for each * frequency-based route, choose how long after service starts the first vehicle leaves (the route's "phase"). * We run all Raptor rounds with one draw before proceeding to the next draw. */ public void randomize () { + // The number of TripSchedules for which we still need to generate a random offset. int remaining = 0; - // First, initialize all offsets for all trips and entries on this pattern with -1s + // First, initialize all offsets for all trips and entries on this pattern with -1 ("not yet randomized"). for (TIntObjectIterator it = offsets.iterator(); it.hasNext(); ) { it.advance(); for (int[] offsetsPerEntry : it.value()) { @@ -67,12 +104,16 @@ public void randomize () { } } + // If some randomized schedules are synchronized with other schedules ("phased") we perform multiple passes. In + // each pass we randomize only schedules whose phasing target is already known (randomized in a previous pass). + // This will loop forever if the phasing dependency graph has cycles - we must catch stalled progress. This is + // essentially performing depth-first traversal of the dependency graph iteratively without materializing it. while (remaining > 0) { - int remainingAfterPreviousRound = remaining; + int remainingAfterPreviousPass = remaining; for (TIntObjectIterator it = offsets.iterator(); it.hasNext(); ) { it.advance(); - + // The only thing we need from the TripPattern is the stop sequence, which is used only in phase solving. TripPattern pattern = data.tripPatterns.get(it.key()); int[][] val = it.value(); @@ -86,20 +127,24 @@ public void randomize () { } else { for (int frequencyEntryIndex = 0; frequencyEntryIndex < val[tripScheduleIndex].length; frequencyEntryIndex++) { if (schedule.phaseFromId == null || schedule.phaseFromId[frequencyEntryIndex] == null) { - // not phased. also, don't overwrite with new random number on each iteration, as other - // trips may be phased from this one + // This trip is not phased so does not require solving. Generate a random offset + // immediately. Do this only once - don't overwrite with a new random number on each + // phase solving pass, as other trips may be be phased from this one. if (val[tripScheduleIndex][frequencyEntryIndex] == -1) { val[tripScheduleIndex][frequencyEntryIndex] = mt.nextInt(schedule.headwaySeconds[frequencyEntryIndex]); remaining--; } } else { - if (val[tripScheduleIndex][frequencyEntryIndex] != -1) continue; // already randomized - - // find source phase information + // This trip is phased from another. + if (val[tripScheduleIndex][frequencyEntryIndex] != -1) { + continue; // Offset has already have been generated. + } + // No random offset has been generated for this trip yet. + // Find source phase information. TODO refactor to use references instead of ints. int[] source = data.frequencyEntryIndexForId.get(schedule.phaseFromId[frequencyEntryIndex]); // Throw a meaningful error when invalid IDs are encountered instead of NPE. - // Really this should be done when applying the modifications rather than during the search. + // Really this should be done when resolving or applying the modifications rather than during search. if (source == null) { throw new RuntimeException("This pattern ID specified in a scenario does not exist: " + schedule.phaseFromId[frequencyEntryIndex]); @@ -120,7 +165,7 @@ public void randomize () { int sourceStopIndexInNetwork = data.indexForStopId.get(schedule.phaseFromStop[frequencyEntryIndex]); // TODO check that stop IDs were found. - + // TODO find all stop IDs in advance when resolving/applying modifications or constructing FrequencyRandomOffsets. while (sourceStopIndexInPattern < phaseFromPattern.stops.length && phaseFromPattern.stops[sourceStopIndexInPattern] != sourceStopIndexInNetwork) { sourceStopIndexInPattern++; @@ -179,10 +224,27 @@ public void randomize () { } } } - - if (remainingAfterPreviousRound == remaining && remaining > 0) { + if (remainingAfterPreviousPass == remaining && remaining > 0) { throw new IllegalArgumentException("Cannot solve phasing, you may have a circular reference!"); } + // Copy results of randomization to a Map keyed on TripSchedules (instead of TripPattern index ints). This + // allows looking up offsets in a context where we only have a filtered list of running frequency trips and + // don't know the original unfiltered index of the trip within the pattern. Ideally we'd just build the + // original map keyed on TripSchedules (or hypothetical FreqEntries) but that reqires a lot of refactoring. + offsetsForTripSchedule.clear(); + for (TIntObjectIterator it = offsets.iterator(); it.hasNext(); ) { + it.advance(); + TripPattern tripPattern = data.tripPatterns.get(it.key()); + int[][] offsetsForTrip = it.value(); + for (int ts = 0; ts < tripPattern.tripSchedules.size(); ts++) { + TripSchedule tripSchedule = tripPattern.tripSchedules.get(ts); + // On patterns with mixed scheduled and frequency trips, scheduled trip slots will be null. + // Maps can store null values, but there's no point in storing them. We only store non-null arrays. + if (offsetsForTrip[ts] != null) { + offsetsForTripSchedule.put(tripSchedule, offsetsForTrip[ts]); + } + } + } } } } diff --git a/src/main/java/com/conveyal/r5/profile/McRaptorSuboptimalPathProfileRouter.java b/src/main/java/com/conveyal/r5/profile/McRaptorSuboptimalPathProfileRouter.java index 181a8a845..b38380060 100644 --- a/src/main/java/com/conveyal/r5/profile/McRaptorSuboptimalPathProfileRouter.java +++ b/src/main/java/com/conveyal/r5/profile/McRaptorSuboptimalPathProfileRouter.java @@ -445,7 +445,7 @@ private boolean doOneRound () { // we have to check all trips and frequency entries because, unlike // schedule-based trips, these are not sorted int departure = tripSchedule.startTimes[frequencyEntry] + - offsets.offsets.get(patIdx)[currentTrip][frequencyEntry] + + offsets.getOffsetSeconds(tripSchedule, frequencyEntry) + tripSchedule.departures[stopPositionInPattern]; int latestDeparture = tripSchedule.endTimes[frequencyEntry] + diff --git a/src/main/java/com/conveyal/r5/streets/StreetLayer.java b/src/main/java/com/conveyal/r5/streets/StreetLayer.java index 65c7c7cfc..0372f5ddc 100644 --- a/src/main/java/com/conveyal/r5/streets/StreetLayer.java +++ b/src/main/java/com/conveyal/r5/streets/StreetLayer.java @@ -29,7 +29,6 @@ import gnu.trove.map.hash.TIntObjectHashMap; import gnu.trove.map.hash.TLongIntHashMap; import gnu.trove.set.TIntSet; -import org.geotools.geojson.geom.GeometryJSON; import org.locationtech.jts.geom.Coordinate; import org.locationtech.jts.geom.Envelope; import org.locationtech.jts.geom.Geometry; @@ -53,6 +52,7 @@ import java.util.stream.LongStream; import static com.conveyal.r5.analyst.scenario.PickupWaitTimes.NO_WAIT_ALL_STOPS; +import static com.conveyal.r5.common.GeometryUtils.checkWgsEnvelopeSize; import static com.conveyal.r5.streets.VertexStore.fixedDegreeGeometryToFloating; /** @@ -380,6 +380,7 @@ void loadFromOsm (OSM osm, boolean removeIslands, boolean saveVertexIndex) { if (!saveVertexIndex) vertexIndexForOsmNode = null; + checkWgsEnvelopeSize(envelope, "street layer"); osm = null; } @@ -1539,24 +1540,6 @@ public Geometry addedEdgesBoundingGeometry () { } } - /** - * Given a JTS Geometry in fixed-point latitude and longitude, log it as floating-point GeoJSON. - */ - public static void logFixedPointGeometry (String label, Geometry fixedPointGeometry) { - if (fixedPointGeometry == null){ - LOG.info("{} is null.", label); - } else if (fixedPointGeometry.isEmpty()) { - LOG.info("{} is empty.", label); - } else { - String geoJson = new GeometryJSON().toString(fixedDegreeGeometryToFloating(fixedPointGeometry)); - if (geoJson == null) { - LOG.info("Could not convert non-null geometry to GeoJSON"); - } else { - LOG.info("{} {}", label, geoJson); - } - } - } - /** * Finds all the P+R stations in given envelope. This might overselect (doesn't filter the objects from the * spatial index) but it's only used in visualizations. diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetwork.java b/src/main/java/com/conveyal/r5/transit/TransportNetwork.java index 358c24dc0..5927a15e4 100644 --- a/src/main/java/com/conveyal/r5/transit/TransportNetwork.java +++ b/src/main/java/com/conveyal/r5/transit/TransportNetwork.java @@ -113,6 +113,9 @@ public void rebuildTransientIndexes() { * doesn't really matter, particularly for analytics. Loading all he feeds into memory simulataneously shouldn't be * so bad with mapdb-based feeds, but it's still not great (due to instance caching, off heap allocations etc.) * Therefore we create the feeds within a stream which loads them one by one on demand. + * + * NOTE the feedId of the gtfs feeds loaded here will be the ones declared by the feeds or based on their filenames. + * This method makes no effort to impose the more unique feed IDs created by the Analysis backend. */ public static TransportNetwork fromFiles ( String osmSourceFile, @@ -194,7 +197,12 @@ public static TransportNetwork fromInputs (TNBuilderConfig tnBuilderConfig, OSM return transportNetwork; } - /** Scan a directory detecting all the files that are network inputs, then build a network from those files. */ + /** + * Scan a directory detecting all the files that are network inputs, then build a network from those files. + * + * NOTE the feedId of the gtfs feeds laoded here will be the ones declared by the feeds or based on their filenames. + * This method makes no effort to impose the more unique feed IDs created by the Analysis backend. + */ public static TransportNetwork fromDirectory (File directory) throws DuplicateFeedException { File osmFile = null; List gtfsFiles = new ArrayList<>(); diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java index b8587feaf..916ea7837 100644 --- a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java +++ b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java @@ -94,20 +94,22 @@ public void rememberScenario (Scenario scenario) { } /** - * Find or create a TransportNetwork for the scenario specified in a ProfileRequest. - * ProfileRequests may contain an embedded complete scenario, or it may contain only the ID of a scenario that - * must be fetched from S3. - * By design a particular scenario is always defined relative to a single base graph (it's never applied to multiple - * different base graphs). Therefore we can look up cached scenario networks based solely on their scenarioId - * rather than a compound key of (networkId, scenarioId). + * Find or create a TransportNetwork for the scenario specified in a ProfileRequest. ProfileRequests may contain an + * embedded complete scenario, or it may contain only the ID of a scenario that must be fetched from S3. By design + * a particular scenario is always defined relative to a single base graph (it's never applied to multiple different + * base graphs). Therefore we can look up cached scenario networks based solely on their scenarioId rather than a + * compound key of (networkId, scenarioId). * - * The fact that scenario networks are cached means that PointSet linkages will be automatically reused when + * The fact that scenario networks are cached means that PointSet linkages will be automatically reused. * TODO it seems to me that this method should just take a Scenario as its second parameter, and that resolving - * the scenario against caches on S3 or local disk should be pulled out into a separate function - * the problem is that then you resolve the scenario every time, even when the ID is enough to look up the already built network. - * So we need to pass the whole task in here, so either the ID or full scenario are visible. - * FIXME the fact that this whole thing is synchronized will cause each new scenario to be applied in sequence. - * I guess that's good as long as building distance tables is already parallelized. + * the scenario against caches on S3 or local disk should be pulled out into a separate function. + * The problem is that then you resolve the scenario every time, even when the ID is enough to look up the already + * built network. So we need to pass the whole task in here, so either the ID or full scenario are visible. + * + * Thread safety notes: This entire method is synchronized so access by multiple threads will be sequential. + * The first thread will have a chance to build and store the requested scenario before any others see it. + * This means each new scenario will be applied one after the other. This is probably OK as long as building egress + * tables is already parallelized. */ public synchronized TransportNetwork getNetworkForScenario (String networkId, String scenarioId) { // If the networkId is different than previous calls, a new network will be loaded. Its transient nested map @@ -138,15 +140,15 @@ public synchronized TransportNetwork getNetworkForScenario (String networkId, St return scenarioNetwork; } - private String getScenarioFilename(String networkId, String scenarioId) { + public static String getScenarioFilename (String networkId, String scenarioId) { return String.format("%s_%s.json", networkId, scenarioId); } - private String getR5NetworkFilename(String networkId) { + private static String getR5NetworkFilename (String networkId) { return String.format("%s_%s.dat", networkId, KryoNetworkSerializer.NETWORK_FORMAT_VERSION); } - private FileStorageKey getR5NetworkFileStorageKey (String networkId) { + private static FileStorageKey getR5NetworkFileStorageKey (String networkId) { return new FileStorageKey(BUNDLES, getR5NetworkFilename(networkId)); } diff --git a/src/main/java/com/conveyal/r5/util/ShapefileReader.java b/src/main/java/com/conveyal/r5/util/ShapefileReader.java index fd7540aa0..ade224ed2 100644 --- a/src/main/java/com/conveyal/r5/util/ShapefileReader.java +++ b/src/main/java/com/conveyal/r5/util/ShapefileReader.java @@ -1,5 +1,7 @@ package com.conveyal.r5.util; +import com.conveyal.analysis.datasource.DataSourceException; +import com.conveyal.analysis.datasource.SpatialAttribute; import org.geotools.data.DataStore; import org.geotools.data.DataStoreFinder; import org.geotools.data.FeatureSource; @@ -11,33 +13,42 @@ import org.geotools.referencing.CRS; import org.geotools.referencing.crs.DefaultGeographicCRS; import org.locationtech.jts.geom.Envelope; -import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.geom.Lineal; +import org.locationtech.jts.geom.Polygonal; +import org.locationtech.jts.geom.Puntal; import org.opengis.feature.simple.SimpleFeature; import org.opengis.feature.simple.SimpleFeatureType; +import org.opengis.feature.type.AttributeDescriptor; +import org.opengis.feature.type.AttributeType; import org.opengis.filter.Filter; import org.opengis.referencing.FactoryException; import org.opengis.referencing.crs.CoordinateReferenceSystem; import org.opengis.referencing.operation.MathTransform; import org.opengis.referencing.operation.TransformException; +import java.io.Closeable; import java.io.File; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.Spliterator; import java.util.Spliterators; +import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; /** * Encapsulate Shapefile reading logic */ -public class ShapefileReader { +public class ShapefileReader implements Closeable { private final FeatureCollection features; private final DataStore store; private final FeatureSource source; - private final CoordinateReferenceSystem crs; + public final CoordinateReferenceSystem crs; private final MathTransform transform; public ShapefileReader (File shapefile) throws IOException, FactoryException { @@ -65,7 +76,7 @@ public Stream stream () throws IOException { FeatureIterator wrapped = features.features(); @Override - public boolean hasNext() { + public boolean hasNext () { boolean hasNext = wrapped.hasNext(); if (!hasNext) { // Prevent keeping a lock on the shapefile. @@ -77,7 +88,7 @@ public boolean hasNext() { } @Override - public SimpleFeature next() { + public SimpleFeature next () { return wrapped.next(); } }; @@ -89,6 +100,10 @@ public ReferencedEnvelope getBounds () throws IOException { return source.getBounds(); } + public List numericAttributes () { + return features.getSchema().getAttributeDescriptors().stream().filter(d -> Number.class.isAssignableFrom(d.getType().getBinding())).map(AttributeDescriptor::getLocalName).collect(Collectors.toList()); + } + public double getAreaSqKm () throws IOException, TransformException, FactoryException { CoordinateReferenceSystem webMercatorCRS = CRS.decode("EPSG:3857"); MathTransform webMercatorTransform = CRS.findMathTransform(crs, webMercatorCRS, true); @@ -99,7 +114,10 @@ public double getAreaSqKm () throws IOException, TransformException, FactoryExce public Stream wgs84Stream () throws IOException, TransformException { return stream().map(f -> { - Geometry g = (Geometry) f.getDefaultGeometry(); + org.locationtech.jts.geom.Geometry g = (org.locationtech.jts.geom.Geometry) f.getDefaultGeometry(); + if (g == null) { + throw new DataSourceException("Null (missing) geometry on feature: " + f.getID()); + } try { // TODO does this leak beyond this function? f.setDefaultGeometry(JTS.transform(g, transform)); @@ -117,12 +135,57 @@ public Envelope wgs84Bounds () throws IOException, TransformException { /** * Failure to call this will leave the shapefile locked, which may mess with future attempts to use it. */ + @Override public void close () { // Note that you also have to close the iterator, see iterator wrapper code above. store.dispose(); } - public int getFeatureCount() throws IOException { + public int featureCount () throws IOException { return source.getCount(Query.ALL); } + + public List attributes () { + return attributes(features.getSchema()); + } + + /** Static utility method for reuse in other classes importing GeoTools FeatureCollections. */ + public static List attributes (SimpleFeatureType schema) { + List attributes = new ArrayList<>(); + HashSet uniqueAttributes = new HashSet<>(); + schema.getAttributeDescriptors().forEach(d -> { + String attributeName = d.getLocalName(); + AttributeType type = d.getType(); + if (type != null) { + attributes.add(new SpatialAttribute(attributeName, type)); + uniqueAttributes.add(attributeName); + } + }); + if (attributes.size() != uniqueAttributes.size()) { + throw new DataSourceException("Spatial layer has attributes with duplicate names."); + } + return attributes; + } + + /** These are very broad. For example, line includes linestring and multilinestring. */ + public enum GeometryType { + POLYGON, POINT, LINE, PIXEL; + public static GeometryType forBindingClass (Class binding) { + if (Polygonal.class.isAssignableFrom(binding)) return POLYGON; + if (Puntal.class.isAssignableFrom(binding)) return POINT; + if (Lineal.class.isAssignableFrom(binding)) return LINE; + throw new IllegalArgumentException("Could not determine geometry type of features in DataSource."); + } + } + + public GeometryType geometryType () { + return geometryType(features); + } + + /** Static utility method for reuse in other classes importing GeoTools FeatureCollections. */ + public static GeometryType geometryType (FeatureCollection featureCollection) { + Class geometryClass = featureCollection.getSchema().getGeometryDescriptor().getType().getBinding(); + return GeometryType.forBindingClass(geometryClass); + } + } diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml index 955c22851..00e0cdabf 100644 --- a/src/main/resources/logback.xml +++ b/src/main/resources/logback.xml @@ -1,17 +1,19 @@ + + - - %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + %date [%thread] %-5level %logger{10} - %msg%n - + + + diff --git a/src/test/java/com/conveyal/analysis/datasource/GeoJsonDataSourceIngesterTest.java b/src/test/java/com/conveyal/analysis/datasource/GeoJsonDataSourceIngesterTest.java new file mode 100644 index 000000000..35d3ccee6 --- /dev/null +++ b/src/test/java/com/conveyal/analysis/datasource/GeoJsonDataSourceIngesterTest.java @@ -0,0 +1,61 @@ +package com.conveyal.analysis.datasource; + +import com.conveyal.analysis.models.SpatialDataSource; +import org.junit.jupiter.api.Test; + +import static com.conveyal.analysis.datasource.SpatialDataSourceIngesterTest.assertIngestException; +import static com.conveyal.analysis.datasource.SpatialDataSourceIngesterTest.testIngest; +import static com.conveyal.analysis.models.DataSourceValidationIssue.Level.ERROR; +import static com.conveyal.file.FileStorageFormat.GEOJSON; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Beyond the standard cases in SpatialDataSourceIngesterTest, special cases for GeoJSON ingestion. + * TODO Maybe instead of loading from files, build GeoJSON programmatically, serialize it to temp files, then load it. + */ +class GeoJsonDataSourceIngesterTest { + + @Test + void typeMismatch () { + SpatialDataSource spatialDataSource = testIngest(GEOJSON, "hkzones-type-mismatch"); + // Currently we can't detect problems with inconsistent schema across features. + // GeoTools seems to report the same schema on every feature. + assertTrue(spatialDataSource.issues.isEmpty()); + } + + @Test + void extraAttribute () { + SpatialDataSource spatialDataSource = testIngest(GEOJSON, "hkzones-extra-attribute"); + // Currently we can't detect problems with inconsistent schema across features. + // GeoTools seems to report the same schema on every feature. + assertTrue(spatialDataSource.issues.isEmpty()); + } + + @Test + void mixedNumeric () { + SpatialDataSource spatialDataSource = testIngest(GEOJSON, "hkzones-mixed-numeric"); + // Currently we can't detect problems with inconsistent schema across features. + // GeoTools seems to report the same schema on every feature. + assertTrue(spatialDataSource.issues.isEmpty()); + } + + @Test + void mixedGeometries () { + SpatialDataSource spatialDataSource = testIngest(GEOJSON, "hkzones-mixed-geometries"); + // Inconsistent geometry between features is detected. + assertTrue(spatialDataSource.issues.stream().anyMatch(i -> i.level == ERROR)); + } + + @Test + void fileEmpty () { + assertIngestException(GEOJSON, "empty", DataSourceException.class, "length"); + } + + @Test + void fileTooSmall () { + assertIngestException(GEOJSON, "too-small", DataSourceException.class, "length"); + } + +} diff --git a/src/test/java/com/conveyal/analysis/datasource/ShapefileDataSourceIngesterTest.java b/src/test/java/com/conveyal/analysis/datasource/ShapefileDataSourceIngesterTest.java new file mode 100644 index 000000000..78157ead3 --- /dev/null +++ b/src/test/java/com/conveyal/analysis/datasource/ShapefileDataSourceIngesterTest.java @@ -0,0 +1,45 @@ +package com.conveyal.analysis.datasource; + +import com.conveyal.analysis.models.SpatialDataSource; +import org.junit.jupiter.api.Test; + +import static com.conveyal.analysis.datasource.SpatialDataSourceIngesterTest.assertIngestException; +import static com.conveyal.analysis.datasource.SpatialDataSourceIngesterTest.testIngest; +import static com.conveyal.file.FileStorageFormat.GEOJSON; +import static com.conveyal.file.FileStorageFormat.SHP; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Beyond the standard cases in SpatialDataSourceIngesterTest, special cases for ESRI Shapefile ingestion. + * Test that we can correctly read Shapefiles with many different characteristics, and can detect problematic inputs + * including ones we've encountered in practice. + */ +class ShapefileDataSourceIngesterTest { + + /** + * Test on a shapefile that has mostly geometries of type "point" but some geometries of type "null", which is to + * say that some of the records in the shapefile have missing geometries. The GeoTools shapefile reader will not + * tolerate geometries of mixed types, but will tolerate inputs like this silently. We want to detect and refuse. + */ + @Test + void nullPointGeometries () { + assertIngestException(SHP, "nl-null-points", DataSourceException.class, "missing"); + } + + /** + * Test on a shapefile that has two attributes with the same name (one text and one integer). This is actually + * possible and has been encountered in the wild, probably due to names being truncated to a fixed length (the + * DBF file used in shapefiles allows field names of at most ten characters). Apparently this has been "fixed" in + * Geotools which now silently renames one of the columns with a numeric suffix. It would be preferable to just + * refuse this kind of input, since the fields are likely to have different names when opened in different software. + */ + @Test + void duplicateAttributeNames () { + SpatialDataSource spatialDataSource = testIngest(SHP, "duplicate-fields"); + // id, the_geom, DDDDDDDDDD, and DDDDDDDDDD. The final one will be renamed on the fly to DDDDDDDDDD1. + assertTrue(spatialDataSource.attributes.size() == 4); + assertTrue(spatialDataSource.attributes.get(3).name.endsWith("1")); + } + +} \ No newline at end of file diff --git a/src/test/java/com/conveyal/analysis/datasource/SpatialDataSourceIngesterTest.java b/src/test/java/com/conveyal/analysis/datasource/SpatialDataSourceIngesterTest.java new file mode 100644 index 000000000..e32cdf9d9 --- /dev/null +++ b/src/test/java/com/conveyal/analysis/datasource/SpatialDataSourceIngesterTest.java @@ -0,0 +1,111 @@ +package com.conveyal.analysis.datasource; + +import com.conveyal.analysis.UserPermissions; +import com.conveyal.analysis.models.SpatialDataSource; +import com.conveyal.file.FileStorageFormat; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; +import org.locationtech.jts.geom.Envelope; + +import java.io.File; +import java.util.List; +import java.util.Optional; + +import static com.conveyal.analysis.datasource.SpatialAttribute.Type.NUMBER; +import static com.conveyal.analysis.datasource.SpatialAttribute.Type.TEXT; +import static com.conveyal.file.FileStorageFormat.GEOJSON; +import static com.conveyal.file.FileStorageFormat.SHP; +import static com.conveyal.r5.util.ShapefileReader.GeometryType.POLYGON; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Test ingestion of all different GeoTools based geographic (spatial) data source files, including GeoPackage. + */ +class SpatialDataSourceIngesterTest { + + // TODO parameter provider method for typed FileStorageFormat enum values instead of String arrays. + // Or a separate local enum that gets mapped to the FileStorageFormat enum. + + /** Envelope around Hong Kong Island, Kowloon, and Lamma. */ + public static final Envelope HK_ENVELOPE = new Envelope(114.09, 114.40, 22.18, 22.34); + + /** + * Test on small basic data sets with no errors, but projected into some relatively obscure local coordinate system. + */ + @ParameterizedTest + @EnumSource(names = {"GEOPACKAGE", "GEOJSON", "SHP"}) + void basicValid (FileStorageFormat format) { + // JUnit can't yet do cartesian products of parameters - iterate within this method. + // According to Github issues, soon we should have List Arguments.cartesianProduct(Set... sets) + // TODO for (String geomType : List.of("point", "polygon", "linestring")) { + // For now all test files are polygons with three features and two additional attributes (name and count). + for (String fileSuffix : List.of("wgs84", "projected")) { + if (format == GEOJSON && "projected".equals(fileSuffix)) { + // GeoTools silently ignores (illegal) non-WGS84 CRS in GeoJSON files. + assertIngestException(format, "valid-polygon-" + fileSuffix, DataSourceException.class, "value"); + } else { + SpatialDataSource spatialDataSource = testIngest(format, "valid-polygon-" + fileSuffix); + assertTrue(spatialDataSource.issues.isEmpty()); + assertTrue(spatialDataSource.geometryType == POLYGON); + assertTrue(spatialDataSource.featureCount == 3); + assertTrue(hasAttribute(spatialDataSource.attributes, "Name", TEXT)); + assertTrue(hasAttribute(spatialDataSource.attributes, "Count", NUMBER)); + assertFalse(hasAttribute(spatialDataSource.attributes, "Count", TEXT)); + // FIXME projected DataSources are returning projected bounds, not WGS84. + assertTrue(HK_ENVELOPE.contains(spatialDataSource.wgsBounds.envelope())); + } + } + } + + /** Test on files containing huge shapes: the continents of Africa, South America, and Australia. */ + @ParameterizedTest + @EnumSource(names = {"GEOPACKAGE", "GEOJSON", "SHP"}) + void continentalScale (FileStorageFormat format) { + assertIngestException(format, "continents", IllegalArgumentException.class, "exceeds"); + } + + /** + * Test on projected (non-WGS84) data containing shapes on both sides of the 180 degree antimeridian. + * This case was encountered in the wild: the North Island and the Chatham islands, both part of New Zealand. + */ + @ParameterizedTest + @EnumSource(names = {"GEOPACKAGE", "GEOJSON", "SHP"}) + void newZealandAntimeridian (FileStorageFormat format) { + assertIngestException(format, "new-zealand-antimeridian", IllegalArgumentException.class, "180"); + } + + public static SpatialDataSource testIngest (FileStorageFormat format, String inputFile) { + TestingProgressListener progressListener = new TestingProgressListener(); + DataSourceIngester ingester = DataSourceIngester.forFormat(format); + ingester.initializeDataSource("TestName", "Test Description", "test_region_id", + new UserPermissions("test@email.com", false, "test_group")); + File resourceFile = getResourceAsFile(String.join(".", inputFile, format.extension)); + ingester.ingest(resourceFile, progressListener); + progressListener.assertUsedCorrectly(); + return ((SpatialDataSource) ingester.dataSource()); + } + + public static void assertIngestException ( + FileStorageFormat format, String inputFile, Class exceptionType, String messageWord + ) { + Throwable throwable = assertThrows(exceptionType, () -> testIngest(format, inputFile), + "Expected failure with exception type: " + exceptionType.getSimpleName()); + assertTrue(throwable.getMessage().contains(messageWord), + "Exception message is expected to contain the text: " + messageWord); + } + + /** Method is static, so resolution is always relative to the package of the class where it's defined. */ + private static File getResourceAsFile (String resource) { + // This just removes the protocol and query parameter part of the URL, which for File URLs leaves a file path. + return new File(SpatialDataSourceIngesterTest.class.getResource(resource).getFile()); + } + + protected static boolean hasAttribute (List attributes, String name, SpatialAttribute.Type type) { + Optional optional = attributes.stream().filter(a -> a.name.equals(name)).findFirst(); + return optional.isPresent() && optional.get().type == type; + // && attribute.occurrances > 0 + } + +} diff --git a/src/test/java/com/conveyal/analysis/datasource/TestingProgressListener.java b/src/test/java/com/conveyal/analysis/datasource/TestingProgressListener.java new file mode 100644 index 000000000..ac7cffd21 --- /dev/null +++ b/src/test/java/com/conveyal/analysis/datasource/TestingProgressListener.java @@ -0,0 +1,47 @@ +package com.conveyal.analysis.datasource; + +import com.conveyal.r5.analyst.progress.ProgressListener; +import com.conveyal.r5.analyst.progress.WorkProduct; +import org.junit.jupiter.api.Assertions; + +import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * A mock ProgressListener for use in tests, which makes sure all the interface methods are called and shows progress. + */ +public class TestingProgressListener implements ProgressListener { + + private String description; + private WorkProduct workProduct; + private int taskCount = 0; + private int totalElements = 0; + private int elementsCompleted = 0; + + @Override + public void beginTask (String description, int totalElements) { + this.description = description; + this.totalElements = totalElements; + taskCount += 1; + } + + @Override + public void increment (int n) { + elementsCompleted += n; + assertTrue(elementsCompleted <= totalElements); + } + + @Override + public void setWorkProduct (WorkProduct workProduct) { + this.workProduct = workProduct; + } + + public void assertUsedCorrectly () { + assertNotNull(description); + assertNotNull(workProduct); + assertTrue(taskCount > 0); + assertTrue(elementsCompleted > 0); + assertEquals(totalElements, elementsCompleted); + } + +} diff --git a/src/test/java/com/conveyal/r5/analyst/GridTest.java b/src/test/java/com/conveyal/r5/analyst/GridTest.java index 27b3afc7d..6798f548e 100644 --- a/src/test/java/com/conveyal/r5/analyst/GridTest.java +++ b/src/test/java/com/conveyal/r5/analyst/GridTest.java @@ -7,12 +7,11 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; -import java.util.Arrays; import java.util.List; import java.util.Random; -import java.util.stream.DoubleStream; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; /** * Tests for the Grid class, which holds destination counts in tiled spherical mercator pixels. @@ -31,7 +30,7 @@ public void testGetMercatorEnvelopeMeters() throws Exception { int zoom = 4; int xTile = 14; int yTile = 9; - Grid grid = new Grid(zoom, 256, 256, 256 * yTile, 256 * xTile); + Grid grid = new Grid(256 * xTile, 256 * yTile, 256, 256, zoom); ReferencedEnvelope envelope = grid.getWebMercatorExtents().getMercatorEnvelopeMeters(); assertEquals(15028131.257091936, envelope.getMinX(), 0.1); assertEquals(-5009377.085697312, envelope.getMinY(), 0.1); @@ -42,7 +41,7 @@ public void testGetMercatorEnvelopeMeters() throws Exception { zoom = 5; xTile = 16; yTile = 11; - grid = new Grid(zoom, 256, 256, 256 * yTile, 256 * xTile); + grid = new Grid(256 * xTile, 256 * yTile, 256, 256, zoom); envelope = grid.getWebMercatorExtents().getMercatorEnvelopeMeters(); assertEquals(0, envelope.getMinX(), 0.1); assertEquals(5009377.085697312, envelope.getMinY(), 0.1); @@ -106,9 +105,9 @@ private static Grid generateRandomGrid (Random random, boolean wholeNumbersOnly) int width = random.nextInt(MAX_GRID_WIDTH_PIXELS) + 1; int height = random.nextInt(MAX_GRID_WIDTH_PIXELS) + 1; - Grid grid = new Grid(zoom, width, height, north, west); - for (int y = 0; y < grid.height; y++) { - for (int x = 0; x < grid.width; x++) { + Grid grid = new Grid(west, north, width, height, zoom); + for (int y = 0; y < grid.extents.height; y++) { + for (int x = 0; x < grid.extents.width; x++) { double amount = random.nextDouble() * MAX_AMOUNT; if (wholeNumbersOnly) { amount = Math.round(amount); @@ -121,11 +120,7 @@ private static Grid generateRandomGrid (Random random, boolean wholeNumbersOnly) private static void assertGridSemanticEquals(Grid g1, Grid g2, boolean tolerateRounding) { // Note that the name field is excluded because it does not survive serialization. - assertEquals(g1.zoom, g2.zoom); - assertEquals(g1.north, g2.north); - assertEquals(g1.west, g2.west); - assertEquals(g1.width, g2.width); - assertEquals(g1.height, g2.height); + assertTrue(g1.hasEqualExtents(g2)); assertArrayEquals(g1.grid, g2.grid, tolerateRounding); } diff --git a/src/test/java/com/conveyal/r5/analyst/GridTransformWrapperTest.java b/src/test/java/com/conveyal/r5/analyst/GridTransformWrapperTest.java index 9e5f33eb0..a74fc3f87 100644 --- a/src/test/java/com/conveyal/r5/analyst/GridTransformWrapperTest.java +++ b/src/test/java/com/conveyal/r5/analyst/GridTransformWrapperTest.java @@ -4,7 +4,6 @@ import java.util.ArrayList; import java.util.List; -import java.util.Random; import java.util.stream.Collectors; import static org.junit.jupiter.api.Assertions.*; @@ -18,11 +17,11 @@ class GridTransformWrapperTest { void testTwoAdjacentGrids () { // Two grids side by side, right one bigger than than the left, with top 20 pixels lower - Grid leftGrid = new Grid(10, 200, 300, 1000, 2000); - Grid rightGrid = new Grid(10, 300, 400, 1020, 2200); + Grid leftGrid = new Grid(2000, 1000, 200, 300, 10); + Grid rightGrid = new Grid(2200, 1020, 300, 400, 10); // One minimum bounding grid exactly encompassing the other two. - Grid superGrid = new Grid(10, 500, 400, 1000, 2000); + Grid superGrid = new Grid(2000, 1000, 500, 400, 10); // Make a column of pixel weights 2 pixels wide and 26 pixels high. List weights = new ArrayList<>(); diff --git a/src/test/java/com/conveyal/r5/analyst/network/GridGtfsGenerator.java b/src/test/java/com/conveyal/r5/analyst/network/GridGtfsGenerator.java index 36648f113..6699dc879 100644 --- a/src/test/java/com/conveyal/r5/analyst/network/GridGtfsGenerator.java +++ b/src/test/java/com/conveyal/r5/analyst/network/GridGtfsGenerator.java @@ -2,7 +2,6 @@ import com.conveyal.gtfs.GTFSFeed; import com.conveyal.gtfs.model.Agency; -import com.conveyal.gtfs.model.Calendar; import com.conveyal.gtfs.model.CalendarDate; import com.conveyal.gtfs.model.Frequency; import com.conveyal.gtfs.model.Route; @@ -10,11 +9,15 @@ import com.conveyal.gtfs.model.Stop; import com.conveyal.gtfs.model.StopTime; import com.conveyal.gtfs.model.Trip; -import org.checkerframework.checker.units.qual.A; import org.mapdb.Fun; import java.time.LocalDate; -import java.util.stream.IntStream; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; + +import static com.conveyal.r5.analyst.network.GridRoute.Materialization.EXACT_TIMES; +import static com.conveyal.r5.analyst.network.GridRoute.Materialization.STOP_TIMES; /** * Create a MapDB backed GTFS object from a GridLayout, not necessarily to be written out as a standard CSV/ZIP feed, @@ -24,9 +27,9 @@ public class GridGtfsGenerator { public static final String FEED_ID = "GRID"; public static final String AGENCY_ID = "AGENCY"; - public static final String SERVICE_ID = "ALL"; - public static final LocalDate GTFS_DATE = LocalDate.of(2020, 1, 1); + public static final LocalDate WEEKDAY_DATE = LocalDate.of(2020, 1, 1); + public static final LocalDate WEEKEND_DATE = LocalDate.of(2020, 1, 4); public final GridLayout gridLayout; @@ -54,14 +57,21 @@ private void addCommonTables () { agency.agency_id = AGENCY_ID; agency.agency_name = AGENCY_ID; feed.agency.put(agency.agency_id, agency); + addService(GridRoute.Services.WEEKDAY, 1, 2, 3); + addService(GridRoute.Services.WEEKEND, 4, 5); + addService(GridRoute.Services.ALL, 1, 2, 3, 4, 5); + } - Service service = new Service(SERVICE_ID); - CalendarDate calendarDate = new CalendarDate(); - calendarDate.date = LocalDate.of(2020, 01, 01); - calendarDate.service_id = SERVICE_ID; - calendarDate.exception_type = 1; - service.calendar_dates.put(calendarDate.date, calendarDate); - feed.services.put(service.service_id, service); + private void addService (GridRoute.Services grs, int... daysOfJanuary2020) { + Service gtfsService = new Service(grs.name()); + for (int day : daysOfJanuary2020) { + CalendarDate calendarDate = new CalendarDate(); + calendarDate.date = LocalDate.of(2020, 01, day); + calendarDate.service_id = gtfsService.service_id; + calendarDate.exception_type = 1; + gtfsService.calendar_dates.put(calendarDate.date, calendarDate); + } + feed.services.put(gtfsService.service_id, gtfsService); } private void addRoute (GridRoute gridRoute) { @@ -98,40 +108,52 @@ public void addStopsForRoute (GridRoute route, boolean back) { } public void addTripsForRoute (GridRoute route, boolean back) { - if (route.headwayMinutes > 0) { - int tripIndex = 0; - int start = route.startHour * 60 * 60; - int end = route.endHour * 60 * 60; - int headway = route.headwayMinutes * 60; - - // Maybe we should use exact_times = 1 instead of generating individual trips. - for (int startTime = start; startTime < end; startTime += headway, tripIndex++) { - String tripId = addTrip(route, back, startTime, tripIndex); - if (route.pureFrequency) { - Frequency frequency = new Frequency(); - frequency.start_time = start; - frequency.end_time = end; - frequency.headway_secs = headway; - frequency.exact_times = 0; - feed.frequencies.add(new Fun.Tuple2<>(tripId, frequency)); - // Do not make any additional trips, frequency entry represents them. - break; - } - } - } else { + // An explicit array of trip start times takes precedence over timetables. + if (route.startTimes != null) { for (int i = 0; i < route.startTimes.length; i++) { - addTrip(route, back, route.startTimes[i], i); + addTrip(route, back, route.startTimes[i], i, GridRoute.Services.ALL); + } + return; + } + // For the non-STOP_TIMES case, a single trip per service that will be referenced by all the timetables. + // We should somehow also allow for different travel speeds per timetable, and default fallback speeds. + Map tripIdForService = new HashMap<>(); + int tripIndex = 0; + for (GridRoute.Timetable timetable : route.timetables) { + int start = timetable.startHour * 60 * 60; + int end = timetable.endHour * 60 * 60; + int headway = timetable.headwayMinutes * 60; + if (route.materialization == STOP_TIMES) { + // For STOP_TIMES, make N different trips. + for (int startTime = start; startTime < end; startTime += headway, tripIndex++) { + addTrip(route, back, startTime, tripIndex, timetable.service); + } + } else { + // Not STOP_TIMES, so this is a frequency entry (either EXACT_TIMES or PURE_FREQ). + // Make only one trip per service ID, all frequency entries reference this single trip. + String tripId = tripIdForService.get(timetable.service); + if (tripId == null) { + tripId = addTrip(route, back, 0, tripIndex, timetable.service); + tripIdForService.put(timetable.service, tripId); + tripIndex++; + } + Frequency frequency = new Frequency(); + frequency.start_time = start; + frequency.end_time = end; + frequency.headway_secs = headway; + frequency.exact_times = (route.materialization == EXACT_TIMES) ? 1 : 0; + feed.frequencies.add(new Fun.Tuple2<>(tripId, frequency)); } } } - private String addTrip (GridRoute route, boolean back, int startTime, int tripIndex) { + private String addTrip (GridRoute route, boolean back, int startTime, int tripIndex, GridRoute.Services service) { Trip trip = new Trip(); trip.direction_id = back ? 1 : 0; String tripId = String.format("%s:%d:%d", route.id, tripIndex, trip.direction_id); trip.trip_id = tripId; trip.route_id = route.id; - trip.service_id = SERVICE_ID; + trip.service_id = service.name(); feed.trips.put(trip.trip_id, trip); int dwell = gridLayout.transitDwellSeconds; int departureTime = startTime; diff --git a/src/test/java/com/conveyal/r5/analyst/network/GridRoute.java b/src/test/java/com/conveyal/r5/analyst/network/GridRoute.java index d269a7ed7..a108204bf 100644 --- a/src/test/java/com/conveyal/r5/analyst/network/GridRoute.java +++ b/src/test/java/com/conveyal/r5/analyst/network/GridRoute.java @@ -1,5 +1,7 @@ package com.conveyal.r5.analyst.network; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.Objects; import java.util.stream.Stream; @@ -18,24 +20,37 @@ public class GridRoute { public int stopSpacingBlocks; public Orientation orientation; public boolean bidirectional; - public int startHour; - public int endHour; + /** Explicit departure times from first stop; if set, startHour and endHour will be ignored*/ public int[] startTimes; - /** - * Override default hop times. Map of (trip, stopAtStartOfHop) to factor by which default hop is multiplied - */ + + /** Override default hop times. Map of (trip, stopAtStartOfHop) to factor by which default hop is multiplied. */ public Map hopTimeScaling; - public int headwayMinutes; - public boolean pureFrequency; + + /** These will be services codes, and can be referenced in timetables. */ + public static enum Services { ALL, WEEKDAY, WEEKEND } + + /** How a Timetable will be translated into GTFS data - stop_times or frequencies with or without exact_times. */ + public static enum Materialization { STOP_TIMES, PURE_FREQ, EXACT_TIMES } + + /** All Timetables on a GridRoute will be materialized in the same way, according to this field. */ + public Materialization materialization = Materialization.STOP_TIMES; + + /** This defines something like a frequency in GTFS, but can also be used to generate normal stop_times trips. */ + public static class Timetable { + Services service; + public int startHour; + public int endHour; + int headwayMinutes; + } + + public List timetables = new ArrayList<>(); private Stream stopIds() { return null; } - public static enum Orientation { - HORIZONTAL, VERTICAL - } + public static enum Orientation { HORIZONTAL, VERTICAL } public int nBlocksLength () { return (nStops - 1) * stopSpacingBlocks; @@ -88,11 +103,9 @@ private static GridRoute newBareRoute (GridLayout gridLayout, int headwayMinutes route.id = gridLayout.nextIntegerId(); // Avoid collisions when same route is added multiple times route.stopSpacingBlocks = 1; route.gridLayout = gridLayout; - route.startHour = 5; - route.endHour = 10; route.bidirectional = true; - route.headwayMinutes = headwayMinutes; route.nStops = gridLayout.widthAndHeightInBlocks + 1; + route.addTimetable(Services.WEEKDAY, 5, 10, headwayMinutes); return route; } @@ -105,6 +118,21 @@ public static GridRoute newHorizontalRoute (GridLayout gridLayout, int row, int return route; } + public GridRoute pureFrequency () { + this.materialization = Materialization.PURE_FREQ; + return this; + } + + public GridRoute addTimetable (Services service, int startHour, int endHour, int headwayMinutes) { + Timetable timetable = new Timetable(); + timetable.service = service; + timetable.startHour = startHour; + timetable.endHour = endHour; + timetable.headwayMinutes = headwayMinutes; + this.timetables.add(timetable); + return this; + } + public static GridRoute newVerticalRoute (GridLayout gridLayout, int col, int headwayMinutes) { GridRoute route = newBareRoute(gridLayout, headwayMinutes); route.orientation = Orientation.VERTICAL; @@ -114,12 +142,7 @@ public static GridRoute newVerticalRoute (GridLayout gridLayout, int col, int he return route; } - public GridRoute pureFrequency () { - pureFrequency = true; - return this; - } - - public static class TripHop{ + public static class TripHop { int trip; int hop; diff --git a/src/test/java/com/conveyal/r5/analyst/network/GridSinglePointTaskBuilder.java b/src/test/java/com/conveyal/r5/analyst/network/GridSinglePointTaskBuilder.java index 97dcba40b..a4b71fd9c 100644 --- a/src/test/java/com/conveyal/r5/analyst/network/GridSinglePointTaskBuilder.java +++ b/src/test/java/com/conveyal/r5/analyst/network/GridSinglePointTaskBuilder.java @@ -14,7 +14,8 @@ import java.util.EnumSet; import java.util.stream.IntStream; -import static com.conveyal.r5.analyst.network.GridGtfsGenerator.GTFS_DATE; +import static com.conveyal.r5.analyst.network.GridGtfsGenerator.WEEKDAY_DATE; +import static com.conveyal.r5.analyst.network.GridGtfsGenerator.WEEKEND_DATE; /** * This creates a task for use in tests. It uses a builder pattern but for a non-immutable task object. @@ -35,7 +36,7 @@ public GridSinglePointTaskBuilder (GridLayout gridLayout) { this.gridLayout = gridLayout; // We will accumulate settings into this task. task = new TravelTimeSurfaceTask(); - task.date = GTFS_DATE; + task.date = WEEKDAY_DATE; // Set defaults that can be overridden by calling builder methods. task.accessModes = EnumSet.of(LegMode.WALK); task.egressModes = EnumSet.of(LegMode.WALK); @@ -78,6 +79,18 @@ public GridSinglePointTaskBuilder setDestination (int gridX, int gridY) { return this; } + public GridSinglePointTaskBuilder weekdayMorningPeak () { + task.date = WEEKDAY_DATE; + morningPeak(); + return this; + } + + public GridSinglePointTaskBuilder weekendMorningPeak () { + task.date = WEEKEND_DATE; + morningPeak(); + return this; + } + public GridSinglePointTaskBuilder morningPeak () { task.fromTime = LocalTime.of(7, 00).toSecondOfDay(); task.toTime = LocalTime.of(9, 00).toSecondOfDay(); @@ -102,11 +115,11 @@ public GridSinglePointTaskBuilder uniformOpportunityDensity (double density) { // In a single point task, the grid of destinations is given with these fields, not from the pointset object. // The destination point set (containing the opportunity densities) must then match these same dimensions. - task.zoom = grid.zoom; - task.north = grid.north; - task.west = grid.west; - task.width = grid.width; - task.height = grid.height; + task.zoom = grid.extents.zoom; + task.north = grid.extents.north; + task.west = grid.extents.west; + task.width = grid.extents.width; + task.height = grid.extents.height; return this; } diff --git a/src/test/java/com/conveyal/r5/analyst/network/RandomFrequencyPhasingTests.java b/src/test/java/com/conveyal/r5/analyst/network/RandomFrequencyPhasingTests.java new file mode 100644 index 000000000..4f762a631 --- /dev/null +++ b/src/test/java/com/conveyal/r5/analyst/network/RandomFrequencyPhasingTests.java @@ -0,0 +1,62 @@ +package com.conveyal.r5.analyst.network; + +import com.conveyal.r5.OneOriginResult; +import com.conveyal.r5.analyst.TravelTimeComputer; +import com.conveyal.r5.analyst.cluster.AnalysisWorkerTask; +import com.conveyal.r5.transit.TransportNetwork; +import org.junit.jupiter.api.Test; + +import static com.conveyal.r5.analyst.network.SimpsonDesertTests.SIMPSON_DESERT_CORNER; + +public class RandomFrequencyPhasingTests { + + /** + * This recreates the problem in issue #740, inspired by the structure of STM Montréal GTFS métro schedules. + * This involves a pattern with two different frequency trips on two different services. The first service has + * N freq entries, and the second has more than N entries. + * + * Here we make one weekday trip with a single frequency entry, and one weekend trip with two frequency entries. + * We search on the weekend, causing the weekday trip to be filtered out. The Monte Carlo boarding code expects + * an array of two offsets for the weekend trip, but sees a single offset intended for the weekday trip. In addition + * the offsets generated for the weekday service will be in the range (0...30) while the headway of the weekend + * service is much shorter - many generated offsets will exceed the headway. So at least two different assertions + * can fail here, but whether they will actually fail depends on the order of the TripSchedules in the TripPattern + * (the weekend trip must come after the weekday one, so filtering causes a decrease in trip index). This depends + * in turn on the iteration order of the GtfsFeed.trips map, which follows the natural order of its keys so should + * be determined by the lexical order of trip IDs, which are determined by the order in which timetables are added + * to the GridRoute. + * + * The problem exists even if the mismatched trips have the same number of entries but can fail silently as offsets + * are computed for the wrong headways. The output travel time range will even be correct, but the distribution will + * be skewed by the offsets being randomly selected from a different headway, while the headway itself is accurate. + * + * The structure created here can only be created by input GTFS, and cannot be produced by our Conveyal scenarios. + * In scenarios, we always produce exactly one frequency entry per trip. This does not mean scenarios are immune + * to the problem of using offsets from the wrong trip - it's just much harder to detect the problem as all arrays + * are the same length, so it can fail silently. + */ + @Test + public void testFilteredTripRandomization () throws Exception { + + GridLayout gridLayout = new GridLayout(SIMPSON_DESERT_CORNER, 40); + // TODO DSL revision: + // gridLayout.newRoute(/*construct and add to routes*/).horizontal(20).addTimetable()... + gridLayout.routes.add(GridRoute + .newHorizontalRoute(gridLayout, 20, 30) + .addTimetable(GridRoute.Services.WEEKEND, 6, 10, 4) + .addTimetable(GridRoute.Services.WEEKEND, 10, 22, 8) + .pureFrequency() + ); + TransportNetwork network = gridLayout.generateNetwork(); + AnalysisWorkerTask task = gridLayout.newTaskBuilder() + .weekendMorningPeak() + .setOrigin(20, 20) + .monteCarloDraws(1000) + .build(); + + TravelTimeComputer computer = new TravelTimeComputer(task, network); + OneOriginResult oneOriginResult = computer.computeTravelTimes(); + + } + +} diff --git a/src/test/java/com/conveyal/r5/analyst/network/SimpsonDesertTests.java b/src/test/java/com/conveyal/r5/analyst/network/SimpsonDesertTests.java index a702fd621..327716429 100644 --- a/src/test/java/com/conveyal/r5/analyst/network/SimpsonDesertTests.java +++ b/src/test/java/com/conveyal/r5/analyst/network/SimpsonDesertTests.java @@ -46,7 +46,7 @@ public void testGridScheduled () throws Exception { // gridLayout.exportFiles("test"); AnalysisWorkerTask task = gridLayout.newTaskBuilder() - .morningPeak() + .weekdayMorningPeak() .setOrigin(20, 20) .uniformOpportunityDensity(10) .build(); @@ -85,7 +85,7 @@ public void testGridFrequency () throws Exception { TransportNetwork network = gridLayout.generateNetwork(); AnalysisWorkerTask task = gridLayout.newTaskBuilder() - .morningPeak() + .weekdayMorningPeak() .setOrigin(20, 20) .uniformOpportunityDensity(10) .build(); @@ -120,7 +120,7 @@ public void testGridFrequencyAlternatives () throws Exception { TransportNetwork network = gridLayout.generateNetwork(); AnalysisWorkerTask task = gridLayout.newTaskBuilder() - .morningPeak() + .weekdayMorningPeak() .setOrigin(20, 20) .uniformOpportunityDensity(10) .monteCarloDraws(20000) @@ -229,7 +229,7 @@ public void testExperiments () throws Exception { TransportNetwork network = gridLayout.generateNetwork(); AnalysisWorkerTask task = gridLayout.newTaskBuilder() - .morningPeak() + .weekdayMorningPeak() .setOrigin(20, 20) .uniformOpportunityDensity(10) .monteCarloDraws(4000) diff --git a/src/test/java/com/conveyal/r5/transit/FrequencyRandomOffsetsTest.java b/src/test/java/com/conveyal/r5/transit/FrequencyRandomOffsetsTest.java index 21774d9bb..f0fd04763 100644 --- a/src/test/java/com/conveyal/r5/transit/FrequencyRandomOffsetsTest.java +++ b/src/test/java/com/conveyal/r5/transit/FrequencyRandomOffsetsTest.java @@ -74,8 +74,8 @@ public void testPhasing () { // check that phasing is correct // offset indices are trip pattern, trip, frequency entry - int timeAtTargetStop = ts2.startTimes[0] + ts2.departures[1] + fro.offsets.get(1)[0][0]; - int timeAtSourceStop = ts1.startTimes[0] + ts1.departures[2] + fro.offsets.get(0)[0][0]; + int timeAtTargetStop = ts2.startTimes[0] + ts2.departures[1] + fro.getOffsetSeconds(ts2, 0); + int timeAtSourceStop = ts1.startTimes[0] + ts1.departures[2] + fro.getOffsetSeconds(ts1, 0); int timeDifference = timeAtTargetStop - timeAtSourceStop; // Depending on how large the offset on the first route is, the new route may come 10 minutes after on its first // trip, or 20 minutes before (which is the same phasing, just changing which route arrives first). @@ -141,8 +141,8 @@ public void testPhasingAtLastStop () { // check that phasing is correct // offset indices are trip pattern, trip, frequency entry - int timeAtTargetStop = ts2.startTimes[0] + ts2.arrivals[3] + fro.offsets.get(1)[0][0]; - int timeAtSourceStop = ts1.startTimes[0] + ts1.arrivals[3] + fro.offsets.get(0)[0][0]; + int timeAtTargetStop = ts2.startTimes[0] + ts2.arrivals[3] + fro.getOffsetSeconds(ts2, 0); + int timeAtSourceStop = ts1.startTimes[0] + ts1.arrivals[3] + fro.getOffsetSeconds(ts1, 0); int timeDifference = timeAtTargetStop - timeAtSourceStop; // Depending on how large the offset on the first route is, the new route may come 10 minutes after on its first // trip, or 20 minutes before (which is the same phasing, just changing which route arrives first). diff --git a/src/test/resources/com/conveyal/analysis/datasource/cities.csv b/src/test/resources/com/conveyal/analysis/datasource/cities.csv new file mode 100644 index 000000000..69701e1bf --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/cities.csv @@ -0,0 +1,5 @@ +id,name,lon,lat,metropop +1,Hong Kong,114.1621809593,22.2783740471,7.501 +2,Shanghai,121.4682654779,31.2461374983,24.871 +3,Tokyo,139.7514493987,35.681394636,37.468 +4,Singapore,103.8451699322,1.2908769445,5.454 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/analysis/datasource/continents.cpg b/src/test/resources/com/conveyal/analysis/datasource/continents.cpg new file mode 100644 index 000000000..3ad133c04 --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/continents.cpg @@ -0,0 +1 @@ +UTF-8 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/analysis/datasource/continents.dbf b/src/test/resources/com/conveyal/analysis/datasource/continents.dbf new file mode 100644 index 000000000..bf28e82c8 Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/continents.dbf differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/continents.geojson b/src/test/resources/com/conveyal/analysis/datasource/continents.geojson new file mode 100644 index 000000000..c67cb39d8 --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/continents.geojson @@ -0,0 +1,10 @@ +{ +"type": "FeatureCollection", +"name": "continents", +"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, +"features": [ +{ "type": "Feature", "properties": { "fid": 1, "name": "Africa" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 9.207694490603865, 37.031796371490152 ], [ -6.080595230445032, 34.579651671503846 ], [ -16.328789439060223, 23.287688550137954 ], [ -16.664795806555794, 12.597733003075595 ], [ -9.10465253790524, 3.625886161837885 ], [ 5.343621264404692, 7.137840582034341 ], [ 14.415793186785351, -9.929863765964264 ], [ 11.895745430568505, -18.232380852417666 ], [ 19.959898250462437, -35.378955258057417 ], [ 30.040089275329819, -31.740130864171554 ], [ 39.784273932701652, -15.984586995814144 ], [ 39.448267565206059, -4.431419378524498 ], [ 48.352436303838942, 4.631276414504119 ], [ 51.880503162542546, 11.118015293717692 ], [ 43.816350342648612, 10.788130399274309 ], [ 36.760216625241441, 18.582014034731003 ], [ 32.056127480303303, 31.050832539233635 ], [ 21.135920536696972, 32.903323939834031 ], [ 19.287885515471263, 30.038022943348466 ], [ 11.39173587932514, 33.325471229721181 ], [ 9.207694490603865, 37.031796371490152 ] ] ] } }, +{ "type": "Feature", "properties": { "fid": 2, "name": "South America" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -77.145941955760179, 8.635523261373871 ], [ -70.5938177895964, 12.433722214759614 ], [ -53.289489863574026, 5.13345555534787 ], [ -51.105448474852764, 0.772231253506057 ], [ -34.809139651317125, -5.101104401164461 ], [ -39.849235163750834, -14.525882518121586 ], [ -40.689251082489783, -22.017404821663259 ], [ -49.257413453627066, -24.63966044038937 ], [ -53.79349941481739, -34.414421960907688 ], [ -65.553722277162692, -43.899052241499916 ], [ -69.753801870857458, -52.185762756047239 ], [ -66.729744563397233, -55.169562644237949 ], [ -72.945862362065455, -54.394508247767995 ], [ -75.633913302030095, -49.317041396205042 ], [ -70.5938177895964, -22.328562552841117 ], [ -70.425814605848615, -17.59294697454715 ], [ -76.305926037021251, -14.200379200318192 ], [ -81.178018365707146, -4.933745041447287 ], [ -77.145941955760179, 8.635523261373871 ] ] ] } }, +{ "type": "Feature", "properties": { "fid": 3, "name": "Australia" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 113.293323157283623, -23.550070942145858 ], [ 115.998531266430973, -35.068814078417311 ], [ 131.017100424110822, -31.720787288707626 ], [ 140.718536401742483, -38.431244158386313 ], [ 147.248349078994636, -38.650132286799682 ], [ 153.871444794493158, -29.635056114562499 ], [ 150.793104532360076, -21.82888297473276 ], [ 142.21106501368584, -10.471242467141616 ], [ 140.811819439988966, -17.52621132938086 ], [ 135.681252336433744, -15.2892740028045 ], [ 135.961101451173136, -12.482324116300157 ], [ 132.50962903605415, -11.844004871667885 ], [ 125.699967244062677, -14.56820380385734 ], [ 116.185097342923868, -21.047429637696247 ], [ 113.293323157283623, -23.550070942145858 ] ] ] } } +] +} diff --git a/src/test/resources/com/conveyal/analysis/datasource/continents.gpkg b/src/test/resources/com/conveyal/analysis/datasource/continents.gpkg new file mode 100644 index 000000000..f60a0999f Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/continents.gpkg differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/continents.prj b/src/test/resources/com/conveyal/analysis/datasource/continents.prj new file mode 100644 index 000000000..f45cbadf0 --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/continents.prj @@ -0,0 +1 @@ +GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]] \ No newline at end of file diff --git a/src/test/resources/com/conveyal/analysis/datasource/continents.shp b/src/test/resources/com/conveyal/analysis/datasource/continents.shp new file mode 100644 index 000000000..b01eb2280 Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/continents.shp differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/continents.shx b/src/test/resources/com/conveyal/analysis/datasource/continents.shx new file mode 100644 index 000000000..d344afc02 Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/continents.shx differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/duplicate-fields.cpg b/src/test/resources/com/conveyal/analysis/datasource/duplicate-fields.cpg new file mode 100644 index 000000000..3ad133c04 --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/duplicate-fields.cpg @@ -0,0 +1 @@ +UTF-8 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/analysis/datasource/duplicate-fields.dbf b/src/test/resources/com/conveyal/analysis/datasource/duplicate-fields.dbf new file mode 100644 index 000000000..1a09bbbfe Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/duplicate-fields.dbf differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/duplicate-fields.prj b/src/test/resources/com/conveyal/analysis/datasource/duplicate-fields.prj new file mode 100644 index 000000000..e5e7cad96 --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/duplicate-fields.prj @@ -0,0 +1 @@ +PROJCS["Hong_Kong_1980_Grid",GEOGCS["GCS_Hong_Kong_1980",DATUM["D_Hong_Kong_1980",SPHEROID["International_1924",6378388.0,297.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",836694.05],PARAMETER["False_Northing",819069.8],PARAMETER["Central_Meridian",114.178555555556],PARAMETER["Scale_Factor",1.0],PARAMETER["Latitude_Of_Origin",22.3121333333333],UNIT["Meter",1.0]] \ No newline at end of file diff --git a/src/test/resources/com/conveyal/analysis/datasource/duplicate-fields.shp b/src/test/resources/com/conveyal/analysis/datasource/duplicate-fields.shp new file mode 100644 index 000000000..460dbb430 Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/duplicate-fields.shp differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/duplicate-fields.shx b/src/test/resources/com/conveyal/analysis/datasource/duplicate-fields.shx new file mode 100644 index 000000000..8e5fa1a3e Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/duplicate-fields.shx differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/empty.geojson b/src/test/resources/com/conveyal/analysis/datasource/empty.geojson new file mode 100644 index 000000000..e69de29bb diff --git a/src/test/resources/com/conveyal/analysis/datasource/hkzones-extra-attribute.geojson b/src/test/resources/com/conveyal/analysis/datasource/hkzones-extra-attribute.geojson new file mode 100644 index 000000000..5d3ede896 --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/hkzones-extra-attribute.geojson @@ -0,0 +1,11 @@ +{ +"type": "FeatureCollection", +"name": "hkzones-extra-attribute", +"description": "Some features have attributes that others do not.", +"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, +"features": [ +{ "type": "Feature", "properties": { "Name": "Kowloon", "Count": 5, "ExtraAttribute": "This attribute exists only in Kowloon." }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 114.159122921974657, 22.32164790382194 ], [ 114.168719395665576, 22.296045686522024 ], [ 114.190255668018438, 22.302549920319244 ], [ 114.190255668018438, 22.322473703634543 ], [ 114.159122921974657, 22.32164790382194 ] ] ] } }, +{ "type": "Feature", "properties": { "Name": "Kennedy Town", "Count": 2 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 114.105703481652796, 22.233478265431106 ], [ 114.122266697678228, 22.203817018165608 ], [ 114.128240316572658, 22.205199673266289 ], [ 114.137472273045859, 22.216511794259716 ], [ 114.119687180428357, 22.241143923358219 ], [ 114.105703481652796, 22.233478265431106 ] ] ] } }, +{ "type": "Feature", "properties": { "Name": "North Lamma", "Count": 45, "OtherExtraAttribute": 1234 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 114.124474544327114, 22.282936206858849 ], [ 114.12956699448219, 22.275726416719557 ], [ 114.132978936086076, 22.27709300679836 ], [ 114.132418766569032, 22.28811948640017 ], [ 114.124474544327114, 22.282936206858849 ] ] ] } } +] +} diff --git a/src/test/resources/com/conveyal/analysis/datasource/hkzones-mixed-geometries.geojson b/src/test/resources/com/conveyal/analysis/datasource/hkzones-mixed-geometries.geojson new file mode 100644 index 000000000..5cef126bd --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/hkzones-mixed-geometries.geojson @@ -0,0 +1,11 @@ +{ +"type": "FeatureCollection", +"name": "hkzones-mixed-geometries", +"description": "Each feature in this file is of a different geometry type. We only accept feature collections where all features have the same type.", +"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, +"features": [ +{ "type": "Feature", "properties": { "Name": "Kowloon", "Count": 5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 114.159122921974657, 22.32164790382194 ], [ 114.168719395665576, 22.296045686522024 ], [ 114.190255668018438, 22.302549920319244 ], [ 114.190255668018438, 22.322473703634543 ], [ 114.159122921974657, 22.32164790382194 ] ] ] } }, +{ "type": "Feature", "properties": { "Name": "Kennedy Town", "Count": 2 }, "geometry": { "type": "LineString", "coordinates": [ [ 114.105703481652796, 22.233478265431106 ], [ 114.122266697678228, 22.203817018165608 ], [ 114.128240316572658, 22.205199673266289 ], [ 114.137472273045859, 22.216511794259716 ], [ 114.119687180428357, 22.241143923358219 ], [ 114.105703481652796, 22.233478265431106 ] ] } }, +{ "type": "Feature", "properties": { "Name": "North Lamma", "Count": 45 }, "geometry": { "type": "Point", "coordinates": [ 114.124474544327114, 22.282936206858849 ] } } +] +} diff --git a/src/test/resources/com/conveyal/analysis/datasource/hkzones-mixed-numeric.geojson b/src/test/resources/com/conveyal/analysis/datasource/hkzones-mixed-numeric.geojson new file mode 100644 index 000000000..729139897 --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/hkzones-mixed-numeric.geojson @@ -0,0 +1,11 @@ +{ +"type": "FeatureCollection", +"name": "hkzones-mixed-numeric", +"description": "Each feature's Count field has a different apparent numeric type.", +"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, +"features": [ +{ "type": "Feature", "properties": { "Name": "Kowloon", "Count": -5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 114.159122921974657, 22.32164790382194 ], [ 114.168719395665576, 22.296045686522024 ], [ 114.190255668018438, 22.302549920319244 ], [ 114.190255668018438, 22.322473703634543 ], [ 114.159122921974657, 22.32164790382194 ] ] ] } }, +{ "type": "Feature", "properties": { "Name": "Kennedy Town", "Count": 2.345 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 114.105703481652796, 22.233478265431106 ], [ 114.122266697678228, 22.203817018165608 ], [ 114.128240316572658, 22.205199673266289 ], [ 114.137472273045859, 22.216511794259716 ], [ 114.119687180428357, 22.241143923358219 ], [ 114.105703481652796, 22.233478265431106 ] ] ] } }, +{ "type": "Feature", "properties": { "Name": "North Lamma", "Count": 6.02e23 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 114.124474544327114, 22.282936206858849 ], [ 114.12956699448219, 22.275726416719557 ], [ 114.132978936086076, 22.27709300679836 ], [ 114.132418766569032, 22.28811948640017 ], [ 114.124474544327114, 22.282936206858849 ] ] ] } } +] +} diff --git a/src/test/resources/com/conveyal/analysis/datasource/hkzones-no-collection.geojson b/src/test/resources/com/conveyal/analysis/datasource/hkzones-no-collection.geojson new file mode 100644 index 000000000..6a0af731d --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/hkzones-no-collection.geojson @@ -0,0 +1,6 @@ +{ +"type": "Feature", +"description": "This file has no FeatureCollection, just a single unwrapped Feature.", +"properties": { "Name": "Kowloon", "Count": 5 }, +"geometry": { "type": "Polygon", "coordinates": [ [ [ 114.159122921974657, 22.32164790382194 ], [ 114.168719395665576, 22.296045686522024 ], [ 114.190255668018438, 22.302549920319244 ], [ 114.190255668018438, 22.322473703634543 ], [ 114.159122921974657, 22.32164790382194 ] ] ] } +} diff --git a/src/test/resources/com/conveyal/analysis/datasource/hkzones-type-mismatch.geojson b/src/test/resources/com/conveyal/analysis/datasource/hkzones-type-mismatch.geojson new file mode 100644 index 000000000..a7afee6e4 --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/hkzones-type-mismatch.geojson @@ -0,0 +1,11 @@ +{ +"type": "FeatureCollection", +"name": "hkzones-type-mismatch", +"description": "Each feature has a value of a different type in the TypeMismatch property.", +"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, +"features": [ +{ "type": "Feature", "properties": { "Name": "Kowloon", "Count": 5, "TypeMismatch": 1234 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 114.159122921974657, 22.32164790382194 ], [ 114.168719395665576, 22.296045686522024 ], [ 114.190255668018438, 22.302549920319244 ], [ 114.190255668018438, 22.322473703634543 ], [ 114.159122921974657, 22.32164790382194 ] ] ] } }, +{ "type": "Feature", "properties": { "Name": "Kennedy Town", "Count": 2, "TypeMismatch": "This is text." }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 114.105703481652796, 22.233478265431106 ], [ 114.122266697678228, 22.203817018165608 ], [ 114.128240316572658, 22.205199673266289 ], [ 114.137472273045859, 22.216511794259716 ], [ 114.119687180428357, 22.241143923358219 ], [ 114.105703481652796, 22.233478265431106 ] ] ] } }, +{ "type": "Feature", "properties": { "Name": "North Lamma", "Count": 45, "TypeMismatch": {"description": "This is an object."} }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 114.124474544327114, 22.282936206858849 ], [ 114.12956699448219, 22.275726416719557 ], [ 114.132978936086076, 22.27709300679836 ], [ 114.132418766569032, 22.28811948640017 ], [ 114.124474544327114, 22.282936206858849 ] ] ] } } +] +} diff --git a/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.cpg b/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.cpg new file mode 100644 index 000000000..3ad133c04 --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.cpg @@ -0,0 +1 @@ +UTF-8 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.dbf b/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.dbf new file mode 100644 index 000000000..cff192237 Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.dbf differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.geojson b/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.geojson new file mode 100644 index 000000000..a1d5020ac --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.geojson @@ -0,0 +1,9 @@ +{ +"type": "FeatureCollection", +"name": "new-zealand-antimeridian", +"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, +"features": [ +{ "type": "Feature", "properties": { "fid": 1.0, "name": "North Island" }, "geometry": { "type": "MultiPolygon", "coordinates": [ [ [ [ 172.572710025251382, -34.449590915441085 ], [ 173.785736803146222, -34.966202723182299 ], [ 175.481051336107583, -36.449524966478741 ], [ 175.963339091174248, -37.593008885343913 ], [ 177.234824990895362, -38.066281456152623 ], [ 177.950949233267011, -37.51190315761761 ], [ 178.579384792899276, -37.662457805684703 ], [ 177.892490111440736, -39.094488481162173 ], [ 176.898685040394383, -39.27573699709766 ], [ 177.103291966786287, -39.670586853627256 ], [ 175.349518311998594, -41.60014497816146 ], [ 174.589549728257225, -41.238484667718865 ], [ 175.232600068346017, -40.253075826063075 ], [ 173.712662900863393, -39.320976075507708 ], [ 174.428787143234985, -38.969609155026646 ], [ 174.925689678758204, -37.905017570549084 ], [ 172.572710025251382, -34.449590915441085 ] ] ] ] } }, +{ "type": "Feature", "properties": { "fid": 2.0, "name": "Chatham Islands" }, "geometry": { "type": "MultiPolygon", "coordinates": [ [ [ [ -176.887089808985451, -43.805876271548826 ], [ -176.687138796566614, -43.658184940888368 ], [ -176.193926299220777, -43.696748226980496 ], [ -176.131719317490592, -44.370768875002504 ], [ -176.216143078297137, -44.373945030874339 ], [ -176.620488459031236, -44.141633001555689 ], [ -176.887089808985451, -43.805876271548826 ] ] ] ] } } +] +} diff --git a/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.gpkg b/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.gpkg new file mode 100644 index 000000000..5eaca6b3c Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.gpkg differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.prj b/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.prj new file mode 100644 index 000000000..02412c37e --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.prj @@ -0,0 +1 @@ +PROJCS["GD_1949_New_Zealand_Map_Grid",GEOGCS["GCS_New_Zealand_1949",DATUM["D_New_Zealand_1949",SPHEROID["International_1924",6378388.0,297.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["New_Zealand_Map_Grid"],PARAMETER["False_Easting",2510000.0],PARAMETER["False_Northing",6023150.0],PARAMETER["Longitude_Of_Origin",173.0],PARAMETER["Latitude_Of_Origin",-41.0],UNIT["Meter",1.0]] \ No newline at end of file diff --git a/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.shp b/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.shp new file mode 100644 index 000000000..c94c3cec4 Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.shp differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.shx b/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.shx new file mode 100644 index 000000000..81449df34 Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/new-zealand-antimeridian.shx differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.cpg b/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.cpg new file mode 100644 index 000000000..3ad133c04 --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.cpg @@ -0,0 +1 @@ +UTF-8 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.dbf b/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.dbf new file mode 100644 index 000000000..858657489 Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.dbf differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.prj b/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.prj new file mode 100644 index 000000000..4a3b7cad7 --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.prj @@ -0,0 +1 @@ +PROJCS["Amersfoort_RD_New",GEOGCS["GCS_Amersfoort",DATUM["D_Amersfoort",SPHEROID["Bessel_1841",6377397.155,299.1528128]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Double_Stereographic"],PARAMETER["latitude_of_origin",52.15616055555555],PARAMETER["central_meridian",5.38763888888889],PARAMETER["scale_factor",0.9999079],PARAMETER["false_easting",155000],PARAMETER["false_northing",463000],UNIT["Meter",1]] \ No newline at end of file diff --git a/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.qpj b/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.qpj new file mode 100644 index 000000000..ac9c05d13 --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.qpj @@ -0,0 +1 @@ +PROJCS["Amersfoort / RD New",GEOGCS["Amersfoort",DATUM["Amersfoort",SPHEROID["Bessel 1841",6377397.155,299.1528128,AUTHORITY["EPSG","7004"]],TOWGS84[565.2369,50.0087,465.658,-0.406857,0.350733,-1.87035,4.0812],AUTHORITY["EPSG","6289"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4289"]],PROJECTION["Oblique_Stereographic"],PARAMETER["latitude_of_origin",52.15616055555555],PARAMETER["central_meridian",5.38763888888889],PARAMETER["scale_factor",0.9999079],PARAMETER["false_easting",155000],PARAMETER["false_northing",463000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["X",EAST],AXIS["Y",NORTH],AUTHORITY["EPSG","28992"]] diff --git a/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.shp b/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.shp new file mode 100644 index 000000000..5d0fc2398 Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.shp differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.shx b/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.shx new file mode 100644 index 000000000..4603632c0 Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/nl-null-points.shx differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/too-small.geojson b/src/test/resources/com/conveyal/analysis/datasource/too-small.geojson new file mode 100644 index 000000000..31ca215bc --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/too-small.geojson @@ -0,0 +1 @@ +{"type":"feature"} \ No newline at end of file diff --git a/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.cpg b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.cpg new file mode 100644 index 000000000..3ad133c04 --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.cpg @@ -0,0 +1 @@ +UTF-8 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.dbf b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.dbf new file mode 100644 index 000000000..23fa132f6 Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.dbf differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.geojson b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.geojson new file mode 100644 index 000000000..047325a8f --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.geojson @@ -0,0 +1,11 @@ +{ +"type": "FeatureCollection", +"name": "hkzones-mercator", +"description": "This file is projected into spherical web Mercator. GeoJSON CRS entries are obsolete, it is suppposed to always be in WGS84.", +"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:EPSG::3857" } }, +"features": [ +{ "type": "Feature", "properties": { "Name": "Kowloon", "Count": 5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 12708135.433080945163965, 2550186.97031799983233 ], [ 12709203.707645628601313, 2547106.364596586674452 ], [ 12711601.114517534151673, 2547888.937824204098433 ], [ 12711601.114517534151673, 2550286.344696110114455 ], [ 12708135.433080945163965, 2550186.97031799983233 ] ] ] } }, +{ "type": "Feature", "properties": { "Name": "Kennedy Town", "Count": 2 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 12702188.80818585306406, 2539580.266773413866758 ], [ 12704032.616959704086185, 2536013.554719078820199 ], [ 12704697.597173223271966, 2536179.799772459082305 ], [ 12705725.293866846710443, 2537539.98657284071669 ], [ 12703745.466412955895066, 2540502.17116033937782 ], [ 12702188.80818585306406, 2539580.266773413866758 ] ] ] } }, +{ "type": "Feature", "properties": { "Name": "North Lamma", "Count": 45 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 12704278.393324406817555, 2545529.175306653603911 ], [ 12704845.282282559201121, 2544661.835200681351125 ], [ 12705225.097884520888329, 2544826.232998545281589 ], [ 12705162.740099124610424, 2546152.753160620573908 ], [ 12704278.393324406817555, 2545529.175306653603911 ] ] ] } } +] +} diff --git a/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.gpkg b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.gpkg new file mode 100644 index 000000000..495a28e6d Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.gpkg differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.prj b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.prj new file mode 100644 index 000000000..e5e7cad96 --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.prj @@ -0,0 +1 @@ +PROJCS["Hong_Kong_1980_Grid",GEOGCS["GCS_Hong_Kong_1980",DATUM["D_Hong_Kong_1980",SPHEROID["International_1924",6378388.0,297.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",836694.05],PARAMETER["False_Northing",819069.8],PARAMETER["Central_Meridian",114.178555555556],PARAMETER["Scale_Factor",1.0],PARAMETER["Latitude_Of_Origin",22.3121333333333],UNIT["Meter",1.0]] \ No newline at end of file diff --git a/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.shp b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.shp new file mode 100644 index 000000000..f711da82f Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.shp differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.shx b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.shx new file mode 100644 index 000000000..2d47ba971 Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-projected.shx differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.cpg b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.cpg new file mode 100644 index 000000000..3ad133c04 --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.cpg @@ -0,0 +1 @@ +UTF-8 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.dbf b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.dbf new file mode 100644 index 000000000..23fa132f6 Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.dbf differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.geojson b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.geojson new file mode 100644 index 000000000..3935903bb --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.geojson @@ -0,0 +1,11 @@ +{ +"type": "FeatureCollection", +"name": "hkzones-wgs84", +"description": "This is the original file with no errors, in WGS84 coordinates.", +"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, +"features": [ +{ "type": "Feature", "properties": { "Name": "Kowloon", "Count": 5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 114.159122921974657, 22.32164790382194 ], [ 114.168719395665576, 22.296045686522024 ], [ 114.190255668018438, 22.302549920319244 ], [ 114.190255668018438, 22.322473703634543 ], [ 114.159122921974657, 22.32164790382194 ] ] ] } }, +{ "type": "Feature", "properties": { "Name": "Kennedy Town", "Count": 2 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 114.105703481652796, 22.233478265431106 ], [ 114.122266697678228, 22.203817018165608 ], [ 114.128240316572658, 22.205199673266289 ], [ 114.137472273045859, 22.216511794259716 ], [ 114.119687180428357, 22.241143923358219 ], [ 114.105703481652796, 22.233478265431106 ] ] ] } }, +{ "type": "Feature", "properties": { "Name": "North Lamma", "Count": 45 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 114.124474544327114, 22.282936206858849 ], [ 114.12956699448219, 22.275726416719557 ], [ 114.132978936086076, 22.27709300679836 ], [ 114.132418766569032, 22.28811948640017 ], [ 114.124474544327114, 22.282936206858849 ] ] ] } } +] +} diff --git a/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.gpkg b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.gpkg new file mode 100644 index 000000000..593fa0329 Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.gpkg differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.prj b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.prj new file mode 100644 index 000000000..f45cbadf0 --- /dev/null +++ b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.prj @@ -0,0 +1 @@ +GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]] \ No newline at end of file diff --git a/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.shp b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.shp new file mode 100644 index 000000000..7a2b11b62 Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.shp differ diff --git a/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.shx b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.shx new file mode 100644 index 000000000..8778a8c1e Binary files /dev/null and b/src/test/resources/com/conveyal/analysis/datasource/valid-polygon-wgs84.shx differ diff --git a/src/test/resources/com/conveyal/gtfs/fiji-ferry/agency.txt b/src/test/resources/com/conveyal/gtfs/fiji-ferry/agency.txt new file mode 100644 index 000000000..cf1f80ac7 --- /dev/null +++ b/src/test/resources/com/conveyal/gtfs/fiji-ferry/agency.txt @@ -0,0 +1,2 @@ +agency_id,agency_name,agency_url,agency_timezone,agency_lang +FijiFerry,Fiji Antimeridian Ferry Authority,"https://tidesandcurrents.noaa.gov/stationhome.html?id=1910000",Pacific/Fiji,en diff --git a/src/test/resources/com/conveyal/gtfs/fiji-ferry/calendar.txt b/src/test/resources/com/conveyal/gtfs/fiji-ferry/calendar.txt new file mode 100644 index 000000000..0c07264f8 --- /dev/null +++ b/src/test/resources/com/conveyal/gtfs/fiji-ferry/calendar.txt @@ -0,0 +1,2 @@ +service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date +all,1,1,1,1,1,1,1,20200901,20201231 diff --git a/src/test/resources/com/conveyal/gtfs/fiji-ferry/feed_info.txt b/src/test/resources/com/conveyal/gtfs/fiji-ferry/feed_info.txt new file mode 100644 index 000000000..998d039df --- /dev/null +++ b/src/test/resources/com/conveyal/gtfs/fiji-ferry/feed_info.txt @@ -0,0 +1,2 @@ +feed_id,feed_publisher_name,feed_publisher_url,feed_lang,feed_start_date,feed_end_date,feed_version,feed_contact_email +FIJI,Conveyal LLC,http://conveyal.com,en,20200901,20201231,1,contact@conveyal.com diff --git a/src/test/resources/com/conveyal/gtfs/fiji-ferry/routes.txt b/src/test/resources/com/conveyal/gtfs/fiji-ferry/routes.txt new file mode 100644 index 000000000..afa999fd6 --- /dev/null +++ b/src/test/resources/com/conveyal/gtfs/fiji-ferry/routes.txt @@ -0,0 +1,2 @@ +route_id,agency_id,route_short_name,route_long_name,route_desc,route_type +FF,FijiFerry,FF1,Fiji Ferry,The Ferry operates across the 180 degree antimeridian,4 diff --git a/src/test/resources/com/conveyal/gtfs/fiji-ferry/stop_times.txt b/src/test/resources/com/conveyal/gtfs/fiji-ferry/stop_times.txt new file mode 100644 index 000000000..64590d78b --- /dev/null +++ b/src/test/resources/com/conveyal/gtfs/fiji-ferry/stop_times.txt @@ -0,0 +1,5 @@ +trip_id,arrival_time,departure_time,stop_id,stop_sequence +out,00:00:00,00:00:00,0,1 +out,11:59:00,12:00:00,1,2 +back,11:59:00,12:00:00,1,1 +back,23:59:00,24:00:00,0,2 diff --git a/src/test/resources/com/conveyal/gtfs/fiji-ferry/stops.txt b/src/test/resources/com/conveyal/gtfs/fiji-ferry/stops.txt new file mode 100644 index 000000000..602b0987e --- /dev/null +++ b/src/test/resources/com/conveyal/gtfs/fiji-ferry/stops.txt @@ -0,0 +1,3 @@ +stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,location_type +0,SUVA,Suva,Suva Ferry Terminal,-18.135791,178.4234745, +1,MATEI,Matei,Naselesele Point,-16.6862018,-179.8820305, diff --git a/src/test/resources/com/conveyal/gtfs/fiji-ferry/trips.txt b/src/test/resources/com/conveyal/gtfs/fiji-ferry/trips.txt new file mode 100644 index 000000000..ca40683df --- /dev/null +++ b/src/test/resources/com/conveyal/gtfs/fiji-ferry/trips.txt @@ -0,0 +1,3 @@ +route_id,service_id,trip_id,trip_headsign,direction_id +FF,all,out,Outbound,0 +FF,all,back,Inbound,1 diff --git a/src/test/resources/com/conveyal/r5/streets/fiji-extract.sh b/src/test/resources/com/conveyal/r5/streets/fiji-extract.sh new file mode 100644 index 000000000..6493d5b7f --- /dev/null +++ b/src/test/resources/com/conveyal/r5/streets/fiji-extract.sh @@ -0,0 +1,12 @@ +# Commands used to create OSM PBF data corresponding to the Fiji Ferry test GTFS +# Process is a bit roundabout to avoid using bounding boxes that span the 180 degree meridian. + +# First filter Geofabrik Fiji data to only roads and platforms +osmium tags-filter fiji-latest.osm.pbf w/highway w/public_transport=platform w/railway=platform w/park_ride=yes r/type=restriction -o fiji-filtered.pbf -f pbf,add_metadata=false,pbf_dense_nodes=true + +# Extract two small sections, one around each stop on either side of the antimeridian +osmium extract --strategy complete_ways --bbox 178.4032589647,-18.1706713885,178.4764627685,-18.1213456347 fiji-filtered.pbf -o fiji-suva.pbf +osmium extract --strategy complete_ways --bbox -179.9970112547,-16.8025646734,-179.8150897003,-16.6356004526 fiji-filtered.pbf -o fiji-matei.pbf + +# Combine the two pieces into a single OSM PBF file +osmium cat fiji-suva.pbf fiji-matei.pbf -o fiji-ferry.pbf diff --git a/src/test/resources/com/conveyal/r5/streets/fiji-ferry.pbf b/src/test/resources/com/conveyal/r5/streets/fiji-ferry.pbf new file mode 100644 index 000000000..9324d8756 Binary files /dev/null and b/src/test/resources/com/conveyal/r5/streets/fiji-ferry.pbf differ