Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MTC Field length validation #276

Merged
merged 19 commits into from
Apr 13, 2020
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
b85a922
refactor(Feed): allow additional validators
landonreed Apr 1, 2020
8b72831
test(MTC): Set up test files for MTC field length validation.
binh-dam-ibigroup Apr 1, 2020
eeb6b32
feat(MTCValidator): Implement MTC validator and complete test.
binh-dam-ibigroup Apr 2, 2020
3f42515
docs: Refine comments and error messages.
binh-dam-ibigroup Apr 2, 2020
4058c9b
refactor(FieldLengthError): Remove unused class.
binh-dam-ibigroup Apr 2, 2020
d423983
refactor: Replace calling reflection on FeedValidator constructors wi…
binh-dam-ibigroup Apr 2, 2020
9b94a0e
refactor(Feed): Replace callback interface to create MTCValidator wit…
binh-dam-ibigroup Apr 2, 2020
13696e5
refactor(validate): remove list from custom validator interface
landonreed Apr 2, 2020
df84840
refactor(validator): rename validator creator
landonreed Apr 3, 2020
d449b12
refactor(validator): refactor Feed class and add javadoc
landonreed Apr 3, 2020
b6aea3b
refactor(validator): fix npe
landonreed Apr 3, 2020
19db9c2
Merge branch 'field-length-validator' into field-length-validator-ltr-2
landonreed Apr 3, 2020
fd249da
refactor: fix build/imports
landonreed Apr 3, 2020
28b1dda
refactor(GTFSTest): Use FeedValidatorCreator... syntax to remove test…
binh-dam-ibigroup Apr 3, 2020
707a119
refactor(MTCValidator): Rename validation method + add overload, adju…
binh-dam-ibigroup Apr 6, 2020
7e97461
fix(tests): Fix tests
binh-dam-ibigroup Apr 6, 2020
be1fc6d
refactor(MTCValidator): Rename validateFieldLength; Add null check!
binh-dam-ibigroup Apr 6, 2020
ffd601a
Merge branch 'field-length-validator' of https://github.com/conveyal/…
binh-dam-ibigroup Apr 6, 2020
12ff728
refactor(NewGTFSErrorType): Reorganize error types per PR comment.
binh-dam-ibigroup Apr 7, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions src/main/java/com/conveyal/gtfs/GTFS.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import com.conveyal.gtfs.loader.JdbcGtfsSnapshotter;
import com.conveyal.gtfs.loader.SnapshotResult;
import com.conveyal.gtfs.util.InvalidNamespaceException;
import com.conveyal.gtfs.validator.FeedValidatorCreator;
import com.conveyal.gtfs.validator.ValidationResult;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.io.Files;
Expand All @@ -15,10 +16,10 @@
import org.apache.commons.dbcp2.DriverManagerConnectionFactory;
import org.apache.commons.dbcp2.PoolableConnectionFactory;
import org.apache.commons.dbcp2.PoolingDataSource;
import org.apache.commons.dbutils.DbUtils;
import org.apache.commons.pool2.impl.GenericObjectPool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.sql.DataSource;
import java.io.File;
import java.io.IOException;
Expand All @@ -28,7 +29,8 @@
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.List;
import java.util.function.BiFunction;

import static com.conveyal.gtfs.util.Util.ensureValidNamespace;

Expand Down Expand Up @@ -94,9 +96,9 @@ public static SnapshotResult makeSnapshot (String feedId, DataSource dataSource)
/**
* Once a feed has been loaded into the database, examine its contents looking for various problems and errors.
*/
public static ValidationResult validate (String feedId, DataSource dataSource) {
public static ValidationResult validate (String feedId, DataSource dataSource, FeedValidatorCreator... additionalValidators) {
Feed feed = new Feed(dataSource, feedId);
ValidationResult result = feed.validate();
ValidationResult result = feed.validate(additionalValidators);
return result;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ public enum NewGTFSErrorType {
URL_FORMAT(Priority.MEDIUM, "URL format should be <scheme>://<authority><path>?<query>#<fragment>"),
LANGUAGE_FORMAT(Priority.LOW, "Language should be specified with a valid BCP47 tag."),
ILLEGAL_FIELD_VALUE(Priority.MEDIUM, "Fields may not contain tabs, carriage returns or new lines."),
FIELD_VALUE_TOO_LONG(Priority.MEDIUM, "Field value has too many characters."),
binh-dam-ibigroup marked this conversation as resolved.
Show resolved Hide resolved
INTEGER_FORMAT(Priority.MEDIUM, "Incorrect integer format."),
FARE_TRANSFER_MISMATCH(Priority.MEDIUM, "A fare that does not permit transfers has a non-zero transfer duration."),
FREQUENCY_PERIOD_OVERLAP(Priority.MEDIUM, "A frequency for a trip overlaps with another frequency defined for the same trip."),
Expand Down
62 changes: 31 additions & 31 deletions src/main/java/com/conveyal/gtfs/loader/Feed.java
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
package com.conveyal.gtfs.loader;

import com.conveyal.gtfs.error.GTFSError;
import com.conveyal.gtfs.error.NewGTFSError;
import com.conveyal.gtfs.error.SQLErrorStorage;
import com.conveyal.gtfs.model.*;
import com.conveyal.gtfs.storage.StorageException;
import com.conveyal.gtfs.util.InvalidNamespaceException;
import com.conveyal.gtfs.validator.*;
import com.google.common.collect.Lists;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.sql.DataSource;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import static com.conveyal.gtfs.error.NewGTFSErrorType.VALIDATOR_FAILED;
Expand All @@ -32,20 +30,15 @@ public class Feed {
// This may be the empty string if the feed is stored in the root ("public") schema.
public final String tablePrefix;

public final TableReader<Agency> agencies;
public final TableReader<Calendar> calendars;
public final TableReader<CalendarDate> calendarDates;
public final TableReader<Agency> agencies;
public final TableReader<Calendar> calendars;
public final TableReader<CalendarDate> calendarDates;
public final TableReader<FareAttribute> fareAttributes;
public final TableReader<Frequency> frequencies;
public final TableReader<Route> routes;
public final TableReader<Stop> stops;
public final TableReader<Trip> trips;
// public final TableReader<ShapePoint> shapePoints;
public final TableReader<StopTime> stopTimes;

/* A place to accumulate errors while the feed is loaded. Tolerate as many errors as possible and keep on loading. */
// TODO remove this and use only NewGTFSErrors in Validators, loaded into a JDBC table
public final List<GTFSError> errors = new ArrayList<>();
public final TableReader<Frequency> frequencies;
public final TableReader<Route> routes;
public final TableReader<Stop> stops;
public final TableReader<Trip> trips;
public final TableReader<StopTime> stopTimes;

/**
* Create a feed that reads tables over a JDBC connection. The connection should already be set to the right
Expand All @@ -65,44 +58,51 @@ public Feed (DataSource dataSource, String tablePrefix) {
routes = new JDBCTableReader(Table.ROUTES, dataSource, tablePrefix, EntityPopulator.ROUTE);
stops = new JDBCTableReader(Table.STOPS, dataSource, tablePrefix, EntityPopulator.STOP);
trips = new JDBCTableReader(Table.TRIPS, dataSource, tablePrefix, EntityPopulator.TRIP);
// shapePoints = new JDBCTableReader(Table.SHAPES, dataSource, tablePrefix, EntityPopulator.SHAPE_POINT);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realize this has been commented out for a while since @landonreed committed 9244ff0. But why? Should we add it back?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we should. It might be worth revisiting #130.

stopTimes = new JDBCTableReader(Table.STOP_TIMES, dataSource, tablePrefix, EntityPopulator.STOP_TIME);
}

/**
* Run the standard validation checks for this feed and store the validation errors in the database. Optionally,
* takes one or more {@link FeedValidatorCreator} in the form of lambda method refs (e.g., {@code MTCValidator::new}),
* which this method will instantiate and run after the standard validation checks have been completed.
*
* TODO check whether validation has already occurred, overwrite results.
* TODO allow validation within feed loading process, so the same connection can be used, and we're certain loaded data is 100% visible.
* That would also avoid having to reconnect the error storage to the DB.
* TODO allow validation within feed loading process, so the same connection can be used, and we're certain loaded
* data is 100% visible. That would also avoid having to reconnect the error storage to the DB.
*/
public ValidationResult validate () {
public ValidationResult validate (FeedValidatorCreator... additionalValidators) {
long validationStartTime = System.currentTimeMillis();
// Create an empty validation result that will have its fields populated by certain validators.
ValidationResult validationResult = new ValidationResult();
// Error tables should already be present from the initial load.
// Reconnect to the existing error tables.
SQLErrorStorage errorStorage = null;
SQLErrorStorage errorStorage;
try {
errorStorage = new SQLErrorStorage(dataSource.getConnection(), tablePrefix, false);
} catch (SQLException | InvalidNamespaceException ex) {
throw new StorageException(ex);
}
int errorCountBeforeValidation = errorStorage.getErrorCount();

List<FeedValidator> feedValidators = Arrays.asList(
new MisplacedStopValidator(this, errorStorage, validationResult),
new DuplicateStopsValidator(this, errorStorage),
new FaresValidator(this, errorStorage),
new FrequencyValidator(this, errorStorage),
new TimeZoneValidator(this, errorStorage),
new NewTripTimesValidator(this, errorStorage),
new NamesValidator(this, errorStorage));
// Create list of standard validators to run on every feed.
List<FeedValidator> feedValidators = Lists.newArrayList(
new MisplacedStopValidator(this, errorStorage, validationResult),
new DuplicateStopsValidator(this, errorStorage),
new FaresValidator(this, errorStorage),
new FrequencyValidator(this, errorStorage),
new TimeZoneValidator(this, errorStorage),
new NewTripTimesValidator(this, errorStorage),
new NamesValidator(this, errorStorage)
);
// Create additional validators specified in this method's args and add to list of feed validators to run.
for (FeedValidatorCreator creator : additionalValidators) {
if (creator != null) feedValidators.add(creator.create(this, errorStorage));
}

for (FeedValidator feedValidator : feedValidators) {
String validatorName = feedValidator.getClass().getSimpleName();
try {
LOG.info("Running {}.", validatorName);
int errorCountBefore = errorStorage.getErrorCount();
// todo why not just pass the feed and errorstorage in here?
feedValidator.validate();
LOG.info("{} found {} errors.", validatorName, errorStorage.getErrorCount() - errorCountBefore);
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package com.conveyal.gtfs.validator;

import com.conveyal.gtfs.error.SQLErrorStorage;
import com.conveyal.gtfs.loader.Feed;

/**
* A functional interface used to create {@link FeedValidator} instances. This interface supports the ability to pass
* validators by lambda method references ({@code SomeValidator::new}) to the {@link Feed#validate(FeedValidatorCreator...)}
* method in order to run special validation checks on specific feeds (e.g., {@link MTCValidator} should be run only on
* GTFS files loaded for those MTC operators). To instantiate the FeedValidator, simply call the
* {@link #create(Feed, SQLErrorStorage)} method, passing in the feed and errorStorage arguments. This is in lieu of
* instantiating the FeedValidator with those arguments in the constructor because these objects are not available before
* the validate method is invoked.
*/
@FunctionalInterface
public interface FeedValidatorCreator {
/**
* The callback that instantiates and returns instances of custom FeedValidator objects
* constructed using the provided feed and error storage objects.
* @param feed The feed being validated.
* @param errorStorage The object that handles error storage.
*/
FeedValidator create(Feed feed, SQLErrorStorage errorStorage);
}
56 changes: 56 additions & 0 deletions src/main/java/com/conveyal/gtfs/validator/MTCValidator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package com.conveyal.gtfs.validator;

import com.conveyal.gtfs.error.SQLErrorStorage;
import com.conveyal.gtfs.loader.Feed;
import com.conveyal.gtfs.model.*;

import static com.conveyal.gtfs.error.NewGTFSErrorType.FIELD_VALUE_TOO_LONG;

/**
* MTCValidator checks in a GTFS feed that the length of certain field values
* do not exceed the 511 MTC guidelines. (TODO: add guidelines URL.)
* To refer to specific limits, search the guidelines for the word 'character'.
binh-dam-ibigroup marked this conversation as resolved.
Show resolved Hide resolved
*/
public class MTCValidator extends FeedValidator {

public MTCValidator(Feed feed, SQLErrorStorage errorStorage) {
super(feed, errorStorage);
}

@Override
public void validate() {
for (Agency agency : feed.agencies) {
fieldLengthShouldNotExceed(agency, agency.agency_id, 50);
fieldLengthShouldNotExceed(agency, agency.agency_name, 50);
fieldLengthShouldNotExceed(agency, agency.agency_url, 500);
}

for (Stop stop : feed.stops) {
fieldLengthShouldNotExceed(stop, stop.stop_name, 100);
}

for (Trip trip : feed.trips) {
fieldLengthShouldNotExceed(trip, trip.trip_headsign, 120);
fieldLengthShouldNotExceed(trip, trip.trip_short_name, 50);
}

// TODO: Handle calendar_attributes.txt?
binh-dam-ibigroup marked this conversation as resolved.
Show resolved Hide resolved
}

/**
* Checks that the length of a string (or Object.toString()) does not exceed a length.
* Reports an error if the length is exceeded.
* @param entity The containing GTFS entity (for error reporting purposes).
* @param objValue The value to check.
* @param maxLength The length to check, should be positive or zero.
* @return true if the length of objValue.toString() is maxLength or less or if objValue is null; false otherwise.
*/
public boolean fieldLengthShouldNotExceed(Entity entity, Object objValue, int maxLength) {
binh-dam-ibigroup marked this conversation as resolved.
Show resolved Hide resolved
String value = objValue != null ? objValue.toString() : "";
if (value.length() > maxLength) {
if (errorStorage != null) registerError(entity, FIELD_VALUE_TOO_LONG, "[over " + maxLength + " characters] " + value);
return false;
}
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ public boolean validate(Feed feed, boolean repair) {
if (trip.shape_id == null) {
isValid = false;
if (missingShapeErrorCount < errorLimit) {
feed.errors.add(new MissingShapeError(trip));
// FIXME store MissingShape errors
// feed.errors.add(new MissingShapeError(trip));
binh-dam-ibigroup marked this conversation as resolved.
Show resolved Hide resolved
}
missingShapeErrorCount++;
continue;
Expand Down Expand Up @@ -111,7 +112,8 @@ public boolean validate(Feed feed, boolean repair) {
// check if first stop is x times closer to end of shape than the beginning or last stop is x times closer to start than the end
if (distanceFirstStopToStart > (distanceFirstStopToEnd * distanceMultiplier) && distanceLastStopToEnd > (distanceLastStopToStart * distanceMultiplier)) {
if (reversedTripShapeErrorCount < errorLimit) {
feed.errors.add(new ReversedTripShapeError(trip));
// FIXME store ReversedTripShape errors
// feed.errors.add(new ReversedTripShapeError(trip));
}
reversedTripShapeErrorCount++;
isValid = false;
Expand All @@ -120,7 +122,8 @@ public boolean validate(Feed feed, boolean repair) {
if (missingCoordinatesErrorCount > 0) {
for (Map.Entry<ShapePoint, List<String>> shapeError : missingShapesMap.entrySet()) {
String[] tripIdList = shapeError.getValue().toArray(new String[shapeError.getValue().size()]);
feed.errors.add(new ShapeMissingCoordinatesError(shapeError.getKey(), tripIdList));
// FIXME store ShapeMissingCoordinates errors
// feed.errors.add(new ShapeMissingCoordinatesError(shapeError.getKey(), tripIdList));
}
}
return isValid;
Expand Down
52 changes: 42 additions & 10 deletions src/test/java/com/conveyal/gtfs/GTFSTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
import com.conveyal.gtfs.storage.PersistenceExpectation;
import com.conveyal.gtfs.storage.RecordExpectation;
import com.conveyal.gtfs.util.InvalidNamespaceException;
import com.conveyal.gtfs.validator.FeedValidatorCreator;
import com.conveyal.gtfs.validator.MTCValidator;
import com.conveyal.gtfs.validator.ValidationResult;
import com.csvreader.CsvReader;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import com.google.common.io.Files;
import org.apache.commons.dbutils.DbUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.input.BOMInputStream;
import org.hamcrest.Matcher;
Expand All @@ -32,16 +33,11 @@
import java.io.PrintStream;
import java.nio.charset.Charset;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

Expand Down Expand Up @@ -306,6 +302,34 @@ public void canLoadAndExportSimpleAgencyWithOnlyCalendarDates() {
);
}

/**
* Tests that a GTFS feed with long field values generates corresponding
* validation errors per MTC guidelines.
*/
@Test
public void canLoadFeedWithLongFieldValues () {
PersistenceExpectation[] expectations = PersistenceExpectation.list();
ErrorExpectation[] errorExpectations = ErrorExpectation.list(
new ErrorExpectation(NewGTFSErrorType.FIELD_VALUE_TOO_LONG),
new ErrorExpectation(NewGTFSErrorType.FIELD_VALUE_TOO_LONG),
new ErrorExpectation(NewGTFSErrorType.FIELD_VALUE_TOO_LONG),
new ErrorExpectation(NewGTFSErrorType.FIELD_VALUE_TOO_LONG),
new ErrorExpectation(NewGTFSErrorType.FIELD_VALUE_TOO_LONG),
new ErrorExpectation(NewGTFSErrorType.FIELD_VALUE_TOO_LONG),
new ErrorExpectation(NewGTFSErrorType.FEED_TRAVEL_TIMES_ROUNDED) // Not related, not worrying about this one.
);
assertThat(
"Long-field-value test passes",
runIntegrationTestOnFolder(
"fake-agency-mtc-long-fields",
nullValue(),
expectations,
errorExpectations,
MTCValidator::new
),
equalTo(true)
);
}

/**
* A helper method that will zip a specified folder in test/main/resources and call
Expand All @@ -315,7 +339,8 @@ private boolean runIntegrationTestOnFolder(
String folderName,
Matcher<Object> fatalExceptionExpectation,
PersistenceExpectation[] persistenceExpectations,
ErrorExpectation[] errorExpectations
ErrorExpectation[] errorExpectations,
FeedValidatorCreator... customValidators
) {
LOG.info("Running integration test on folder {}", folderName);
// zip up test folder into temp zip file
Expand All @@ -326,7 +351,13 @@ private boolean runIntegrationTestOnFolder(
e.printStackTrace();
return false;
}
return runIntegrationTestOnZipFile(zipFileName, fatalExceptionExpectation, persistenceExpectations, errorExpectations);
return runIntegrationTestOnZipFile(
zipFileName,
fatalExceptionExpectation,
persistenceExpectations,
errorExpectations,
customValidators
);
}

/**
Expand All @@ -342,7 +373,8 @@ private boolean runIntegrationTestOnZipFile(
String zipFileName,
Matcher<Object> fatalExceptionExpectation,
PersistenceExpectation[] persistenceExpectations,
ErrorExpectation[] errorExpectations
ErrorExpectation[] errorExpectations,
FeedValidatorCreator... customValidators
) {
String testDBName = TestUtils.generateNewDB();
String dbConnectionUrl = String.join("/", JDBC_URL, testDBName);
Expand All @@ -359,7 +391,7 @@ private boolean runIntegrationTestOnZipFile(
// load and validate feed
LOG.info("load and validate GTFS file {}", zipFileName);
FeedLoadResult loadResult = GTFS.load(zipFileName, dataSource);
ValidationResult validationResult = GTFS.validate(loadResult.uniqueIdentifier, dataSource);
ValidationResult validationResult = GTFS.validate(loadResult.uniqueIdentifier, dataSource, customValidators);

assertThat(validationResult.fatalException, is(fatalExceptionExpectation));
namespace = loadResult.uniqueIdentifier;
Expand Down
Loading