Skip to content

Commit

Permalink
FeedReader and TransitFeed help deal with complete feeds
Browse files Browse the repository at this point in the history
FeedReader helps access transit feed entries in a compressed archive or
directory.

TransitFeed is a helpful container for transit feed entries that can be
filled with a feed reader.

Addresses some of georust#5.

thanks @DenZip for the FeedProvider Trait idea
  • Loading branch information
medwards committed May 2, 2018
1 parent eb58c9d commit 8596c1f
Show file tree
Hide file tree
Showing 8 changed files with 394 additions and 1 deletion.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ keywords = ["transit", "gtfs", "transitfeed"]
[dependencies]
chrono = { version = "0.4", features = ["serde"] }
csv = "1.0.0-beta.5"
failure = "0.1.1"
serde = "1"
serde_derive = "1"
tempfile = "3.0.1"
zip = "0.2"

[lib]
Expand Down
11 changes: 11 additions & 0 deletions examples/invalid_csv/stops.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
stop_id;stop_name;stop_desc;stop_lat;stop_lon;zone_id;stop_url;stop_code;location_type;parent_station
FUR_CREEK_RES;Furnace Creek Resort (Demo);;36.425288;-117.133162;;;1234;;
BEATTY_AIRPORT;Nye County Airport (Demo);;36.868446;-116.784582;;;1235;0;BEATTY_AIRPORT_STATION
BEATTY_AIRPORT_STATION;Nye County Airport (Demo);;36.868446;-116.784582;;;1235;1;
BULLFROG;Bullfrog (Demo);;36.88108;-116.81797;;;;;
STAGECOACH;Stagecoach Hotel & Casino (Demo);;36.915682;-116.751677;;;1236;;
NADAV;North Ave / D Ave N (Demo);;36.914893;-116.76821;;;1237;;
NANAA;North Ave / N A Ave (Demo);;36.914944;-116.761472;;;1238;;
DADAN;Doing Ave / D Ave N (Demo);;36.909489;-116.768242;;;;;
EMSI;E Main St / S Irving St (Demo);;36.905697;-116.76218;;;;;
AMV;Amargosa Valley (Demo);;36.641496;-116.40094;;;;;
1 change: 1 addition & 0 deletions examples/unknown_format.zip
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
not a real zip file
83 changes: 83 additions & 0 deletions src/archive.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
use failure;
use std;
use std::io;
use std::fs;
#[cfg(unix)]
use std::os::unix::fs::PermissionsExt;
use zip;

pub fn extract_zip<T: io::Read + io::Seek>(
archive: &mut zip::ZipArchive<T>,
output: &std::path::Path,
) -> Result<(), failure::Error> {
for i in 0..archive.len() {
let mut file = archive.by_index(i)?;
let outpath = output.join(sanitize_filename(file.name()));

{
let comment = file.comment();
if comment.len() > 0 {
println!(" File comment: {}", comment);
}
}

let perms = convert_permissions(file.unix_mode());

// also suspicious but why not?
if (&*file.name()).ends_with("/") {
create_directory(&outpath, perms);
} else {
write_file(&mut file, &outpath, perms);
}
}
Ok(())
}

#[cfg(unix)]
fn convert_permissions(mode: Option<u32>) -> Option<fs::Permissions> {
match mode {
Some(mode) => Some(fs::Permissions::from_mode(mode)),
None => None,
}
}
#[cfg(not(unix))]
fn convert_permissions(_mode: Option<u32>) -> Option<fs::Permissions> {
None
}

fn write_file(
file: &mut zip::read::ZipFile,
outpath: &std::path::Path,
perms: Option<fs::Permissions>,
) {
let mut outfile = fs::File::create(&outpath).unwrap();
io::copy(file, &mut outfile).unwrap();
if let Some(perms) = perms {
fs::set_permissions(outpath, perms).unwrap();
}
}

fn create_directory(outpath: &std::path::Path, perms: Option<fs::Permissions>) {
fs::create_dir_all(&outpath).unwrap();
if let Some(perms) = perms {
fs::set_permissions(outpath, perms).unwrap();
}
}

fn sanitize_filename(filename: &str) -> std::path::PathBuf {
let no_null_filename = match filename.find('\0') {
Some(index) => &filename[0..index],
None => filename,
};

std::path::Path::new(no_null_filename)
.components()
.filter(|component| match *component {
std::path::Component::Normal(..) => true,
_ => false,
})
.fold(std::path::PathBuf::new(), |mut path, ref cur| {
path.push(cur.as_os_str());
path
})
}
288 changes: 288 additions & 0 deletions src/feed.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,288 @@
extern crate tempfile;

use csv;
use failure;
use serde;
use std;
use std::collections::HashMap;
use std::path::Path;
use std::fs::File;
use self::tempfile::{Builder, TempDir};
use zip;

use archive::extract_zip;
use transit::{Agency, Calendar, CalendarDate, FareAttribute, FareRule, FeedInfo, Frequency, Route,
ShapePoint, Stop, StopTime, Transfer, Trip};
use gtfs::GTFSIterator;
use gtfs::Error;
// TODO: re-expose trim settings once rust-csv releases it
pub use csv::Terminator;

#[derive(Debug)]
pub struct FeedReader<P>
where
P: FeedProvider,
{
provider: P,
builder: csv::ReaderBuilder,
}

pub trait FeedProvider {
fn path(&self) -> &str;
}

pub struct LocalFeedProvider {
path: String,
}

impl LocalFeedProvider {
fn new(path: &str) -> LocalFeedProvider {
LocalFeedProvider {
path: path.to_string(),
}
}
}

impl FeedProvider for LocalFeedProvider {
fn path(&self) -> &str {
return &self.path;
}
}

#[derive(Debug)]
pub struct ZipFeedProvider {
dir: TempDir,
}

impl ZipFeedProvider {
fn new(zipfile: &str) -> Result<ZipFeedProvider, failure::Error> {
let dir = Builder::new().prefix("transitfeed").tempdir()?;
let mut zip = zip::ZipArchive::new(File::open(zipfile)?)?;
extract_zip(&mut zip, dir.path())?;
Ok(ZipFeedProvider { dir: dir })
}
}

impl FeedProvider for ZipFeedProvider {
fn path(&self) -> &str {
self.dir.path().to_str().unwrap()
}
}

impl FeedReader<LocalFeedProvider> {
pub fn new(path: &str) -> Self {
FeedReader::from_provider(LocalFeedProvider::new(path))
}
}

impl FeedReader<ZipFeedProvider> {
pub fn from_zip(zipfile: &str) -> Result<Self, failure::Error> {
Ok(FeedReader::from_provider(ZipFeedProvider::new(zipfile)?))
}
}

impl<P: FeedProvider> FeedReader<P> {
pub fn from_provider(provider: P) -> Self {
FeedReader {
provider: provider,
builder: csv::ReaderBuilder::new(),
}
}

pub fn builder(&mut self) -> &mut csv::ReaderBuilder {
&mut self.builder
}

pub fn agencies(&self) -> Result<GTFSIterator<File, Agency>, Error> {
self.make_iterator("agency.txt")
}

pub fn stops(&self) -> Result<GTFSIterator<File, Stop>, Error> {
self.make_iterator("stops.txt")
}

pub fn routes(&self) -> Result<GTFSIterator<File, Route>, Error> {
self.make_iterator("routes.txt")
}

pub fn trips(&self) -> Result<GTFSIterator<File, Trip>, Error> {
self.make_iterator("trips.txt")
}

pub fn stop_times(&self) -> Result<GTFSIterator<File, StopTime>, Error> {
self.make_iterator("stop_times.txt")
}

pub fn calendars(&self) -> Result<GTFSIterator<File, Calendar>, Error> {
self.make_iterator("calendar.txt")
}

pub fn calendar_dates(&self) -> Result<GTFSIterator<File, CalendarDate>, Error> {
self.make_iterator("calendar_dates.txt")
}

pub fn fare_attributes(&self) -> Result<GTFSIterator<File, FareAttribute>, Error> {
self.make_iterator("fare_attributes.txt")
}

pub fn fare_rules(&self) -> Result<GTFSIterator<File, FareRule>, Error> {
self.make_iterator("fare_rules.txt")
}

pub fn shapes(&self) -> Result<GTFSIterator<File, ShapePoint>, Error> {
self.make_iterator("shapes.txt")
}

pub fn frequencies(&self) -> Result<GTFSIterator<File, Frequency>, Error> {
self.make_iterator("frequencies.txt")
}

pub fn transfers(&self) -> Result<GTFSIterator<File, Transfer>, Error> {
self.make_iterator("transfers.txt")
}

pub fn feed_info(&self) -> Result<GTFSIterator<File, FeedInfo>, Error> {
self.make_iterator("feed_info.txt")
}

fn make_iterator<T>(&self, filename: &str) -> Result<GTFSIterator<File, T>, Error>
where
T: serde::de::DeserializeOwned,
{
let path = match Path::new(&self.provider.path()).join(filename).to_str() {
Some(path_str) => path_str.to_string(),
None => {
return Err(Error::Feed(format!(
"failed to construct path from {} and {}",
self.provider.path(),
filename
)))
}
};
let reader = match self.builder.from_path(&path) {
Ok(reader) => reader,
Err(e) => return Err(Error::Csv(path, e)),
};
Ok(GTFSIterator::new(reader, &path)?)
}
}

/// Container for all transit records
pub struct TransitFeed {
pub agencies: Vec<Agency>,
pub stops: Vec<Stop>,
pub routes: Vec<Route>,
pub trips: Vec<Trip>,
pub stoptimes: Vec<StopTime>,
pub calendars: Vec<Calendar>,
pub calendar_dates: Option<Vec<CalendarDate>>,
pub fare_attributes: Option<Vec<FareAttribute>>,
pub fare_rules: Option<Vec<FareRule>>,
pub shapes: Option<Vec<ShapePoint>>,
pub frequencies: Option<Vec<Frequency>>,
pub transfers: Option<Vec<Transfer>>,
pub feedinfo: Option<FeedInfo>,

stop_map: HashMap<String, usize>,
route_map: HashMap<String, usize>,
trip_map: HashMap<String, usize>,
}

impl TransitFeed {
pub fn from_reader<P: FeedProvider>(reader: &FeedReader<P>) -> Result<TransitFeed, Error> {
let agencies = load_feed_file(try!(reader.agencies()));
let stops = load_feed_file(try!(reader.stops()));
let routes = load_feed_file(try!(reader.routes()));
let trips = load_feed_file(try!(reader.trips()));
let stoptimes = load_feed_file(try!(reader.stop_times()));
let calendars = load_feed_file(try!(reader.calendars()));

let stop_map = make_map(&stops, |stop: &Stop| stop.stop_id.clone());
let route_map = make_map(&routes, |route: &Route| route.route_id.clone());
let trip_map = make_map(&trips, |trip: &Trip| trip.trip_id.clone());

Ok(TransitFeed {
agencies: agencies,
stops: stops,
stop_map: stop_map,
routes: routes,
route_map: route_map,
trips: trips,
trip_map: trip_map,
stoptimes: stoptimes,
calendars: calendars,
calendar_dates: load_optional_feed_file(reader.calendar_dates()),
fare_attributes: load_optional_feed_file(reader.fare_attributes()),
fare_rules: load_optional_feed_file(reader.fare_rules()),
shapes: load_optional_feed_file(reader.shapes()),
frequencies: load_optional_feed_file(reader.frequencies()),
transfers: load_optional_feed_file(reader.transfers()),
feedinfo: match load_optional_feed_file(reader.feed_info()) {
Some(mut records) => {
if records.len() != 1 {
println!("Unexpected number of entries in feed_info.txt");
}
records.pop()
}
None => None,
},
})
}

pub fn find_stop(&self, id: &str) -> Option<&Stop> {
TransitFeed::find_record(id, &self.stop_map, &self.stops)
}

pub fn find_route(&self, id: &str) -> Option<&Route> {
TransitFeed::find_record(id, &self.route_map, &self.routes)
}

pub fn find_trip(&self, id: &str) -> Option<&Trip> {
TransitFeed::find_record(id, &self.trip_map, &self.trips)
}

fn find_record<'a, T>(
record_id: &str,
map: &HashMap<String, usize>,
records: &'a Vec<T>,
) -> Option<&'a T> {
map.get(record_id).map(|index| &records[*index])
}
}

// TODO: Need to log stuff here
fn load_feed_file<R, T>(iter: GTFSIterator<R, T>) -> Vec<T>
where
R: std::io::Read,
for<'de> T: serde::Deserialize<'de>,
{
iter.filter_map(|r| match r {
Ok(r) => Some(r),
Err(e) => {
println!("SKIPPING - {}", e);
None
}
}).collect()
}

fn load_optional_feed_file<R, T>(result: Result<GTFSIterator<R, T>, Error>) -> Option<Vec<T>>
where
R: std::io::Read,
for<'de> T: serde::Deserialize<'de>,
{
match result {
Ok(iter) => Some(load_feed_file(iter)),
Err(e) => {
println!("SKIPPING optional file - {}", e);
None
}
}
}

fn make_map<T, F: Fn(&T) -> String>(records: &Vec<T>, key_fn: F) -> HashMap<String, usize> {
records
.iter()
.enumerate()
.map(|(index, record)| (key_fn(record), index))
.collect()
}
Loading

0 comments on commit 8596c1f

Please sign in to comment.