Skip to content

Commit

Permalink
updates to spark
Browse files Browse the repository at this point in the history
  • Loading branch information
herbertli committed Dec 10, 2018
1 parent 32c8633 commit 1edf797
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 5 deletions.
Binary file added screenshots/taxi/linear_reg1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added screenshots/taxi/linear_reg2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
16 changes: 11 additions & 5 deletions source_code/nyc-spark/src/main/scala/PredGreen.scala
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,26 @@ object PredGreen {

val df = spark.read.schema(FeatureRow)
.csv(inputPath)

val featurized = df
.withColumn("pu_month", month($"pickupTime"))
.withColumn("pu_dayofyear", dayofyear($"pickupTime"))
.withColumn("pu_dayofweek", dowUDF($"pickupTime"))
.withColumn("pu_day", dayofmonth($"pickupTime"))
.withColumn("pu_hour", hour($"pickupTime"))
.withColumn("pu_min", minute($"pickupTime"))

val pred = rfModel.transform(df)
val pred = rfModel.transform(featurized)
.drop("pu_month", "pu_dayofyear", "pu_dayofweek", "pu_day", "pu_hour", "pu_min")

pred.write
.option("header", value = true)
.option("timestampFormat", value = "yyyy-MM-dd'T'HH:mm:ss")
.csv(outputPath)
pred.show()

// pred
// .select("passengers")
// .write
// .option("header", value = true)
// .option("timestampFormat", value = "yyyy-MM-dd'T'HH:mm:ss")
// .csv(outputPath)

}

Expand Down

0 comments on commit 1edf797

Please sign in to comment.