diff --git a/data-raw/my_dates.R b/data-raw/my_dates.R new file mode 100644 index 00000000..0e52116c --- /dev/null +++ b/data-raw/my_dates.R @@ -0,0 +1,12 @@ +data <- as.character(sample( + seq( + as.Date("2020/01/01"), + as.Date("2022/12/31"), + by = "day" + ), 1001 +)) + +dates <- tibble::tibble(dates = data) + +dates[1000, ] <- "20220907" +write_csv(dates, "inst/extdata/my_dates.csv") diff --git a/inst/extdata/my_dates.csv b/inst/extdata/my_dates.csv new file mode 100644 index 00000000..1b6f9b24 --- /dev/null +++ b/inst/extdata/my_dates.csv @@ -0,0 +1,1002 @@ +dates +2020-08-17 +2022-04-05 +2020-10-19 +2022-10-18 +2020-06-27 +2021-09-07 +2020-02-17 +2022-11-28 +2021-05-20 +2020-07-13 +2022-04-19 +2022-03-15 +2021-10-09 +2020-07-26 +2021-10-23 +2022-11-14 +2020-03-08 +2021-02-20 +2020-06-15 +2022-11-10 +2022-09-11 +2022-10-07 +2021-04-11 +2021-12-28 +2020-05-11 +2022-07-25 +2022-06-25 +2021-03-15 +2021-12-30 +2021-01-21 +2021-09-13 +2022-10-22 +2021-09-06 +2021-05-28 +2022-06-26 +2022-01-27 +2021-04-14 +2020-12-07 +2020-07-28 +2021-02-28 +2020-06-11 +2020-10-10 +2022-01-20 +2020-08-04 +2020-01-01 +2022-07-13 +2020-06-22 +2021-08-25 +2020-09-07 +2022-06-04 +2020-11-08 +2022-05-10 +2020-04-20 +2022-11-15 +2020-03-11 +2021-04-12 +2022-11-07 +2020-08-14 +2021-02-25 +2020-05-13 +2021-10-31 +2021-09-05 +2020-11-22 +2022-05-01 +2022-10-11 +2022-08-02 +2020-12-25 +2020-06-24 +2022-10-30 +2022-09-23 +2022-03-18 +2022-12-31 +2020-06-08 +2021-09-26 +2021-11-25 +2022-04-03 +2022-05-19 +2022-10-12 +2021-12-29 +2021-03-14 +2020-02-09 +2021-12-19 +2021-06-26 +2021-10-27 +2022-05-09 +2020-09-03 +2021-06-08 +2022-08-22 +2020-08-07 +2022-06-08 +2021-07-01 +2021-07-28 +2020-01-03 +2022-12-21 +2021-07-11 +2021-03-11 +2021-10-05 +2020-03-19 +2022-09-30 +2021-12-01 +2021-01-22 +2021-11-10 +2020-01-15 +2021-11-11 +2020-01-07 +2022-03-09 +2020-03-13 +2021-07-15 +2020-11-20 +2022-09-16 +2022-08-05 +2020-05-21 +2021-03-12 +2021-04-15 +2021-02-06 +2020-06-21 +2022-11-22 +2021-09-12 +2020-07-10 +2021-04-07 +2022-03-05 +2020-03-07 +2021-08-01 +2022-12-20 +2020-05-03 +2021-10-24 +2020-04-23 +2020-08-20 +2022-12-22 +2020-09-27 +2021-03-02 +2020-10-16 +2022-06-09 +2022-01-13 +2022-05-17 +2020-11-03 +2020-09-22 +2021-06-03 +2021-07-13 +2022-04-30 +2020-02-06 +2020-01-23 +2020-01-04 +2020-10-03 +2020-08-03 +2020-04-21 +2021-04-23 +2022-01-25 +2020-06-19 +2021-01-18 +2020-07-23 +2022-02-08 +2022-07-23 +2021-11-05 +2022-04-11 +2022-04-06 +2022-09-22 +2022-09-26 +2020-12-15 +2020-07-09 +2021-05-04 +2021-11-06 +2021-12-10 +2022-04-07 +2022-11-21 +2020-02-01 +2021-12-18 +2021-08-20 +2021-02-11 +2022-01-28 +2020-01-14 +2022-10-27 +2020-12-09 +2022-02-14 +2021-11-14 +2021-04-03 +2021-04-13 +2022-07-01 +2021-03-03 +2022-06-29 +2020-11-24 +2022-03-11 +2021-12-22 +2021-09-15 +2021-12-06 +2022-06-30 +2021-04-20 +2022-08-25 +2021-05-19 +2022-05-27 +2020-12-30 +2022-02-11 +2021-11-30 +2022-01-26 +2022-02-16 +2020-12-19 +2022-05-29 +2020-01-19 +2021-08-16 +2022-01-18 +2022-11-24 +2022-05-08 +2020-01-13 +2020-04-08 +2022-12-06 +2020-07-31 +2022-07-05 +2020-09-12 +2022-07-10 +2020-01-27 +2020-11-19 +2022-09-17 +2022-02-09 +2021-04-06 +2022-11-11 +2020-05-10 +2020-09-17 +2022-01-30 +2021-07-25 +2020-02-24 +2022-08-23 +2021-05-12 +2020-02-14 +2020-03-09 +2021-07-08 +2021-08-02 +2021-09-03 +2021-10-04 +2020-06-07 +2022-01-14 +2022-05-25 +2022-06-01 +2020-03-27 +2020-07-24 +2020-09-13 +2020-09-06 +2020-08-01 +2020-09-23 +2022-06-24 +2021-02-24 +2021-11-01 +2021-07-02 +2021-06-21 +2021-08-26 +2021-10-21 +2020-05-29 +2021-11-29 +2020-09-29 +2022-02-19 +2022-12-25 +2021-02-10 +2022-07-21 +2020-08-23 +2020-10-15 +2020-11-26 +2021-07-12 +2021-07-27 +2020-03-30 +2021-09-09 +2022-06-05 +2020-12-05 +2020-04-24 +2022-11-05 +2020-07-04 +2021-02-17 +2020-11-04 +2021-09-29 +2020-02-29 +2020-04-13 +2021-12-16 +2021-11-27 +2022-04-14 +2022-03-25 +2021-09-30 +2021-05-07 +2020-07-25 +2020-09-04 +2020-03-31 +2021-03-30 +2021-07-16 +2021-07-24 +2020-04-16 +2020-04-12 +2022-07-28 +2022-02-03 +2021-07-04 +2020-04-07 +2021-04-16 +2021-12-27 +2020-08-09 +2022-10-19 +2022-01-08 +2021-01-16 +2022-07-12 +2022-10-10 +2022-05-11 +2022-03-21 +2020-09-11 +2022-08-12 +2021-09-22 +2021-04-25 +2020-09-10 +2021-05-18 +2022-04-18 +2021-06-09 +2020-04-11 +2022-02-13 +2021-08-19 +2022-11-08 +2022-09-24 +2022-02-23 +2022-01-23 +2021-01-11 +2020-07-03 +2020-11-02 +2020-06-09 +2022-01-02 +2022-01-29 +2020-03-02 +2022-10-26 +2022-04-09 +2021-05-27 +2020-11-28 +2022-06-28 +2021-12-14 +2022-05-02 +2020-08-10 +2021-08-24 +2021-12-08 +2020-11-11 +2020-07-08 +2022-06-11 +2020-01-22 +2022-10-14 +2022-09-21 +2020-06-05 +2021-05-25 +2020-01-10 +2021-02-26 +2020-01-29 +2021-03-01 +2021-02-14 +2021-06-25 +2021-01-26 +2020-04-06 +2020-05-01 +2022-10-02 +2020-12-18 +2021-05-13 +2021-07-30 +2022-11-09 +2022-02-20 +2021-04-02 +2022-09-19 +2022-10-17 +2022-07-11 +2022-09-13 +2022-02-18 +2020-12-27 +2020-03-28 +2021-11-19 +2021-05-05 +2021-03-24 +2021-05-11 +2020-09-01 +2020-05-27 +2022-10-06 +2020-01-06 +2021-08-22 +2020-03-16 +2021-08-03 +2022-01-10 +2022-03-13 +2022-02-02 +2020-06-03 +2022-06-07 +2020-06-04 +2022-02-22 +2020-11-30 +2020-04-22 +2021-02-04 +2020-07-17 +2021-03-13 +2020-07-05 +2022-06-03 +2020-04-01 +2021-11-17 +2021-06-01 +2022-06-16 +2021-09-02 +2020-02-16 +2022-01-12 +2022-11-12 +2021-05-10 +2022-04-29 +2022-02-06 +2020-09-09 +2022-03-31 +2020-04-29 +2021-06-18 +2022-03-07 +2021-03-06 +2021-12-13 +2021-06-02 +2020-10-02 +2021-04-09 +2020-09-30 +2020-12-10 +2022-01-15 +2020-04-17 +2022-03-24 +2021-10-30 +2021-02-01 +2021-05-30 +2022-10-15 +2020-03-15 +2021-01-07 +2020-06-10 +2022-07-20 +2021-11-13 +2021-11-18 +2022-03-20 +2021-04-26 +2020-03-14 +2020-10-09 +2021-09-01 +2021-06-07 +2022-01-06 +2022-03-17 +2020-04-09 +2022-10-08 +2022-12-14 +2022-04-02 +2021-04-05 +2020-08-29 +2021-04-24 +2020-08-08 +2021-12-07 +2022-05-16 +2021-02-16 +2022-05-07 +2021-02-19 +2022-10-31 +2022-11-06 +2022-05-23 +2022-10-28 +2020-01-12 +2022-10-04 +2021-12-24 +2020-02-05 +2021-08-27 +2022-08-30 +2020-11-13 +2022-12-26 +2020-05-08 +2022-08-29 +2022-06-02 +2022-07-26 +2021-03-21 +2021-08-09 +2021-02-22 +2021-10-11 +2021-03-10 +2022-08-24 +2021-09-17 +2021-07-19 +2022-02-17 +2020-06-01 +2020-10-30 +2021-03-27 +2020-11-14 +2021-09-19 +2021-10-25 +2022-05-13 +2020-10-12 +2022-11-27 +2022-12-29 +2022-09-09 +2020-07-15 +2022-10-21 +2020-04-28 +2022-11-03 +2020-10-31 +2022-12-18 +2022-12-13 +2021-04-19 +2021-10-07 +2020-08-12 +2020-03-23 +2021-03-07 +2022-04-24 +2020-12-08 +2020-03-26 +2022-10-09 +2020-10-01 +2021-10-08 +2022-07-08 +2020-08-16 +2022-09-15 +2020-10-20 +2021-12-21 +2020-09-05 +2021-12-12 +2020-10-29 +2020-10-13 +2021-12-05 +2020-05-16 +2021-01-04 +2022-11-17 +2022-05-05 +2020-07-07 +2021-01-17 +2020-11-12 +2022-08-07 +2021-01-19 +2022-03-03 +2022-12-23 +2021-01-25 +2020-01-28 +2021-06-27 +2021-06-22 +2020-02-27 +2021-08-29 +2020-02-28 +2020-12-04 +2022-09-04 +2022-08-15 +2021-03-22 +2020-03-29 +2022-05-21 +2021-05-17 +2020-08-22 +2022-04-10 +2021-05-31 +2021-12-03 +2020-03-21 +2021-04-04 +2021-03-04 +2020-12-16 +2022-04-23 +2020-12-20 +2020-01-09 +2021-11-21 +2020-11-25 +2021-05-23 +2022-02-28 +2021-02-07 +2020-03-17 +2022-01-04 +2021-02-18 +2021-10-02 +2022-12-10 +2022-01-17 +2022-01-01 +2021-01-30 +2021-05-02 +2021-12-15 +2021-07-26 +2020-11-15 +2020-12-06 +2020-05-24 +2021-04-17 +2022-12-27 +2022-09-10 +2021-01-06 +2021-10-22 +2021-06-14 +2022-01-03 +2021-02-03 +2020-02-18 +2022-04-04 +2022-02-01 +2020-02-11 +2021-01-09 +2020-01-16 +2022-08-21 +2022-12-19 +2021-03-28 +2021-09-08 +2022-03-26 +2020-04-03 +2021-05-03 +2020-02-20 +2020-10-06 +2022-04-26 +2022-09-02 +2022-10-20 +2021-01-14 +2020-08-28 +2020-01-11 +2021-08-15 +2020-11-01 +2020-03-24 +2022-03-27 +2021-04-21 +2021-03-09 +2020-11-21 +2020-08-02 +2022-07-16 +2020-06-17 +2021-03-29 +2020-04-10 +2021-07-17 +2021-02-08 +2022-01-11 +2020-02-21 +2020-10-11 +2020-10-18 +2022-09-28 +2020-08-06 +2022-09-05 +2020-10-27 +2021-09-14 +2021-01-27 +2021-02-23 +2022-03-29 +2020-12-17 +2021-04-08 +2022-04-21 +2021-10-15 +2022-10-03 +2020-06-06 +2020-05-30 +2021-11-04 +2022-11-18 +2020-01-26 +2020-05-25 +2020-06-12 +2020-02-15 +2020-02-02 +2021-05-16 +2022-09-27 +2022-12-24 +2021-01-23 +2022-09-25 +2021-07-18 +2021-09-18 +2020-07-22 +2022-11-25 +2022-05-12 +2022-05-28 +2021-11-20 +2021-11-08 +2022-11-20 +2022-08-06 +2020-01-20 +2020-10-07 +2020-11-07 +2022-06-10 +2021-08-05 +2022-02-12 +2022-07-29 +2022-12-07 +2021-07-29 +2020-07-16 +2022-08-19 +2022-03-19 +2022-12-05 +2021-02-21 +2021-08-14 +2022-12-15 +2022-09-14 +2022-01-22 +2022-05-03 +2021-10-28 +2021-06-29 +2022-09-01 +2022-03-02 +2022-08-03 +2022-05-31 +2020-06-16 +2022-10-16 +2022-06-17 +2020-12-13 +2020-04-30 +2021-04-10 +2021-06-05 +2020-09-24 +2022-10-05 +2021-05-29 +2021-07-21 +2020-03-22 +2020-03-20 +2021-02-09 +2022-09-07 +2020-12-03 +2021-01-28 +2020-10-14 +2020-06-20 +2022-08-27 +2022-05-14 +2021-05-22 +2022-08-16 +2022-11-01 +2021-02-15 +2020-09-19 +2020-10-26 +2020-05-26 +2022-12-28 +2020-12-22 +2021-10-18 +2021-03-26 +2022-04-13 +2021-10-03 +2021-12-20 +2021-08-28 +2022-02-07 +2020-01-21 +2021-10-29 +2020-09-14 +2022-02-10 +2022-07-17 +2020-01-05 +2022-05-20 +2022-09-18 +2021-03-05 +2022-06-18 +2020-04-14 +2021-11-09 +2021-03-17 +2022-08-08 +2022-12-03 +2020-11-17 +2022-12-12 +2020-05-12 +2020-02-22 +2020-12-21 +2020-07-20 +2021-06-24 +2022-01-09 +2022-08-14 +2020-01-08 +2020-01-30 +2021-10-20 +2020-05-17 +2022-03-30 +2020-12-11 +2021-09-10 +2021-07-09 +2020-10-24 +2021-08-23 +2022-04-25 +2020-04-15 +2022-03-01 +2021-04-30 +2020-06-14 +2021-05-14 +2020-10-25 +2022-02-27 +2022-04-17 +2021-11-12 +2020-01-02 +2022-02-24 +2021-10-17 +2020-06-13 +2022-11-23 +2020-10-21 +2021-07-05 +2020-12-23 +2022-06-13 +2020-08-18 +2021-01-02 +2022-06-21 +2020-11-27 +2021-08-04 +2021-10-13 +2020-03-25 +2021-01-01 +2022-10-23 +2020-09-18 +2021-06-30 +2021-08-07 +2021-09-25 +2020-05-09 +2020-08-05 +2022-11-02 +2021-12-11 +2020-10-28 +2022-07-04 +2020-02-19 +2022-09-20 +2022-11-13 +2020-07-18 +2022-02-04 +2021-09-23 +2021-07-07 +2021-04-22 +2022-08-28 +2021-07-23 +2021-06-06 +2021-07-06 +2021-04-18 +2021-02-12 +2022-05-24 +2020-11-16 +2020-02-13 +2022-10-25 +2020-05-14 +2020-01-31 +2022-12-02 +2020-05-22 +2022-12-04 +2021-03-16 +2020-07-01 +2022-01-07 +2020-01-18 +2022-03-16 +2022-12-30 +2022-11-19 +2020-02-08 +2020-05-15 +2020-03-05 +2021-10-16 +2021-05-09 +2020-09-21 +2020-04-19 +2020-07-14 +2022-06-27 +2021-06-16 +2022-08-04 +2020-06-26 +2021-06-10 +2020-08-30 +2021-03-23 +2022-08-31 +2022-04-20 +2022-10-29 +2021-08-13 +2020-01-24 +2022-08-18 +2021-01-24 +2021-09-04 +2022-04-15 +2020-03-06 +2020-08-25 +2021-12-09 +2020-06-29 +2020-02-07 +2020-03-18 +2021-03-25 +2021-11-28 +2022-08-20 +2022-06-06 +2021-12-17 +2021-08-11 +2022-02-05 +2022-09-06 +2021-09-16 +2020-04-04 +2020-07-30 +2021-06-23 +2021-11-02 +2020-11-09 +2022-04-08 +2022-09-08 +2020-05-02 +2020-04-18 +2022-05-15 +2021-09-28 +2020-10-08 +2021-09-24 +2020-11-05 +2022-04-27 +2020-11-23 +2020-08-11 +2020-12-31 +2022-07-18 +2022-07-31 +2020-07-27 +2021-06-15 +2020-12-28 +2020-12-29 +2020-05-31 +2022-07-07 +2022-03-06 +2020-10-22 +2021-01-20 +2021-04-01 +2020-12-02 +2021-09-11 +2020-10-17 +2022-07-14 +2020-05-20 +2021-05-06 +2021-12-25 +2021-04-27 +2021-01-10 +2021-10-26 +2022-11-29 +2021-12-02 +2021-02-27 +2020-02-12 +2020-08-24 +2022-03-28 +2020-11-10 +2022-02-21 +2022-03-23 +2021-09-21 +2022-05-04 +2020-07-12 +2020-04-02 +2020-02-25 +2020-01-17 +2022-11-30 +2020-09-20 +2022-05-22 +2020-03-04 +2020-08-27 +2021-07-03 +2020-11-06 +2020-08-13 +2022-04-16 +2021-06-11 +2021-04-28 +2020-02-23 +2021-11-16 +2020-07-06 +2021-11-03 +2021-12-23 +2022-12-08 +2021-05-26 +2020-12-01 +2022-05-30 +2021-12-26 +2020-12-14 +2020-08-15 +2021-05-21 +2020-09-15 +2020-11-18 +2022-04-01 +2022-01-24 +2022-08-09 +2020-04-26 +2020-05-05 +2022-09-29 +2021-08-21 +2022-06-19 +2022-07-03 +2021-01-13 +2022-08-17 +2021-07-22 +2022-03-12 +2022-09-03 +2022-11-16 +2020-09-02 +2020-09-08 +2022-09-12 +2022-07-02 +2020-02-10 +2020-08-31 +2022-05-06 +2020-11-29 +2021-07-10 +2021-01-15 +2021-08-06 +2021-06-19 +2021-11-23 +2021-09-20 +2021-07-31 +2020-03-03 +2021-08-17 +2022-03-22 +2021-08-30 +2020-06-02 +2020-09-26 +2022-12-11 +2021-12-31 +2020-07-19 +2020-05-04 +2021-01-05 +2021-10-12 +2021-10-14 +2022-01-05 +2021-02-05 +2020-12-26 +2021-10-19 +2022-03-08 +2020-09-25 +2020-05-07 +2020-05-18 +2021-11-26 +2022-05-26 +2020-12-12 +2022-08-13 +2021-06-13 +2022-01-19 +2021-03-08 +2021-09-27 +2022-05-18 +2022-10-13 +2022-01-16 +2020-08-21 +2021-06-17 +2020-10-23 +2022-01-21 +2020-06-18 +2022-08-01 +2022-08-10 +2020-02-04 +20220907 +2022-07-30 diff --git a/vignettes/readr.Rmd b/vignettes/readr.Rmd index 91911789..a6cc343a 100644 --- a/vignettes/readr.Rmd +++ b/vignettes/readr.Rmd @@ -127,10 +127,10 @@ parse_number("$1,234") There are two parsers that will never be guessed: `col_skip()` and `col_factor()`. You will always need to supply these explicitly. -You can see the specification that readr would generate for a column file by using `spec_csv()`, `spec_tsv()` and so on: +You can see the specification that readr would generate for a file by using `spec_csv()`, `spec_tsv()`, and so on. ```{r} -x <- spec_csv(readr_example("challenge.csv")) +x <- spec_csv(readr_example("my_dates.csv")) ``` For bigger files, you can often make the specification simpler by changing the default column type using `cols_condense()` @@ -142,14 +142,13 @@ mtcars_spec cols_condense(mtcars_spec) ``` - -By default readr only looks at the first 1000 rows. This keeps file parsing speedy, but can generate incorrect guesses. For example, in `challenge.csv` the column types change in row 1001, so readr guesses the wrong types. One way to resolve the problem is to increase the number of rows: +When guessing column types, readr selects rows interspersed through-out the file and always includes the first and last row. The default number of rows it selects is 1000. If the column type guess is incorrect, increasing `guess_max` may or may not improve the results. ```{r} -x <- spec_csv(readr_example("challenge.csv"), guess_max = 1001) +x <- spec_csv(readr_example("my_dates.csv"), guess_max = 1001) ``` -Another way is to manually specify the `col_type`, as described below. +The best solution is to manually specify the `col_type`, as described below. ## Rectangular parsers @@ -160,61 +159,53 @@ readr comes with five parsers for rectangular file formats: * `read_fwf()` for fixed-width files * `read_log()` for web log files -Each of these functions firsts calls `spec_xxx()` (as described above), and then parses the file according to that column specification: +If readr finds problems with parsing your data, it will emit a warning message. +For example, in our aptly named `my_dates.csv` file, the column type we expected from our data was a column of dates. ```{r} -df1 <- read_csv(readr_example("challenge.csv")) +df1 <- read_csv(readr_example("my_dates.csv")) ``` -The rectangular parsing functions almost always succeed; they'll only fail if the format is severely messed up. Instead, readr will generate a data frame of problems. The first few will be printed out, and you can access them all with `problems()`: +The data was successfully imported, but we get a warning message about parsing issues. +To view more details about these parsing issues, call `problems()` on your data frame. +This will generate a data frame with information about the parsing problems, with one row per problem. ```{r} problems(df1) ``` -You've already seen one way of handling bad guesses: increasing the number of rows used to guess the type of each column. +In this example, one of the dates is in an incorrect format. +Since `problems()` gives us the row number from the original file, we can view that area of data in R with a combination of `readr::read_lines()` and `writeLines()`. ```{r} -df2 <- read_csv(readr_example("challenge.csv"), guess_max = 1001) +problem_area <- problems(df1)$row - 2 +writeLines(read_lines(readr_example("my_dates.csv"), skip = problem_area, n_max = 3)) ``` -Another approach is to manually supply the column specification. - -### Overriding the defaults - -In the previous examples, you may have noticed that readr printed the column specification that it used to parse the file: - -```{r} -#> Parsed with column specification: -#> cols( -#> x = col_integer(), -#> y = col_character() -#> ) +We can work around this parsing error by importing the date column as character and converting it to a column of dates, after the fact. Importing as character and using a more specialized package to parse the data post-import, is a common solution to parsing tricky columns. In this case, the lubridate package can parse our column back into dates. + +```{r, eval = FALSE} +df1 <- read_csv(readr_example("my_dates.csv"), col_types = list( + dates = col_character() +)) +df1$dates <- lubridate::ymd(df1$dates) +df1[problem_area:(problem_area + 2), ] +#> # A tibble: 3 × 1 +#> dates +#> +#> 1 2020-02-04 +#> 2 2022-09-07 +#> 3 2022-07-30 ``` -You can also access it after the fact using `spec()`: +In general, it's good practice to supply an explicit column specification to readr. It is more work, but it ensures that you get warnings if the data changes in unexpected ways. To be really strict, you can use `stop_for_problems(df1)`. This will throw an error if there are any parsing problems, forcing you to fix those problems before proceeding with the analysis. -```{r} -spec(df1) -spec(df2) -``` - -(This also allows you to access the full column specification if you're reading a very wide file. By default, readr will only print the specification of the first 20 columns.) - -If you want to manually specify the column types, you can start by copying and pasting this code, and then tweaking it fix the parsing problems. +To access the full column specification after importing, you can call `spec()` on your data frame. If you want to manually specify the column types, you can start by copying and pasting this code, and then tweaking it to fix the parsing problems. ```{r} -df3 <- read_csv( - readr_example("challenge.csv"), - col_types = list( - x = col_double(), - y = col_date(format = "") - ) -) +spec(df1) ``` -In general, it's good practice to supply an explicit column specification. It is more work, but it ensures that you get warnings if the data changes in unexpected ways. To be really strict, you can use `stop_for_problems(df3)`. This will throw an error if there are any parsing problems, forcing you to fix those problems before proceeding with the analysis. - ### Available column specifications The available specifications are: (with string abbreviations in brackets) @@ -239,32 +230,32 @@ Use the `col_types` argument to override the default choices. There are two ways * With a (named) list of col objects: - ```r - read_csv("iris.csv", col_types = list( - Sepal.Length = col_double(), - Sepal.Width = col_double(), - Petal.Length = col_double(), - Petal.Width = col_double(), - Species = col_factor(c("setosa", "versicolor", "virginica")) - )) - ``` + ```{r, eval = FALSE} + read_csv("iris.csv", col_types = list( + Sepal.Length = col_double(), + Sepal.Width = col_double(), + Petal.Length = col_double(), + Petal.Width = col_double(), + Species = col_factor(c("setosa", "versicolor", "virginica")) + )) + ``` Or, with their abbreviations: - ```r - read_csv("iris.csv", col_types = list( - Sepal.Length = "d", - Sepal.Width = "d", - Petal.Length = "d", - Petal.Width = "d", - Species = col_factor(c("setosa", "versicolor", "virginica")) - )) - ``` + ```{r, eval = FALSE} + read_csv("iris.csv", col_types = list( + Sepal.Length = "d", + Sepal.Width = "d", + Petal.Length = "d", + Petal.Width = "d", + Species = col_factor(c("setosa", "versicolor", "virginica")) + )) + ``` Any omitted columns will be parsed automatically, so the previous call will lead to the same result as: -```r +```{r, eval = FALSE} read_csv("iris.csv", col_types = list( Species = col_factor(c("setosa", "versicolor", "virginica"))) ) @@ -273,7 +264,7 @@ read_csv("iris.csv", col_types = list( You can also set a default type that will be used instead of relying on the automatic detection for columns you don't specify: -```r +```{r, eval = FALSE} read_csv("iris.csv", col_types = list( Species = col_factor(c("setosa", "versicolor", "virginica")), .default = col_double()) @@ -282,7 +273,7 @@ read_csv("iris.csv", col_types = list( If you only want to read specified columns, use `cols_only()`: -```r +```{r, eval = FALSE} read_csv("iris.csv", col_types = cols_only( Species = col_factor(c("setosa", "versicolor", "virginica"))) )