|
| 1 | +use { |
| 2 | + crate::constants::{ |
| 3 | + ALL_INSTRUMENTS, BUFFER_SIZE, CHUNK_SIZE, NSE_OPERATING_TIME_IN_SECONDS, OHLCVOI_HEADER, |
| 4 | + OHLCVOI_HEADER_ARRAY, |
| 5 | + }, |
| 6 | + csv::{ByteRecord, WriterBuilder}, |
| 7 | + itertools::izip, |
| 8 | + rand::{rng, Rng}, |
| 9 | + rayon::iter::{IntoParallelRefIterator, ParallelIterator}, |
| 10 | + std::{ |
| 11 | + fs::File, |
| 12 | + io::{BufWriter, Write as _}, |
| 13 | + path::{Path, PathBuf}, |
| 14 | + }, |
| 15 | + time::{ |
| 16 | + macros::format_description, Date, Duration, Month, PrimitiveDateTime, Time, UtcOffset, |
| 17 | + Weekday, |
| 18 | + }, |
| 19 | +}; |
| 20 | + |
| 21 | +#[derive(Debug)] |
| 22 | +pub struct Ohlcvoi { |
| 23 | + pub timestamp: Vec<String>, |
| 24 | + pub open: Vec<f32>, |
| 25 | + pub high: Vec<f32>, |
| 26 | + pub low: Vec<f32>, |
| 27 | + pub close: Vec<f32>, |
| 28 | + pub volume: Vec<u32>, |
| 29 | + pub oi: Vec<u32>, |
| 30 | +} |
| 31 | + |
| 32 | +impl Ohlcvoi { |
| 33 | + pub fn random(date: Date, no_of_seconds: Option<usize>) -> Self { |
| 34 | + let total_points = no_of_seconds.unwrap_or(NSE_OPERATING_TIME_IN_SECONDS); |
| 35 | + let start_time = Time::from_hms(9, 15, 0).unwrap(); |
| 36 | + // let end_time = Time::from_hms(15, 30, 0).unwrap(); |
| 37 | + let offset = UtcOffset::from_whole_seconds(5 * 3600 + 30 * 60).unwrap(); |
| 38 | + let timestamp: Vec<String> = (0..total_points) |
| 39 | + .filter_map(|seconds| { |
| 40 | + let duration = time::Duration::seconds(seconds as i64); |
| 41 | + let datetime = PrimitiveDateTime::new(date, start_time) + duration; |
| 42 | + datetime.assume_offset(offset) |
| 43 | + .format(format_description!( |
| 44 | + "[year]-[month]-[day]T[hour]:[minute]:[second][offset_hour sign:mandatory]:[offset_minute]" |
| 45 | + )) |
| 46 | + .ok() |
| 47 | + }) |
| 48 | + .collect(); |
| 49 | + let mut rng = rng(); |
| 50 | + let open: Vec<f32> = (0..total_points) |
| 51 | + .map(|_| rng.random_range(100.0..100_000.0)) |
| 52 | + .collect(); |
| 53 | + let high: Vec<f32> = (0..total_points) |
| 54 | + .map(|_| rng.random_range(100.0..100_000.0)) |
| 55 | + .collect(); |
| 56 | + let low: Vec<f32> = (0..total_points) |
| 57 | + .map(|_| rng.random_range(100.0..100_000.0)) |
| 58 | + .collect(); |
| 59 | + let close: Vec<f32> = (0..total_points) |
| 60 | + .map(|_| rng.random_range(100.0..100_000.0)) |
| 61 | + .collect(); |
| 62 | + let volume: Vec<u32> = (0..total_points) |
| 63 | + .map(|_| rng.random_range(100..1_000_000)) |
| 64 | + .collect(); |
| 65 | + let oi: Vec<u32> = (0..total_points) |
| 66 | + .map(|_| rng.random_range(100..1_000_000)) |
| 67 | + .collect(); |
| 68 | + Self { |
| 69 | + timestamp, |
| 70 | + open, |
| 71 | + high, |
| 72 | + low, |
| 73 | + close, |
| 74 | + volume, |
| 75 | + oi, |
| 76 | + } |
| 77 | + } |
| 78 | + |
| 79 | + #[allow(dead_code)] |
| 80 | + fn write_csv<P: AsRef<Path>>(self, output_path: P) -> std::io::Result<()> { |
| 81 | + let len = self.timestamp.len(); |
| 82 | + if ![ |
| 83 | + &self.open.len(), |
| 84 | + &self.high.len(), |
| 85 | + &self.low.len(), |
| 86 | + &self.close.len(), |
| 87 | + &self.volume.len(), |
| 88 | + &self.oi.len(), |
| 89 | + ] |
| 90 | + .iter() |
| 91 | + .all(|&x| *x == len) |
| 92 | + { |
| 93 | + return Err(std::io::Error::new( |
| 94 | + std::io::ErrorKind::InvalidData, |
| 95 | + "All vectors must have the same length", |
| 96 | + )); |
| 97 | + } |
| 98 | + let output_file = File::create(output_path)?; |
| 99 | + let mut writer = csv::WriterBuilder::new() |
| 100 | + .buffer_capacity(BUFFER_SIZE) |
| 101 | + .from_writer(output_file); |
| 102 | + writer.write_record(OHLCVOI_HEADER_ARRAY)?; |
| 103 | + for chunk_start in (0..self.timestamp.len()).step_by(CHUNK_SIZE) { |
| 104 | + eprintln!( |
| 105 | + "Chunk Start {} Chunk End {}, {}", |
| 106 | + chunk_start, |
| 107 | + chunk_start + CHUNK_SIZE, |
| 108 | + self.timestamp.len() |
| 109 | + ); |
| 110 | + let chunk_end = (chunk_start + CHUNK_SIZE).min(self.timestamp.len()); |
| 111 | + eprintln!("Chunk Start {} Chunk End {}", chunk_start, chunk_end); |
| 112 | + for i in chunk_start..chunk_end { |
| 113 | + writer.write_record([ |
| 114 | + &self.timestamp[i], |
| 115 | + &self.open[i].to_string(), |
| 116 | + &self.high[i].to_string(), |
| 117 | + &self.low[i].to_string(), |
| 118 | + &self.close[i].to_string(), |
| 119 | + &self.volume[i].to_string(), |
| 120 | + &self.oi[i].to_string(), |
| 121 | + ])?; |
| 122 | + } |
| 123 | + writer.flush()?; |
| 124 | + } |
| 125 | + Ok(()) |
| 126 | + } |
| 127 | + |
| 128 | + #[allow(dead_code)] |
| 129 | + fn to_csv<P: AsRef<Path>>(&self, output_path: P) -> std::io::Result<()> { |
| 130 | + let output_file = File::create(output_path)?; |
| 131 | + let mut writer = WriterBuilder::new() |
| 132 | + .buffer_capacity(BUFFER_SIZE) |
| 133 | + .from_writer(output_file); |
| 134 | + writer.write_record(OHLCVOI_HEADER_ARRAY)?; |
| 135 | + let mut record = ByteRecord::with_capacity(128, 7); |
| 136 | + let mut buffer = Vec::with_capacity(128); |
| 137 | + for chunk_start in (0..self.timestamp.len()).step_by(CHUNK_SIZE) { |
| 138 | + let chunk_end = (chunk_start + CHUNK_SIZE).min(self.timestamp.len()); |
| 139 | + eprintln!("Chunk Start {} Chunk End {}", chunk_start, chunk_end); |
| 140 | + for i in chunk_start..chunk_end { |
| 141 | + record.clear(); |
| 142 | + buffer.clear(); |
| 143 | + record.push_field(self.timestamp[i].as_bytes()); |
| 144 | + write!(buffer, "{:.2}", self.open[i])?; |
| 145 | + record.push_field(&buffer); |
| 146 | + buffer.clear(); |
| 147 | + write!(buffer, "{:.2}", self.high[i])?; |
| 148 | + record.push_field(&buffer); |
| 149 | + buffer.clear(); |
| 150 | + write!(buffer, "{:.2}", self.low[i])?; |
| 151 | + record.push_field(&buffer); |
| 152 | + buffer.clear(); |
| 153 | + write!(buffer, "{:.2}", self.close[i])?; |
| 154 | + record.push_field(&buffer); |
| 155 | + buffer.clear(); |
| 156 | + write!(buffer, "{}", self.volume[i])?; |
| 157 | + record.push_field(&buffer); |
| 158 | + buffer.clear(); |
| 159 | + write!(buffer, "{}", self.oi[i])?; |
| 160 | + record.push_field(&buffer); |
| 161 | + writer.write_byte_record(&record)?; |
| 162 | + } |
| 163 | + writer.flush()?; |
| 164 | + } |
| 165 | + Ok(()) |
| 166 | + } |
| 167 | + |
| 168 | + pub fn write_to_csv<P: AsRef<Path>>(&self, output_path: P) -> std::io::Result<()> { |
| 169 | + let output_file = File::create(output_path)?; |
| 170 | + let mut writer = BufWriter::with_capacity(BUFFER_SIZE, output_file); |
| 171 | + writer.write_all(OHLCVOI_HEADER)?; |
| 172 | + for chunk_start in (0..self.timestamp.len()).step_by(CHUNK_SIZE) { |
| 173 | + let chunk_end = (chunk_start + CHUNK_SIZE).min(self.timestamp.len()); |
| 174 | + for (timestamp, open, high, low, close, volume, oi) in izip!( |
| 175 | + &self.timestamp[chunk_start..chunk_end], |
| 176 | + &self.open[chunk_start..chunk_end], |
| 177 | + &self.high[chunk_start..chunk_end], |
| 178 | + &self.low[chunk_start..chunk_end], |
| 179 | + &self.close[chunk_start..chunk_end], |
| 180 | + &self.volume[chunk_start..chunk_end], |
| 181 | + &self.oi[chunk_start..chunk_end] |
| 182 | + ) { |
| 183 | + writeln!( |
| 184 | + writer, |
| 185 | + "{},{:.2},{:.2},{:.2},{:.2},{},{}", |
| 186 | + timestamp, open, high, low, close, volume, oi |
| 187 | + )?; |
| 188 | + } |
| 189 | + writer.flush()?; |
| 190 | + } |
| 191 | + writer.flush()?; |
| 192 | + Ok(()) |
| 193 | + } |
| 194 | + |
| 195 | + #[cfg(feature = "parallel")] |
| 196 | + pub fn generate_multiple(dates: &[Date], no_of_seconds: Option<usize>) -> Vec<Self> { |
| 197 | + dates |
| 198 | + .par_iter() |
| 199 | + .map(|&date| Self::random(date, no_of_seconds)) |
| 200 | + .collect() |
| 201 | + } |
| 202 | +} |
| 203 | + |
| 204 | +pub struct CsvGenerationConfig<'a> { |
| 205 | + pub base_path: PathBuf, |
| 206 | + pub instruments: Option<&'a [&'a str]>, |
| 207 | + pub num_csvs_per_instrument_per_day: usize, |
| 208 | + pub total_points_per_csv: Option<usize>, |
| 209 | +} |
| 210 | + |
| 211 | +impl<'a> CsvGenerationConfig<'a> { |
| 212 | + pub fn new( |
| 213 | + base_path: PathBuf, |
| 214 | + instruments: Option<&'a [&'a str]>, |
| 215 | + num_csvs_per_instrument_per_day: usize, |
| 216 | + total_points_per_csv: Option<usize>, |
| 217 | + ) -> Self { |
| 218 | + CsvGenerationConfig { |
| 219 | + base_path, |
| 220 | + instruments, |
| 221 | + num_csvs_per_instrument_per_day, |
| 222 | + total_points_per_csv, |
| 223 | + } |
| 224 | + } |
| 225 | + |
| 226 | + fn generate_instrument_day_csvs(&self, instrument: &str, date: Date) -> std::io::Result<()> { |
| 227 | + let instrument_dir = self.base_path.join(instrument); |
| 228 | + std::fs::create_dir_all(&instrument_dir)?; |
| 229 | + let date_dir = instrument_dir.join( |
| 230 | + date.format(format_description!("[year][month][day]")) |
| 231 | + .unwrap_or_default(), |
| 232 | + ); |
| 233 | + std::fs::create_dir_all(&date_dir)?; |
| 234 | + for seq_no in 0..self.num_csvs_per_instrument_per_day { |
| 235 | + let ohlcvoi = Ohlcvoi::random(date, self.total_points_per_csv); |
| 236 | + let filename = format!("{}-{}.csv", instrument, seq_no); |
| 237 | + let csv_path = date_dir.join(filename); |
| 238 | + ohlcvoi.write_to_csv(&csv_path)?; |
| 239 | + } |
| 240 | + Ok(()) |
| 241 | + } |
| 242 | + |
| 243 | + fn generate_all_instrument_csvs(&self, year: i32) -> std::io::Result<()> { |
| 244 | + let business_days = generate_business_days(year); |
| 245 | + let instruments = self.instruments.unwrap_or(ALL_INSTRUMENTS.as_slice()); |
| 246 | + instruments.par_iter().try_for_each(|instrument| { |
| 247 | + business_days |
| 248 | + .par_iter() |
| 249 | + .try_for_each(|&date| self.generate_instrument_day_csvs(instrument, date)) |
| 250 | + })?; |
| 251 | + Ok(()) |
| 252 | + } |
| 253 | + |
| 254 | + pub fn generate_dummy_data( |
| 255 | + year: i32, |
| 256 | + instruments: Option<&'a [&'a str]>, |
| 257 | + num_csvs_per_instrument_per_day: usize, |
| 258 | + base_path: PathBuf, |
| 259 | + total_points_per_csv: Option<usize>, |
| 260 | + ) -> std::io::Result<()> { |
| 261 | + let config = CsvGenerationConfig { |
| 262 | + base_path, |
| 263 | + instruments, |
| 264 | + num_csvs_per_instrument_per_day, |
| 265 | + total_points_per_csv, |
| 266 | + }; |
| 267 | + config.generate_all_instrument_csvs(year)?; |
| 268 | + Ok(()) |
| 269 | + } |
| 270 | +} |
| 271 | + |
| 272 | +#[inline] |
| 273 | +fn generate_business_days(year: i32) -> Vec<Date> { |
| 274 | + let start_date = Date::from_calendar_date(year, Month::January, 1).unwrap(); |
| 275 | + let end_date = Date::from_calendar_date(year, Month::December, 31).unwrap(); |
| 276 | + let total_days = (end_date - start_date).whole_days(); |
| 277 | + let is_leap_year = year % 4 == 0 && (year % 100 != 0 || year % 400 == 0); |
| 278 | + let estimated_business_days = if is_leap_year { 262 } else { 261 }; |
| 279 | + let mut business_days = Vec::with_capacity(estimated_business_days); |
| 280 | + (0..total_days + 1) |
| 281 | + .map(|days| start_date + Duration::days(days)) |
| 282 | + .filter(|date| !matches!(date.weekday(), Weekday::Saturday | Weekday::Sunday)) |
| 283 | + .for_each(|date| business_days.push(date)); |
| 284 | + business_days.shrink_to_fit(); |
| 285 | + business_days |
| 286 | +} |
| 287 | + |
| 288 | +#[cfg(test)] |
| 289 | +mod tests { |
| 290 | + use time::macros::date; |
| 291 | + |
| 292 | + use super::*; |
| 293 | + |
| 294 | + #[test] |
| 295 | + fn test_generate_business_days() { |
| 296 | + let twenty_twenty_four_dates = generate_business_days(2024); |
| 297 | + eprintln!("{:?}", twenty_twenty_four_dates); |
| 298 | + assert_eq!(twenty_twenty_four_dates.len(), 262); |
| 299 | + } |
| 300 | +} |
0 commit comments