|
1 |
| -pub use fasta_manipulation::tpf_fasta::*; |
| 1 | +use assert_cmd::Command; |
| 2 | +use std::fs; |
| 3 | +use std::fs::File; |
| 4 | +use std::io::Write; |
| 5 | + |
| 6 | +use noodles::fasta::record::Sequence; |
| 7 | +use tempfile::Builder; |
| 8 | + |
| 9 | +use fasta_manipulation::tpf_fasta_mod::{ |
| 10 | + check_orientation, get_uniques, parse_seq, parse_tpf, save_to_fasta, subset_vec_tpf, NewFasta, |
| 11 | + Tpf, |
| 12 | +}; |
| 13 | + |
| 14 | +mod util; |
| 15 | + |
| 16 | +use util::are_files_identical; |
| 17 | + |
| 18 | +// To test the check orientation function we need to publicly expose it |
| 19 | +// Is there a way to test private functions? |
| 20 | +#[test] |
| 21 | +fn check_orientation_inverts_sequence_if_minus() { |
| 22 | + let sequence = Sequence::from(b"ATGC".to_vec()); |
| 23 | + let orientation = "MINUS".to_string(); |
| 24 | + let result = check_orientation(Some(sequence), orientation); |
| 25 | + assert_eq!(result, "GCAT".to_string()); |
| 26 | +} |
| 27 | + |
| 28 | +#[test] |
| 29 | +fn check_orientation_does_not_invert_sequence_if_plus() { |
| 30 | + let sequence = Sequence::from(b"ATGC".to_vec()); |
| 31 | + let orientation = "PLUS".to_string(); |
| 32 | + let result = check_orientation(Some(sequence), orientation); |
| 33 | + assert_eq!(result, "ATGC".to_string()); |
| 34 | +} |
| 35 | + |
| 36 | +// Again we need to publicly expose the get_uniques function to test it |
| 37 | +// Also we need to publicly expose the Tpf struct attributes |
| 38 | +// Do we need a factory function to create Tpf structs? |
| 39 | +#[test] |
| 40 | +fn get_uniques_returns_unique_scaffold_names() { |
| 41 | + let tpf1 = Tpf { |
| 42 | + ori_scaffold: "scaffold1".to_string(), |
| 43 | + start_coord: 1, |
| 44 | + end_coord: 100, |
| 45 | + new_scaffold: "newScaffold1".to_string(), |
| 46 | + orientation: "PLUS".to_string(), |
| 47 | + }; |
| 48 | + let tpf2 = Tpf { |
| 49 | + ori_scaffold: "scaffold2".to_string(), |
| 50 | + start_coord: 1, |
| 51 | + end_coord: 100, |
| 52 | + new_scaffold: "newScaffold2".to_string(), |
| 53 | + orientation: "PLUS".to_string(), |
| 54 | + }; |
| 55 | + let tpf3 = Tpf { |
| 56 | + ori_scaffold: "scaffold1".to_string(), |
| 57 | + start_coord: 1, |
| 58 | + end_coord: 100, |
| 59 | + new_scaffold: "newScaffold1".to_string(), |
| 60 | + orientation: "PLUS".to_string(), |
| 61 | + }; |
| 62 | + let tpfs = vec![tpf1, tpf2, tpf3]; |
| 63 | + let result = get_uniques(&tpfs); |
| 64 | + assert_eq!( |
| 65 | + result, |
| 66 | + vec!["newScaffold1".to_string(), "newScaffold2".to_string()] |
| 67 | + ); |
| 68 | +} |
| 69 | + |
| 70 | +// Need to add some docs for function |
| 71 | +// as we were not entirely sure what it was doing |
| 72 | +#[test] |
| 73 | +fn get_subset_of_tpfs() { |
| 74 | + let tpf1 = Tpf { |
| 75 | + ori_scaffold: "scaffold1".to_string(), |
| 76 | + start_coord: 1, |
| 77 | + end_coord: 100, |
| 78 | + new_scaffold: "newScaffold1".to_string(), |
| 79 | + orientation: "PLUS".to_string(), |
| 80 | + }; |
| 81 | + let tpf2 = Tpf { |
| 82 | + ori_scaffold: "scaffold2".to_string(), |
| 83 | + start_coord: 1, |
| 84 | + end_coord: 100, |
| 85 | + new_scaffold: "newScaffold2".to_string(), |
| 86 | + orientation: "PLUS".to_string(), |
| 87 | + }; |
| 88 | + let tpf3 = Tpf { |
| 89 | + ori_scaffold: "scaffold1".to_string(), |
| 90 | + start_coord: 1, |
| 91 | + end_coord: 100, |
| 92 | + new_scaffold: "newScaffold1".to_string(), |
| 93 | + orientation: "PLUS".to_string(), |
| 94 | + }; |
| 95 | + let tpfs = vec![tpf1, tpf2, tpf3]; |
| 96 | + let fasta = (&"scaffold1".to_string(), &(1 as usize)); |
| 97 | + let result = subset_vec_tpf(&tpfs, fasta); |
| 98 | + assert_eq!(result.len(), 2); |
| 99 | +} |
| 100 | + |
| 101 | +#[test] |
| 102 | +fn check_parse_seq() { |
| 103 | + let sequence = |
| 104 | + Sequence::from(b"AATGGCCGGCGCGTTAAACCCAATGCCCCGGTTAANNGCTCGTCGCTTGCTTCGCAAAA".to_vec()); |
| 105 | + let tpf1 = Tpf { |
| 106 | + ori_scaffold: "scaffold1".to_string(), |
| 107 | + start_coord: 3, |
| 108 | + end_coord: 5, |
| 109 | + new_scaffold: "newScaffold1".to_string(), |
| 110 | + orientation: "PLUS".to_string(), |
| 111 | + }; |
| 112 | + let tpf2 = Tpf { |
| 113 | + ori_scaffold: "scaffold2".to_string(), |
| 114 | + start_coord: 10, |
| 115 | + end_coord: 20, |
| 116 | + new_scaffold: "newScaffold2".to_string(), |
| 117 | + orientation: "MINUS".to_string(), |
| 118 | + }; |
| 119 | + let tpf3 = Tpf { |
| 120 | + ori_scaffold: "scaffold1".to_string(), |
| 121 | + start_coord: 1, |
| 122 | + end_coord: 58, |
| 123 | + new_scaffold: "newScaffold1".to_string(), |
| 124 | + orientation: "PLUS".to_string(), |
| 125 | + }; |
| 126 | + |
| 127 | + let tpfs = vec![&tpf1, &tpf2, &tpf3]; |
| 128 | + let input_sequence = Some(sequence); |
| 129 | + |
| 130 | + let new_fasta = parse_seq(input_sequence, tpfs); |
| 131 | + |
| 132 | + assert_eq!(new_fasta.len(), 3); |
| 133 | + assert_eq!(new_fasta.first().unwrap().sequence, "TGG"); |
| 134 | + assert_eq!(new_fasta.get(1).unwrap().sequence, "GGTTTAACGCG"); |
| 135 | + assert_eq!( |
| 136 | + new_fasta.get(2).unwrap().sequence, |
| 137 | + "AATGGCCGGCGCGTTAAACCCAATGCCCCGGTTAANNGCTCGTCGCTTGCTTCGCAAA" |
| 138 | + ); |
| 139 | +} |
| 140 | + |
| 141 | +// This should panic with a end_coord > sequence.length |
| 142 | +// Should the exception be handled in a more graceful way? |
| 143 | +#[test] |
| 144 | +#[should_panic] |
| 145 | +fn check_parse_seq_bounds_error() { |
| 146 | + let sequence = |
| 147 | + Sequence::from(b"AATGGCCGGCGCGTTAAACCCAATGCCCCGGTTAANNGCTCGTCGCTTGCTTCGCAAAA".to_vec()); |
| 148 | + let tpf = Tpf { |
| 149 | + ori_scaffold: "scaffold1".to_string(), |
| 150 | + start_coord: 10, |
| 151 | + end_coord: 60, |
| 152 | + new_scaffold: "newScaffold1".to_string(), |
| 153 | + orientation: "PLUS".to_string(), |
| 154 | + }; |
| 155 | + let tpfs = vec![&tpf]; |
| 156 | + |
| 157 | + let input_sequence = Some(sequence); |
| 158 | + |
| 159 | + parse_seq(input_sequence, tpfs); |
| 160 | +} |
2 | 161 |
|
3 | 162 | #[test]
|
4 |
| -fn it_works() { |
5 |
| - assert_eq!(true, true); |
| 163 | +fn check_parse_tpf() { |
| 164 | + let path = "test_data/iyAndFlav1/full/iyAndFlav1.curated_subset.tpf".to_string(); |
| 165 | + let tpfs = parse_tpf(&path); |
| 166 | + assert_eq!(tpfs.len(), 4); |
| 167 | + |
| 168 | + // ? SCAFFOLD_12:1-900734 RL_3 MINUS |
| 169 | + // GAP TYPE-2 200 |
| 170 | + // ? SCAFFOLD_50:1-61000 RL_3 PLUS |
| 171 | + // ? SCAFFOLD_26:1-201195 RL_3_unloc_1 PLUS |
| 172 | + // ? SCAFFOLD_84:1-2000 SCAFFOLD_84 PLUS |
| 173 | + |
| 174 | + let tpf1 = tpfs.first().unwrap(); |
| 175 | + assert_eq!(tpf1.ori_scaffold, "SCAFFOLD_12".to_string()); |
| 176 | + assert_eq!(tpf1.start_coord, 1); |
| 177 | + assert_eq!(tpf1.end_coord, 900734); |
| 178 | + assert_eq!(tpf1.new_scaffold, "SUPER_3".to_string()); |
| 179 | + assert_eq!(tpf1.orientation, "MINUS".to_string()); |
| 180 | + |
| 181 | + let tpf2 = tpfs.last().unwrap(); |
| 182 | + assert_eq!(tpf2.ori_scaffold, "SCAFFOLD_84".to_string()); |
| 183 | + assert_eq!(tpf2.start_coord, 1); |
| 184 | + assert_eq!(tpf2.end_coord, 2000); |
| 185 | + assert_eq!(tpf2.new_scaffold, "SCAFFOLD_84".to_string()); |
| 186 | + assert_eq!(tpf2.orientation, "PLUS".to_string()); |
| 187 | +} |
| 188 | + |
| 189 | +#[test] |
| 190 | +fn check_save_to_fasta() { |
| 191 | + // Inputs: Vector of NewFasta types, vector of Tpf types, output path, and n_length |
| 192 | + // 1. Creates a data file based on the output path, and open the created file using OpenOption |
| 193 | + // 2. Creates a debug.txt file, and open that file. |
| 194 | + // 3. Retrieving unique scaffolds based on the initial tpf types |
| 195 | + |
| 196 | + // Iterating over the unique scaffold names: |
| 197 | + // - appends a > symbol to the start and a new line to the end |
| 198 | + // - appends the scaffold name to the file |
| 199 | + // - appends the scaffold name to file2 ()debug.txt) |
| 200 | + // - creates a struct called MyRecord with an empty name and sequence |
| 201 | + // - assigns the unique scaffold name to data name |
| 202 | + // - iterating over the tpf data (comes from parse_tpf function) |
| 203 | + // - if the new scaffold name is equal to the unique scaffold name |
| 204 | + // - iterates over the new_fasta data |
| 205 | + // - checking for object equality |
| 206 | + // - if the object is equal it formats the tpf into a string and writes it to file2 (debug.txt) |
| 207 | + // - if the object is equal it appends the fasta sequence to the data sequence |
| 208 | + // - creates a variable line_len set to 60 |
| 209 | + // - creates a fixed variable which is is the sequence |
| 210 | + // - creates a n_string variable which is N repeated n_length times |
| 211 | + // - creates fixed2 variable which is fixed joined with n_string |
| 212 | + // - creates a variable called fixed3 which is converted to bytes and chunks it by line_len and converts it to a vector of strings |
| 213 | + // - iterates over the fixed3 variable and writes it to the file |
| 214 | + |
| 215 | + let new_fasta_items = vec![ |
| 216 | + NewFasta { |
| 217 | + tpf: Tpf { |
| 218 | + ori_scaffold: "SCAFFOLD_1".to_string(), |
| 219 | + start_coord: 1, |
| 220 | + end_coord: 9, |
| 221 | + new_scaffold: "SUPER_1".to_string(), |
| 222 | + orientation: "MINUS".to_string(), |
| 223 | + }, |
| 224 | + sequence: "GGCATGCAT".to_string(), |
| 225 | + }, |
| 226 | + NewFasta { |
| 227 | + tpf: Tpf { |
| 228 | + ori_scaffold: "SCAFFOLD_3".to_string(), |
| 229 | + start_coord: 1, |
| 230 | + end_coord: 5, |
| 231 | + new_scaffold: "SUPER_2".to_string(), |
| 232 | + orientation: "PLUS".to_string(), |
| 233 | + }, |
| 234 | + sequence: "AGTGT".to_string(), |
| 235 | + }, |
| 236 | + ]; |
| 237 | + |
| 238 | + let tpf_items = vec![ |
| 239 | + Tpf { |
| 240 | + ori_scaffold: "SCAFFOLD_1".to_string(), |
| 241 | + start_coord: 1, |
| 242 | + end_coord: 9, |
| 243 | + new_scaffold: "SUPER_1".to_string(), |
| 244 | + orientation: "MINUS".to_string(), |
| 245 | + }, |
| 246 | + Tpf { |
| 247 | + ori_scaffold: "SCAFFOLD_3".to_string(), |
| 248 | + start_coord: 1, |
| 249 | + end_coord: 5, |
| 250 | + new_scaffold: "SUPER_2".to_string(), |
| 251 | + orientation: "PLUS".to_string(), |
| 252 | + }, |
| 253 | + ]; |
| 254 | + |
| 255 | + let output = &"new.fasta".to_string(); |
| 256 | + |
| 257 | + let n_length: usize = 200; |
| 258 | + |
| 259 | + save_to_fasta(new_fasta_items, tpf_items, output, n_length); |
| 260 | + |
| 261 | + assert!( |
| 262 | + are_files_identical(output, "test_data/iyAndFlav1/tiny/tiny_test.output.fasta").unwrap() |
| 263 | + ); |
| 264 | + |
| 265 | + assert!( |
| 266 | + are_files_identical("debug.txt", "test_data/iyAndFlav1/tiny/tiny_test.debug.txt").unwrap() |
| 267 | + ); |
| 268 | + |
| 269 | + match fs::remove_file(output) { |
| 270 | + Ok(_) => true, |
| 271 | + Err(_err) => panic!("File cannot be found!"), |
| 272 | + }; |
| 273 | + match fs::remove_file("debug.txt") { |
| 274 | + Ok(_) => true, |
| 275 | + Err(_err) => panic!("File cannot be found!"), |
| 276 | + }; |
| 277 | +} |
| 278 | + |
| 279 | +//#[ignore = "Work in Progress (WIP)"] |
| 280 | +#[test] |
| 281 | +fn check_curate_fasta() { |
| 282 | + let mut cmd = Command::cargo_bin("fasta_manipulation").unwrap(); |
| 283 | + |
| 284 | + // Create temp directory that will get cleaned up |
| 285 | + let dir = Builder::new().prefix("local_tests").tempdir().unwrap(); |
| 286 | + |
| 287 | + // Generate paths for mock files |
| 288 | + let fasta_path = &dir.path().join("input_fasta.fa"); |
| 289 | + let fai_path = &dir.path().join("input_fasta.fa.fai"); |
| 290 | + let tpf_path = &dir.path().join("input.tpf"); |
| 291 | + |
| 292 | + // Actually generate the mock files |
| 293 | + let mut fasta = File::create(fasta_path).unwrap(); |
| 294 | + let mut fai = File::create(fai_path).unwrap(); |
| 295 | + let mut tpf = File::create(tpf_path).unwrap(); |
| 296 | + |
| 297 | + let output = "./output.fa"; |
| 298 | + |
| 299 | + write!( |
| 300 | + fai, |
| 301 | + "SCAFFOLD_1\t16\t12\t16\t17\nSCAFFOLD_3\t16\t41\t16\t17" |
| 302 | + ) |
| 303 | + .unwrap(); |
| 304 | + |
| 305 | + write!( |
| 306 | + fasta, |
| 307 | + ">SCAFFOLD_1\nATGCATGCCGTATAGA\n>SCAFFOLD_3\nAGTGTATTTTTATGCA" |
| 308 | + ) |
| 309 | + .unwrap(); |
| 310 | + |
| 311 | + write!( |
| 312 | + tpf, |
| 313 | + "?\tSCAFFOLD_1:1-9\tRL_1\tMINUS\nGAP\tTYPE-2\t200\n?\tSCAFFOLD_3:1-5\tRL_2\tPLUS" |
| 314 | + ) |
| 315 | + .unwrap(); |
| 316 | + |
| 317 | + cmd.arg("curate") |
| 318 | + .arg("-f") |
| 319 | + .arg(fasta_path) |
| 320 | + .arg("-t") |
| 321 | + .arg(tpf_path) |
| 322 | + .arg("-o") |
| 323 | + .arg(output) |
| 324 | + .assert() |
| 325 | + .success(); |
6 | 326 | }
|
0 commit comments