|
72 | 72 | "|-----> 🏗️ Starting download_example_data function\n",
|
73 | 73 | "|-----> ⚙️ Download data started\n",
|
74 | 74 | "|-----------> Data found in pyaging_data/atac_example.pkl\n",
|
75 |
| - "|-----> ✅ Download data finished [0.0007s]\n", |
76 |
| - "|-----> 🎉 Done! [0.0014s]\n" |
| 75 | + "|-----> ✅ Download data finished [0.0008s]\n", |
| 76 | + "|-----> 🎉 Done! [0.0017s]\n" |
77 | 77 | ]
|
78 | 78 | }
|
79 | 79 | ],
|
|
308 | 308 | "df.head()"
|
309 | 309 | ]
|
310 | 310 | },
|
| 311 | + { |
| 312 | + "cell_type": "markdown", |
| 313 | + "id": "4ca435ec-3480-4167-ad55-b8a28b23a843", |
| 314 | + "metadata": {}, |
| 315 | + "source": [ |
| 316 | + "This is what the adata object looks like:" |
| 317 | + ] |
| 318 | + }, |
311 | 319 | {
|
312 | 320 | "cell_type": "markdown",
|
313 | 321 | "id": "251495e7-082f-45ae-841c-a2dd86a3cb15",
|
|
336 | 344 | "text": [
|
337 | 345 | "|-----> 🏗️ Starting df_to_adata function\n",
|
338 | 346 | "|-----> ⚙️ Create anndata object started\n",
|
339 |
| - "|-----> ✅ Create anndata object finished [0.0018s]\n", |
| 347 | + "|-----> ✅ Create anndata object finished [0.0009s]\n", |
340 | 348 | "|-----> ⚙️ Add metadata to anndata started\n",
|
341 | 349 | "|-----------? No metadata provided. Leaving adata.obs empty\n",
|
342 |
| - "|-----> ⚠️ Add metadata to anndata finished [0.0082s]\n", |
| 350 | + "|-----> ⚠️ Add metadata to anndata finished [0.0021s]\n", |
343 | 351 | "|-----> ⚙️ Log data statistics started\n",
|
344 | 352 | "|-----------> There are 10 observations\n",
|
345 | 353 | "|-----------> There are 80400 features\n",
|
346 | 354 | "|-----------> Total missing values: 0\n",
|
347 | 355 | "|-----------> Percentage of missing values: 0.00%\n",
|
348 |
| - "|-----> ✅ Log data statistics finished [0.0026s]\n", |
| 356 | + "|-----> ✅ Log data statistics finished [0.0011s]\n", |
349 | 357 | "|-----> ⚙️ Impute missing values started\n",
|
350 | 358 | "|-----------> No missing values found. No imputation necessary\n",
|
351 |
| - "|-----> ✅ Impute missing values finished [0.0056s]\n", |
| 359 | + "|-----> ✅ Impute missing values finished [0.0022s]\n", |
352 | 360 | "|-----> ⚙️ Add unstructured data to anndata started\n",
|
353 |
| - "|-----> ✅ Add unstructured data to anndata finished [0.0074s]\n", |
354 |
| - "|-----> 🎉 Done! [0.0288s]\n" |
| 361 | + "|-----> ✅ Add unstructured data to anndata finished [0.0042s]\n", |
| 362 | + "|-----> 🎉 Done! [0.0111s]\n" |
355 | 363 | ]
|
356 | 364 | }
|
357 | 365 | ],
|
358 | 366 | "source": [
|
359 | 367 | "adata = pya.preprocess.df_to_adata(df)"
|
360 | 368 | ]
|
361 | 369 | },
|
| 370 | + { |
| 371 | + "cell_type": "markdown", |
| 372 | + "id": "5042e04f-17c0-4eb2-8c5d-2c2fc5d6d2d6", |
| 373 | + "metadata": {}, |
| 374 | + "source": [ |
| 375 | + "This is what the `adata` object looks like:" |
| 376 | + ] |
| 377 | + }, |
| 378 | + { |
| 379 | + "cell_type": "code", |
| 380 | + "execution_count": 6, |
| 381 | + "id": "503da312-2256-4e67-9747-107f5c4587ec", |
| 382 | + "metadata": {}, |
| 383 | + "outputs": [ |
| 384 | + { |
| 385 | + "data": { |
| 386 | + "text/plain": [ |
| 387 | + "AnnData object with n_obs × n_vars = 10 × 80400\n", |
| 388 | + " var: 'percent_na'\n", |
| 389 | + " uns: 'imputer_strategy', 'data_type'" |
| 390 | + ] |
| 391 | + }, |
| 392 | + "execution_count": 6, |
| 393 | + "metadata": {}, |
| 394 | + "output_type": "execute_result" |
| 395 | + } |
| 396 | + ], |
| 397 | + "source": [ |
| 398 | + "adata" |
| 399 | + ] |
| 400 | + }, |
362 | 401 | {
|
363 | 402 | "cell_type": "markdown",
|
364 | 403 | "id": "c072990d-0f54-49b3-bb7a-7bbd13301e2a",
|
|
377 | 416 | },
|
378 | 417 | {
|
379 | 418 | "cell_type": "code",
|
380 |
| - "execution_count": 6, |
| 419 | + "execution_count": 7, |
381 | 420 | "id": "26398785-d1ea-4ce8-b1d9-7234f8f46ef6",
|
382 | 421 | "metadata": {},
|
383 | 422 | "outputs": [
|
|
388 | 427 | "|-----> 🏗️ Starting predict_age function\n",
|
389 | 428 | "|-----> ⚙️ Set PyTorch device started\n",
|
390 | 429 | "|-----------> Using device: cpu\n",
|
391 |
| - "|-----> ✅ Set PyTorch device finished [0.0021s]\n", |
| 430 | + "|-----> ✅ Set PyTorch device finished [0.0009s]\n", |
392 | 431 | "|-----> Processing clock: OcampoATAC1\n",
|
393 | 432 | "|-----------> ⚙️ Load clock started\n",
|
394 | 433 | "|-----------> ⚙️ Download data started\n",
|
395 | 434 | "|-----------> Data found in pyaging_data/ocampoatac1.pt\n",
|
396 |
| - "|-----------> ✅ Download data finished [0.0006s]\n", |
397 |
| - "|-----------> ✅ Load clock finished [0.0006s]\n", |
| 435 | + "|-----------> ✅ Download data finished [0.0003s]\n", |
| 436 | + "|-----------> ✅ Load clock finished [0.0003s]\n", |
398 | 437 | "|-----------> ⚙️ Check features in adata started\n",
|
399 | 438 | "|-----------> All features are present in adata.var_names.\n",
|
400 |
| - "|-----------> ✅ Check features in adata finished [0.0062s]\n", |
| 439 | + "|-----------> ✅ Check features in adata finished [0.0030s]\n", |
401 | 440 | "|-----------> ⚙️ Filter features and extract data matrix started\n",
|
402 |
| - "|-----------> ✅ Filter features and extract data matrix finished [0.0030s]\n", |
| 441 | + "|-----------> ✅ Filter features and extract data matrix finished [0.0012s]\n", |
403 | 442 | "|-----------> ⚙️ Preprocess data started\n",
|
404 | 443 | "|-----------------> Preprocessing data with function log1p\n",
|
405 |
| - "|-----------> ✅ Preprocess data finished [0.0054s]\n", |
| 444 | + "|-----------> ✅ Preprocess data finished [0.0027s]\n", |
406 | 445 | "|-----------> ⚙️ Convert numpy array to tensor started\n",
|
407 |
| - "|-----------> ✅ Convert numpy array to tensor finished [0.0028s]\n", |
| 446 | + "|-----------> ✅ Convert numpy array to tensor finished [0.0015s]\n", |
408 | 447 | "|-----------> ⚙️ Initialize model started\n",
|
409 |
| - "|-----------> ✅ Initialize model finished [0.0023s]\n", |
| 448 | + "|-----------> ✅ Initialize model finished [0.0013s]\n", |
410 | 449 | "|-----------> ⚙️ Predict ages with model started\n",
|
411 |
| - "|-----------> ✅ Predict ages with model finished [0.0036s]\n", |
| 450 | + "|-----------> ✅ Predict ages with model finished [0.0015s]\n", |
412 | 451 | "|-----------> ⚙️ Convert tensor to numpy array started\n",
|
413 |
| - "|-----------> ✅ Convert tensor to numpy array finished [0.0016s]\n", |
| 452 | + "|-----------> ✅ Convert tensor to numpy array finished [0.0015s]\n", |
414 | 453 | "|-----------> ⚙️ Add predicted ages to adata started\n",
|
415 | 454 | "|-----------> ✅ Add predicted ages to adata finished [0.0013s]\n",
|
416 | 455 | "|-----------> ⚙️ Load all clock metadata started\n",
|
417 | 456 | "|-----------> ⚙️ Download data started\n",
|
418 | 457 | "|-----------> Data found in pyaging_data/all_clock_metadata.pt\n",
|
419 |
| - "|-----------> ✅ Download data finished [0.0005s]\n", |
420 |
| - "|-----------> ✅ Load all clock metadata finished [0.0005s]\n", |
| 458 | + "|-----------> ✅ Download data finished [0.0003s]\n", |
| 459 | + "|-----------> ✅ Load all clock metadata finished [0.0003s]\n", |
421 | 460 | "|-----------> ⚙️ Add clock metadata to adata.uns started\n",
|
422 | 461 | "|-----------> ✅ Add clock metadata to adata.uns finished [0.0039s]\n",
|
423 | 462 | "|-----> Processing clock: OcampoATAC2\n",
|
424 | 463 | "|-----------> ⚙️ Load clock started\n",
|
425 | 464 | "|-----------> ⚙️ Download data started\n",
|
426 | 465 | "|-----------> Data found in pyaging_data/ocampoatac2.pt\n",
|
427 |
| - "|-----------> ✅ Download data finished [0.0004s]\n", |
428 |
| - "|-----------> ✅ Load clock finished [0.0004s]\n", |
| 466 | + "|-----------> ✅ Download data finished [0.0006s]\n", |
| 467 | + "|-----------> ✅ Load clock finished [0.0006s]\n", |
429 | 468 | "|-----------> ⚙️ Check features in adata started\n",
|
430 | 469 | "|-----------> All features are present in adata.var_names.\n",
|
431 |
| - "|-----------> ✅ Check features in adata finished [0.0034s]\n", |
| 470 | + "|-----------> ✅ Check features in adata finished [0.0027s]\n", |
432 | 471 | "|-----------> ⚙️ Filter features and extract data matrix started\n",
|
433 |
| - "|-----------> ✅ Filter features and extract data matrix finished [0.0022s]\n", |
| 472 | + "|-----------> ✅ Filter features and extract data matrix finished [0.0013s]\n", |
434 | 473 | "|-----------> ⚙️ Preprocess data started\n",
|
435 | 474 | "|-----------------> Preprocessing data with function log1p\n",
|
436 |
| - "|-----------> ✅ Preprocess data finished [0.0024s]\n", |
| 475 | + "|-----------> ✅ Preprocess data finished [0.0016s]\n", |
437 | 476 | "|-----------> ⚙️ Convert numpy array to tensor started\n",
|
438 |
| - "|-----------> ✅ Convert numpy array to tensor finished [0.0019s]\n", |
| 477 | + "|-----------> ✅ Convert numpy array to tensor finished [0.0009s]\n", |
439 | 478 | "|-----------> ⚙️ Initialize model started\n",
|
440 |
| - "|-----------> ✅ Initialize model finished [0.0021s]\n", |
| 479 | + "|-----------> ✅ Initialize model finished [0.0007s]\n", |
441 | 480 | "|-----------> ⚙️ Predict ages with model started\n",
|
442 |
| - "|-----------> ✅ Predict ages with model finished [0.0020s]\n", |
| 481 | + "|-----------> ✅ Predict ages with model finished [0.0012s]\n", |
443 | 482 | "|-----------> ⚙️ Convert tensor to numpy array started\n",
|
444 |
| - "|-----------> ✅ Convert tensor to numpy array finished [0.0010s]\n", |
| 483 | + "|-----------> ✅ Convert tensor to numpy array finished [0.0013s]\n", |
445 | 484 | "|-----------> ⚙️ Add predicted ages to adata started\n",
|
446 |
| - "|-----------> ✅ Add predicted ages to adata finished [0.0008s]\n", |
| 485 | + "|-----------> ✅ Add predicted ages to adata finished [0.0011s]\n", |
447 | 486 | "|-----------> ⚙️ Load all clock metadata started\n",
|
448 | 487 | "|-----------> ⚙️ Download data started\n",
|
449 | 488 | "|-----------> Data found in pyaging_data/all_clock_metadata.pt\n",
|
450 |
| - "|-----------> ✅ Download data finished [0.0005s]\n", |
451 |
| - "|-----------> ✅ Load all clock metadata finished [0.0005s]\n", |
| 489 | + "|-----------> ✅ Download data finished [0.0003s]\n", |
| 490 | + "|-----------> ✅ Load all clock metadata finished [0.0003s]\n", |
452 | 491 | "|-----------> ⚙️ Add clock metadata to adata.uns started\n",
|
453 |
| - "|-----------> ✅ Add clock metadata to adata.uns finished [0.0025s]\n", |
454 |
| - "|-----> 🎉 Done! [0.0612s]\n" |
| 492 | + "|-----------> ✅ Add clock metadata to adata.uns finished [0.0023s]\n", |
| 493 | + "|-----> 🎉 Done! [0.0396s]\n" |
455 | 494 | ]
|
456 | 495 | }
|
457 | 496 | ],
|
|
469 | 508 | },
|
470 | 509 | {
|
471 | 510 | "cell_type": "code",
|
472 |
| - "execution_count": 7, |
| 511 | + "execution_count": 8, |
473 | 512 | "id": "055761d9-7e22-49f3-a1db-31c3ed3749ba",
|
474 | 513 | "metadata": {},
|
475 | 514 | "outputs": [],
|
|
482 | 521 | },
|
483 | 522 | {
|
484 | 523 | "cell_type": "code",
|
485 |
| - "execution_count": 8, |
| 524 | + "execution_count": 9, |
486 | 525 | "id": "fdd9d6c2-7f0a-4f96-a095-4a492ed73f8d",
|
487 | 526 | "metadata": {},
|
488 | 527 | "outputs": [
|
|
550 | 589 | "Sample_5 38.929848 33.717129"
|
551 | 590 | ]
|
552 | 591 | },
|
553 |
| - "execution_count": 8, |
| 592 | + "execution_count": 9, |
554 | 593 | "metadata": {},
|
555 | 594 | "output_type": "execute_result"
|
556 | 595 | }
|
|
559 | 598 | "adata.obs.head()"
|
560 | 599 | ]
|
561 | 600 | },
|
| 601 | + { |
| 602 | + "cell_type": "markdown", |
| 603 | + "id": "4bad3df8-f868-4cf5-be74-00ffd02c18f5", |
| 604 | + "metadata": {}, |
| 605 | + "source": [ |
| 606 | + "After age prediction, the clocks are added to `adata.obs`. Moreover, the percent of missing values for each clock and other metadata are included in `adata.uns`." |
| 607 | + ] |
| 608 | + }, |
| 609 | + { |
| 610 | + "cell_type": "code", |
| 611 | + "execution_count": 10, |
| 612 | + "id": "0d13fb55-8a12-4d28-83e9-ec7c9fbbe30c", |
| 613 | + "metadata": {}, |
| 614 | + "outputs": [ |
| 615 | + { |
| 616 | + "data": { |
| 617 | + "text/plain": [ |
| 618 | + "AnnData object with n_obs × n_vars = 10 × 80400\n", |
| 619 | + " obs: 'ocampoatac1', 'ocampoatac2'\n", |
| 620 | + " var: 'percent_na'\n", |
| 621 | + " uns: 'imputer_strategy', 'data_type', 'ocampoatac1_percent_na', 'ocampoatac1_metadata', 'ocampoatac2_percent_na', 'ocampoatac2_metadata'" |
| 622 | + ] |
| 623 | + }, |
| 624 | + "execution_count": 10, |
| 625 | + "metadata": {}, |
| 626 | + "output_type": "execute_result" |
| 627 | + } |
| 628 | + ], |
| 629 | + "source": [ |
| 630 | + "adata" |
| 631 | + ] |
| 632 | + }, |
562 | 633 | {
|
563 | 634 | "cell_type": "markdown",
|
564 | 635 | "id": "a4e7ad8d-44ae-4ced-a626-f9e3b2d04114",
|
|
577 | 648 | },
|
578 | 649 | {
|
579 | 650 | "cell_type": "code",
|
580 |
| - "execution_count": 10, |
| 651 | + "execution_count": 11, |
581 | 652 | "id": "6b368506-55d1-4b74-be61-817bcf575ade",
|
582 | 653 | "metadata": {},
|
583 | 654 | "outputs": [
|
|
591 | 662 | " 'doi': 'https://doi.org/10.1007/s11357-023-00986-0'}"
|
592 | 663 | ]
|
593 | 664 | },
|
594 |
| - "execution_count": 10, |
| 665 | + "execution_count": 11, |
595 | 666 | "metadata": {},
|
596 | 667 | "output_type": "execute_result"
|
597 | 668 | }
|
|
0 commit comments